gem5  v20.0.0.2
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
GPUCoalescer.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
35 #define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
36 
37 #include <iostream>
38 #include <unordered_map>
39 
40 #include "base/statistics.hh"
41 #include "mem/request.hh"
44 #include "mem/ruby/protocol/HSAScope.hh"
45 #include "mem/ruby/protocol/HSASegment.hh"
46 #include "mem/ruby/protocol/PrefetchBit.hh"
47 #include "mem/ruby/protocol/RubyAccessMode.hh"
48 #include "mem/ruby/protocol/RubyRequestType.hh"
49 #include "mem/ruby/protocol/SequencerRequestType.hh"
51 #include "mem/token_port.hh"
52 
53 class DataBlock;
54 class CacheMsg;
55 class MachineID;
56 class CacheMemory;
57 
58 class RubyGPUCoalescerParams;
59 
60 HSAScope reqScopeToHSAScope(const RequestPtr &req);
61 HSASegment reqSegmentToHSASegment(const RequestPtr &req);
62 
63 // List of packets that belongs to a specific instruction.
65 
67 {
68  public:
71 
72  void insertPacket(PacketPtr pkt);
73  bool packetAvailable();
74  void printRequestTable(std::stringstream& ss);
75 
76  // Returns a pointer to the list of packets corresponding to an
77  // instruction in the instruction map or nullptr if there are no
78  // instructions at the offset.
80  void updateResources();
81 
82  // Check if a packet hasn't been removed from instMap in too long.
83  // Panics if a deadlock is detected and returns nothing otherwise.
84  void checkDeadlock(Tick threshold);
85 
86  private:
88 
89  // Maps an instructions unique sequence number to a queue of packets
90  // which need responses. This data structure assumes the sequence number
91  // is monotonically increasing (which is true for CU class) in order to
92  // issue packets in age order.
93  std::map<uint64_t, PerInstPackets> instMap;
94 };
95 
97 {
98  public:
99  CoalescedRequest(uint64_t _seqNum)
100  : seqNum(_seqNum), issueTime(Cycles(0)),
101  rubyType(RubyRequestType_NULL)
102  {}
104 
105  void insertPacket(PacketPtr pkt) { pkts.push_back(pkt); }
106  void setSeqNum(uint64_t _seqNum) { seqNum = _seqNum; }
107  void setIssueTime(Cycles _issueTime) { issueTime = _issueTime; }
108  void setRubyType(RubyRequestType type) { rubyType = type; }
109 
110  uint64_t getSeqNum() const { return seqNum; }
111  PacketPtr getFirstPkt() const { return pkts[0]; }
112  Cycles getIssueTime() const { return issueTime; }
113  RubyRequestType getRubyType() const { return rubyType; }
114  std::vector<PacketPtr>& getPackets() { return pkts; }
115 
116  private:
117  uint64_t seqNum;
119  RubyRequestType rubyType;
121 };
122 
123 class GPUCoalescer : public RubyPort
124 {
125  public:
127  {
128  public:
129  GMTokenPort(const std::string& name, ClockedObject *owner,
130  PortID id = InvalidPortID)
131  : TokenSlavePort(name, owner, id)
132  { }
134 
135  protected:
136  Tick recvAtomic(PacketPtr) { return Tick(0); }
138  bool recvTimingReq(PacketPtr) { return false; }
140  {
141  AddrRangeList ranges;
142  return ranges;
143  }
144  };
145 
146  typedef RubyGPUCoalescerParams Params;
147  GPUCoalescer(const Params *);
148  ~GPUCoalescer();
149 
150  Port &getPort(const std::string &if_name,
151  PortID idx = InvalidPortID) override;
152 
153  // Public Methods
154  void wakeup(); // Used only for deadlock detection
155  void printRequestTable(std::stringstream& ss);
156 
157  void printProgress(std::ostream& out) const;
158  void resetStats() override;
159  void collateStats();
160  void regStats() override;
161 
162  void writeCallback(Addr address, DataBlock& data);
163 
164  void writeCallback(Addr address,
165  MachineType mach,
166  DataBlock& data);
167 
168  void writeCallback(Addr address,
169  MachineType mach,
170  DataBlock& data,
171  Cycles initialRequestTime,
172  Cycles forwardRequestTime,
173  Cycles firstResponseTime,
174  bool isRegion);
175 
176  void writeCallback(Addr address,
177  MachineType mach,
178  DataBlock& data,
179  Cycles initialRequestTime,
180  Cycles forwardRequestTime,
181  Cycles firstResponseTime);
182 
183  void readCallback(Addr address, DataBlock& data);
184 
185  void readCallback(Addr address,
186  MachineType mach,
187  DataBlock& data);
188 
189  void readCallback(Addr address,
190  MachineType mach,
191  DataBlock& data,
192  Cycles initialRequestTime,
193  Cycles forwardRequestTime,
194  Cycles firstResponseTime);
195 
196  void readCallback(Addr address,
197  MachineType mach,
198  DataBlock& data,
199  Cycles initialRequestTime,
200  Cycles forwardRequestTime,
201  Cycles firstResponseTime,
202  bool isRegion);
203  /* atomics need their own callback because the data
204  might be const coming from SLICC */
205  void atomicCallback(Addr address,
206  MachineType mach,
207  const DataBlock& data);
208 
209  void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID);
210  void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID);
211 
212  // Alternate implementations in VIPER Coalescer
213  virtual RequestStatus makeRequest(PacketPtr pkt) override;
214 
215  int outstandingCount() const override { return m_outstanding_count; }
216 
217  bool
218  isDeadlockEventScheduled() const override
219  {
220  return deadlockCheckEvent.scheduled();
221  }
222 
223  void
225  {
226  deschedule(deadlockCheckEvent);
227  }
228 
229  bool empty() const;
230 
231  void print(std::ostream& out) const;
232 
233  void evictionCallback(Addr address);
234  void completeIssue();
235 
236  void insertKernel(int wavefront_id, PacketPtr pkt);
237 
238  GMTokenPort& getGMTokenPort() { return gmTokenPort; }
239 
240  void recordRequestType(SequencerRequestType requestType);
241  Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; }
242 
243  Stats::Histogram& getLatencyHist() { return m_latencyHist; }
245  { return *m_typeLatencyHist[t]; }
246 
248  { return m_missLatencyHist; }
250  { return *m_missTypeLatencyHist[t]; }
251 
253  { return *m_missMachLatencyHist[t]; }
254 
256  getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
257  { return *m_missTypeMachLatencyHist[r][t]; }
258 
260  { return *m_IssueToInitialDelayHist[t]; }
261 
263  getInitialToForwardDelayHist(const MachineType t) const
264  { return *m_InitialToForwardDelayHist[t]; }
265 
267  getForwardRequestToFirstResponseHist(const MachineType t) const
268  { return *m_ForwardToFirstResponseDelayHist[t]; }
269 
271  getFirstResponseToCompletionDelayHist(const MachineType t) const
272  { return *m_FirstResponseToCompletionDelayHist[t]; }
273 
274  // Changed to protected to enable inheritance by VIPER Coalescer
275  protected:
276  bool tryCacheAccess(Addr addr, RubyRequestType type,
277  Addr pc, RubyAccessMode access_mode,
278  int size, DataBlock*& data_ptr);
279  // Alternate implementations in VIPER Coalescer
280  virtual void issueRequest(CoalescedRequest* crequest);
281 
282  void kernelCallback(int wavfront_id);
283 
284  void hitCallback(CoalescedRequest* crequest,
285  MachineType mach,
286  DataBlock& data,
287  bool success,
288  Cycles initialRequestTime,
289  Cycles forwardRequestTime,
290  Cycles firstResponseTime,
291  bool isRegion);
292  void recordMissLatency(CoalescedRequest* crequest,
293  MachineType mach,
294  Cycles initialRequestTime,
295  Cycles forwardRequestTime,
296  Cycles firstResponseTime,
297  bool success, bool isRegion);
298  void completeHitCallback(std::vector<PacketPtr> & mylist);
299 
300 
301  virtual RubyRequestType getRequestType(PacketPtr pkt);
302 
303  // Attempt to remove a packet from the uncoalescedTable and coalesce
304  // with a previous request from the same instruction. If there is no
305  // previous instruction and the max number of outstanding requests has
306  // not be reached, a new coalesced request is created and added to the
307  // "target" list of the coalescedTable.
308  bool coalescePacket(PacketPtr pkt);
309 
311 
312 
313  // Changed to protected to enable inheritance by VIPER Coalescer
314  protected:
317 
320 
321  // coalescingWindow is the maximum number of instructions that are
322  // allowed to be coalesced in a single cycle.
324 
325  // The uncoalescedTable contains several "columns" which hold memory
326  // request packets for an instruction. The maximum size is the number of
327  // columns * the wavefront size.
329 
330  // An MSHR-like struct for holding coalesced requests. The requests in
331  // this table may or may not be outstanding in the memory hierarchy. The
332  // maximum size is equal to the maximum outstanding requests for a CU
333  // (typically the number of blocks in TCP). If there are duplicates of
334  // an address, the are serviced in age order.
335  std::map<Addr, std::deque<CoalescedRequest*>> coalescedTable;
336 
337  // Global outstanding request count, across all request tables
340  std::unordered_map<int, PacketPtr> kernelEndList;
342 
347 
349 
352 
353  // m5 style stats for TCP hit/miss counts
358 
363 
368 
373 
376 
380 
385 
390 
396 
397  private:
398  // Token port is used to send/receive tokens to/from GPU's global memory
399  // pipeline across the port boundary. There is one per <wave size> data
400  // ports in the CU.
402 
403  // Private copy constructor and assignment operator
404  GPUCoalescer(const GPUCoalescer& obj);
405  GPUCoalescer& operator=(const GPUCoalescer& obj);
406 };
407 
408 inline std::ostream&
409 operator<<(std::ostream& out, const GPUCoalescer& obj)
410 {
411  obj.print(out);
412  out << std::flush;
413  return out;
414 }
415 
416 #endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
GMTokenPort & getGMTokenPort()
Stats::Histogram & getLatencyHist()
CoalescedRequest(uint64_t _seqNum)
Definition: GPUCoalescer.hh:99
Ports are used to interface objects to each other.
Definition: port.hh:56
Stats::Scalar CP_TCCStHits
EventFunctionWrapper issueEvent
Stats::Scalar GPU_TCPStHits
void setSeqNum(uint64_t _seqNum)
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:81
bool recvTimingReq(PacketPtr)
Receive a timing request from the peer.
UncoalescedTable(GPUCoalescer *gc)
const std::string & name()
Definition: trace.cc:50
Stats::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
std::vector< Stats::Histogram * > m_ForwardToFirstResponseDelayHist
const PortID InvalidPortID
Definition: types.hh:236
Stats::Histogram & getIssueToInitialDelayHist(uint32_t t) const
Stats::Histogram & getFirstResponseToCompletionDelayHist(const MachineType t) const
GMTokenPort(const std::string &name, ClockedObject *owner, PortID id=InvalidPortID)
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
HSASegment reqSegmentToHSASegment(const RequestPtr &req)
Definition: GPUCoalescer.cc:91
std::shared_ptr< Request > RequestPtr
Definition: request.hh:81
Stats::Scalar GPU_TCPLdHits
Stats::Scalar CP_StMiss
ip6_addr_t addr
Definition: inet.hh:330
Stats::Scalar GPU_TCCStHits
EventFunctionWrapper deadlockCheckEvent
Stats::Histogram & getMissTypeLatencyHist(uint32_t t)
uint64_t getSeqNum() const
Stats::Scalar CP_LdMiss
Stats::Scalar GPU_TCPLdTransfers
Stats::Histogram & getForwardRequestToFirstResponseHist(const MachineType t) const
Bitfield< 23, 0 > offset
Definition: types.hh:152
Stats::Scalar CP_TCPLdTransfers
std::vector< Stats::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
int m_max_outstanding_requests
Stats::Histogram & getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
Stats::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
void checkDeadlock(Tick threshold)
std::vector< PacketPtr > & getPackets()
Declaration of Statistics objects.
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:2505
STL vector class.
Definition: stl.hh:37
Bitfield< 33 > id
Stats::Scalar CP_TCPStTransfers
RubyGPUCoalescerParams Params
Stats::Histogram & getMissMachLatencyHist(uint32_t t) const
uint8_t type
Definition: inet.hh:328
Bitfield< 4 > pc
void setRubyType(RubyRequestType type)
Cycles getIssueTime() const
std::map< uint64_t, PerInstPackets > instMap
Definition: GPUCoalescer.hh:93
void setIssueTime(Cycles _issueTime)
Stats::Scalar GPU_LdMiss
CacheMemory * m_dataCache_ptr
uint64_t Tick
Tick count type.
Definition: types.hh:61
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
bool assumingRfOCoherence
PerInstPackets * getInstPackets(int offset)
Stats::Scalar GPU_TCCLdHits
Tick recvAtomic(PacketPtr)
Receive an atomic request packet from the peer.
void descheduleDeadlockEvent() override
int m_store_waiting_on_load_cycles
A simple histogram stat.
Definition: statistics.hh:2626
Stats::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
std::vector< std::vector< Stats::Histogram * > > m_missTypeMachLatencyHist
Bitfield< 21 > ss
bool isDeadlockEventScheduled() const override
STL list class.
Definition: stl.hh:51
std::vector< Stats::Histogram * > m_InitialToForwardDelayHist
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
int m_load_waiting_on_load_cycles
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
GPUCoalescer * coalescer
Definition: GPUCoalescer.hh:87
std::unordered_map< int, PacketPtr > kernelEndList
Stats::Scalar GPU_StMiss
std::vector< Stats::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Stats::Histogram & getOutstandReqHist()
Stats::Scalar CP_TCPLdHits
RubyRequestType getRubyType() const
std::vector< PacketPtr > pkts
void insertPacket(PacketPtr pkt)
std::vector< Stats::Histogram * > m_typeLatencyHist
void insertPacket(PacketPtr pkt)
std::list< PacketPtr > PerInstPackets
Definition: GPUCoalescer.hh:64
std::ostream & operator<<(std::ostream &out, const GPUCoalescer &obj)
Stats::Histogram & getMissLatencyHist()
Stats::Histogram & getTypeLatencyHist(uint32_t t)
std::vector< int > newKernelEnds
PacketPtr getFirstPkt() const
GMTokenPort gmTokenPort
HSAScope reqScopeToHSAScope(const RequestPtr &req)
Definition: GPUCoalescer.cc:71
AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
int outstandingCount() const override
Stats::Scalar CP_TCPStHits
int m_load_waiting_on_store_cycles
void printRequestTable(std::stringstream &ss)
Stats::Histogram & getInitialToForwardDelayHist(const MachineType t) const
void recvFunctional(PacketPtr)
Receive a functional request packet from the peer.
RubyRequestType rubyType
CacheMemory * m_instCache_ptr
int m_outstanding_count
UncoalescedTable uncoalescedTable
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:235
Cycles m_deadlock_threshold
Bitfield< 5 > t
void print(std::ostream &out) const
bool m_runningGarnetStandalone
std::map< Addr, std::deque< CoalescedRequest * > > coalescedTable
std::vector< Stats::Histogram * > m_FirstResponseToCompletionDelayHist
std::vector< Stats::Histogram * > m_missTypeLatencyHist
int m_store_waiting_on_store_cycles
const char data[]
Stats::Scalar GPU_TCPStTransfers
bool m_deadlock_check_scheduled
Stats::Scalar CP_TCCLdHits

Generated on Mon Jun 8 2020 15:45:12 for gem5 by doxygen 1.8.13