gem5  v21.0.1.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
GPUCoalescer.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
35 #define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
36 
37 #include <iostream>
38 #include <unordered_map>
39 
40 #include "base/statistics.hh"
42 #include "gpu-compute/misc.hh"
43 #include "mem/request.hh"
46 #include "mem/ruby/protocol/PrefetchBit.hh"
47 #include "mem/ruby/protocol/RubyAccessMode.hh"
48 #include "mem/ruby/protocol/RubyRequestType.hh"
49 #include "mem/ruby/protocol/SequencerRequestType.hh"
51 #include "mem/token_port.hh"
52 
53 class DataBlock;
54 class CacheMsg;
55 struct MachineID;
56 class CacheMemory;
57 
58 struct RubyGPUCoalescerParams;
59 
60 // List of packets that belongs to a specific instruction.
62 
64 {
65  public:
68 
69  void insertPacket(PacketPtr pkt);
70  bool packetAvailable();
71  void printRequestTable(std::stringstream& ss);
72 
73  // Modify packets remaining map. Init sets value iff the seqNum has not
74  // yet been seen before. get/set act as a regular getter/setter.
75  void initPacketsRemaining(InstSeqNum seqNum, int count);
76  int getPacketsRemaining(InstSeqNum seqNum);
77  void setPacketsRemaining(InstSeqNum seqNum, int count);
78 
79  // Returns a pointer to the list of packets corresponding to an
80  // instruction in the instruction map or nullptr if there are no
81  // instructions at the offset.
83  void updateResources();
84  bool areRequestsDone(const InstSeqNum instSeqNum);
85 
86  // Check if a packet hasn't been removed from instMap in too long.
87  // Panics if a deadlock is detected and returns nothing otherwise.
88  void checkDeadlock(Tick threshold);
89 
90  private:
92 
93  // Maps an instructions unique sequence number to a queue of packets
94  // which need responses. This data structure assumes the sequence number
95  // is monotonically increasing (which is true for CU class) in order to
96  // issue packets in age order.
97  std::map<InstSeqNum, PerInstPackets> instMap;
98 
99  std::map<InstSeqNum, int> instPktsRemaining;
100 };
101 
103 {
104  public:
105  CoalescedRequest(uint64_t _seqNum)
106  : seqNum(_seqNum), issueTime(Cycles(0)),
107  rubyType(RubyRequestType_NULL)
108  {}
110 
111  void insertPacket(PacketPtr pkt) { pkts.push_back(pkt); }
112  void setSeqNum(uint64_t _seqNum) { seqNum = _seqNum; }
113  void setIssueTime(Cycles _issueTime) { issueTime = _issueTime; }
114  void setRubyType(RubyRequestType type) { rubyType = type; }
115 
116  uint64_t getSeqNum() const { return seqNum; }
117  PacketPtr getFirstPkt() const { return pkts[0]; }
118  Cycles getIssueTime() const { return issueTime; }
119  RubyRequestType getRubyType() const { return rubyType; }
121 
122  private:
123  uint64_t seqNum;
125  RubyRequestType rubyType;
127 };
128 
129 // PendingWriteInst tracks the number of outstanding Ruby requests
130 // per write instruction. Once all requests associated with one instruction
131 // are completely done in Ruby, we call back the requestor to mark
132 // that this instruction is complete.
134 {
135  public:
137  : numPendingStores(0),
138  originalPort(nullptr),
139  gpuDynInstPtr(nullptr)
140  {}
141 
143  {}
144 
145  void
147  bool usingRubyTester)
148  {
149  assert(port);
150  originalPort = port;
151 
152  if (!usingRubyTester) {
153  gpuDynInstPtr = inst;
154  }
155 
157  }
158 
159  // return true if no more ack is expected
160  bool
162  {
163  assert(numPendingStores > 0);
165  return (numPendingStores == 0) ? true : false;
166  }
167 
168  // ack the original requestor that this write instruction is complete
169  void
170  ackWriteCompletion(bool usingRubyTester)
171  {
172  assert(numPendingStores == 0);
173 
174  // make a response packet
175  PacketPtr pkt = new Packet(std::make_shared<Request>(),
177 
178  if (!usingRubyTester) {
179  assert(gpuDynInstPtr);
182  (gpuDynInstPtr, 0, nullptr);
183  pkt->senderState = ss;
184  }
185 
186  // send the ack response to the requestor
188  }
189 
190  int
192  return numPendingStores;
193  }
194 
195  private:
196  // the number of stores waiting for writeCompleteCallback
198  // The original port that sent one of packets associated with this
199  // write instruction. We may have more than one packet per instruction,
200  // which implies multiple ports per instruction. However, we need
201  // only 1 of the ports to call back the CU. Therefore, here we keep
202  // track the port that sent the first packet of this instruction.
204  // similar to the originalPort, this gpuDynInstPtr is set only for
205  // the first packet of this instruction.
207 };
208 
209 class GPUCoalescer : public RubyPort
210 {
211  public:
213  {
214  public:
215  GMTokenPort(const std::string& name, ClockedObject *owner,
216  PortID id = InvalidPortID)
218  { }
220 
221  protected:
222  Tick recvAtomic(PacketPtr) { return Tick(0); }
224  bool recvTimingReq(PacketPtr) { return false; }
226  {
227  AddrRangeList ranges;
228  return ranges;
229  }
230  };
231 
232  typedef RubyGPUCoalescerParams Params;
233  GPUCoalescer(const Params &);
234  ~GPUCoalescer();
235 
236  Port &getPort(const std::string &if_name,
237  PortID idx = InvalidPortID) override;
238 
239  // Public Methods
240  void wakeup(); // Used only for deadlock detection
241  void printRequestTable(std::stringstream& ss);
242 
243  void printProgress(std::ostream& out) const;
244  void resetStats() override;
245  void collateStats();
246 
247  // each store request needs two callbacks:
248  // (1) writeCallback is called when the store is received and processed
249  // by TCP. This writeCallback does not guarantee the store is actually
250  // completed at its destination cache or memory. writeCallback helps
251  // release hardware resources (e.g., its entry in coalescedTable)
252  // allocated for the store so that subsequent requests will not be
253  // blocked unnecessarily due to hardware resource constraints.
254  // (2) writeCompleteCallback is called when the store is fully completed
255  // at its destination cache or memory. writeCompleteCallback
256  // guarantees that the store is fully completed. This callback
257  // will decrement hardware counters in CU
258  void writeCallback(Addr address, DataBlock& data);
259 
260  void writeCallback(Addr address,
261  MachineType mach,
262  DataBlock& data);
263 
264  void writeCallback(Addr address,
265  MachineType mach,
266  DataBlock& data,
267  Cycles initialRequestTime,
268  Cycles forwardRequestTime,
269  Cycles firstResponseTime,
270  bool isRegion);
271 
272  void writeCallback(Addr address,
273  MachineType mach,
274  DataBlock& data,
275  Cycles initialRequestTime,
276  Cycles forwardRequestTime,
277  Cycles firstResponseTime);
278 
279  void writeCompleteCallback(Addr address,
280  uint64_t instSeqNum,
281  MachineType mach);
282 
283  void readCallback(Addr address, DataBlock& data);
284 
285  void readCallback(Addr address,
286  MachineType mach,
287  DataBlock& data);
288 
289  void readCallback(Addr address,
290  MachineType mach,
291  DataBlock& data,
292  Cycles initialRequestTime,
293  Cycles forwardRequestTime,
294  Cycles firstResponseTime);
295 
296  void readCallback(Addr address,
297  MachineType mach,
298  DataBlock& data,
299  Cycles initialRequestTime,
300  Cycles forwardRequestTime,
301  Cycles firstResponseTime,
302  bool isRegion);
303 
304  /* atomics need their own callback because the data
305  might be const coming from SLICC */
306  virtual void atomicCallback(Addr address,
307  MachineType mach,
308  const DataBlock& data);
309 
310  RequestStatus makeRequest(PacketPtr pkt) override;
311  int outstandingCount() const override { return m_outstanding_count; }
312 
313  bool
314  isDeadlockEventScheduled() const override
315  {
316  return deadlockCheckEvent.scheduled();
317  }
318 
319  void
321  {
323  }
324 
325  bool empty() const;
326 
327  void print(std::ostream& out) const;
328 
329  void evictionCallback(Addr address);
330  void completeIssue();
331 
332  void insertKernel(int wavefront_id, PacketPtr pkt);
333 
335 
337 
340  { return *m_typeLatencyHist[t]; }
341 
343  { return m_missLatencyHist; }
345  { return *m_missTypeLatencyHist[t]; }
346 
348  { return *m_missMachLatencyHist[t]; }
349 
351  getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
352  { return *m_missTypeMachLatencyHist[r][t]; }
353 
355  { return *m_IssueToInitialDelayHist[t]; }
356 
358  getInitialToForwardDelayHist(const MachineType t) const
359  { return *m_InitialToForwardDelayHist[t]; }
360 
362  getForwardRequestToFirstResponseHist(const MachineType t) const
364 
366  getFirstResponseToCompletionDelayHist(const MachineType t) const
368 
369  protected:
370  bool tryCacheAccess(Addr addr, RubyRequestType type,
371  Addr pc, RubyAccessMode access_mode,
372  int size, DataBlock*& data_ptr);
373 
374  // since the two following issue functions are protocol-specific,
375  // they must be implemented in a derived coalescer
376  virtual void issueRequest(CoalescedRequest* crequest) = 0;
377  virtual void issueMemSyncRequest(PacketPtr pkt) {}
378 
379  void kernelCallback(int wavefront_id);
380 
381  void hitCallback(CoalescedRequest* crequest,
382  MachineType mach,
383  DataBlock& data,
384  bool success,
385  Cycles initialRequestTime,
386  Cycles forwardRequestTime,
387  Cycles firstResponseTime,
388  bool isRegion);
389  void recordMissLatency(CoalescedRequest* crequest,
390  MachineType mach,
391  Cycles initialRequestTime,
392  Cycles forwardRequestTime,
393  Cycles firstResponseTime,
394  bool success, bool isRegion);
396 
397  virtual RubyRequestType getRequestType(PacketPtr pkt);
398 
400 
401  // Attempt to remove a packet from the uncoalescedTable and coalesce
402  // with a previous request from the same instruction. If there is no
403  // previous instruction and the max number of outstanding requests has
404  // not be reached, a new coalesced request is created and added to the
405  // "target" list of the coalescedTable.
406  bool coalescePacket(PacketPtr pkt);
407 
409 
410  protected:
413 
416 
417  // coalescingWindow is the maximum number of instructions that are
418  // allowed to be coalesced in a single cycle.
420 
421  // The uncoalescedTable contains several "columns" which hold memory
422  // request packets for an instruction. The maximum size is the number of
423  // columns * the wavefront size.
425 
426  // An MSHR-like struct for holding coalesced requests. The requests in
427  // this table may or may not be outstanding in the memory hierarchy. The
428  // maximum size is equal to the maximum outstanding requests for a CU
429  // (typically the number of blocks in TCP). If there are duplicates of
430  // an address, the are serviced in age order.
431  std::map<Addr, std::deque<CoalescedRequest*>> coalescedTable;
432  // Map of instruction sequence number to coalesced requests that get
433  // created in coalescePacket, used in completeIssue to send the fully
434  // coalesced request
435  std::unordered_map<uint64_t, std::deque<CoalescedRequest*>> coalescedReqs;
436 
437  // a map btw an instruction sequence number and PendingWriteInst
438  // this is used to do a final call back for each write when it is
439  // completely done in the memory system
440  std::unordered_map<uint64_t, PendingWriteInst> pendingWriteInsts;
441 
442  // Global outstanding request count, across all request tables
445  std::unordered_map<int, PacketPtr> kernelEndList;
447 
452 
454 
457 
458 // TODO - Need to update the following stats once the VIPER protocol
459 // is re-integrated.
460 // // m5 style stats for TCP hit/miss counts
461 // Stats::Scalar GPU_TCPLdHits;
462 // Stats::Scalar GPU_TCPLdTransfers;
463 // Stats::Scalar GPU_TCCLdHits;
464 // Stats::Scalar GPU_LdMiss;
465 //
466 // Stats::Scalar GPU_TCPStHits;
467 // Stats::Scalar GPU_TCPStTransfers;
468 // Stats::Scalar GPU_TCCStHits;
469 // Stats::Scalar GPU_StMiss;
470 //
471 // Stats::Scalar CP_TCPLdHits;
472 // Stats::Scalar CP_TCPLdTransfers;
473 // Stats::Scalar CP_TCCLdHits;
474 // Stats::Scalar CP_LdMiss;
475 //
476 // Stats::Scalar CP_TCPStHits;
477 // Stats::Scalar CP_TCPStTransfers;
478 // Stats::Scalar CP_TCCStHits;
479 // Stats::Scalar CP_StMiss;
480 
483 
487 
492 
497 
503 
504 // TODO - Need to update the following stats once the VIPER protocol
505 // is re-integrated.
506 // Stats::Distribution numHopDelays;
507 // Stats::Distribution tcpToTccDelay;
508 // Stats::Distribution tccToSdDelay;
509 // Stats::Distribution sdToSdDelay;
510 // Stats::Distribution sdToTccDelay;
511 // Stats::Distribution tccToTcpDelay;
512 //
513 // Stats::Average avgTcpToTcc;
514 // Stats::Average avgTccToSd;
515 // Stats::Average avgSdToSd;
516 // Stats::Average avgSdToTcc;
517 // Stats::Average avgTccToTcp;
518 
519  private:
520  // Token port is used to send/receive tokens to/from GPU's global memory
521  // pipeline across the port boundary. There is one per <wave size> data
522  // ports in the CU.
524 
525  // Private copy constructor and assignment operator
526  GPUCoalescer(const GPUCoalescer& obj);
527  GPUCoalescer& operator=(const GPUCoalescer& obj);
528 };
529 
530 inline std::ostream&
531 operator<<(std::ostream& out, const GPUCoalescer& obj)
532 {
533  obj.print(out);
534  out << std::flush;
535  return out;
536 }
537 
538 #endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
CoalescedRequest::seqNum
uint64_t seqNum
Definition: GPUCoalescer.hh:123
UncoalescedTable::~UncoalescedTable
~UncoalescedTable()
Definition: GPUCoalescer.hh:67
GPUCoalescer::m_deadlock_threshold
Cycles m_deadlock_threshold
Definition: GPUCoalescer.hh:412
Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:462
GPUCoalescer::issueMemSyncRequest
virtual void issueMemSyncRequest(PacketPtr pkt)
Definition: GPUCoalescer.hh:377
GPUCoalescer::m_load_waiting_on_load_cycles
int m_load_waiting_on_load_cycles
Definition: GPUCoalescer.hh:451
GPUCoalescer::collateStats
void collateStats()
GPUCoalescer::GMTokenPort::recvFunctional
void recvFunctional(PacketPtr)
Receive a functional request packet from the peer.
Definition: GPUCoalescer.hh:223
GPUCoalescer::m_latencyHist
Stats::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
Definition: GPUCoalescer.hh:485
data
const char data[]
Definition: circlebuf.test.cc:47
UncoalescedTable::instPktsRemaining
std::map< InstSeqNum, int > instPktsRemaining
Definition: GPUCoalescer.hh:99
CoalescedRequest::getRubyType
RubyRequestType getRubyType() const
Definition: GPUCoalescer.hh:119
CoalescedRequest::insertPacket
void insertPacket(PacketPtr pkt)
Definition: GPUCoalescer.hh:111
ResponsePort::sendTimingResp
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
Definition: port.hh:367
GPUCoalescer::m_missTypeMachLatencyHist
std::vector< std::vector< Stats::Histogram * > > m_missTypeMachLatencyHist
Definition: GPUCoalescer.hh:496
CoalescedRequest::~CoalescedRequest
~CoalescedRequest()
Definition: GPUCoalescer.hh:109
InvalidPortID
const PortID InvalidPortID
Definition: types.hh:244
GPUCoalescer::writeCompleteCallback
void writeCompleteCallback(Addr address, uint64_t instSeqNum, MachineType mach)
Definition: GPUCoalescer.cc:427
CoalescedRequest::pkts
std::vector< PacketPtr > pkts
Definition: GPUCoalescer.hh:126
PendingWriteInst::receiveWriteCompleteAck
bool receiveWriteCompleteAck()
Definition: GPUCoalescer.hh:161
GPUCoalescer::evictionCallback
void evictionCallback(Addr address)
Definition: GPUCoalescer.cc:864
GPUCoalescer::getGMTokenPort
GMTokenPort & getGMTokenPort()
Definition: GPUCoalescer.hh:334
GPUCoalescer::getOutstandReqHist
Stats::Histogram & getOutstandReqHist()
Definition: GPUCoalescer.hh:336
UncoalescedTable::coalescer
GPUCoalescer * coalescer
Definition: GPUCoalescer.hh:91
GPUCoalescer
Definition: GPUCoalescer.hh:209
UncoalescedTable
Definition: GPUCoalescer.hh:63
GPUCoalescer::printRequestTable
void printRequestTable(std::stringstream &ss)
Definition: GPUCoalescer.cc:302
misc.hh
GPUCoalescer::Params
RubyGPUCoalescerParams Params
Definition: GPUCoalescer.hh:232
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:59
GPUCoalescer::printProgress
void printProgress(std::ostream &out) const
Definition: GPUCoalescer.cc:351
GPUCoalescer::getInitialToForwardDelayHist
Stats::Histogram & getInitialToForwardDelayHist(const MachineType t) const
Definition: GPUCoalescer.hh:358
GPUCoalescer::GMTokenPort
Definition: GPUCoalescer.hh:212
PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:243
UncoalescedTable::insertPacket
void insertPacket(PacketPtr pkt)
Definition: GPUCoalescer.cc:63
UncoalescedTable::getPacketsRemaining
int getPacketsRemaining(InstSeqNum seqNum)
Definition: GPUCoalescer.cc:87
GPUCoalescer::newKernelEnds
std::vector< int > newKernelEnds
Definition: GPUCoalescer.hh:446
GPUCoalescer::gmTokenPort
GMTokenPort gmTokenPort
Definition: GPUCoalescer.hh:523
EventManager::deschedule
void deschedule(Event &event)
Definition: eventq.hh:1025
std::vector
STL vector class.
Definition: stl.hh:37
GPUCoalescer::kernelCallback
void kernelCallback(int wavefront_id)
Definition: GPUCoalescer.cc:870
PendingWriteInst::getNumPendingStores
int getNumPendingStores()
Definition: GPUCoalescer.hh:191
GPUCoalescer::getIssueToInitialDelayHist
Stats::Histogram & getIssueToInitialDelayHist(uint32_t t) const
Definition: GPUCoalescer.hh:354
GPUCoalescer::m_outstanding_count
int m_outstanding_count
Definition: GPUCoalescer.hh:443
X86ISA::count
count
Definition: misc.hh:703
GPUCoalescer::getPort
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition: GPUCoalescer.cc:265
PendingWriteInst::addPendingReq
void addPendingReq(RubyPort::MemResponsePort *port, GPUDynInstPtr inst, bool usingRubyTester)
Definition: GPUCoalescer.hh:146
PendingWriteInst
Definition: GPUCoalescer.hh:133
CoalescedRequest::getFirstPkt
PacketPtr getFirstPkt() const
Definition: GPUCoalescer.hh:117
MachineID
Definition: MachineID.hh:50
GPUCoalescer::m_store_waiting_on_load_cycles
int m_store_waiting_on_load_cycles
Definition: GPUCoalescer.hh:448
request.hh
GPUCoalescer::outstandingCount
int outstandingCount() const override
Definition: GPUCoalescer.hh:311
ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:231
DataBlock
Definition: DataBlock.hh:54
EventFunctionWrapper
Definition: eventq.hh:1112
Stats::Histogram
A simple histogram stat.
Definition: statistics.hh:2126
GPUCoalescer::deadlockCheckEvent
EventFunctionWrapper deadlockCheckEvent
Definition: GPUCoalescer.hh:455
PendingWriteInst::PendingWriteInst
PendingWriteInst()
Definition: GPUCoalescer.hh:136
CoalescedRequest::rubyType
RubyRequestType rubyType
Definition: GPUCoalescer.hh:125
GPUCoalescer::m_InitialToForwardDelayHist
std::vector< Stats::Histogram * > m_InitialToForwardDelayHist
Definition: GPUCoalescer.hh:500
MemCmd::WriteCompleteResp
@ WriteCompleteResp
Definition: packet.hh:88
GPUCoalescer::m_outstandReqHist
Stats::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
Definition: GPUCoalescer.hh:482
GPUCoalescer::completeHitCallback
void completeHitCallback(std::vector< PacketPtr > &mylist)
Definition: GPUCoalescer.cc:909
GPUCoalescer::isDeadlockEventScheduled
bool isDeadlockEventScheduled() const override
Definition: GPUCoalescer.hh:314
RubyPort::MemResponsePort
Definition: RubyPort.hh:75
UncoalescedTable::printRequestTable
void printRequestTable(std::stringstream &ss)
Definition: GPUCoalescer.cc:150
GPUCoalescer::coalescedTable
std::map< Addr, std::deque< CoalescedRequest * > > coalescedTable
Definition: GPUCoalescer.hh:431
ArmISA::ss
Bitfield< 21 > ss
Definition: miscregs_types.hh:56
GPUCoalescer::m_IssueToInitialDelayHist
std::vector< Stats::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Definition: GPUCoalescer.hh:499
UncoalescedTable::packetAvailable
bool packetAvailable()
Definition: GPUCoalescer.cc:73
GPUCoalescer::getForwardRequestToFirstResponseHist
Stats::Histogram & getForwardRequestToFirstResponseHist(const MachineType t) const
Definition: GPUCoalescer.hh:362
GPUCoalescer::~GPUCoalescer
~GPUCoalescer()
Definition: GPUCoalescer.cc:260
GPUCoalescer::m_store_waiting_on_store_cycles
int m_store_waiting_on_store_cycles
Definition: GPUCoalescer.hh:449
GPUCoalescer::atomicCallback
virtual void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
Definition: GPUCoalescer.cc:880
SenderState
RubyTester::SenderState SenderState
Definition: Check.cc:37
GPUCoalescer::print
void print(std::ostream &out) const
Definition: GPUCoalescer.cc:678
GPUCoalescer::m_runningGarnetStandalone
bool m_runningGarnetStandalone
Definition: GPUCoalescer.hh:453
MipsISA::pc
Bitfield< 4 > pc
Definition: pra_constants.hh:240
GPUCoalescer::wakeup
void wakeup()
Definition: GPUCoalescer.cc:276
statistics.hh
GPUCoalescer::getLatencyHist
Stats::Histogram & getLatencyHist()
Definition: GPUCoalescer.hh:338
RubyPort
Definition: RubyPort.hh:58
UncoalescedTable::setPacketsRemaining
void setPacketsRemaining(InstSeqNum seqNum, int count)
Definition: GPUCoalescer.cc:93
Port
Ports are used to interface objects to each other.
Definition: port.hh:56
CoalescedRequest::setSeqNum
void setSeqNum(uint64_t _seqNum)
Definition: GPUCoalescer.hh:112
MipsISA::r
r
Definition: pra_constants.hh:95
GPUCoalescer::m_typeLatencyHist
std::vector< Stats::Histogram * > m_typeLatencyHist
Definition: GPUCoalescer.hh:486
GPUCoalescer::getRequestType
virtual RubyRequestType getRequestType(PacketPtr pkt)
Definition: GPUCoalescer.cc:589
UncoalescedTable::UncoalescedTable
UncoalescedTable(GPUCoalescer *gc)
Definition: GPUCoalescer.cc:57
GPUCoalescer::pendingWriteInsts
std::unordered_map< uint64_t, PendingWriteInst > pendingWriteInsts
Definition: GPUCoalescer.hh:440
GPUCoalescer::operator=
GPUCoalescer & operator=(const GPUCoalescer &obj)
gpu_dyn_inst.hh
GPUCoalescer::insertKernel
void insertKernel(int wavefront_id, PacketPtr pkt)
Definition: GPUCoalescer.cc:357
GPUCoalescer::m_deadlock_check_scheduled
bool m_deadlock_check_scheduled
Definition: GPUCoalescer.hh:444
CacheMemory
Definition: CacheMemory.hh:63
GPUCoalescer::getDynInst
GPUDynInstPtr getDynInst(PacketPtr pkt) const
Definition: GPUCoalescer.cc:686
GPUCoalescer::issueEvent
EventFunctionWrapper issueEvent
Definition: GPUCoalescer.hh:408
CoalescedRequest::CoalescedRequest
CoalescedRequest(uint64_t _seqNum)
Definition: GPUCoalescer.hh:105
GPUCoalescer::GMTokenPort::getAddrRanges
AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: GPUCoalescer.hh:225
Port::id
const PortID id
A numeric identifier to distinguish ports in a vector, and set to InvalidPortID in case this port is ...
Definition: port.hh:74
GPUCoalescer::m_ForwardToFirstResponseDelayHist
std::vector< Stats::Histogram * > m_ForwardToFirstResponseDelayHist
Definition: GPUCoalescer.hh:501
ComputeUnit::DataPort::SenderState
Definition: compute_unit.hh:515
UncoalescedTable::instMap
std::map< InstSeqNum, PerInstPackets > instMap
Definition: GPUCoalescer.hh:97
GPUCoalescer::resetStats
void resetStats() override
Callback to reset stats.
Definition: GPUCoalescer.cc:328
GPUCoalescer::getFirstResponseToCompletionDelayHist
Stats::Histogram & getFirstResponseToCompletionDelayHist(const MachineType t) const
Definition: GPUCoalescer.hh:366
InstSeqNum
uint64_t InstSeqNum
Definition: inst_seq.hh:37
GPUCoalescer::m_instCache_ptr
CacheMemory * m_instCache_ptr
Definition: GPUCoalescer.hh:415
GPUCoalescer::m_load_waiting_on_store_cycles
int m_load_waiting_on_store_cycles
Definition: GPUCoalescer.hh:450
ProbePoints::Packet
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:103
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:148
Port::name
const std::string name() const
Return port name (for DPRINTF).
Definition: port.hh:106
PendingWriteInst::ackWriteCompletion
void ackWriteCompletion(bool usingRubyTester)
Definition: GPUCoalescer.hh:170
GPUCoalescer::writeCallback
void writeCallback(Addr address, DataBlock &data)
Definition: GPUCoalescer.cc:371
GPUCoalescer::makeRequest
RequestStatus makeRequest(PacketPtr pkt) override
Definition: GPUCoalescer.cc:617
GPUCoalescer::readCallback
void readCallback(Addr address, DataBlock &data)
Definition: GPUCoalescer.cc:457
PendingWriteInst::originalPort
RubyPort::MemResponsePort * originalPort
Definition: GPUCoalescer.hh:203
ResponsePort::owner
SimObject & owner
Definition: port.hh:276
GPUCoalescer::coalescePacket
bool coalescePacket(PacketPtr pkt)
Definition: GPUCoalescer.cc:699
X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:80
GPUCoalescer::getMissTypeLatencyHist
Stats::Histogram & getMissTypeLatencyHist(uint32_t t)
Definition: GPUCoalescer.hh:344
GPUCoalescer::getMissTypeMachLatencyHist
Stats::Histogram & getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
Definition: GPUCoalescer.hh:351
GPUCoalescer::m_missTypeLatencyHist
std::vector< Stats::Histogram * > m_missTypeLatencyHist
Definition: GPUCoalescer.hh:491
GPUCoalescer::recordMissLatency
void recordMissLatency(CoalescedRequest *crequest, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool success, bool isRegion)
Definition: GPUCoalescer.cc:942
PendingWriteInst::numPendingStores
int numPendingStores
Definition: GPUCoalescer.hh:197
PerInstPackets
std::list< PacketPtr > PerInstPackets
Definition: GPUCoalescer.hh:58
CoalescedRequest
Definition: GPUCoalescer.hh:102
Consumer.hh
Address.hh
ArmISA::t
Bitfield< 5 > t
Definition: miscregs_types.hh:67
GPUCoalescer::m_FirstResponseToCompletionDelayHist
std::vector< Stats::Histogram * > m_FirstResponseToCompletionDelayHist
Definition: GPUCoalescer.hh:502
GPUCoalescer::tryCacheAccess
bool tryCacheAccess(Addr addr, RubyRequestType type, Addr pc, RubyAccessMode access_mode, int size, DataBlock *&data_ptr)
Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:258
CoalescedRequest::getSeqNum
uint64_t getSeqNum() const
Definition: GPUCoalescer.hh:116
GPUCoalescer::kernelEndList
std::unordered_map< int, PacketPtr > kernelEndList
Definition: GPUCoalescer.hh:445
GPUCoalescer::m_missMachLatencyHist
std::vector< Stats::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
Definition: GPUCoalescer.hh:495
PendingWriteInst::gpuDynInstPtr
GPUDynInstPtr gpuDynInstPtr
Definition: GPUCoalescer.hh:206
GPUCoalescer::m_max_outstanding_requests
int m_max_outstanding_requests
Definition: GPUCoalescer.hh:411
token_port.hh
UncoalescedTable::initPacketsRemaining
void initPacketsRemaining(InstSeqNum seqNum, int count)
Definition: GPUCoalescer.cc:79
PendingWriteInst::~PendingWriteInst
~PendingWriteInst()
Definition: GPUCoalescer.hh:142
GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:79
GPUCoalescer::completeIssue
void completeIssue()
Definition: GPUCoalescer.cc:793
UncoalescedTable::checkDeadlock
void checkDeadlock(Tick threshold)
Definition: GPUCoalescer.cc:161
X86ISA::type
type
Definition: misc.hh:727
GPUCoalescer::empty
bool empty() const
Definition: GPUCoalescer.cc:583
GPUCoalescer::GMTokenPort::GMTokenPort
GMTokenPort(const std::string &name, ClockedObject *owner, PortID id=InvalidPortID)
Definition: GPUCoalescer.hh:215
CoalescedRequest::issueTime
Cycles issueTime
Definition: GPUCoalescer.hh:124
operator<<
std::ostream & operator<<(std::ostream &out, const GPUCoalescer &obj)
Definition: GPUCoalescer.hh:531
CoalescedRequest::getPackets
std::vector< PacketPtr > & getPackets()
Definition: GPUCoalescer.hh:120
GPUCoalescer::getMissLatencyHist
Stats::Histogram & getMissLatencyHist()
Definition: GPUCoalescer.hh:342
GPUCoalescer::getTypeLatencyHist
Stats::Histogram & getTypeLatencyHist(uint32_t t)
Definition: GPUCoalescer.hh:339
GPUCoalescer::issueRequest
virtual void issueRequest(CoalescedRequest *crequest)=0
GPUCoalescer::coalescingWindow
int coalescingWindow
Definition: GPUCoalescer.hh:419
Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:509
std::list
STL list class.
Definition: stl.hh:51
GPUCoalescer::m_missLatencyHist
Stats::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
Definition: GPUCoalescer.hh:490
TokenResponsePort
Definition: token_port.hh:90
GPUCoalescer::GMTokenPort::recvTimingReq
bool recvTimingReq(PacketPtr)
Receive a timing request from the peer.
Definition: GPUCoalescer.hh:224
CoalescedRequest::setIssueTime
void setIssueTime(Cycles _issueTime)
Definition: GPUCoalescer.hh:113
GPUCoalescer::GPUCoalescer
GPUCoalescer(const Params &)
Definition: GPUCoalescer.cc:183
GPUCoalescer::getMissMachLatencyHist
Stats::Histogram & getMissMachLatencyHist(uint32_t t) const
Definition: GPUCoalescer.hh:347
GPUCoalescer::GMTokenPort::recvAtomic
Tick recvAtomic(PacketPtr)
Receive an atomic request packet from the peer.
Definition: GPUCoalescer.hh:222
UncoalescedTable::getInstPackets
PerInstPackets * getInstPackets(int offset)
Definition: GPUCoalescer.cc:99
CoalescedRequest::getIssueTime
Cycles getIssueTime() const
Definition: GPUCoalescer.hh:118
GPUCoalescer::m_dataCache_ptr
CacheMemory * m_dataCache_ptr
Definition: GPUCoalescer.hh:414
UncoalescedTable::areRequestsDone
bool areRequestsDone(const InstSeqNum instSeqNum)
Definition: GPUCoalescer.cc:137
GPUCoalescer::assumingRfOCoherence
bool assumingRfOCoherence
Definition: GPUCoalescer.hh:456
GPUCoalescer::coalescedReqs
std::unordered_map< uint64_t, std::deque< CoalescedRequest * > > coalescedReqs
Definition: GPUCoalescer.hh:435
GPUCoalescer::GMTokenPort::~GMTokenPort
~GMTokenPort()
Definition: GPUCoalescer.hh:219
CoalescedRequest::setRubyType
void setRubyType(RubyRequestType type)
Definition: GPUCoalescer.hh:114
UncoalescedTable::updateResources
void updateResources()
Definition: GPUCoalescer.cc:112
ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:153
GPUCoalescer::descheduleDeadlockEvent
void descheduleDeadlockEvent() override
Definition: GPUCoalescer.hh:320
GPUCoalescer::uncoalescedTable
UncoalescedTable uncoalescedTable
Definition: GPUCoalescer.hh:424
GPUCoalescer::hitCallback
void hitCallback(CoalescedRequest *crequest, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion)
Definition: GPUCoalescer.cc:524
Sequencer.hh

Generated on Tue Jun 22 2021 15:28:30 for gem5 by doxygen 1.8.17