gem5  v20.1.0.0
GPUCoalescer.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
35 #define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
36 
37 #include <iostream>
38 #include <unordered_map>
39 
40 #include "base/statistics.hh"
42 #include "gpu-compute/misc.hh"
43 #include "mem/request.hh"
46 #include "mem/ruby/protocol/PrefetchBit.hh"
47 #include "mem/ruby/protocol/RubyAccessMode.hh"
48 #include "mem/ruby/protocol/RubyRequestType.hh"
49 #include "mem/ruby/protocol/SequencerRequestType.hh"
51 #include "mem/token_port.hh"
52 
53 class DataBlock;
54 class CacheMsg;
55 class MachineID;
56 class CacheMemory;
57 
58 class RubyGPUCoalescerParams;
59 
60 // List of packets that belongs to a specific instruction.
62 
64 {
65  public:
68 
69  void insertPacket(PacketPtr pkt);
70  bool packetAvailable();
71  void printRequestTable(std::stringstream& ss);
72 
73  // Returns a pointer to the list of packets corresponding to an
74  // instruction in the instruction map or nullptr if there are no
75  // instructions at the offset.
77  void updateResources();
78  bool areRequestsDone(const uint64_t instSeqNum);
79 
80  // Check if a packet hasn't been removed from instMap in too long.
81  // Panics if a deadlock is detected and returns nothing otherwise.
82  void checkDeadlock(Tick threshold);
83 
84  private:
86 
87  // Maps an instructions unique sequence number to a queue of packets
88  // which need responses. This data structure assumes the sequence number
89  // is monotonically increasing (which is true for CU class) in order to
90  // issue packets in age order.
91  std::map<uint64_t, PerInstPackets> instMap;
92 };
93 
95 {
96  public:
97  CoalescedRequest(uint64_t _seqNum)
98  : seqNum(_seqNum), issueTime(Cycles(0)),
99  rubyType(RubyRequestType_NULL)
100  {}
102 
103  void insertPacket(PacketPtr pkt) { pkts.push_back(pkt); }
104  void setSeqNum(uint64_t _seqNum) { seqNum = _seqNum; }
105  void setIssueTime(Cycles _issueTime) { issueTime = _issueTime; }
106  void setRubyType(RubyRequestType type) { rubyType = type; }
107 
108  uint64_t getSeqNum() const { return seqNum; }
109  PacketPtr getFirstPkt() const { return pkts[0]; }
110  Cycles getIssueTime() const { return issueTime; }
111  RubyRequestType getRubyType() const { return rubyType; }
113 
114  private:
115  uint64_t seqNum;
117  RubyRequestType rubyType;
119 };
120 
121 // PendingWriteInst tracks the number of outstanding Ruby requests
122 // per write instruction. Once all requests associated with one instruction
123 // are completely done in Ruby, we call back the requestor to mark
124 // that this instruction is complete.
126 {
127  public:
129  : numPendingStores(0),
130  originalPort(nullptr),
131  gpuDynInstPtr(nullptr)
132  {}
133 
135  {}
136 
137  void
139  bool usingRubyTester)
140  {
141  assert(port);
142  originalPort = port;
143 
144  if (!usingRubyTester) {
145  gpuDynInstPtr = inst;
146  }
147 
149  }
150 
151  // return true if no more ack is expected
152  bool
154  {
155  assert(numPendingStores > 0);
157  return (numPendingStores == 0) ? true : false;
158  }
159 
160  // ack the original requestor that this write instruction is complete
161  void
162  ackWriteCompletion(bool usingRubyTester)
163  {
164  assert(numPendingStores == 0);
165 
166  // make a response packet
167  PacketPtr pkt = new Packet(std::make_shared<Request>(),
169 
170  if (!usingRubyTester) {
171  assert(gpuDynInstPtr);
174  (gpuDynInstPtr, 0, nullptr);
175  pkt->senderState = ss;
176  }
177 
178  // send the ack response to the requestor
180  }
181 
182  int
184  return numPendingStores;
185  }
186 
187  private:
188  // the number of stores waiting for writeCompleteCallback
190  // The original port that sent one of packets associated with this
191  // write instruction. We may have more than one packet per instruction,
192  // which implies multiple ports per instruction. However, we need
193  // only 1 of the ports to call back the CU. Therefore, here we keep
194  // track the port that sent the first packet of this instruction.
196  // similar to the originalPort, this gpuDynInstPtr is set only for
197  // the first packet of this instruction.
199 };
200 
201 class GPUCoalescer : public RubyPort
202 {
203  public:
205  {
206  public:
207  GMTokenPort(const std::string& name, ClockedObject *owner,
208  PortID id = InvalidPortID)
210  { }
212 
213  protected:
214  Tick recvAtomic(PacketPtr) { return Tick(0); }
216  bool recvTimingReq(PacketPtr) { return false; }
218  {
219  AddrRangeList ranges;
220  return ranges;
221  }
222  };
223 
224  typedef RubyGPUCoalescerParams Params;
225  GPUCoalescer(const Params *);
226  ~GPUCoalescer();
227 
228  Port &getPort(const std::string &if_name,
229  PortID idx = InvalidPortID) override;
230 
231  // Public Methods
232  void wakeup(); // Used only for deadlock detection
233  void printRequestTable(std::stringstream& ss);
234 
235  void printProgress(std::ostream& out) const;
236  void resetStats() override;
237  void collateStats();
238  void regStats() override;
239 
240  // each store request needs two callbacks:
241  // (1) writeCallback is called when the store is received and processed
242  // by TCP. This writeCallback does not guarantee the store is actually
243  // completed at its destination cache or memory. writeCallback helps
244  // release hardware resources (e.g., its entry in coalescedTable)
245  // allocated for the store so that subsequent requests will not be
246  // blocked unnecessarily due to hardware resource constraints.
247  // (2) writeCompleteCallback is called when the store is fully completed
248  // at its destination cache or memory. writeCompleteCallback
249  // guarantees that the store is fully completed. This callback
250  // will decrement hardware counters in CU
251  void writeCallback(Addr address, DataBlock& data);
252 
253  void writeCallback(Addr address,
254  MachineType mach,
255  DataBlock& data);
256 
257  void writeCallback(Addr address,
258  MachineType mach,
259  DataBlock& data,
260  Cycles initialRequestTime,
261  Cycles forwardRequestTime,
262  Cycles firstResponseTime,
263  bool isRegion);
264 
265  void writeCallback(Addr address,
266  MachineType mach,
267  DataBlock& data,
268  Cycles initialRequestTime,
269  Cycles forwardRequestTime,
270  Cycles firstResponseTime);
271 
272  void writeCompleteCallback(Addr address,
273  uint64_t instSeqNum,
274  MachineType mach);
275 
276  void readCallback(Addr address, DataBlock& data);
277 
278  void readCallback(Addr address,
279  MachineType mach,
280  DataBlock& data);
281 
282  void readCallback(Addr address,
283  MachineType mach,
284  DataBlock& data,
285  Cycles initialRequestTime,
286  Cycles forwardRequestTime,
287  Cycles firstResponseTime);
288 
289  void readCallback(Addr address,
290  MachineType mach,
291  DataBlock& data,
292  Cycles initialRequestTime,
293  Cycles forwardRequestTime,
294  Cycles firstResponseTime,
295  bool isRegion);
296 
297  /* atomics need their own callback because the data
298  might be const coming from SLICC */
299  virtual void atomicCallback(Addr address,
300  MachineType mach,
301  const DataBlock& data);
302 
303  RequestStatus makeRequest(PacketPtr pkt) override;
304  int outstandingCount() const override { return m_outstanding_count; }
305 
306  bool
307  isDeadlockEventScheduled() const override
308  {
309  return deadlockCheckEvent.scheduled();
310  }
311 
312  void
314  {
316  }
317 
318  bool empty() const;
319 
320  void print(std::ostream& out) const;
321 
322  void evictionCallback(Addr address);
323  void completeIssue();
324 
325  void insertKernel(int wavefront_id, PacketPtr pkt);
326 
328 
330 
333  { return *m_typeLatencyHist[t]; }
334 
336  { return m_missLatencyHist; }
338  { return *m_missTypeLatencyHist[t]; }
339 
341  { return *m_missMachLatencyHist[t]; }
342 
344  getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
345  { return *m_missTypeMachLatencyHist[r][t]; }
346 
348  { return *m_IssueToInitialDelayHist[t]; }
349 
351  getInitialToForwardDelayHist(const MachineType t) const
352  { return *m_InitialToForwardDelayHist[t]; }
353 
355  getForwardRequestToFirstResponseHist(const MachineType t) const
357 
359  getFirstResponseToCompletionDelayHist(const MachineType t) const
361 
362  protected:
363  bool tryCacheAccess(Addr addr, RubyRequestType type,
364  Addr pc, RubyAccessMode access_mode,
365  int size, DataBlock*& data_ptr);
366 
367  // since the two following issue functions are protocol-specific,
368  // they must be implemented in a derived coalescer
369  virtual void issueRequest(CoalescedRequest* crequest) = 0;
370  virtual void issueMemSyncRequest(PacketPtr pkt) {}
371 
372  void kernelCallback(int wavefront_id);
373 
374  void hitCallback(CoalescedRequest* crequest,
375  MachineType mach,
376  DataBlock& data,
377  bool success,
378  Cycles initialRequestTime,
379  Cycles forwardRequestTime,
380  Cycles firstResponseTime,
381  bool isRegion);
382  void recordMissLatency(CoalescedRequest* crequest,
383  MachineType mach,
384  Cycles initialRequestTime,
385  Cycles forwardRequestTime,
386  Cycles firstResponseTime,
387  bool success, bool isRegion);
389 
390  virtual RubyRequestType getRequestType(PacketPtr pkt);
391 
392  // Attempt to remove a packet from the uncoalescedTable and coalesce
393  // with a previous request from the same instruction. If there is no
394  // previous instruction and the max number of outstanding requests has
395  // not be reached, a new coalesced request is created and added to the
396  // "target" list of the coalescedTable.
397  bool coalescePacket(PacketPtr pkt);
398 
400 
401  protected:
404 
407 
408  // coalescingWindow is the maximum number of instructions that are
409  // allowed to be coalesced in a single cycle.
411 
412  // The uncoalescedTable contains several "columns" which hold memory
413  // request packets for an instruction. The maximum size is the number of
414  // columns * the wavefront size.
416 
417  // An MSHR-like struct for holding coalesced requests. The requests in
418  // this table may or may not be outstanding in the memory hierarchy. The
419  // maximum size is equal to the maximum outstanding requests for a CU
420  // (typically the number of blocks in TCP). If there are duplicates of
421  // an address, the are serviced in age order.
422  std::map<Addr, std::deque<CoalescedRequest*>> coalescedTable;
423 
424  // a map btw an instruction sequence number and PendingWriteInst
425  // this is used to do a final call back for each write when it is
426  // completely done in the memory system
427  std::unordered_map<uint64_t, PendingWriteInst> pendingWriteInsts;
428 
429  // Global outstanding request count, across all request tables
432  std::unordered_map<int, PacketPtr> kernelEndList;
434 
439 
441 
444 
445 // TODO - Need to update the following stats once the VIPER protocol
446 // is re-integrated.
447 // // m5 style stats for TCP hit/miss counts
448 // Stats::Scalar GPU_TCPLdHits;
449 // Stats::Scalar GPU_TCPLdTransfers;
450 // Stats::Scalar GPU_TCCLdHits;
451 // Stats::Scalar GPU_LdMiss;
452 //
453 // Stats::Scalar GPU_TCPStHits;
454 // Stats::Scalar GPU_TCPStTransfers;
455 // Stats::Scalar GPU_TCCStHits;
456 // Stats::Scalar GPU_StMiss;
457 //
458 // Stats::Scalar CP_TCPLdHits;
459 // Stats::Scalar CP_TCPLdTransfers;
460 // Stats::Scalar CP_TCCLdHits;
461 // Stats::Scalar CP_LdMiss;
462 //
463 // Stats::Scalar CP_TCPStHits;
464 // Stats::Scalar CP_TCPStTransfers;
465 // Stats::Scalar CP_TCCStHits;
466 // Stats::Scalar CP_StMiss;
467 
470 
474 
479 
484 
490 
491 // TODO - Need to update the following stats once the VIPER protocol
492 // is re-integrated.
493 // Stats::Distribution numHopDelays;
494 // Stats::Distribution tcpToTccDelay;
495 // Stats::Distribution tccToSdDelay;
496 // Stats::Distribution sdToSdDelay;
497 // Stats::Distribution sdToTccDelay;
498 // Stats::Distribution tccToTcpDelay;
499 //
500 // Stats::Average avgTcpToTcc;
501 // Stats::Average avgTccToSd;
502 // Stats::Average avgSdToSd;
503 // Stats::Average avgSdToTcc;
504 // Stats::Average avgTccToTcp;
505 
506  private:
507  // Token port is used to send/receive tokens to/from GPU's global memory
508  // pipeline across the port boundary. There is one per <wave size> data
509  // ports in the CU.
511 
512  // Private copy constructor and assignment operator
513  GPUCoalescer(const GPUCoalescer& obj);
514  GPUCoalescer& operator=(const GPUCoalescer& obj);
515 };
516 
517 inline std::ostream&
518 operator<<(std::ostream& out, const GPUCoalescer& obj)
519 {
520  obj.print(out);
521  out << std::flush;
522  return out;
523 }
524 
525 #endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
CoalescedRequest::seqNum
uint64_t seqNum
Definition: GPUCoalescer.hh:115
UncoalescedTable::~UncoalescedTable
~UncoalescedTable()
Definition: GPUCoalescer.hh:67
GPUCoalescer::m_deadlock_threshold
Cycles m_deadlock_threshold
Definition: GPUCoalescer.hh:403
Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:460
GPUCoalescer::issueMemSyncRequest
virtual void issueMemSyncRequest(PacketPtr pkt)
Definition: GPUCoalescer.hh:370
GPUCoalescer::m_load_waiting_on_load_cycles
int m_load_waiting_on_load_cycles
Definition: GPUCoalescer.hh:438
GPUCoalescer::collateStats
void collateStats()
GPUCoalescer::GMTokenPort::recvFunctional
void recvFunctional(PacketPtr)
Receive a functional request packet from the peer.
Definition: GPUCoalescer.hh:215
GPUCoalescer::m_latencyHist
Stats::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
Definition: GPUCoalescer.hh:472
data
const char data[]
Definition: circlebuf.test.cc:42
CoalescedRequest::getRubyType
RubyRequestType getRubyType() const
Definition: GPUCoalescer.hh:111
CoalescedRequest::insertPacket
void insertPacket(PacketPtr pkt)
Definition: GPUCoalescer.hh:103
ResponsePort::sendTimingResp
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
Definition: port.hh:367
GPUCoalescer::m_missTypeMachLatencyHist
std::vector< std::vector< Stats::Histogram * > > m_missTypeMachLatencyHist
Definition: GPUCoalescer.hh:483
CoalescedRequest::~CoalescedRequest
~CoalescedRequest()
Definition: GPUCoalescer.hh:101
InvalidPortID
const PortID InvalidPortID
Definition: types.hh:238
GPUCoalescer::writeCompleteCallback
void writeCompleteCallback(Addr address, uint64_t instSeqNum, MachineType mach)
Definition: GPUCoalescer.cc:355
CoalescedRequest::pkts
std::vector< PacketPtr > pkts
Definition: GPUCoalescer.hh:118
PendingWriteInst::receiveWriteCompleteAck
bool receiveWriteCompleteAck()
Definition: GPUCoalescer.hh:153
GPUCoalescer::evictionCallback
void evictionCallback(Addr address)
Definition: GPUCoalescer.cc:732
GPUCoalescer::getGMTokenPort
GMTokenPort & getGMTokenPort()
Definition: GPUCoalescer.hh:327
GPUCoalescer::getOutstandReqHist
Stats::Histogram & getOutstandReqHist()
Definition: GPUCoalescer.hh:329
UncoalescedTable::coalescer
GPUCoalescer * coalescer
Definition: GPUCoalescer.hh:85
GPUCoalescer
Definition: GPUCoalescer.hh:201
UncoalescedTable
Definition: GPUCoalescer.hh:63
type
uint8_t type
Definition: inet.hh:421
GPUCoalescer::printRequestTable
void printRequestTable(std::stringstream &ss)
Definition: GPUCoalescer.cc:230
misc.hh
GPUCoalescer::Params
RubyGPUCoalescerParams Params
Definition: GPUCoalescer.hh:224
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:63
GPUCoalescer::printProgress
void printProgress(std::ostream &out) const
Definition: GPUCoalescer.cc:279
GPUCoalescer::getInitialToForwardDelayHist
Stats::Histogram & getInitialToForwardDelayHist(const MachineType t) const
Definition: GPUCoalescer.hh:351
GPUCoalescer::GMTokenPort
Definition: GPUCoalescer.hh:204
PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:237
UncoalescedTable::insertPacket
void insertPacket(PacketPtr pkt)
Definition: GPUCoalescer.cc:65
GPUCoalescer::newKernelEnds
std::vector< int > newKernelEnds
Definition: GPUCoalescer.hh:433
GPUCoalescer::gmTokenPort
GMTokenPort gmTokenPort
Definition: GPUCoalescer.hh:510
EventManager::deschedule
void deschedule(Event &event)
Definition: eventq.hh:1014
std::vector
STL vector class.
Definition: stl.hh:37
GPUCoalescer::kernelCallback
void kernelCallback(int wavefront_id)
Definition: GPUCoalescer.cc:738
PendingWriteInst::getNumPendingStores
int getNumPendingStores()
Definition: GPUCoalescer.hh:183
GPUCoalescer::getIssueToInitialDelayHist
Stats::Histogram & getIssueToInitialDelayHist(uint32_t t) const
Definition: GPUCoalescer.hh:347
GPUCoalescer::m_outstanding_count
int m_outstanding_count
Definition: GPUCoalescer.hh:430
GPUCoalescer::getPort
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition: GPUCoalescer.cc:193
PendingWriteInst::addPendingReq
void addPendingReq(RubyPort::MemResponsePort *port, GPUDynInstPtr inst, bool usingRubyTester)
Definition: GPUCoalescer.hh:138
PendingWriteInst
Definition: GPUCoalescer.hh:125
CoalescedRequest::getFirstPkt
PacketPtr getFirstPkt() const
Definition: GPUCoalescer.hh:109
MachineID
Definition: MachineID.hh:38
GPUCoalescer::m_store_waiting_on_load_cycles
int m_store_waiting_on_load_cycles
Definition: GPUCoalescer.hh:435
request.hh
GPUCoalescer::outstandingCount
int outstandingCount() const override
Definition: GPUCoalescer.hh:304
GPUCoalescer::regStats
void regStats() override
Callback to set stat parameters.
Definition: GPUCoalescer.cc:820
ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:231
DataBlock
Definition: DataBlock.hh:40
EventFunctionWrapper
Definition: eventq.hh:1101
Stats::Histogram
A simple histogram stat.
Definition: statistics.hh:2654
GPUCoalescer::deadlockCheckEvent
EventFunctionWrapper deadlockCheckEvent
Definition: GPUCoalescer.hh:442
PendingWriteInst::PendingWriteInst
PendingWriteInst()
Definition: GPUCoalescer.hh:128
CoalescedRequest::rubyType
RubyRequestType rubyType
Definition: GPUCoalescer.hh:117
GPUCoalescer::m_InitialToForwardDelayHist
std::vector< Stats::Histogram * > m_InitialToForwardDelayHist
Definition: GPUCoalescer.hh:487
MemCmd::WriteCompleteResp
@ WriteCompleteResp
Definition: packet.hh:87
GPUCoalescer::m_outstandReqHist
Stats::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
Definition: GPUCoalescer.hh:469
GPUCoalescer::completeHitCallback
void completeHitCallback(std::vector< PacketPtr > &mylist)
Definition: GPUCoalescer.cc:777
GPUCoalescer::isDeadlockEventScheduled
bool isDeadlockEventScheduled() const override
Definition: GPUCoalescer.hh:307
RubyPort::MemResponsePort
Definition: RubyPort.hh:75
UncoalescedTable::printRequestTable
void printRequestTable(std::stringstream &ss)
Definition: GPUCoalescer.cc:121
GPUCoalescer::coalescedTable
std::map< Addr, std::deque< CoalescedRequest * > > coalescedTable
Definition: GPUCoalescer.hh:422
ArmISA::ss
Bitfield< 21 > ss
Definition: miscregs_types.hh:56
GPUCoalescer::m_IssueToInitialDelayHist
std::vector< Stats::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Definition: GPUCoalescer.hh:486
UncoalescedTable::packetAvailable
bool packetAvailable()
Definition: GPUCoalescer.cc:75
GPUCoalescer::getForwardRequestToFirstResponseHist
Stats::Histogram & getForwardRequestToFirstResponseHist(const MachineType t) const
Definition: GPUCoalescer.hh:355
GPUCoalescer::~GPUCoalescer
~GPUCoalescer()
Definition: GPUCoalescer.cc:188
GPUCoalescer::m_store_waiting_on_store_cycles
int m_store_waiting_on_store_cycles
Definition: GPUCoalescer.hh:436
GPUCoalescer::atomicCallback
virtual void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
Definition: GPUCoalescer.cc:748
SenderState
RubyTester::SenderState SenderState
Definition: Check.cc:37
GPUCoalescer::print
void print(std::ostream &out) const
Definition: GPUCoalescer.cc:591
GPUCoalescer::m_runningGarnetStandalone
bool m_runningGarnetStandalone
Definition: GPUCoalescer.hh:440
MipsISA::pc
Bitfield< 4 > pc
Definition: pra_constants.hh:240
GPUCoalescer::wakeup
void wakeup()
Definition: GPUCoalescer.cc:204
statistics.hh
GPUCoalescer::getLatencyHist
Stats::Histogram & getLatencyHist()
Definition: GPUCoalescer.hh:331
RubyPort
Definition: RubyPort.hh:58
Port
Ports are used to interface objects to each other.
Definition: port.hh:56
CoalescedRequest::setSeqNum
void setSeqNum(uint64_t _seqNum)
Definition: GPUCoalescer.hh:104
MipsISA::r
r
Definition: pra_constants.hh:95
GPUCoalescer::m_typeLatencyHist
std::vector< Stats::Histogram * > m_typeLatencyHist
Definition: GPUCoalescer.hh:473
GPUCoalescer::getRequestType
virtual RubyRequestType getRequestType(PacketPtr pkt)
Definition: GPUCoalescer.cc:517
UncoalescedTable::UncoalescedTable
UncoalescedTable(GPUCoalescer *gc)
Definition: GPUCoalescer.cc:59
GPUCoalescer::pendingWriteInsts
std::unordered_map< uint64_t, PendingWriteInst > pendingWriteInsts
Definition: GPUCoalescer.hh:427
GPUCoalescer::operator=
GPUCoalescer & operator=(const GPUCoalescer &obj)
gpu_dyn_inst.hh
GPUCoalescer::insertKernel
void insertKernel(int wavefront_id, PacketPtr pkt)
Definition: GPUCoalescer.cc:285
GPUCoalescer::m_deadlock_check_scheduled
bool m_deadlock_check_scheduled
Definition: GPUCoalescer.hh:431
CacheMemory
Definition: CacheMemory.hh:63
GPUCoalescer::issueEvent
EventFunctionWrapper issueEvent
Definition: GPUCoalescer.hh:399
CoalescedRequest::CoalescedRequest
CoalescedRequest(uint64_t _seqNum)
Definition: GPUCoalescer.hh:97
GPUCoalescer::GMTokenPort::getAddrRanges
AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: GPUCoalescer.hh:217
Port::id
const PortID id
A numeric identifier to distinguish ports in a vector, and set to InvalidPortID in case this port is ...
Definition: port.hh:74
GPUCoalescer::m_ForwardToFirstResponseDelayHist
std::vector< Stats::Histogram * > m_ForwardToFirstResponseDelayHist
Definition: GPUCoalescer.hh:488
ComputeUnit::DataPort::SenderState
Definition: compute_unit.hh:660
GPUCoalescer::resetStats
void resetStats() override
Callback to reset stats.
Definition: GPUCoalescer.cc:256
GPUCoalescer::getFirstResponseToCompletionDelayHist
Stats::Histogram & getFirstResponseToCompletionDelayHist(const MachineType t) const
Definition: GPUCoalescer.hh:359
GPUCoalescer::m_instCache_ptr
CacheMemory * m_instCache_ptr
Definition: GPUCoalescer.hh:406
GPUCoalescer::m_load_waiting_on_store_cycles
int m_load_waiting_on_store_cycles
Definition: GPUCoalescer.hh:437
ProbePoints::Packet
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:103
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
Port::name
const std::string name() const
Return port name (for DPRINTF).
Definition: port.hh:106
PendingWriteInst::ackWriteCompletion
void ackWriteCompletion(bool usingRubyTester)
Definition: GPUCoalescer.hh:162
GPUCoalescer::writeCallback
void writeCallback(Addr address, DataBlock &data)
Definition: GPUCoalescer.cc:299
GPUCoalescer::makeRequest
RequestStatus makeRequest(PacketPtr pkt) override
Definition: GPUCoalescer.cc:545
GPUCoalescer::readCallback
void readCallback(Addr address, DataBlock &data)
Definition: GPUCoalescer.cc:385
PendingWriteInst::originalPort
RubyPort::MemResponsePort * originalPort
Definition: GPUCoalescer.hh:195
ResponsePort::owner
SimObject & owner
Definition: port.hh:276
GPUCoalescer::coalescePacket
bool coalescePacket(PacketPtr pkt)
Definition: GPUCoalescer.cc:600
GPUCoalescer::getMissTypeLatencyHist
Stats::Histogram & getMissTypeLatencyHist(uint32_t t)
Definition: GPUCoalescer.hh:337
GPUCoalescer::getMissTypeMachLatencyHist
Stats::Histogram & getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
Definition: GPUCoalescer.hh:344
GPUCoalescer::m_missTypeLatencyHist
std::vector< Stats::Histogram * > m_missTypeLatencyHist
Definition: GPUCoalescer.hh:478
GPUCoalescer::recordMissLatency
void recordMissLatency(CoalescedRequest *crequest, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool success, bool isRegion)
Definition: GPUCoalescer.cc:810
UncoalescedTable::areRequestsDone
bool areRequestsDone(const uint64_t instSeqNum)
Definition: GPUCoalescer.cc:108
PendingWriteInst::numPendingStores
int numPendingStores
Definition: GPUCoalescer.hh:189
PerInstPackets
std::list< PacketPtr > PerInstPackets
Definition: GPUCoalescer.hh:58
CoalescedRequest
Definition: GPUCoalescer.hh:94
Consumer.hh
Address.hh
ArmISA::t
Bitfield< 5 > t
Definition: miscregs_types.hh:67
GPUCoalescer::m_FirstResponseToCompletionDelayHist
std::vector< Stats::Histogram * > m_FirstResponseToCompletionDelayHist
Definition: GPUCoalescer.hh:489
GPUCoalescer::tryCacheAccess
bool tryCacheAccess(Addr addr, RubyRequestType type, Addr pc, RubyAccessMode access_mode, int size, DataBlock *&data_ptr)
Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:257
CoalescedRequest::getSeqNum
uint64_t getSeqNum() const
Definition: GPUCoalescer.hh:108
GPUCoalescer::kernelEndList
std::unordered_map< int, PacketPtr > kernelEndList
Definition: GPUCoalescer.hh:432
GPUCoalescer::m_missMachLatencyHist
std::vector< Stats::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
Definition: GPUCoalescer.hh:482
PendingWriteInst::gpuDynInstPtr
GPUDynInstPtr gpuDynInstPtr
Definition: GPUCoalescer.hh:198
UncoalescedTable::instMap
std::map< uint64_t, PerInstPackets > instMap
Definition: GPUCoalescer.hh:91
GPUCoalescer::m_max_outstanding_requests
int m_max_outstanding_requests
Definition: GPUCoalescer.hh:402
token_port.hh
addr
ip6_addr_t addr
Definition: inet.hh:423
PendingWriteInst::~PendingWriteInst
~PendingWriteInst()
Definition: GPUCoalescer.hh:134
GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:83
GPUCoalescer::completeIssue
void completeIssue()
Definition: GPUCoalescer.cc:696
UncoalescedTable::checkDeadlock
void checkDeadlock(Tick threshold)
Definition: GPUCoalescer.cc:132
GPUCoalescer::empty
bool empty() const
Definition: GPUCoalescer.cc:511
GPUCoalescer::GMTokenPort::GMTokenPort
GMTokenPort(const std::string &name, ClockedObject *owner, PortID id=InvalidPortID)
Definition: GPUCoalescer.hh:207
CoalescedRequest::issueTime
Cycles issueTime
Definition: GPUCoalescer.hh:116
operator<<
std::ostream & operator<<(std::ostream &out, const GPUCoalescer &obj)
Definition: GPUCoalescer.hh:518
CoalescedRequest::getPackets
std::vector< PacketPtr > & getPackets()
Definition: GPUCoalescer.hh:112
GPUCoalescer::getMissLatencyHist
Stats::Histogram & getMissLatencyHist()
Definition: GPUCoalescer.hh:335
GPUCoalescer::getTypeLatencyHist
Stats::Histogram & getTypeLatencyHist(uint32_t t)
Definition: GPUCoalescer.hh:332
GPUCoalescer::issueRequest
virtual void issueRequest(CoalescedRequest *crequest)=0
GPUCoalescer::coalescingWindow
int coalescingWindow
Definition: GPUCoalescer.hh:410
Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:508
std::list
STL list class.
Definition: stl.hh:51
GPUCoalescer::m_missLatencyHist
Stats::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
Definition: GPUCoalescer.hh:477
TokenResponsePort
Definition: token_port.hh:90
GPUCoalescer::GMTokenPort::recvTimingReq
bool recvTimingReq(PacketPtr)
Receive a timing request from the peer.
Definition: GPUCoalescer.hh:216
CoalescedRequest::setIssueTime
void setIssueTime(Cycles _issueTime)
Definition: GPUCoalescer.hh:105
GPUCoalescer::getMissMachLatencyHist
Stats::Histogram & getMissMachLatencyHist(uint32_t t) const
Definition: GPUCoalescer.hh:340
GPUCoalescer::GMTokenPort::recvAtomic
Tick recvAtomic(PacketPtr)
Receive an atomic request packet from the peer.
Definition: GPUCoalescer.hh:214
UncoalescedTable::getInstPackets
PerInstPackets * getInstPackets(int offset)
Definition: GPUCoalescer.cc:81
CoalescedRequest::getIssueTime
Cycles getIssueTime() const
Definition: GPUCoalescer.hh:110
GPUCoalescer::m_dataCache_ptr
CacheMemory * m_dataCache_ptr
Definition: GPUCoalescer.hh:405
GPUCoalescer::assumingRfOCoherence
bool assumingRfOCoherence
Definition: GPUCoalescer.hh:443
GPUCoalescer::GPUCoalescer
GPUCoalescer(const Params *)
Definition: GPUCoalescer.cc:154
GPUCoalescer::GMTokenPort::~GMTokenPort
~GMTokenPort()
Definition: GPUCoalescer.hh:211
CoalescedRequest::setRubyType
void setRubyType(RubyRequestType type)
Definition: GPUCoalescer.hh:106
UncoalescedTable::updateResources
void updateResources()
Definition: GPUCoalescer.cc:94
ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:153
GPUCoalescer::descheduleDeadlockEvent
void descheduleDeadlockEvent() override
Definition: GPUCoalescer.hh:313
GPUCoalescer::uncoalescedTable
UncoalescedTable uncoalescedTable
Definition: GPUCoalescer.hh:415
GPUCoalescer::hitCallback
void hitCallback(CoalescedRequest *crequest, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion)
Definition: GPUCoalescer.cc:452
Sequencer.hh

Generated on Wed Sep 30 2020 14:02:13 for gem5 by doxygen 1.8.17