gem5  v21.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
VIPERCoalescer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
35 
36 #include "base/logging.hh"
37 #include "base/str.hh"
38 #include "config/the_isa.hh"
40 #include "debug/GPUCoalescer.hh"
41 #include "debug/MemoryAccess.hh"
42 #include "debug/ProtocolTrace.hh"
43 #include "mem/packet.hh"
52 #include "params/VIPERCoalescer.hh"
53 
55  : GPUCoalescer(p),
56  m_cache_inv_pkt(nullptr),
57  m_num_pending_invs(0)
58 {
59 }
60 
62 {
63 }
64 
65 // Places an uncoalesced packet in uncoalescedTable. If the packet is a
66 // special type (MemFence, scoping, etc), it is issued immediately.
67 RequestStatus
69 {
70  // VIPER only supports following memory request types
71  // MemSyncReq & INV_L1 : TCP cache invalidation
72  // ReadReq : cache read
73  // WriteReq : cache write
74  // AtomicOp : cache atomic
75  //
76  // VIPER does not expect MemSyncReq & Release since in GCN3, compute unit
77  // does not specify an equivalent type of memory request.
78  assert((pkt->cmd == MemCmd::MemSyncReq && pkt->req->isInvL1()) ||
79  pkt->cmd == MemCmd::ReadReq ||
80  pkt->cmd == MemCmd::WriteReq ||
81  pkt->isAtomicOp());
82 
83  if (pkt->req->isInvL1() && m_cache_inv_pkt) {
84  // In VIPER protocol, the coalescer is not able to handle two or
85  // more cache invalidation requests at a time. Cache invalidation
86  // requests must be serialized to ensure that all stale data in
87  // TCP are invalidated correctly. If there's already a pending
88  // cache invalidation request, we must retry this request later
89  return RequestStatus_Aliased;
90  }
91 
93 
94  if (pkt->req->isInvL1()) {
95  // In VIPER protocol, a compute unit sends a MemSyncReq with INV_L1
96  // flag to invalidate TCP. Upon receiving a request of this type,
97  // VIPERCoalescer starts a cache walk to invalidate all valid entries
98  // in TCP. The request is completed once all entries are invalidated.
99  assert(!m_cache_inv_pkt);
100  m_cache_inv_pkt = pkt;
101  invTCP();
102  }
103 
104  return RequestStatus_Issued;
105 }
106 
107 void
109 {
110  PacketPtr pkt = crequest->getFirstPkt();
111 
112  int proc_id = -1;
113  if (pkt != NULL && pkt->req->hasContextId()) {
114  proc_id = pkt->req->contextId();
115  }
116 
117  // If valid, copy the pc to the ruby request
118  Addr pc = 0;
119  if (pkt->req->hasPC()) {
120  pc = pkt->req->getPC();
121  }
122 
123  Addr line_addr = makeLineAddress(pkt->getAddr());
124 
125  // Creating WriteMask that records written bytes
126  // and atomic operations. This enables partial writes
127  // and partial reads of those writes
128  DataBlock dataBlock;
129  dataBlock.clear();
130  uint32_t blockSize = RubySystem::getBlockSizeBytes();
131  std::vector<bool> accessMask(blockSize,false);
133  uint32_t tableSize = crequest->getPackets().size();
134  for (int i = 0; i < tableSize; i++) {
135  PacketPtr tmpPkt = crequest->getPackets()[i];
136  uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr;
137  uint32_t tmpSize = tmpPkt->getSize();
138  if (tmpPkt->isAtomicOp()) {
139  std::pair<int,AtomicOpFunctor *> tmpAtomicOp(tmpOffset,
140  tmpPkt->getAtomicOp());
141  atomicOps.push_back(tmpAtomicOp);
142  } else if (tmpPkt->isWrite()) {
143  dataBlock.setData(tmpPkt->getPtr<uint8_t>(),
144  tmpOffset, tmpSize);
145  }
146  for (int j = 0; j < tmpSize; j++) {
147  accessMask[tmpOffset + j] = true;
148  }
149  }
150  std::shared_ptr<RubyRequest> msg;
151  if (pkt->isAtomicOp()) {
152  msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
153  pkt->getSize(), pc, crequest->getRubyType(),
154  RubyAccessMode_Supervisor, pkt,
155  PrefetchBit_No, proc_id, 100,
156  blockSize, accessMask,
157  dataBlock, atomicOps, crequest->getSeqNum());
158  } else {
159  msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
160  pkt->getSize(), pc, crequest->getRubyType(),
161  RubyAccessMode_Supervisor, pkt,
162  PrefetchBit_No, proc_id, 100,
163  blockSize, accessMask,
164  dataBlock, crequest->getSeqNum());
165  }
166 
167  if (pkt->cmd == MemCmd::WriteReq) {
168  makeWriteCompletePkts(crequest);
169  }
170 
171  DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n",
172  curTick(), m_version, "Coal", "Begin", "", "",
173  printAddress(msg->getPhysicalAddress()),
174  RubyRequestType_to_string(crequest->getRubyType()));
175 
176  fatal_if(crequest->getRubyType() == RubyRequestType_IFETCH,
177  "there should not be any I-Fetch requests in the GPU Coalescer");
178 
179  if (!deadlockCheckEvent.scheduled()) {
182  curTick());
183  }
184 
185  assert(m_mandatory_q_ptr);
186  Tick latency = cyclesToTicks(
188  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
189 }
190 
191 void
193 {
194  // In VIPER protocol, for each write request, down-stream caches
195  // return two responses: writeCallback and writeCompleteCallback.
196  // We need to prepare a writeCompletePkt for each write request so
197  // that when writeCompleteCallback is called, we can respond
198  // requesting wavefront right away.
199  // writeCompletePkt inherits request and senderState of the original
200  // write request packet so that we can find the original requestor
201  // later. This assumes that request and senderState are not deleted
202  // before writeCompleteCallback is called.
203 
204  auto key = crequest->getSeqNum();
205  std::vector<PacketPtr>& req_pkts = crequest->getPackets();
206 
207  for (auto pkt : req_pkts) {
208  DPRINTF(GPUCoalescer, "makeWriteCompletePkts: instSeqNum %d\n",
209  key);
210  assert(pkt->cmd == MemCmd::WriteReq);
211 
212  PacketPtr writeCompletePkt = new Packet(pkt->req,
214  writeCompletePkt->setAddr(pkt->getAddr());
215  writeCompletePkt->senderState = pkt->senderState;
216  m_writeCompletePktMap[key].push_back(writeCompletePkt);
217  }
218 }
219 
220 void
222 {
223  DPRINTF(GPUCoalescer, "writeCompleteCallback: instSeqNum %d addr 0x%x\n",
224  instSeqNum, addr);
225 
226  auto key = instSeqNum;
227  assert(m_writeCompletePktMap.count(key) == 1 &&
228  !m_writeCompletePktMap[key].empty());
229 
230  m_writeCompletePktMap[key].erase(
231  std::remove_if(
232  m_writeCompletePktMap[key].begin(),
233  m_writeCompletePktMap[key].end(),
234  [addr](PacketPtr writeCompletePkt) -> bool {
235  if (makeLineAddress(writeCompletePkt->getAddr()) == addr) {
236  RubyPort::SenderState *ss =
237  safe_cast<RubyPort::SenderState *>
238  (writeCompletePkt->senderState);
239  MemResponsePort *port = ss->port;
240  assert(port != NULL);
241 
242  writeCompletePkt->senderState = ss->predecessor;
243  delete ss;
244  port->hitCallback(writeCompletePkt);
245  return true;
246  }
247  return false;
248  }
249  ),
250  m_writeCompletePktMap[key].end()
251  );
252 
253  trySendRetries();
254 
255  if (m_writeCompletePktMap[key].empty())
256  m_writeCompletePktMap.erase(key);
257 }
258 
259 void
261 {
262  assert(m_cache_inv_pkt && m_num_pending_invs > 0);
263 
265 
266  if (m_num_pending_invs == 0) {
268  m_cache_inv_pkt = nullptr;
269  completeHitCallback(pkt_list);
270  }
271 }
272 
276 void
278 {
279  int size = m_dataCache_ptr->getNumBlocks();
281  "There are %d Invalidations outstanding before Cache Walk\n",
283  // Walk the cache
284  for (int i = 0; i < size; i++) {
286  // Evict Read-only data
287  RubyRequestType request_type = RubyRequestType_REPLACEMENT;
288  std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
289  clockEdge(), addr, 0, 0,
290  request_type, RubyAccessMode_Supervisor,
291  nullptr);
292  DPRINTF(GPUCoalescer, "Evicting addr 0x%x\n", addr);
293  assert(m_mandatory_q_ptr != NULL);
294  Tick latency = cyclesToTicks(
295  m_controller->mandatoryQueueLatency(request_type));
296  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
298  }
300  "There are %d Invalidatons outstanding after Cache Walk\n",
302 }
GPUCoalescer::m_deadlock_threshold
Cycles m_deadlock_threshold
Definition: GPUCoalescer.hh:412
Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:462
Profiler.hh
RubySystem::getBlockSizeBytes
static uint32_t getBlockSizeBytes()
Definition: RubySystem.hh:61
CoalescedRequest::getRubyType
RubyRequestType getRubyType() const
Definition: GPUCoalescer.hh:119
Packet::getAddr
Addr getAddr() const
Definition: packet.hh:755
makeLineAddress
Addr makeLineAddress(Addr addr)
Definition: Address.cc:54
ArmISA::i
Bitfield< 7 > i
Definition: miscregs_types.hh:63
VIPERCoalescer::VIPERCoalescer
VIPERCoalescer(const Params &)
Definition: VIPERCoalescer.cc:54
GPUCoalescer
Definition: GPUCoalescer.hh:209
MemCmd::ReadReq
@ ReadReq
Definition: packet.hh:83
GPUCoalescer::Params
RubyGPUCoalescerParams Params
Definition: GPUCoalescer.hh:232
GPUCoalescer.hh
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:59
AbstractController.hh
Packet::isAtomicOp
bool isAtomicOp() const
Definition: packet.hh:794
VIPERCoalescer::issueRequest
void issueRequest(CoalescedRequest *crequest) override
Definition: VIPERCoalescer.cc:108
Packet::req
RequestPtr req
A pointer to the original request.
Definition: packet.hh:341
RubyRequest.hh
DataBlock::setData
void setData(const uint8_t *data, int offset, int len)
Definition: DataBlock.cc:118
std::vector< bool >
Packet::getSize
unsigned getSize() const
Definition: packet.hh:765
DataBlock::clear
void clear()
Definition: DataBlock.cc:62
CoalescedRequest::getFirstPkt
PacketPtr getFirstPkt() const
Definition: GPUCoalescer.hh:117
MessageBuffer::enqueue
void enqueue(MsgPtr message, Tick curTime, Tick delta)
Definition: MessageBuffer.cc:191
VIPERCoalescer.hh
VIPERCoalescer::~VIPERCoalescer
~VIPERCoalescer()
Definition: VIPERCoalescer.cc:61
DataBlock
Definition: DataBlock.hh:54
packet.hh
RubyPort::m_mandatory_q_ptr
MessageBuffer * m_mandatory_q_ptr
Definition: RubyPort.hh:191
GPUCoalescer::deadlockCheckEvent
EventFunctionWrapper deadlockCheckEvent
Definition: GPUCoalescer.hh:455
str.hh
RubyPort::m_controller
AbstractController * m_controller
Definition: RubyPort.hh:190
MemCmd::WriteReq
@ WriteReq
Definition: packet.hh:86
MemCmd::WriteCompleteResp
@ WriteCompleteResp
Definition: packet.hh:88
GPUCoalescer::completeHitCallback
void completeHitCallback(std::vector< PacketPtr > &mylist)
Definition: GPUCoalescer.cc:909
ArmISA::j
Bitfield< 24 > j
Definition: miscregs_types.hh:54
EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1016
Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition: clocked_object.hh:224
VIPERCoalescer::writeCompleteCallback
void writeCompleteCallback(Addr address, uint64_t instSeqNum)
Definition: VIPERCoalescer.cc:221
CacheMemory.hh
VIPERCoalescer::m_num_pending_invs
int m_num_pending_invs
Definition: VIPERCoalescer.hh:76
VIPERCoalescer::m_writeCompletePktMap
std::unordered_map< uint64_t, std::vector< PacketPtr > > m_writeCompletePktMap
Definition: VIPERCoalescer.hh:85
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:237
MipsISA::pc
Bitfield< 4 > pc
Definition: pra_constants.hh:240
VIPERCoalescer::m_cache_inv_pkt
PacketPtr m_cache_inv_pkt
Definition: VIPERCoalescer.hh:73
RubyTester.hh
Clocked::clockEdge
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Definition: clocked_object.hh:174
Packet::getAtomicOp
AtomicOpFunctor * getAtomicOp() const
Accessor function to atomic op.
Definition: packet.hh:793
DPRINTFR
#define DPRINTFR(...)
Definition: trace.hh:239
std::pair< int, AtomicOpFunctor * >
VIPERCoalescer::makeWriteCompletePkts
void makeWriteCompletePkts(CoalescedRequest *crequest)
Definition: VIPERCoalescer.cc:192
RubySystem.hh
ProbePoints::Packet
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:103
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:148
Packet::setAddr
void setAddr(Addr _addr)
Update the address of this packet mid-transaction.
Definition: packet.hh:763
Clocked::clockPeriod
Tick clockPeriod() const
Definition: clocked_object.hh:214
GPUCoalescer::makeRequest
RequestStatus makeRequest(PacketPtr pkt) override
Definition: GPUCoalescer.cc:617
X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:80
Packet::cmd
MemCmd cmd
The command field of the packet.
Definition: packet.hh:336
MessageBuffer.hh
VIPERCoalescer::invTCPCallback
void invTCPCallback(Addr address)
Definition: VIPERCoalescer.cc:260
CoalescedRequest
Definition: GPUCoalescer.hh:102
MemCmd::MemSyncReq
@ MemSyncReq
Definition: packet.hh:116
Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:258
CoalescedRequest::getSeqNum
uint64_t getSeqNum() const
Definition: GPUCoalescer.hh:116
CacheMemory::getAddressAtIdx
Addr getAddressAtIdx(int idx) const
Definition: CacheMemory.cc:159
logging.hh
Packet::isWrite
bool isWrite() const
Definition: packet.hh:558
Packet::getPtr
T * getPtr()
get a pointer to the data ptr.
Definition: packet.hh:1158
GPUCoalescer::empty
bool empty() const
Definition: GPUCoalescer.cc:583
CoalescedRequest::getPackets
std::vector< PacketPtr > & getPackets()
Definition: GPUCoalescer.hh:120
curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:43
Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:509
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
RubyPort::trySendRetries
void trySendRetries()
Definition: RubyPort.cc:457
VIPERCoalescer::invTCP
void invTCP()
Invalidate TCP.
Definition: VIPERCoalescer.cc:277
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:219
AbstractController::mandatoryQueueLatency
virtual Cycles mandatoryQueueLatency(const RubyRequestType &param_type)
Definition: AbstractController.hh:113
RubyPort::m_version
uint32_t m_version
Definition: RubyPort.hh:189
VIPERCoalescer::makeRequest
RequestStatus makeRequest(PacketPtr pkt) override
Definition: VIPERCoalescer.cc:68
printAddress
std::string printAddress(Addr addr)
Definition: Address.cc:74
SubBlock.hh
GPUCoalescer::m_dataCache_ptr
CacheMemory * m_dataCache_ptr
Definition: GPUCoalescer.hh:414
CacheMemory::getNumBlocks
int getNumBlocks() const
Definition: CacheMemory.hh:153

Generated on Tue Mar 23 2021 19:41:28 for gem5 by doxygen 1.8.17