gem5  v22.1.0.0
VIPERCoalescer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
33 
34 #include "base/logging.hh"
35 #include "base/str.hh"
37 #include "debug/GPUCoalescer.hh"
38 #include "debug/MemoryAccess.hh"
39 #include "debug/ProtocolTrace.hh"
40 #include "mem/packet.hh"
49 #include "params/VIPERCoalescer.hh"
50 
51 namespace gem5
52 {
53 
54 namespace ruby
55 {
56 
58  : GPUCoalescer(p),
59  m_cache_inv_pkt(nullptr),
60  m_num_pending_invs(0)
61 {
62 }
63 
65 {
66 }
67 
68 // Places an uncoalesced packet in uncoalescedTable. If the packet is a
69 // special type (MemFence, scoping, etc), it is issued immediately.
70 RequestStatus
72 {
73  // VIPER only supports following memory request types
74  // MemSyncReq & INV_L1 : TCP cache invalidation
75  // ReadReq : cache read
76  // WriteReq : cache write
77  // AtomicOp : cache atomic
78  //
79  // VIPER does not expect MemSyncReq & Release since in GCN3, compute unit
80  // does not specify an equivalent type of memory request.
81  assert((pkt->cmd == MemCmd::MemSyncReq && pkt->req->isInvL1()) ||
82  pkt->cmd == MemCmd::ReadReq ||
83  pkt->cmd == MemCmd::WriteReq ||
84  pkt->isAtomicOp());
85 
86  if (pkt->req->isInvL1() && m_cache_inv_pkt) {
87  // In VIPER protocol, the coalescer is not able to handle two or
88  // more cache invalidation requests at a time. Cache invalidation
89  // requests must be serialized to ensure that all stale data in
90  // TCP are invalidated correctly. If there's already a pending
91  // cache invalidation request, we must retry this request later
92  return RequestStatus_Aliased;
93  }
94 
96 
97  if (pkt->req->isInvL1()) {
98  // In VIPER protocol, a compute unit sends a MemSyncReq with INV_L1
99  // flag to invalidate TCP. Upon receiving a request of this type,
100  // VIPERCoalescer starts a cache walk to invalidate all valid entries
101  // in TCP. The request is completed once all entries are invalidated.
102  assert(!m_cache_inv_pkt);
103  m_cache_inv_pkt = pkt;
104  invTCP();
105  }
106 
107  return RequestStatus_Issued;
108 }
109 
110 void
112 {
113  PacketPtr pkt = crequest->getFirstPkt();
114 
115  int proc_id = -1;
116  if (pkt != NULL && pkt->req->hasContextId()) {
117  proc_id = pkt->req->contextId();
118  }
119 
120  // If valid, copy the pc to the ruby request
121  Addr pc = 0;
122  if (pkt->req->hasPC()) {
123  pc = pkt->req->getPC();
124  }
125 
126  Addr line_addr = makeLineAddress(pkt->getAddr());
127 
128  // Creating WriteMask that records written bytes
129  // and atomic operations. This enables partial writes
130  // and partial reads of those writes
131  DataBlock dataBlock;
132  dataBlock.clear();
133  uint32_t blockSize = RubySystem::getBlockSizeBytes();
134  std::vector<bool> accessMask(blockSize,false);
136  uint32_t tableSize = crequest->getPackets().size();
137  for (int i = 0; i < tableSize; i++) {
138  PacketPtr tmpPkt = crequest->getPackets()[i];
139  uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr;
140  uint32_t tmpSize = tmpPkt->getSize();
141  if (tmpPkt->isAtomicOp()) {
142  std::pair<int,AtomicOpFunctor *> tmpAtomicOp(tmpOffset,
143  tmpPkt->getAtomicOp());
144  atomicOps.push_back(tmpAtomicOp);
145  } else if (tmpPkt->isWrite()) {
146  dataBlock.setData(tmpPkt->getPtr<uint8_t>(),
147  tmpOffset, tmpSize);
148  }
149  for (int j = 0; j < tmpSize; j++) {
150  accessMask[tmpOffset + j] = true;
151  }
152  }
153  std::shared_ptr<RubyRequest> msg;
154  if (pkt->isAtomicOp()) {
155  msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
156  pkt->getSize(), pc, crequest->getRubyType(),
157  RubyAccessMode_Supervisor, pkt,
158  PrefetchBit_No, proc_id, 100,
159  blockSize, accessMask,
160  dataBlock, atomicOps, crequest->getSeqNum());
161  } else {
162  msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
163  pkt->getSize(), pc, crequest->getRubyType(),
164  RubyAccessMode_Supervisor, pkt,
165  PrefetchBit_No, proc_id, 100,
166  blockSize, accessMask,
167  dataBlock, crequest->getSeqNum());
168  }
169 
170  if (pkt->cmd == MemCmd::WriteReq) {
171  makeWriteCompletePkts(crequest);
172  }
173 
174  DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n",
175  curTick(), m_version, "Coal", "Begin", "", "",
176  printAddress(msg->getPhysicalAddress()),
177  RubyRequestType_to_string(crequest->getRubyType()));
178 
179  fatal_if(crequest->getRubyType() == RubyRequestType_IFETCH,
180  "there should not be any I-Fetch requests in the GPU Coalescer");
181 
182  if (!deadlockCheckEvent.scheduled()) {
185  curTick());
186  }
187 
188  assert(m_mandatory_q_ptr);
189  Tick latency = cyclesToTicks(
191  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
192 }
193 
194 void
196 {
197  // In VIPER protocol, for each write request, down-stream caches
198  // return two responses: writeCallback and writeCompleteCallback.
199  // We need to prepare a writeCompletePkt for each write request so
200  // that when writeCompleteCallback is called, we can respond
201  // requesting wavefront right away.
202  // writeCompletePkt inherits request and senderState of the original
203  // write request packet so that we can find the original requestor
204  // later. This assumes that request and senderState are not deleted
205  // before writeCompleteCallback is called.
206 
207  auto key = crequest->getSeqNum();
208  std::vector<PacketPtr>& req_pkts = crequest->getPackets();
209 
210  for (auto pkt : req_pkts) {
211  DPRINTF(GPUCoalescer, "makeWriteCompletePkts: instSeqNum %d\n",
212  key);
213  assert(pkt->cmd == MemCmd::WriteReq);
214 
215  PacketPtr writeCompletePkt = new Packet(pkt->req,
217  writeCompletePkt->setAddr(pkt->getAddr());
218  writeCompletePkt->senderState = pkt->senderState;
219  m_writeCompletePktMap[key].push_back(writeCompletePkt);
220  }
221 }
222 
223 void
225 {
226  DPRINTF(GPUCoalescer, "writeCompleteCallback: instSeqNum %d addr 0x%x\n",
227  instSeqNum, addr);
228 
229  auto key = instSeqNum;
230  assert(m_writeCompletePktMap.count(key) == 1 &&
231  !m_writeCompletePktMap[key].empty());
232 
233  m_writeCompletePktMap[key].erase(
234  std::remove_if(
235  m_writeCompletePktMap[key].begin(),
236  m_writeCompletePktMap[key].end(),
237  [addr](PacketPtr writeCompletePkt) -> bool {
238  if (makeLineAddress(writeCompletePkt->getAddr()) == addr) {
239  RubyPort::SenderState *ss =
240  safe_cast<RubyPort::SenderState *>
241  (writeCompletePkt->senderState);
242  MemResponsePort *port = ss->port;
243  assert(port != NULL);
244 
245  writeCompletePkt->senderState = ss->predecessor;
246  delete ss;
247  port->hitCallback(writeCompletePkt);
248  return true;
249  }
250  return false;
251  }
252  ),
253  m_writeCompletePktMap[key].end()
254  );
255 
256  trySendRetries();
257 
258  if (m_writeCompletePktMap[key].empty())
259  m_writeCompletePktMap.erase(key);
260 }
261 
262 void
264 {
265  assert(m_cache_inv_pkt && m_num_pending_invs > 0);
266 
268 
269  if (m_num_pending_invs == 0) {
271  m_cache_inv_pkt = nullptr;
272  completeHitCallback(pkt_list);
273  }
274 }
275 
279 void
281 {
282  int size = m_dataCache_ptr->getNumBlocks();
284  "There are %d Invalidations outstanding before Cache Walk\n",
286  // Walk the cache
287  for (int i = 0; i < size; i++) {
289  // Evict Read-only data
290  RubyRequestType request_type = RubyRequestType_REPLACEMENT;
291  std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
292  clockEdge(), addr, 0, 0,
293  request_type, RubyAccessMode_Supervisor,
294  nullptr);
295  DPRINTF(GPUCoalescer, "Evicting addr 0x%x\n", addr);
296  assert(m_mandatory_q_ptr != NULL);
297  Tick latency = cyclesToTicks(
298  m_controller->mandatoryQueueLatency(request_type));
299  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
301  }
303  "There are %d Invalidatons outstanding after Cache Walk\n",
305 }
306 
307 } // namespace ruby
308 } // namespace gem5
#define DPRINTFR(x,...)
Definition: trace.hh:200
#define DPRINTF(x,...)
Definition: trace.hh:186
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Tick cyclesToTicks(Cycles c) const
Tick clockPeriod() const
@ WriteCompleteResp
Definition: packet.hh:91
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
T * getPtr()
get a pointer to the data ptr.
Definition: packet.hh:1212
Addr getAddr() const
Definition: packet.hh:805
AtomicOpFunctor * getAtomicOp() const
Accessor function to atomic op.
Definition: packet.hh:843
bool isAtomicOp() const
Definition: packet.hh:844
void setAddr(Addr _addr)
Update the address of this packet mid-transaction.
Definition: packet.hh:813
SenderState * senderState
This packet's sender state.
Definition: packet.hh:544
bool isWrite() const
Definition: packet.hh:593
RequestPtr req
A pointer to the original request.
Definition: packet.hh:376
unsigned getSize() const
Definition: packet.hh:815
MemCmd cmd
The command field of the packet.
Definition: packet.hh:371
virtual Cycles mandatoryQueueLatency(const RubyRequestType &param_type)
Addr getAddressAtIdx(int idx) const
Definition: CacheMemory.cc:166
std::vector< PacketPtr > & getPackets()
PacketPtr getFirstPkt() const
RubyRequestType getRubyType() const
void setData(const uint8_t *data, int offset, int len)
Definition: DataBlock.cc:124
RubyGPUCoalescerParams Params
RequestStatus makeRequest(PacketPtr pkt) override
void completeHitCallback(std::vector< PacketPtr > &mylist)
CacheMemory * m_dataCache_ptr
EventFunctionWrapper deadlockCheckEvent
void enqueue(MsgPtr message, Tick curTime, Tick delta)
AbstractController * m_controller
Definition: RubyPort.hh:199
MessageBuffer * m_mandatory_q_ptr
Definition: RubyPort.hh:200
static uint32_t getBlockSizeBytes()
Definition: RubySystem.hh:72
void makeWriteCompletePkts(CoalescedRequest *crequest)
void invTCP()
Invalidate TCP.
std::unordered_map< uint64_t, std::vector< PacketPtr > > m_writeCompletePktMap
RequestStatus makeRequest(PacketPtr pkt) override
void issueRequest(CoalescedRequest *crequest) override
VIPERCoalescer(const Params &)
void invTCPCallback(Addr address)
void writeCompleteCallback(Addr address, uint64_t instSeqNum)
STL pair class.
Definition: stl.hh:58
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:226
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 24 > j
Definition: misc_types.hh:57
Bitfield< 4 > pc
Bitfield< 54 > p
Definition: pagetable.hh:70
Bitfield< 3 > addr
Definition: types.hh:84
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109
Addr makeLineAddress(Addr addr)
Definition: Address.cc:60
std::string printAddress(Addr addr)
Definition: Address.cc:80
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
uint64_t Tick
Tick count type.
Definition: types.hh:58
Declaration of the Packet class.

Generated on Wed Dec 21 2022 10:22:39 for gem5 by doxygen 1.9.1