gem5  v20.1.0.0
VIPERCoalescer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
35 
36 #include "base/logging.hh"
37 #include "base/str.hh"
38 #include "config/the_isa.hh"
40 #include "debug/GPUCoalescer.hh"
41 #include "debug/MemoryAccess.hh"
42 #include "debug/ProtocolTrace.hh"
43 #include "mem/packet.hh"
52 #include "params/VIPERCoalescer.hh"
53 
54 using namespace std;
55 
57 VIPERCoalescerParams::create()
58 {
59  return new VIPERCoalescer(this);
60 }
61 
63  : GPUCoalescer(p),
64  m_cache_inv_pkt(nullptr),
65  m_num_pending_invs(0)
66 {
67 }
68 
70 {
71 }
72 
73 // Places an uncoalesced packet in uncoalescedTable. If the packet is a
74 // special type (MemFence, scoping, etc), it is issued immediately.
75 RequestStatus
77 {
78  // VIPER only supports following memory request types
79  // MemSyncReq & Acquire: TCP cache invalidation
80  // ReadReq : cache read
81  // WriteReq : cache write
82  // AtomicOp : cache atomic
83  //
84  // VIPER does not expect MemSyncReq & Release since in GCN3, compute unit
85  // does not specify an equivalent type of memory request.
86  // TODO: future patches should rename Acquire and Release
87  assert((pkt->cmd == MemCmd::MemSyncReq && pkt->req->isAcquire()) ||
88  pkt->cmd == MemCmd::ReadReq ||
89  pkt->cmd == MemCmd::WriteReq ||
90  pkt->isAtomicOp());
91 
92  if (pkt->req->isAcquire() && m_cache_inv_pkt) {
93  // In VIPER protocol, the coalescer is not able to handle two or
94  // more cache invalidation requests at a time. Cache invalidation
95  // requests must be serialized to ensure that all stale data in
96  // TCP are invalidated correctly. If there's already a pending
97  // cache invalidation request, we must retry this request later
98  return RequestStatus_Aliased;
99  }
100 
102 
103  if (pkt->req->isAcquire()) {
104  // In VIPER protocol, a compute unit sends a MemSyncReq with Acquire
105  // flag to invalidate TCP. Upon receiving a request of this type,
106  // VIPERCoalescer starts a cache walk to invalidate all valid entries
107  // in TCP. The request is completed once all entries are invalidated.
108  assert(!m_cache_inv_pkt);
109  m_cache_inv_pkt = pkt;
110  invTCP();
111  }
112 
113  return RequestStatus_Issued;
114 }
115 
116 void
118 {
119  PacketPtr pkt = crequest->getFirstPkt();
120 
121  int proc_id = -1;
122  if (pkt != NULL && pkt->req->hasContextId()) {
123  proc_id = pkt->req->contextId();
124  }
125 
126  // If valid, copy the pc to the ruby request
127  Addr pc = 0;
128  if (pkt->req->hasPC()) {
129  pc = pkt->req->getPC();
130  }
131 
132  Addr line_addr = makeLineAddress(pkt->getAddr());
133 
134  // Creating WriteMask that records written bytes
135  // and atomic operations. This enables partial writes
136  // and partial reads of those writes
137  DataBlock dataBlock;
138  dataBlock.clear();
139  uint32_t blockSize = RubySystem::getBlockSizeBytes();
140  std::vector<bool> accessMask(blockSize,false);
142  uint32_t tableSize = crequest->getPackets().size();
143  for (int i = 0; i < tableSize; i++) {
144  PacketPtr tmpPkt = crequest->getPackets()[i];
145  uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr;
146  uint32_t tmpSize = tmpPkt->getSize();
147  if (tmpPkt->isAtomicOp()) {
148  std::pair<int,AtomicOpFunctor *> tmpAtomicOp(tmpOffset,
149  tmpPkt->getAtomicOp());
150  atomicOps.push_back(tmpAtomicOp);
151  } else if (tmpPkt->isWrite()) {
152  dataBlock.setData(tmpPkt->getPtr<uint8_t>(),
153  tmpOffset, tmpSize);
154  }
155  for (int j = 0; j < tmpSize; j++) {
156  accessMask[tmpOffset + j] = true;
157  }
158  }
159  std::shared_ptr<RubyRequest> msg;
160  if (pkt->isAtomicOp()) {
161  msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
162  pkt->getPtr<uint8_t>(),
163  pkt->getSize(), pc, crequest->getRubyType(),
164  RubyAccessMode_Supervisor, pkt,
165  PrefetchBit_No, proc_id, 100,
166  blockSize, accessMask,
167  dataBlock, atomicOps, crequest->getSeqNum());
168  } else {
169  msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
170  pkt->getPtr<uint8_t>(),
171  pkt->getSize(), pc, crequest->getRubyType(),
172  RubyAccessMode_Supervisor, pkt,
173  PrefetchBit_No, proc_id, 100,
174  blockSize, accessMask,
175  dataBlock, crequest->getSeqNum());
176  }
177 
178  if (pkt->cmd == MemCmd::WriteReq) {
179  makeWriteCompletePkts(crequest);
180  }
181 
182  DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n",
183  curTick(), m_version, "Coal", "Begin", "", "",
184  printAddress(msg->getPhysicalAddress()),
185  RubyRequestType_to_string(crequest->getRubyType()));
186 
187  fatal_if(crequest->getRubyType() == RubyRequestType_IFETCH,
188  "there should not be any I-Fetch requests in the GPU Coalescer");
189 
190  if (!deadlockCheckEvent.scheduled()) {
193  curTick());
194  }
195 
196  assert(m_mandatory_q_ptr);
197  Tick latency = cyclesToTicks(
199  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
200 }
201 
202 void
204 {
205  // In VIPER protocol, for each write request, down-stream caches
206  // return two responses: writeCallback and writeCompleteCallback.
207  // We need to prepare a writeCompletePkt for each write request so
208  // that when writeCompleteCallback is called, we can respond
209  // requesting wavefront right away.
210  // writeCompletePkt inherits request and senderState of the original
211  // write request packet so that we can find the original requestor
212  // later. This assumes that request and senderState are not deleted
213  // before writeCompleteCallback is called.
214 
215  auto key = crequest->getSeqNum();
216  std::vector<PacketPtr>& req_pkts = crequest->getPackets();
217 
218  for (auto pkt : req_pkts) {
219  DPRINTF(GPUCoalescer, "makeWriteCompletePkts: instSeqNum %d\n",
220  key);
221  assert(pkt->cmd == MemCmd::WriteReq);
222 
223  PacketPtr writeCompletePkt = new Packet(pkt->req,
225  writeCompletePkt->setAddr(pkt->getAddr());
226  writeCompletePkt->senderState = pkt->senderState;
227  m_writeCompletePktMap[key].push_back(writeCompletePkt);
228  }
229 }
230 
231 void
233 {
234  DPRINTF(GPUCoalescer, "writeCompleteCallback: instSeqNum %d addr 0x%x\n",
235  instSeqNum, addr);
236 
237  auto key = instSeqNum;
238  assert(m_writeCompletePktMap.count(key) == 1 &&
239  !m_writeCompletePktMap[key].empty());
240 
241  for (auto writeCompletePkt : m_writeCompletePktMap[key]) {
242  if (makeLineAddress(writeCompletePkt->getAddr()) == addr) {
244  safe_cast<RubyPort::SenderState *>
245  (writeCompletePkt->senderState);
246  MemResponsePort *port = ss->port;
247  assert(port != NULL);
248 
249  writeCompletePkt->senderState = ss->predecessor;
250  delete ss;
251  port->hitCallback(writeCompletePkt);
252  }
253  }
254 
255  trySendRetries();
256 
257  if (m_writeCompletePktMap[key].empty())
258  m_writeCompletePktMap.erase(key);
259 }
260 
261 void
263 {
264  assert(m_cache_inv_pkt && m_num_pending_invs > 0);
265 
267 
268  if (m_num_pending_invs == 0) {
270  completeHitCallback(pkt_list);
271  m_cache_inv_pkt = nullptr;
272  }
273 }
274 
278 void
280 {
281  int size = m_dataCache_ptr->getNumBlocks();
283  "There are %d Invalidations outstanding before Cache Walk\n",
285  // Walk the cache
286  for (int i = 0; i < size; i++) {
288  // Evict Read-only data
289  RubyRequestType request_type = RubyRequestType_REPLACEMENT;
290  std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
291  clockEdge(), addr, (uint8_t*) 0, 0, 0,
292  request_type, RubyAccessMode_Supervisor,
293  nullptr);
294  DPRINTF(GPUCoalescer, "Evicting addr 0x%x\n", addr);
295  assert(m_mandatory_q_ptr != NULL);
296  Tick latency = cyclesToTicks(
297  m_controller->mandatoryQueueLatency(request_type));
298  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
300  }
302  "There are %d Invalidatons outstanding after Cache Walk\n",
304 }
GPUCoalescer::m_deadlock_threshold
Cycles m_deadlock_threshold
Definition: GPUCoalescer.hh:403
Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:460
Profiler.hh
RubySystem::getBlockSizeBytes
static uint32_t getBlockSizeBytes()
Definition: RubySystem.hh:62
CoalescedRequest::getRubyType
RubyRequestType getRubyType() const
Definition: GPUCoalescer.hh:111
Packet::getAddr
Addr getAddr() const
Definition: packet.hh:754
makeLineAddress
Addr makeLineAddress(Addr addr)
Definition: Address.cc:54
ArmISA::i
Bitfield< 7 > i
Definition: miscregs_types.hh:63
GPUCoalescer
Definition: GPUCoalescer.hh:201
MemCmd::ReadReq
@ ReadReq
Definition: packet.hh:82
GPUCoalescer::Params
RubyGPUCoalescerParams Params
Definition: GPUCoalescer.hh:224
GPUCoalescer.hh
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:63
AbstractController.hh
Packet::isAtomicOp
bool isAtomicOp() const
Definition: packet.hh:793
VIPERCoalescer::issueRequest
void issueRequest(CoalescedRequest *crequest) override
Definition: VIPERCoalescer.cc:117
Packet::req
RequestPtr req
A pointer to the original request.
Definition: packet.hh:340
RubyRequest.hh
DataBlock::setData
void setData(const uint8_t *data, int offset, int len)
Definition: DataBlock.cc:108
std::vector< bool >
Packet::getSize
unsigned getSize() const
Definition: packet.hh:764
DataBlock::clear
void clear()
Definition: DataBlock.cc:50
CoalescedRequest::getFirstPkt
PacketPtr getFirstPkt() const
Definition: GPUCoalescer.hh:109
MessageBuffer::enqueue
void enqueue(MsgPtr message, Tick curTime, Tick delta)
Definition: MessageBuffer.cc:162
VIPERCoalescer.hh
VIPERCoalescer::~VIPERCoalescer
~VIPERCoalescer()
Definition: VIPERCoalescer.cc:69
DataBlock
Definition: DataBlock.hh:40
packet.hh
RubyPort::m_mandatory_q_ptr
MessageBuffer * m_mandatory_q_ptr
Definition: RubyPort.hh:191
GPUCoalescer::deadlockCheckEvent
EventFunctionWrapper deadlockCheckEvent
Definition: GPUCoalescer.hh:442
str.hh
RubyPort::m_controller
AbstractController * m_controller
Definition: RubyPort.hh:190
MemCmd::WriteReq
@ WriteReq
Definition: packet.hh:85
MemCmd::WriteCompleteResp
@ WriteCompleteResp
Definition: packet.hh:87
GPUCoalescer::completeHitCallback
void completeHitCallback(std::vector< PacketPtr > &mylist)
Definition: GPUCoalescer.cc:777
ArmISA::j
Bitfield< 24 > j
Definition: miscregs_types.hh:54
RubyPort::MemResponsePort
Definition: RubyPort.hh:75
EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1005
Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition: clocked_object.hh:224
ArmISA::ss
Bitfield< 21 > ss
Definition: miscregs_types.hh:56
VIPERCoalescer::writeCompleteCallback
void writeCompleteCallback(Addr address, uint64_t instSeqNum)
Definition: VIPERCoalescer.cc:232
CacheMemory.hh
VIPERCoalescer::m_num_pending_invs
int m_num_pending_invs
Definition: VIPERCoalescer.hh:76
VIPERCoalescer::m_writeCompletePktMap
std::unordered_map< uint64_t, std::vector< PacketPtr > > m_writeCompletePktMap
Definition: VIPERCoalescer.hh:85
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:234
MipsISA::pc
Bitfield< 4 > pc
Definition: pra_constants.hh:240
VIPERCoalescer::m_cache_inv_pkt
PacketPtr m_cache_inv_pkt
Definition: VIPERCoalescer.hh:73
RubyTester.hh
Clocked::clockEdge
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Definition: clocked_object.hh:174
Packet::getAtomicOp
AtomicOpFunctor * getAtomicOp() const
Accessor function to atomic op.
Definition: packet.hh:792
DPRINTFR
#define DPRINTFR(...)
Definition: trace.hh:236
std::pair< int, AtomicOpFunctor * >
VIPERCoalescer::makeWriteCompletePkts
void makeWriteCompletePkts(CoalescedRequest *crequest)
Definition: VIPERCoalescer.cc:203
RubySystem.hh
ProbePoints::Packet
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:103
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
Packet::setAddr
void setAddr(Addr _addr)
Update the address of this packet mid-transaction.
Definition: packet.hh:762
Clocked::clockPeriod
Tick clockPeriod() const
Definition: clocked_object.hh:214
GPUCoalescer::makeRequest
RequestStatus makeRequest(PacketPtr pkt) override
Definition: GPUCoalescer.cc:545
Packet::cmd
MemCmd cmd
The command field of the packet.
Definition: packet.hh:335
MessageBuffer.hh
VIPERCoalescer
Definition: VIPERCoalescer.hh:54
VIPERCoalescer::invTCPCallback
void invTCPCallback(Addr address)
Definition: VIPERCoalescer.cc:262
std
Overload hash function for BasicBlockRange type.
Definition: vec_reg.hh:587
CoalescedRequest
Definition: GPUCoalescer.hh:94
MemCmd::MemSyncReq
@ MemSyncReq
Definition: packet.hh:115
Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:257
CoalescedRequest::getSeqNum
uint64_t getSeqNum() const
Definition: GPUCoalescer.hh:108
VIPERCoalescer::VIPERCoalescer
VIPERCoalescer(const Params *)
Definition: VIPERCoalescer.cc:62
RubyPort::MemResponsePort::hitCallback
void hitCallback(PacketPtr pkt)
Definition: RubyPort.cc:515
addr
ip6_addr_t addr
Definition: inet.hh:423
CacheMemory::getAddressAtIdx
Addr getAddressAtIdx(int idx) const
Definition: CacheMemory.cc:166
logging.hh
Packet::isWrite
bool isWrite() const
Definition: packet.hh:557
Packet::getPtr
T * getPtr()
get a pointer to the data ptr.
Definition: packet.hh:1157
GPUCoalescer::empty
bool empty() const
Definition: GPUCoalescer.cc:511
CoalescedRequest::getPackets
std::vector< PacketPtr > & getPackets()
Definition: GPUCoalescer.hh:112
Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:508
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
RubyPort::trySendRetries
void trySendRetries()
Definition: RubyPort.cc:455
VIPERCoalescer::invTCP
void invTCP()
Invalidate TCP (Acquire)
Definition: VIPERCoalescer.cc:279
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:219
AbstractController::mandatoryQueueLatency
virtual Cycles mandatoryQueueLatency(const RubyRequestType &param_type)
Definition: AbstractController.hh:112
RubyPort::m_version
uint32_t m_version
Definition: RubyPort.hh:189
VIPERCoalescer::makeRequest
RequestStatus makeRequest(PacketPtr pkt) override
Definition: VIPERCoalescer.cc:76
printAddress
std::string printAddress(Addr addr)
Definition: Address.cc:74
RubyPort::SenderState
Definition: RubyPort.hh:139
SubBlock.hh
GPUCoalescer::m_dataCache_ptr
CacheMemory * m_dataCache_ptr
Definition: GPUCoalescer.hh:405
CacheMemory::getNumBlocks
int getNumBlocks() const
Definition: CacheMemory.hh:178
curTick
Tick curTick()
The current simulated tick.
Definition: core.hh:45

Generated on Wed Sep 30 2020 14:02:14 for gem5 by doxygen 1.8.17