gem5  v21.1.0.2
VIPERCoalescer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
35 
36 #include "base/logging.hh"
37 #include "base/str.hh"
39 #include "debug/GPUCoalescer.hh"
40 #include "debug/MemoryAccess.hh"
41 #include "debug/ProtocolTrace.hh"
42 #include "mem/packet.hh"
51 #include "params/VIPERCoalescer.hh"
52 
53 namespace gem5
54 {
55 
56 namespace ruby
57 {
58 
60  : GPUCoalescer(p),
61  m_cache_inv_pkt(nullptr),
62  m_num_pending_invs(0)
63 {
64 }
65 
67 {
68 }
69 
70 // Places an uncoalesced packet in uncoalescedTable. If the packet is a
71 // special type (MemFence, scoping, etc), it is issued immediately.
72 RequestStatus
74 {
75  // VIPER only supports following memory request types
76  // MemSyncReq & INV_L1 : TCP cache invalidation
77  // ReadReq : cache read
78  // WriteReq : cache write
79  // AtomicOp : cache atomic
80  //
81  // VIPER does not expect MemSyncReq & Release since in GCN3, compute unit
82  // does not specify an equivalent type of memory request.
83  assert((pkt->cmd == MemCmd::MemSyncReq && pkt->req->isInvL1()) ||
84  pkt->cmd == MemCmd::ReadReq ||
85  pkt->cmd == MemCmd::WriteReq ||
86  pkt->isAtomicOp());
87 
88  if (pkt->req->isInvL1() && m_cache_inv_pkt) {
89  // In VIPER protocol, the coalescer is not able to handle two or
90  // more cache invalidation requests at a time. Cache invalidation
91  // requests must be serialized to ensure that all stale data in
92  // TCP are invalidated correctly. If there's already a pending
93  // cache invalidation request, we must retry this request later
94  return RequestStatus_Aliased;
95  }
96 
98 
99  if (pkt->req->isInvL1()) {
100  // In VIPER protocol, a compute unit sends a MemSyncReq with INV_L1
101  // flag to invalidate TCP. Upon receiving a request of this type,
102  // VIPERCoalescer starts a cache walk to invalidate all valid entries
103  // in TCP. The request is completed once all entries are invalidated.
104  assert(!m_cache_inv_pkt);
105  m_cache_inv_pkt = pkt;
106  invTCP();
107  }
108 
109  return RequestStatus_Issued;
110 }
111 
112 void
114 {
115  PacketPtr pkt = crequest->getFirstPkt();
116 
117  int proc_id = -1;
118  if (pkt != NULL && pkt->req->hasContextId()) {
119  proc_id = pkt->req->contextId();
120  }
121 
122  // If valid, copy the pc to the ruby request
123  Addr pc = 0;
124  if (pkt->req->hasPC()) {
125  pc = pkt->req->getPC();
126  }
127 
128  Addr line_addr = makeLineAddress(pkt->getAddr());
129 
130  // Creating WriteMask that records written bytes
131  // and atomic operations. This enables partial writes
132  // and partial reads of those writes
133  DataBlock dataBlock;
134  dataBlock.clear();
135  uint32_t blockSize = RubySystem::getBlockSizeBytes();
136  std::vector<bool> accessMask(blockSize,false);
138  uint32_t tableSize = crequest->getPackets().size();
139  for (int i = 0; i < tableSize; i++) {
140  PacketPtr tmpPkt = crequest->getPackets()[i];
141  uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr;
142  uint32_t tmpSize = tmpPkt->getSize();
143  if (tmpPkt->isAtomicOp()) {
144  std::pair<int,AtomicOpFunctor *> tmpAtomicOp(tmpOffset,
145  tmpPkt->getAtomicOp());
146  atomicOps.push_back(tmpAtomicOp);
147  } else if (tmpPkt->isWrite()) {
148  dataBlock.setData(tmpPkt->getPtr<uint8_t>(),
149  tmpOffset, tmpSize);
150  }
151  for (int j = 0; j < tmpSize; j++) {
152  accessMask[tmpOffset + j] = true;
153  }
154  }
155  std::shared_ptr<RubyRequest> msg;
156  if (pkt->isAtomicOp()) {
157  msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
158  pkt->getSize(), pc, crequest->getRubyType(),
159  RubyAccessMode_Supervisor, pkt,
160  PrefetchBit_No, proc_id, 100,
161  blockSize, accessMask,
162  dataBlock, atomicOps, crequest->getSeqNum());
163  } else {
164  msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
165  pkt->getSize(), pc, crequest->getRubyType(),
166  RubyAccessMode_Supervisor, pkt,
167  PrefetchBit_No, proc_id, 100,
168  blockSize, accessMask,
169  dataBlock, crequest->getSeqNum());
170  }
171 
172  if (pkt->cmd == MemCmd::WriteReq) {
173  makeWriteCompletePkts(crequest);
174  }
175 
176  DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n",
177  curTick(), m_version, "Coal", "Begin", "", "",
178  printAddress(msg->getPhysicalAddress()),
179  RubyRequestType_to_string(crequest->getRubyType()));
180 
181  fatal_if(crequest->getRubyType() == RubyRequestType_IFETCH,
182  "there should not be any I-Fetch requests in the GPU Coalescer");
183 
184  if (!deadlockCheckEvent.scheduled()) {
187  curTick());
188  }
189 
190  assert(m_mandatory_q_ptr);
191  Tick latency = cyclesToTicks(
193  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
194 }
195 
196 void
198 {
199  // In VIPER protocol, for each write request, down-stream caches
200  // return two responses: writeCallback and writeCompleteCallback.
201  // We need to prepare a writeCompletePkt for each write request so
202  // that when writeCompleteCallback is called, we can respond
203  // requesting wavefront right away.
204  // writeCompletePkt inherits request and senderState of the original
205  // write request packet so that we can find the original requestor
206  // later. This assumes that request and senderState are not deleted
207  // before writeCompleteCallback is called.
208 
209  auto key = crequest->getSeqNum();
210  std::vector<PacketPtr>& req_pkts = crequest->getPackets();
211 
212  for (auto pkt : req_pkts) {
213  DPRINTF(GPUCoalescer, "makeWriteCompletePkts: instSeqNum %d\n",
214  key);
215  assert(pkt->cmd == MemCmd::WriteReq);
216 
217  PacketPtr writeCompletePkt = new Packet(pkt->req,
219  writeCompletePkt->setAddr(pkt->getAddr());
220  writeCompletePkt->senderState = pkt->senderState;
221  m_writeCompletePktMap[key].push_back(writeCompletePkt);
222  }
223 }
224 
225 void
227 {
228  DPRINTF(GPUCoalescer, "writeCompleteCallback: instSeqNum %d addr 0x%x\n",
229  instSeqNum, addr);
230 
231  auto key = instSeqNum;
232  assert(m_writeCompletePktMap.count(key) == 1 &&
233  !m_writeCompletePktMap[key].empty());
234 
235  m_writeCompletePktMap[key].erase(
236  std::remove_if(
237  m_writeCompletePktMap[key].begin(),
238  m_writeCompletePktMap[key].end(),
239  [addr](PacketPtr writeCompletePkt) -> bool {
240  if (makeLineAddress(writeCompletePkt->getAddr()) == addr) {
241  RubyPort::SenderState *ss =
242  safe_cast<RubyPort::SenderState *>
243  (writeCompletePkt->senderState);
244  MemResponsePort *port = ss->port;
245  assert(port != NULL);
246 
247  writeCompletePkt->senderState = ss->predecessor;
248  delete ss;
249  port->hitCallback(writeCompletePkt);
250  return true;
251  }
252  return false;
253  }
254  ),
255  m_writeCompletePktMap[key].end()
256  );
257 
258  trySendRetries();
259 
260  if (m_writeCompletePktMap[key].empty())
261  m_writeCompletePktMap.erase(key);
262 }
263 
264 void
266 {
267  assert(m_cache_inv_pkt && m_num_pending_invs > 0);
268 
270 
271  if (m_num_pending_invs == 0) {
273  m_cache_inv_pkt = nullptr;
274  completeHitCallback(pkt_list);
275  }
276 }
277 
281 void
283 {
284  int size = m_dataCache_ptr->getNumBlocks();
286  "There are %d Invalidations outstanding before Cache Walk\n",
288  // Walk the cache
289  for (int i = 0; i < size; i++) {
291  // Evict Read-only data
292  RubyRequestType request_type = RubyRequestType_REPLACEMENT;
293  std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
294  clockEdge(), addr, 0, 0,
295  request_type, RubyAccessMode_Supervisor,
296  nullptr);
297  DPRINTF(GPUCoalescer, "Evicting addr 0x%x\n", addr);
298  assert(m_mandatory_q_ptr != NULL);
299  Tick latency = cyclesToTicks(
300  m_controller->mandatoryQueueLatency(request_type));
301  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
303  }
305  "There are %d Invalidatons outstanding after Cache Walk\n",
307 }
308 
309 } // namespace ruby
310 } // namespace gem5
gem5::ruby::DataBlock::clear
void clear()
Definition: DataBlock.cc:68
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
gem5::Packet::isAtomicOp
bool isAtomicOp() const
Definition: packet.hh:820
gem5::ruby::RubyPort::m_version
uint32_t m_version
Definition: RubyPort.hh:196
Profiler.hh
DPRINTFR
#define DPRINTFR(x,...)
Definition: trace.hh:200
gem5::ruby::DataBlock::setData
void setData(const uint8_t *data, int offset, int len)
Definition: DataBlock.cc:124
gem5::ruby::printAddress
std::string printAddress(Addr addr)
Definition: Address.cc:80
gem5::ruby::CacheMemory::getAddressAtIdx
Addr getAddressAtIdx(int idx) const
Definition: CacheMemory.cc:166
gem5::Packet::req
RequestPtr req
A pointer to the original request.
Definition: packet.hh:366
GPUCoalescer.hh
AbstractController.hh
gem5::ruby::MessageBuffer::enqueue
void enqueue(MsgPtr message, Tick curTime, Tick delta)
Definition: MessageBuffer.cc:197
gem5::Packet::isWrite
bool isWrite() const
Definition: packet.hh:583
gem5::ruby::RubySystem::getBlockSizeBytes
static uint32_t getBlockSizeBytes()
Definition: RubySystem.hh:72
RubyRequest.hh
gem5::ruby::VIPERCoalescer::m_cache_inv_pkt
PacketPtr m_cache_inv_pkt
Definition: VIPERCoalescer.hh:79
gem5::EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
std::vector< bool >
gem5::ruby::RubyPort::trySendRetries
void trySendRetries()
Definition: RubyPort.cc:464
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:66
gem5::ruby::makeLineAddress
Addr makeLineAddress(Addr addr)
Definition: Address.cc:60
VIPERCoalescer.hh
gem5::ruby::RubyPort::m_mandatory_q_ptr
MessageBuffer * m_mandatory_q_ptr
Definition: RubyPort.hh:198
packet.hh
gem5::ruby::CoalescedRequest::getFirstPkt
PacketPtr getFirstPkt() const
Definition: GPUCoalescer.hh:123
gem5::ruby::VIPERCoalescer::issueRequest
void issueRequest(CoalescedRequest *crequest) override
Definition: VIPERCoalescer.cc:113
gem5::ruby::VIPERCoalescer::writeCompleteCallback
void writeCompleteCallback(Addr address, uint64_t instSeqNum)
Definition: VIPERCoalescer.cc:226
str.hh
gem5::ruby::VIPERCoalescer::m_writeCompletePktMap
std::unordered_map< uint64_t, std::vector< PacketPtr > > m_writeCompletePktMap
Definition: VIPERCoalescer.hh:91
gem5::Packet::getAtomicOp
AtomicOpFunctor * getAtomicOp() const
Accessor function to atomic op.
Definition: packet.hh:819
gem5::ArmISA::j
Bitfield< 24 > j
Definition: misc_types.hh:57
CacheMemory.hh
gem5::ruby::VIPERCoalescer::makeWriteCompletePkts
void makeWriteCompletePkts(CoalescedRequest *crequest)
Definition: VIPERCoalescer.cc:197
gem5::Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition: clocked_object.hh:227
gem5::ruby::GPUCoalescer::Params
RubyGPUCoalescerParams Params
Definition: GPUCoalescer.hh:238
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:283
gem5::Packet::setAddr
void setAddr(Addr _addr)
Update the address of this packet mid-transaction.
Definition: packet.hh:789
gem5::probing::Packet
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109
gem5::MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:326
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
RubyTester.hh
gem5::ruby::CoalescedRequest::getPackets
std::vector< PacketPtr > & getPackets()
Definition: GPUCoalescer.hh:126
gem5::MemCmd::ReadReq
@ ReadReq
Definition: packet.hh:86
gem5::MemCmd::MemSyncReq
@ MemSyncReq
Definition: packet.hh:119
gem5::ruby::GPUCoalescer::m_deadlock_threshold
Cycles m_deadlock_threshold
Definition: GPUCoalescer.hh:418
gem5::ruby::VIPERCoalescer::m_num_pending_invs
int m_num_pending_invs
Definition: VIPERCoalescer.hh:82
gem5::ruby::GPUCoalescer::empty
bool empty() const
Definition: GPUCoalescer.cc:593
gem5::ruby::CacheMemory::getNumBlocks
int getNumBlocks() const
Definition: CacheMemory.hh:159
gem5::ruby::GPUCoalescer::completeHitCallback
void completeHitCallback(std::vector< PacketPtr > &mylist)
Definition: GPUCoalescer.cc:922
gem5::Packet::cmd
MemCmd cmd
The command field of the packet.
Definition: packet.hh:361
std::pair< int, AtomicOpFunctor * >
RubySystem.hh
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:534
gem5::Clocked::clockEdge
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Definition: clocked_object.hh:177
gem5::ruby::GPUCoalescer::deadlockCheckEvent
EventFunctionWrapper deadlockCheckEvent
Definition: GPUCoalescer.hh:461
gem5::ruby::AbstractController::mandatoryQueueLatency
virtual Cycles mandatoryQueueLatency(const RubyRequestType &param_type)
Definition: AbstractController.hh:119
gem5::MemCmd::WriteCompleteResp
@ WriteCompleteResp
Definition: packet.hh:91
MessageBuffer.hh
gem5::ruby::GPUCoalescer::m_dataCache_ptr
CacheMemory * m_dataCache_ptr
Definition: GPUCoalescer.hh:420
gem5::ruby::CoalescedRequest::getSeqNum
uint64_t getSeqNum() const
Definition: GPUCoalescer.hh:122
gem5::ruby::VIPERCoalescer::makeRequest
RequestStatus makeRequest(PacketPtr pkt) override
Definition: VIPERCoalescer.cc:73
gem5::ruby::RubyPort::m_controller
AbstractController * m_controller
Definition: RubyPort.hh:197
gem5::ruby::CoalescedRequest
Definition: GPUCoalescer.hh:108
gem5::ruby::GPUCoalescer::makeRequest
RequestStatus makeRequest(PacketPtr pkt) override
Definition: GPUCoalescer.cc:627
gem5::MipsISA::pc
Bitfield< 4 > pc
Definition: pra_constants.hh:243
gem5::MemCmd::WriteReq
@ WriteReq
Definition: packet.hh:89
logging.hh
gem5::ruby::VIPERCoalescer::~VIPERCoalescer
~VIPERCoalescer()
Definition: VIPERCoalescer.cc:66
gem5::ruby::VIPERCoalescer::invTCPCallback
void invTCPCallback(Addr address)
Definition: VIPERCoalescer.cc:265
gem5::ruby::VIPERCoalescer::VIPERCoalescer
VIPERCoalescer(const Params &)
Definition: VIPERCoalescer.cc:59
gem5::ruby::DataBlock
Definition: DataBlock.hh:60
gem5::Packet::getAddr
Addr getAddr() const
Definition: packet.hh:781
gem5::ruby::CoalescedRequest::getRubyType
RubyRequestType getRubyType() const
Definition: GPUCoalescer.hh:125
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:225
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::ruby::VIPERCoalescer::invTCP
void invTCP()
Invalidate TCP.
Definition: VIPERCoalescer.cc:282
SubBlock.hh
gem5::ruby::GPUCoalescer
Definition: GPUCoalescer.hh:215
gem5::Packet::getSize
unsigned getSize() const
Definition: packet.hh:791
gem5::Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
gem5::Clocked::clockPeriod
Tick clockPeriod() const
Definition: clocked_object.hh:217
gem5::X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:84
gem5::Packet::getPtr
T * getPtr()
get a pointer to the data ptr.
Definition: packet.hh:1184

Generated on Tue Sep 21 2021 12:25:43 for gem5 by doxygen 1.8.17