gem5 v24.1.0.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
VIPERCoalescer.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include "base/logging.hh"
35#include "base/str.hh"
37#include "debug/GPUCoalescer.hh"
38#include "debug/MemoryAccess.hh"
39#include "debug/ProtocolTrace.hh"
40#include "mem/packet.hh"
49#include "params/VIPERCoalescer.hh"
50
51namespace gem5
52{
53
54namespace ruby
55{
56
58 : GPUCoalescer(p),
59 m_cache_inv_pkt(nullptr),
60 m_num_pending_invs(0)
61{
62}
63
67
68// Places an uncoalesced packet in uncoalescedTable. If the packet is a
69// special type (MemFence, scoping, etc), it is issued immediately.
70RequestStatus
72{
73 // VIPER only supports following memory request types
74 // MemSyncReq & INV_L1 : TCP cache invalidation
75 // ReadReq : cache read
76 // WriteReq : cache write
77 // AtomicOp : cache atomic
78 // Flush : flush and invalidate cache
79 //
80 // VIPER does not expect MemSyncReq & Release since compute unit
81 // does not specify an equivalent type of memory request.
82 assert((pkt->cmd == MemCmd::MemSyncReq && pkt->req->isInvL1()) ||
83 (pkt->cmd == MemCmd::MemSyncReq && pkt->req->isInvL2()) ||
84 pkt->cmd == MemCmd::ReadReq ||
85 pkt->cmd == MemCmd::WriteReq ||
86 pkt->cmd == MemCmd::FlushReq ||
87 pkt->isAtomicOp());
88
89 if (pkt->req->isInvL1() && m_cache_inv_pkt) {
90 // In VIPER protocol, the coalescer is not able to handle two or
91 // more cache invalidation requests at a time. Cache invalidation
92 // requests must be serialized to ensure that all stale data in
93 // TCP are invalidated correctly. If there's already a pending
94 // cache invalidation request, we must retry this request later
95 return RequestStatus_Aliased;
96 }
97
99
100 if (pkt->req->isInvL1()) {
101 // In VIPER protocol, a compute unit sends a MemSyncReq with INV_L1
102 // flag to invalidate TCP. Upon receiving a request of this type,
103 // VIPERCoalescer starts a cache walk to invalidate all valid entries
104 // in TCP. The request is completed once all entries are invalidated.
105 assert(!m_cache_inv_pkt);
106 m_cache_inv_pkt = pkt;
107 invTCP();
108 }
109
110 if (pkt->req->isInvL2()) {
111 invTCC(pkt);
112 }
113
114 return RequestStatus_Issued;
115}
116
117void
119{
120 PacketPtr pkt = crequest->getFirstPkt();
121
122 int proc_id = -1;
123 if (pkt != NULL && pkt->req->hasContextId()) {
124 proc_id = pkt->req->contextId();
125 }
126
127 // If valid, copy the pc to the ruby request
128 Addr pc = 0;
129 if (pkt->req->hasPC()) {
130 pc = pkt->req->getPC();
131 }
132
133 Addr line_addr = makeLineAddress(pkt->getAddr());
134
135 // Creating WriteMask that records written bytes
136 // and atomic operations. This enables partial writes
137 // and partial reads of those writes
138 uint32_t blockSize = m_ruby_system->getBlockSizeBytes();
139 DataBlock dataBlock(blockSize);
140 dataBlock.clear();
141 std::vector<bool> accessMask(blockSize,false);
143 uint32_t tableSize = crequest->getPackets().size();
144 for (int i = 0; i < tableSize; i++) {
145 PacketPtr tmpPkt = crequest->getPackets()[i];
146 uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr;
147 uint32_t tmpSize = tmpPkt->getSize();
148 if (tmpPkt->isAtomicOp()) {
149 std::pair<int,AtomicOpFunctor *> tmpAtomicOp(tmpOffset,
150 tmpPkt->getAtomicOp());
151 atomicOps.push_back(tmpAtomicOp);
152 } else if (tmpPkt->isWrite()) {
153 dataBlock.setData(tmpPkt->getPtr<uint8_t>(),
154 tmpOffset, tmpSize);
155 }
156 for (int j = 0; j < tmpSize; j++) {
157 accessMask[tmpOffset + j] = true;
158 }
159 }
160 std::shared_ptr<RubyRequest> msg;
161 if (pkt->isAtomicOp()) {
162 msg = std::make_shared<RubyRequest>(clockEdge(), blockSize,
163 m_ruby_system, pkt->getAddr(), pkt->getSize(),
164 pc, crequest->getRubyType(),
165 RubyAccessMode_Supervisor, pkt,
166 PrefetchBit_No, proc_id, 100,
167 blockSize, accessMask,
168 dataBlock, atomicOps, crequest->getSeqNum());
169 } else {
170 msg = std::make_shared<RubyRequest>(clockEdge(), blockSize,
171 m_ruby_system, pkt->getAddr(), pkt->getSize(),
172 pc, crequest->getRubyType(),
173 RubyAccessMode_Supervisor, pkt,
174 PrefetchBit_No, proc_id, 100,
175 blockSize, accessMask,
176 dataBlock, crequest->getSeqNum());
177 }
178
179 if (pkt->cmd == MemCmd::WriteReq) {
180 makeWriteCompletePkts(crequest);
181 }
182
183 DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n",
184 curTick(), m_version, "Coal", "Begin", "", "",
185 printAddress(msg->getPhysicalAddress()),
186 RubyRequestType_to_string(crequest->getRubyType()));
187
188 fatal_if(crequest->getRubyType() == RubyRequestType_IFETCH,
189 "there should not be any I-Fetch requests in the GPU Coalescer");
190
194 curTick());
195 }
196
197 assert(m_mandatory_q_ptr);
198 Tick latency = cyclesToTicks(
200 m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency,
203}
204
205void
207{
208 // In VIPER protocol, for each write request, down-stream caches
209 // return two responses: writeCallback and writeCompleteCallback.
210 // We need to prepare a writeCompletePkt for each write request so
211 // that when writeCompleteCallback is called, we can respond
212 // requesting wavefront right away.
213 // writeCompletePkt inherits request and senderState of the original
214 // write request packet so that we can find the original requestor
215 // later. This assumes that request and senderState are not deleted
216 // before writeCompleteCallback is called.
217
218 auto key = crequest->getSeqNum();
219 std::vector<PacketPtr>& req_pkts = crequest->getPackets();
220
221 for (auto pkt : req_pkts) {
222 DPRINTF(GPUCoalescer, "makeWriteCompletePkts: instSeqNum %d\n",
223 key);
224 assert(pkt->cmd == MemCmd::WriteReq);
225
226 PacketPtr writeCompletePkt = new Packet(pkt->req,
228 writeCompletePkt->setAddr(pkt->getAddr());
229 writeCompletePkt->senderState = pkt->senderState;
230 m_writeCompletePktMap[key].push_back(writeCompletePkt);
231 }
232}
233
234void
236{
237 DPRINTF(GPUCoalescer, "writeCompleteCallback: instSeqNum %d addr 0x%x\n",
238 instSeqNum, addr);
239
240 auto key = instSeqNum;
241 assert(m_writeCompletePktMap.count(key) == 1 &&
242 !m_writeCompletePktMap[key].empty());
243
244 m_writeCompletePktMap[key].erase(
245 std::remove_if(
246 m_writeCompletePktMap[key].begin(),
247 m_writeCompletePktMap[key].end(),
248 [this,addr](PacketPtr writeCompletePkt) -> bool {
249 if (makeLineAddress(writeCompletePkt->getAddr()) == addr) {
250 RubyPort::SenderState *ss =
251 safe_cast<RubyPort::SenderState *>
252 (writeCompletePkt->senderState);
253 MemResponsePort *port = ss->port;
254 assert(port != NULL);
255
256 writeCompletePkt->senderState = ss->predecessor;
257 delete ss;
258 port->hitCallback(writeCompletePkt);
259 return true;
260 }
261 return false;
262 }
263 ),
264 m_writeCompletePktMap[key].end()
265 );
266
268
269 if (m_writeCompletePktMap[key].empty())
270 m_writeCompletePktMap.erase(key);
271}
272
273void
275{
276 assert(m_cache_inv_pkt && m_num_pending_invs > 0);
277
279
280 if (m_num_pending_invs == 0) {
282 m_cache_inv_pkt = nullptr;
283 completeHitCallback(pkt_list);
284 }
285}
286
290void
292{
293 int size = m_dataCache_ptr->getNumBlocks();
295 "There are %d Invalidations outstanding before Cache Walk\n",
297 // Walk the cache
298 for (int i = 0; i < size; i++) {
300 // Evict Read-only data
301 RubyRequestType request_type = RubyRequestType_REPLACEMENT;
302 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
304 addr, 0, 0, request_type, RubyAccessMode_Supervisor, nullptr);
305 DPRINTF(GPUCoalescer, "Evicting addr 0x%x\n", addr);
306 assert(m_mandatory_q_ptr != NULL);
307 Tick latency = cyclesToTicks(
308 m_controller->mandatoryQueueLatency(request_type));
309 m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency,
313 }
315 "There are %d Invalidatons outstanding after Cache Walk\n",
317}
318
319void
321{
322 for (auto& pkt : m_pending_invl2s[addr]) {
324 safe_cast<RubyPort::SenderState *>(pkt->senderState);
325 MemResponsePort *port = ss->port;
326 assert(port != nullptr);
327
328 // Now convert to MemSyncResp
329 pkt->makeResponse();
330
331 pkt->senderState = ss->predecessor;
332 delete ss;
333 port->hitCallback(pkt);
334 }
335 m_pending_invl2s.erase(addr);
336}
337
338/*
339 * Send an invalidate to a specific address in the TCC.
340 */
341void
343{
344 assert(pkt);
345 assert(pkt->req);
346
347 Addr addr = pkt->req->getPaddr();
348 RubyRequestType request_type = RubyRequestType_InvL2;
349
350 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
352 addr, 0, 0, request_type, RubyAccessMode_Supervisor, nullptr);
353
354 DPRINTF(GPUCoalescer, "Sending L2 invalidate to 0x%x\n", addr);
355
356 assert(m_mandatory_q_ptr);
357 Tick latency = cyclesToTicks(
358 m_controller->mandatoryQueueLatency(request_type));
359 m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency,
362
363 m_pending_invl2s[addr].push_back(pkt);
364}
365
366} // namespace ruby
367} // namespace gem5
#define DPRINTFR(x,...)
Definition trace.hh:223
#define DPRINTF(x,...)
Definition trace.hh:209
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Tick cyclesToTicks(Cycles c) const
Tick clockPeriod() const
@ WriteCompleteResp
Definition packet.hh:92
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
Addr getAddr() const
Definition packet.hh:807
bool isAtomicOp() const
Definition packet.hh:846
void setAddr(Addr _addr)
Update the address of this packet mid-transaction.
Definition packet.hh:815
SenderState * senderState
This packet's sender state.
Definition packet.hh:545
T * getPtr()
get a pointer to the data ptr.
Definition packet.hh:1225
bool isWrite() const
Definition packet.hh:594
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
unsigned getSize() const
Definition packet.hh:817
AtomicOpFunctor * getAtomicOp() const
Accessor function to atomic op.
Definition packet.hh:845
MemCmd cmd
The command field of the packet.
Definition packet.hh:372
virtual Cycles mandatoryQueueLatency(const RubyRequestType &param_type)
Addr getAddressAtIdx(int idx) const
PacketPtr getFirstPkt() const
RubyRequestType getRubyType() const
std::vector< PacketPtr > & getPackets()
void setData(const uint8_t *data, int offset, int len)
Definition DataBlock.cc:207
RequestStatus makeRequest(PacketPtr pkt) override
void completeHitCallback(std::vector< PacketPtr > &mylist)
CacheMemory * m_dataCache_ptr
EventFunctionWrapper deadlockCheckEvent
void enqueue(MsgPtr message, Tick curTime, Tick delta, bool ruby_is_random, bool ruby_warmup, bool bypassStrictFIFO=false)
RubySystem * m_ruby_system
Definition RubyPort.hh:207
AbstractController * m_controller
Definition RubyPort.hh:209
MessageBuffer * m_mandatory_q_ptr
Definition RubyPort.hh:210
uint32_t getBlockSizeBytes()
Definition RubySystem.hh:73
void makeWriteCompletePkts(CoalescedRequest *crequest)
void invTCP()
Invalidate TCP.
void invTCC(PacketPtr pkt)
std::unordered_map< uint64_t, std::vector< PacketPtr > > m_writeCompletePktMap
RequestStatus makeRequest(PacketPtr pkt) override
void issueRequest(CoalescedRequest *crequest) override
std::unordered_map< Addr, std::vector< PacketPtr > > m_pending_invl2s
void invTCCCallback(Addr address)
void invTCPCallback(Addr address)
VIPERCoalescerParams Params
void writeCompleteCallback(Addr address, uint64_t instSeqNum)
STL pair class.
Definition stl.hh:58
STL vector class.
Definition stl.hh:37
bool scheduled() const
Determine if the current event is scheduled.
Definition eventq.hh:458
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:236
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 21 > ss
Definition misc_types.hh:60
Bitfield< 4 > pc
Bitfield< 0 > p
Bitfield< 3 > addr
Definition types.hh:84
Addr makeLineAddress(Addr addr, int cacheLineBits)
Definition Address.cc:61
std::string printAddress(Addr addr, int cacheLineBits)
Definition Address.cc:76
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
Declaration of the Packet class.

Generated on Mon Jan 13 2025 04:28:41 for gem5 by doxygen 1.9.8