gem5  v20.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
VIPERCoalescer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "base/logging.hh"
35 #include "base/str.hh"
36 #include "config/the_isa.hh"
37 
38 #if THE_ISA == X86_ISA
40 
41 #endif // X86_ISA
43 
45 #include "debug/GPUCoalescer.hh"
46 #include "debug/MemoryAccess.hh"
47 #include "mem/packet.hh"
56 #include "params/VIPERCoalescer.hh"
57 
58 using namespace std;
59 
61 VIPERCoalescerParams::create()
62 {
63  return new VIPERCoalescer(this);
64 }
65 
67  : GPUCoalescer(p)
68 {
69  m_max_wb_per_cycle=p->max_wb_per_cycle;
70  m_max_inv_per_cycle=p->max_inv_per_cycle;
72  m_outstanding_wb = 0;
73 }
74 
76 {
77 }
78 
79 // Analyzes the packet to see if this request can be coalesced.
80 // If request can be coalesced, this request is added to the reqCoalescer table
81 // and makeRequest returns RequestStatus_Issued;
82 // If this is the first request to a cacheline, request is added to both
83 // newRequests queue and to the reqCoalescer table; makeRequest
84 // returns RequestStatus_Issued.
85 // If there is a pending request to this cacheline and this request
86 // can't be coalesced, RequestStatus_Aliased is returned and
87 // the packet needs to be reissued.
88 RequestStatus
90 {
93  "There are %d Writebacks and %d Invalidatons\n",
95  }
96  // Are we in the middle of a release
97  if ((m_outstanding_wb) > 0) {
98  if (pkt->req->isKernel()) {
99  // Everythign is fine
100  // Barriers and Kernel End scan coalesce
101  // If it is a Kerenl Begin flush the cache
102  if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) {
103  invL1();
104  }
105 
106  if (pkt->req->isRelease()) {
107  insertKernel(pkt->req->contextId(), pkt);
108  }
109 
110  return RequestStatus_Issued;
111  }
112 // return RequestStatus_Aliased;
113  } else if (pkt->req->isKernel() && pkt->req->isRelease()) {
114  // Flush Dirty Data on Kernel End
115  // isKernel + isRelease
116  insertKernel(pkt->req->contextId(), pkt);
117  wbL1();
118  if (m_outstanding_wb == 0) {
119  for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) {
120  newKernelEnds.push_back(it->first);
121  }
122  completeIssue();
123  }
124  return RequestStatus_Issued;
125  }
126  RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt);
127  if (requestStatus!=RequestStatus_Issued) {
128  // Request not isssued
129  // enqueue Retry
130  DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n");
131  return requestStatus;
132  } else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
133  // Invalidate clean Data on Kernel Begin
134  // isKernel + isAcquire
135  invL1();
136  } else if (pkt->req->isAcquire() && pkt->req->isRelease()) {
137  // Deschedule the AtomicAcqRel and
138  // Flush and Invalidate the L1 cache
139  invwbL1();
140  if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
141  DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
143  }
144  } else if (pkt->req->isRelease()) {
145  // Deschedule the StoreRel and
146  // Flush the L1 cache
147  wbL1();
148  if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
149  DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
151  }
152  } else if (pkt->req->isAcquire()) {
153  // LoadAcq or AtomicAcq
154  // Invalidate the L1 cache
155  invL1();
156  }
157  // Request was successful
158  if (m_outstanding_wb == 0) {
159  if (!issueEvent.scheduled()) {
160  DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n");
162  }
163  }
164  return RequestStatus_Issued;
165 }
166 
167 void
169 {
171  // if L1 Flush Complete
172  // attemnpt to schedule issueEvent
173  assert(((int) m_outstanding_wb) >= 0);
174  if (m_outstanding_wb == 0) {
175  for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) {
176  newKernelEnds.push_back(it->first);
177  }
178  completeIssue();
179  }
180  trySendRetries();
181 }
182 
183 void
185 {
187  // if L1 Flush Complete
188  // attemnpt to schedule issueEvent
189  // This probably won't happen, since
190  // we dont wait on cache invalidations
191  if (m_outstanding_wb == 0) {
192  for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) {
193  newKernelEnds.push_back(it->first);
194  }
195  completeIssue();
196  }
197  trySendRetries();
198 }
199 
203 void
205 {
206  int size = m_dataCache_ptr->getNumBlocks();
208  "There are %d Invalidations outstanding before Cache Walk\n",
210  // Walk the cache
211  for (int i = 0; i < size; i++) {
213  // Evict Read-only data
214  RubyRequestType request_type = RubyRequestType_REPLACEMENT;
215  std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
216  clockEdge(), addr, (uint8_t*) 0, 0, 0,
217  request_type, RubyAccessMode_Supervisor,
218  nullptr);
219  assert(m_mandatory_q_ptr != NULL);
220  Tick latency = cyclesToTicks(
221  m_controller->mandatoryQueueLatency(request_type));
222  assert(latency > 0);
223  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
225  }
227  "There are %d Invalidatons outstanding after Cache Walk\n",
229 }
230 
234 void
236 {
237  int size = m_dataCache_ptr->getNumBlocks();
239  "There are %d Writebacks outstanding before Cache Walk\n",
241  // Walk the cache
242  for (int i = 0; i < size; i++) {
244  // Write dirty data back
245  RubyRequestType request_type = RubyRequestType_FLUSH;
246  std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
247  clockEdge(), addr, (uint8_t*) 0, 0, 0,
248  request_type, RubyAccessMode_Supervisor,
249  nullptr);
250  assert(m_mandatory_q_ptr != NULL);
251  Tick latency = cyclesToTicks(
252  m_controller->mandatoryQueueLatency(request_type));
253  assert(latency > 0);
254  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
256  }
258  "There are %d Writebacks outstanding after Cache Walk\n",
260 }
261 
265 void
267 {
268  int size = m_dataCache_ptr->getNumBlocks();
269  // Walk the cache
270  for (int i = 0; i < size; i++) {
272  // Evict Read-only data
273  RubyRequestType request_type = RubyRequestType_REPLACEMENT;
274  std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
275  clockEdge(), addr, (uint8_t*) 0, 0, 0,
276  request_type, RubyAccessMode_Supervisor,
277  nullptr);
278  assert(m_mandatory_q_ptr != NULL);
279  Tick latency = cyclesToTicks(
280  m_controller->mandatoryQueueLatency(request_type));
281  assert(latency > 0);
282  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
284  }
285  // Walk the cache
286  for (int i = 0; i< size; i++) {
288  // Write dirty data back
289  RubyRequestType request_type = RubyRequestType_FLUSH;
290  std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
291  clockEdge(), addr, (uint8_t*) 0, 0, 0,
292  request_type, RubyAccessMode_Supervisor,
293  nullptr);
294  assert(m_mandatory_q_ptr != NULL);
295  Tick latency = cyclesToTicks(
296  m_controller->mandatoryQueueLatency(request_type));
297  assert(latency > 0);
298  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
300  }
301 }
#define DPRINTF(x,...)
Definition: trace.hh:225
void insertKernel(int wavefront_id, PacketPtr pkt)
EventFunctionWrapper issueEvent
Bitfield< 7 > i
AbstractController * m_controller
Definition: RubyPort.hh:189
ip6_addr_t addr
Definition: inet.hh:330
void trySendRetries()
Definition: RubyPort.cc:449
void wbCallback(Addr address)
Overload hash function for BasicBlockRange type.
Definition: vec_reg.hh:587
uint64_t m_outstanding_wb
void completeIssue()
uint64_t m_outstanding_inv
RequestPtr req
A pointer to the original request.
Definition: packet.hh:321
RubyGPUCoalescerParams Params
Definition: GPUCoalescer.hh:97
void wbL1()
Writeback L1 cache (Release)
uint64_t m_max_wb_per_cycle
Tick cyclesToTicks(Cycles c) const
Tick curTick()
The current simulated tick.
Definition: core.hh:44
void invCallback(Addr address)
CacheMemory * m_dataCache_ptr
uint64_t Tick
Tick count type.
Definition: types.hh:61
virtual RequestStatus makeRequest(PacketPtr pkt) override
void invL1()
Invalidate L1 cache (Acquire)
RequestStatus makeRequest(PacketPtr pkt)
void deschedule(Event &event)
Definition: eventq.hh:943
void schedule(Event &event, Tick when)
Definition: eventq.hh:934
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
std::unordered_map< int, PacketPtr > kernelEndList
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Addr getAddressAtIdx(int idx) const
Definition: CacheMemory.cc:153
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:459
MessageBuffer * m_mandatory_q_ptr
Definition: RubyPort.hh:190
std::vector< int > newKernelEnds
Declaration of the Packet class.
virtual Cycles mandatoryQueueLatency(const RubyRequestType &param_type)
void invwbL1()
Invalidate and Writeback L1 cache (Acquire&Release)
uint64_t m_max_inv_per_cycle
Bitfield< 0 > p
int getNumBlocks() const
Definition: CacheMemory.hh:155
void enqueue(MsgPtr message, Tick curTime, Tick delta)
VIPERCoalescer(const Params *)

Generated on Thu May 28 2020 16:21:35 for gem5 by doxygen 1.8.13