gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
VIPERCoalescer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: Sooraj Puthoor
34  */
35 
36 #include "base/logging.hh"
37 #include "base/str.hh"
38 #include "config/the_isa.hh"
39 
40 #if THE_ISA == X86_ISA
42 
43 #endif // X86_ISA
45 
47 #include "debug/GPUCoalescer.hh"
48 #include "debug/MemoryAccess.hh"
49 #include "mem/packet.hh"
58 #include "params/VIPERCoalescer.hh"
59 
60 using namespace std;
61 
63 VIPERCoalescerParams::create()
64 {
65  return new VIPERCoalescer(this);
66 }
67 
69  : GPUCoalescer(p)
70 {
71  m_max_wb_per_cycle=p->max_wb_per_cycle;
72  m_max_inv_per_cycle=p->max_inv_per_cycle;
74  m_outstanding_wb = 0;
75 }
76 
78 {
79 }
80 
81 // Analyzes the packet to see if this request can be coalesced.
82 // If request can be coalesced, this request is added to the reqCoalescer table
83 // and makeRequest returns RequestStatus_Issued;
84 // If this is the first request to a cacheline, request is added to both
85 // newRequests queue and to the reqCoalescer table; makeRequest
86 // returns RequestStatus_Issued.
87 // If there is a pending request to this cacheline and this request
88 // can't be coalesced, RequestStatus_Aliased is returned and
89 // the packet needs to be reissued.
90 RequestStatus
92 {
95  "There are %d Writebacks and %d Invalidatons\n",
97  }
98  // Are we in the middle of a release
99  if ((m_outstanding_wb) > 0) {
100  if (pkt->req->isKernel()) {
101  // Everythign is fine
102  // Barriers and Kernel End scan coalesce
103  // If it is a Kerenl Begin flush the cache
104  if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) {
105  invL1();
106  }
107 
108  if (pkt->req->isRelease()) {
109  insertKernel(pkt->req->contextId(), pkt);
110  }
111 
112  return RequestStatus_Issued;
113  }
114 // return RequestStatus_Aliased;
115  } else if (pkt->req->isKernel() && pkt->req->isRelease()) {
116  // Flush Dirty Data on Kernel End
117  // isKernel + isRelease
118  insertKernel(pkt->req->contextId(), pkt);
119  wbL1();
120  if (m_outstanding_wb == 0) {
121  for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) {
122  newKernelEnds.push_back(it->first);
123  }
124  completeIssue();
125  }
126  return RequestStatus_Issued;
127  }
128  RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt);
129  if (requestStatus!=RequestStatus_Issued) {
130  // Request not isssued
131  // enqueue Retry
132  DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n");
133  return requestStatus;
134  } else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
135  // Invalidate clean Data on Kernel Begin
136  // isKernel + isAcquire
137  invL1();
138  } else if (pkt->req->isAcquire() && pkt->req->isRelease()) {
139  // Deschedule the AtomicAcqRel and
140  // Flush and Invalidate the L1 cache
141  invwbL1();
142  if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
143  DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
145  }
146  } else if (pkt->req->isRelease()) {
147  // Deschedule the StoreRel and
148  // Flush the L1 cache
149  wbL1();
150  if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
151  DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
153  }
154  } else if (pkt->req->isAcquire()) {
155  // LoadAcq or AtomicAcq
156  // Invalidate the L1 cache
157  invL1();
158  }
159  // Request was successful
160  if (m_outstanding_wb == 0) {
161  if (!issueEvent.scheduled()) {
162  DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n");
164  }
165  }
166  return RequestStatus_Issued;
167 }
168 
169 void
171 {
173  // if L1 Flush Complete
174  // attemnpt to schedule issueEvent
175  assert(((int) m_outstanding_wb) >= 0);
176  if (m_outstanding_wb == 0) {
177  for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) {
178  newKernelEnds.push_back(it->first);
179  }
180  completeIssue();
181  }
182  trySendRetries();
183 }
184 
185 void
187 {
189  // if L1 Flush Complete
190  // attemnpt to schedule issueEvent
191  // This probably won't happen, since
192  // we dont wait on cache invalidations
193  if (m_outstanding_wb == 0) {
194  for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) {
195  newKernelEnds.push_back(it->first);
196  }
197  completeIssue();
198  }
199  trySendRetries();
200 }
201 
205 void
207 {
208  int size = m_dataCache_ptr->getNumBlocks();
210  "There are %d Invalidations outstanding before Cache Walk\n",
212  // Walk the cache
213  for (int i = 0; i < size; i++) {
215  // Evict Read-only data
216  RubyRequestType request_type = RubyRequestType_REPLACEMENT;
217  std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
218  clockEdge(), addr, (uint8_t*) 0, 0, 0,
219  request_type, RubyAccessMode_Supervisor,
220  nullptr);
221  assert(m_mandatory_q_ptr != NULL);
222  Tick latency = cyclesToTicks(
223  m_controller->mandatoryQueueLatency(request_type));
224  assert(latency > 0);
225  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
227  }
229  "There are %d Invalidatons outstanding after Cache Walk\n",
231 }
232 
236 void
238 {
239  int size = m_dataCache_ptr->getNumBlocks();
241  "There are %d Writebacks outstanding before Cache Walk\n",
243  // Walk the cache
244  for (int i = 0; i < size; i++) {
246  // Write dirty data back
247  RubyRequestType request_type = RubyRequestType_FLUSH;
248  std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
249  clockEdge(), addr, (uint8_t*) 0, 0, 0,
250  request_type, RubyAccessMode_Supervisor,
251  nullptr);
252  assert(m_mandatory_q_ptr != NULL);
253  Tick latency = cyclesToTicks(
254  m_controller->mandatoryQueueLatency(request_type));
255  assert(latency > 0);
256  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
258  }
260  "There are %d Writebacks outstanding after Cache Walk\n",
262 }
263 
267 void
269 {
270  int size = m_dataCache_ptr->getNumBlocks();
271  // Walk the cache
272  for (int i = 0; i < size; i++) {
274  // Evict Read-only data
275  RubyRequestType request_type = RubyRequestType_REPLACEMENT;
276  std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
277  clockEdge(), addr, (uint8_t*) 0, 0, 0,
278  request_type, RubyAccessMode_Supervisor,
279  nullptr);
280  assert(m_mandatory_q_ptr != NULL);
281  Tick latency = cyclesToTicks(
282  m_controller->mandatoryQueueLatency(request_type));
283  assert(latency > 0);
284  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
286  }
287  // Walk the cache
288  for (int i = 0; i< size; i++) {
290  // Write dirty data back
291  RubyRequestType request_type = RubyRequestType_FLUSH;
292  std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
293  clockEdge(), addr, (uint8_t*) 0, 0, 0,
294  request_type, RubyAccessMode_Supervisor,
295  nullptr);
296  assert(m_mandatory_q_ptr != NULL);
297  Tick latency = cyclesToTicks(
298  m_controller->mandatoryQueueLatency(request_type));
299  assert(latency > 0);
300  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
302  }
303 }
#define DPRINTF(x,...)
Definition: trace.hh:229
void insertKernel(int wavefront_id, PacketPtr pkt)
EventFunctionWrapper issueEvent
Bitfield< 7 > i
AbstractController * m_controller
Definition: RubyPort.hh:187
ip6_addr_t addr
Definition: inet.hh:335
void trySendRetries()
Definition: RubyPort.cc:440
void wbCallback(Addr address)
Overload hash function for BasicBlockRange type.
Definition: vec_reg.hh:586
virtual RequestStatus makeRequest(PacketPtr pkt)
uint64_t m_outstanding_wb
void completeIssue()
uint64_t m_outstanding_inv
void deschedule(Event &event)
Definition: eventq.hh:750
RequestPtr req
A pointer to the original request.
Definition: packet.hh:327
RubyGPUCoalescerParams Params
Definition: GPUCoalescer.hh:99
void wbL1()
Writeback L1 cache (Release)
uint64_t m_max_wb_per_cycle
Tick cyclesToTicks(Cycles c) const
Tick curTick()
The current simulated tick.
Definition: core.hh:47
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:385
void invCallback(Addr address)
CacheMemory * m_dataCache_ptr
uint64_t Tick
Tick count type.
Definition: types.hh:63
void invL1()
Invalidate L1 cache (Acquire)
RequestStatus makeRequest(PacketPtr pkt)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:255
std::unordered_map< int, PacketPtr > kernelEndList
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Addr getAddressAtIdx(int idx) const
Definition: CacheMemory.cc:153
MessageBuffer * m_mandatory_q_ptr
Definition: RubyPort.hh:188
std::vector< int > newKernelEnds
Declaration of the Packet class.
virtual Cycles mandatoryQueueLatency(const RubyRequestType &param_type)
void schedule(Event &event, Tick when)
Definition: eventq.hh:744
void invwbL1()
Invalidate and Writeback L1 cache (Acquire&Release)
uint64_t m_max_inv_per_cycle
Bitfield< 0 > p
int getNumBlocks() const
Definition: CacheMemory.hh:148
void enqueue(MsgPtr message, Tick curTime, Tick delta)
VIPERCoalescer(const Params *)

Generated on Fri Feb 28 2020 16:27:02 for gem5 by doxygen 1.8.13