gem5  v20.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
GPUCoalescer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "base/logging.hh"
35 #include "base/str.hh"
36 #include "config/the_isa.hh"
37 
38 #if THE_ISA == X86_ISA
40 
41 #endif // X86_ISA
43 
45 #include "debug/GPUCoalescer.hh"
46 #include "debug/MemoryAccess.hh"
47 #include "debug/ProtocolTrace.hh"
48 #include "debug/RubyPort.hh"
49 #include "debug/RubyStats.hh"
50 #include "gpu-compute/shader.hh"
51 #include "mem/packet.hh"
60 #include "params/RubyGPUCoalescer.hh"
61 
62 using namespace std;
63 
65 RubyGPUCoalescerParams::create()
66 {
67  return new GPUCoalescer(this);
68 }
69 
70 HSAScope
72 {
73  HSAScope accessScope = HSAScope_UNSPECIFIED;
74  if (req->isScoped()) {
75  if (req->isWavefrontScope()) {
76  accessScope = HSAScope_WAVEFRONT;
77  } else if (req->isWorkgroupScope()) {
78  accessScope = HSAScope_WORKGROUP;
79  } else if (req->isDeviceScope()) {
80  accessScope = HSAScope_DEVICE;
81  } else if (req->isSystemScope()) {
82  accessScope = HSAScope_SYSTEM;
83  } else {
84  fatal("Bad scope type");
85  }
86  }
87  return accessScope;
88 }
89 
90 HSASegment
92 {
93  HSASegment accessSegment = HSASegment_GLOBAL;
94 
95  if (req->isGlobalSegment()) {
96  accessSegment = HSASegment_GLOBAL;
97  } else if (req->isGroupSegment()) {
98  accessSegment = HSASegment_GROUP;
99  } else if (req->isPrivateSegment()) {
100  accessSegment = HSASegment_PRIVATE;
101  } else if (req->isKernargSegment()) {
102  accessSegment = HSASegment_KERNARG;
103  } else if (req->isReadonlySegment()) {
104  accessSegment = HSASegment_READONLY;
105  } else if (req->isSpillSegment()) {
106  accessSegment = HSASegment_SPILL;
107  } else if (req->isArgSegment()) {
108  accessSegment = HSASegment_ARG;
109  } else {
110  fatal("Bad segment type");
111  }
112 
113  return accessSegment;
114 }
115 
117  : RubyPort(p),
118  issueEvent([this]{ completeIssue(); }, "Issue coalesced request",
120  deadlockCheckEvent([this]{ wakeup(); }, "GPUCoalescer deadlock check")
121 {
126 
128 
131  m_instCache_ptr = nullptr;
132  m_dataCache_ptr = nullptr;
133 
134  m_instCache_ptr = p->icache;
135  m_dataCache_ptr = p->dcache;
136  m_max_outstanding_requests = p->max_outstanding_requests;
137  m_deadlock_threshold = p->deadlock_threshold;
138 
139  assert(m_max_outstanding_requests > 0);
140  assert(m_deadlock_threshold > 0);
141  assert(m_instCache_ptr);
142  assert(m_dataCache_ptr);
143 
144  m_runningGarnetStandalone = p->garnet_standalone;
145  assumingRfOCoherence = p->assume_rfo;
146 }
147 
149 {
150 }
151 
152 void
154 {
155  // Check for deadlock of any of the requests
156  Cycles current_time = curCycle();
157 
158  // Check across all outstanding requests
159  int total_outstanding = 0;
160 
161  RequestTable::iterator read = m_readRequestTable.begin();
162  RequestTable::iterator read_end = m_readRequestTable.end();
163  for (; read != read_end; ++read) {
164  GPUCoalescerRequest* request = read->second;
165  if (current_time - request->issue_time < m_deadlock_threshold)
166  continue;
167 
168  panic("Possible Deadlock detected. Aborting!\n"
169  "version: %d request.paddr: 0x%x m_readRequestTable: %d "
170  "current time: %u issue_time: %d difference: %d\n", m_version,
171  request->pkt->getAddr(), m_readRequestTable.size(),
172  current_time * clockPeriod(), request->issue_time * clockPeriod(),
173  (current_time - request->issue_time)*clockPeriod());
174  }
175 
176  RequestTable::iterator write = m_writeRequestTable.begin();
177  RequestTable::iterator write_end = m_writeRequestTable.end();
178  for (; write != write_end; ++write) {
179  GPUCoalescerRequest* request = write->second;
180  if (current_time - request->issue_time < m_deadlock_threshold)
181  continue;
182 
183  panic("Possible Deadlock detected. Aborting!\n"
184  "version: %d request.paddr: 0x%x m_writeRequestTable: %d "
185  "current time: %u issue_time: %d difference: %d\n", m_version,
186  request->pkt->getAddr(), m_writeRequestTable.size(),
187  current_time * clockPeriod(), request->issue_time * clockPeriod(),
188  (current_time - request->issue_time) * clockPeriod());
189  }
190 
191  total_outstanding += m_writeRequestTable.size();
192  total_outstanding += m_readRequestTable.size();
193 
194  assert(m_outstanding_count == total_outstanding);
195 
196  if (m_outstanding_count > 0) {
197  // If there are still outstanding requests, keep checking
200  curTick());
201  }
202 }
203 
204 void
206 {
209  for (int i = 0; i < RubyRequestType_NUM; i++) {
210  m_typeLatencyHist[i]->reset();
211  m_missTypeLatencyHist[i]->reset();
212  for (int j = 0; j < MachineType_NUM; j++) {
213  m_missTypeMachLatencyHist[i][j]->reset();
214  }
215  }
216 
217  for (int i = 0; i < MachineType_NUM; i++) {
218  m_missMachLatencyHist[i]->reset();
219 
220  m_IssueToInitialDelayHist[i]->reset();
221  m_InitialToForwardDelayHist[i]->reset();
224  }
225 }
226 
227 void
228 GPUCoalescer::printProgress(ostream& out) const
229 {
230 }
231 
232 RequestStatus
233 GPUCoalescer::getRequestStatus(PacketPtr pkt, RubyRequestType request_type)
234 {
235  Addr line_addr = makeLineAddress(pkt->getAddr());
236 
238  return RequestStatus_BufferFull;
239  }
240 
241  if (m_controller->isBlocked(line_addr) &&
242  request_type != RubyRequestType_Locked_RMW_Write) {
243  return RequestStatus_Aliased;
244  }
245 
246  if ((request_type == RubyRequestType_ST) ||
247  (request_type == RubyRequestType_ATOMIC) ||
248  (request_type == RubyRequestType_ATOMIC_RETURN) ||
249  (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
250  (request_type == RubyRequestType_RMW_Read) ||
251  (request_type == RubyRequestType_RMW_Write) ||
252  (request_type == RubyRequestType_Load_Linked) ||
253  (request_type == RubyRequestType_Store_Conditional) ||
254  (request_type == RubyRequestType_Locked_RMW_Read) ||
255  (request_type == RubyRequestType_Locked_RMW_Write) ||
256  (request_type == RubyRequestType_FLUSH)) {
257 
258  // Check if there is any outstanding read request for the same
259  // cache line.
260  if (m_readRequestTable.count(line_addr) > 0) {
262  return RequestStatus_Aliased;
263  }
264 
265  if (m_writeRequestTable.count(line_addr) > 0) {
266  // There is an outstanding write request for the cache line
268  return RequestStatus_Aliased;
269  }
270  } else {
271  // Check if there is any outstanding write request for the same
272  // cache line.
273  if (m_writeRequestTable.count(line_addr) > 0) {
275  return RequestStatus_Aliased;
276  }
277 
278  if (m_readRequestTable.count(line_addr) > 0) {
279  // There is an outstanding read request for the cache line
281  return RequestStatus_Aliased;
282  }
283  }
284 
285  return RequestStatus_Ready;
286 
287 }
288 
289 
290 
291 // sets the kernelEndList
292 void
293 GPUCoalescer::insertKernel(int wavefront_id, PacketPtr pkt)
294 {
295  // Don't know if this will happen or is possible
296  // but I just want to be careful and not have it become
297  // simulator hang in the future
298  DPRINTF(GPUCoalescer, "inserting wf: %d to kernelEndlist\n", wavefront_id);
299  assert(kernelEndList.count(wavefront_id) == 0);
300 
301  kernelEndList[wavefront_id] = pkt;
302  DPRINTF(GPUCoalescer, "kernelEndList->size() = %d\n",
303  kernelEndList.size());
304 }
305 
306 
307 // Insert the request on the correct request table. Return true if
308 // the entry was already present.
309 bool
310 GPUCoalescer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
311 {
312  assert(getRequestStatus(pkt, request_type) == RequestStatus_Ready ||
313  pkt->req->isLockedRMW() ||
315 
316  int total_outstanding M5_VAR_USED =
317  m_writeRequestTable.size() + m_readRequestTable.size();
318 
319  assert(m_outstanding_count == total_outstanding);
320 
321  // See if we should schedule a deadlock check
322  if (!deadlockCheckEvent.scheduled()) {
324  }
325 
326  Addr line_addr = makeLineAddress(pkt->getAddr());
327  if ((request_type == RubyRequestType_ST) ||
328  (request_type == RubyRequestType_ATOMIC) ||
329  (request_type == RubyRequestType_ATOMIC_RETURN) ||
330  (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
331  (request_type == RubyRequestType_RMW_Read) ||
332  (request_type == RubyRequestType_RMW_Write) ||
333  (request_type == RubyRequestType_Load_Linked) ||
334  (request_type == RubyRequestType_Store_Conditional) ||
335  (request_type == RubyRequestType_Locked_RMW_Read) ||
336  (request_type == RubyRequestType_Locked_RMW_Write) ||
337  (request_type == RubyRequestType_FLUSH)) {
338 
340  m_writeRequestTable.insert(RequestTable::value_type(line_addr,
341  (GPUCoalescerRequest*) NULL));
342  if (r.second) {
343  RequestTable::iterator i = r.first;
344  i->second = new GPUCoalescerRequest(pkt, request_type,
345  curCycle());
347  "Inserting write request for paddr %#x for type %d\n",
348  pkt->req->getPaddr(), i->second->m_type);
350  } else {
351  return true;
352  }
353  } else {
355  m_readRequestTable.insert(RequestTable::value_type(line_addr,
356  (GPUCoalescerRequest*) NULL));
357 
358  if (r.second) {
359  RequestTable::iterator i = r.first;
360  i->second = new GPUCoalescerRequest(pkt, request_type,
361  curCycle());
363  "Inserting read request for paddr %#x for type %d\n",
364  pkt->req->getPaddr(), i->second->m_type);
366  } else {
367  return true;
368  }
369  }
370 
372 
373  total_outstanding = m_writeRequestTable.size() + m_readRequestTable.size();
374  assert(m_outstanding_count == total_outstanding);
375 
376  return false;
377 }
378 
379 void
381 {
383  assert(m_outstanding_count ==
384  m_writeRequestTable.size() + m_readRequestTable.size());
385 }
386 
387 void
389 {
390  assert(m_outstanding_count ==
391  m_writeRequestTable.size() + m_readRequestTable.size());
392 
393  Addr line_addr = makeLineAddress(srequest->pkt->getAddr());
394  if ((srequest->m_type == RubyRequestType_ST) ||
395  (srequest->m_type == RubyRequestType_RMW_Read) ||
396  (srequest->m_type == RubyRequestType_RMW_Write) ||
397  (srequest->m_type == RubyRequestType_Load_Linked) ||
398  (srequest->m_type == RubyRequestType_Store_Conditional) ||
399  (srequest->m_type == RubyRequestType_Locked_RMW_Read) ||
400  (srequest->m_type == RubyRequestType_Locked_RMW_Write)) {
401  m_writeRequestTable.erase(line_addr);
402  } else {
403  m_readRequestTable.erase(line_addr);
404  }
405 
406  markRemoved();
407 }
408 
409 bool
411 {
412  //
413  // The success flag indicates whether the LLSC operation was successful.
414  // LL ops will always succeed, but SC may fail if the cache line is no
415  // longer locked.
416  //
417  bool success = true;
418  if (request->m_type == RubyRequestType_Store_Conditional) {
419  if (!m_dataCache_ptr->isLocked(address, m_version)) {
420  //
421  // For failed SC requests, indicate the failure to the cpu by
422  // setting the extra data to zero.
423  //
424  request->pkt->req->setExtraData(0);
425  success = false;
426  } else {
427  //
428  // For successful SC requests, indicate the success to the cpu by
429  // setting the extra data to one.
430  //
431  request->pkt->req->setExtraData(1);
432  }
433  //
434  // Independent of success, all SC operations must clear the lock
435  //
436  m_dataCache_ptr->clearLocked(address);
437  } else if (request->m_type == RubyRequestType_Load_Linked) {
438  //
439  // Note: To fully follow Alpha LLSC semantics, should the LL clear any
440  // previously locked cache lines?
441  //
443  } else if ((m_dataCache_ptr->isTagPresent(address)) &&
444  (m_dataCache_ptr->isLocked(address, m_version))) {
445  //
446  // Normal writes should clear the locked address
447  //
448  m_dataCache_ptr->clearLocked(address);
449  }
450  return success;
451 }
452 
453 void
455 {
456  writeCallback(address, MachineType_NULL, data);
457 }
458 
459 void
461  MachineType mach,
462  DataBlock& data)
463 {
464  writeCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
465 }
466 
467 void
469  MachineType mach,
470  DataBlock& data,
471  Cycles initialRequestTime,
472  Cycles forwardRequestTime,
473  Cycles firstResponseTime)
474 {
475  writeCallback(address, mach, data,
476  initialRequestTime, forwardRequestTime, firstResponseTime,
477  false);
478 }
479 
480 void
482  MachineType mach,
483  DataBlock& data,
484  Cycles initialRequestTime,
485  Cycles forwardRequestTime,
486  Cycles firstResponseTime,
487  bool isRegion)
488 {
489  assert(address == makeLineAddress(address));
490 
491  DPRINTF(GPUCoalescer, "write callback for address %#x\n", address);
492  assert(m_writeRequestTable.count(makeLineAddress(address)));
493 
494  RequestTable::iterator i = m_writeRequestTable.find(address);
495  assert(i != m_writeRequestTable.end());
496  GPUCoalescerRequest* request = i->second;
497 
498  m_writeRequestTable.erase(i);
499  markRemoved();
500 
501  assert((request->m_type == RubyRequestType_ST) ||
502  (request->m_type == RubyRequestType_ATOMIC) ||
503  (request->m_type == RubyRequestType_ATOMIC_RETURN) ||
504  (request->m_type == RubyRequestType_ATOMIC_NO_RETURN) ||
505  (request->m_type == RubyRequestType_RMW_Read) ||
506  (request->m_type == RubyRequestType_RMW_Write) ||
507  (request->m_type == RubyRequestType_Load_Linked) ||
508  (request->m_type == RubyRequestType_Store_Conditional) ||
509  (request->m_type == RubyRequestType_Locked_RMW_Read) ||
510  (request->m_type == RubyRequestType_Locked_RMW_Write) ||
511  (request->m_type == RubyRequestType_FLUSH));
512 
513 
514  //
515  // For Alpha, properly handle LL, SC, and write requests with respect to
516  // locked cache blocks.
517  //
518  // Not valid for Garnet_standalone protocl
519  //
520  bool success = true;
522  success = handleLlsc(address, request);
523 
524  if (request->m_type == RubyRequestType_Locked_RMW_Read) {
526  } else if (request->m_type == RubyRequestType_Locked_RMW_Write) {
527  m_controller->unblock(address);
528  }
529 
530  hitCallback(request, mach, data, success,
531  request->issue_time, forwardRequestTime, firstResponseTime,
532  isRegion);
533 }
534 
535 void
537 {
538  readCallback(address, MachineType_NULL, data);
539 }
540 
541 void
543  MachineType mach,
544  DataBlock& data)
545 {
546  readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
547 }
548 
549 void
551  MachineType mach,
552  DataBlock& data,
553  Cycles initialRequestTime,
554  Cycles forwardRequestTime,
555  Cycles firstResponseTime)
556 {
557 
558  readCallback(address, mach, data,
559  initialRequestTime, forwardRequestTime, firstResponseTime,
560  false);
561 }
562 
563 void
565  MachineType mach,
566  DataBlock& data,
567  Cycles initialRequestTime,
568  Cycles forwardRequestTime,
569  Cycles firstResponseTime,
570  bool isRegion)
571 {
572  assert(address == makeLineAddress(address));
573  assert(m_readRequestTable.count(makeLineAddress(address)));
574 
575  DPRINTF(GPUCoalescer, "read callback for address %#x\n", address);
576  RequestTable::iterator i = m_readRequestTable.find(address);
577  assert(i != m_readRequestTable.end());
578  GPUCoalescerRequest* request = i->second;
579 
580  m_readRequestTable.erase(i);
581  markRemoved();
582 
583  assert((request->m_type == RubyRequestType_LD) ||
584  (request->m_type == RubyRequestType_IFETCH));
585 
586  hitCallback(request, mach, data, true,
587  request->issue_time, forwardRequestTime, firstResponseTime,
588  isRegion);
589 }
590 
591 void
593  MachineType mach,
594  DataBlock& data,
595  bool success,
596  Cycles initialRequestTime,
597  Cycles forwardRequestTime,
598  Cycles firstResponseTime,
599  bool isRegion)
600 {
601  PacketPtr pkt = srequest->pkt;
602  Addr request_address = pkt->getAddr();
603  Addr request_line_address = makeLineAddress(request_address);
604 
605  RubyRequestType type = srequest->m_type;
606 
607  // Set this cache entry to the most recently used
608  if (type == RubyRequestType_IFETCH) {
609  if (m_instCache_ptr->isTagPresent(request_line_address))
610  m_instCache_ptr->setMRU(request_line_address);
611  } else {
612  if (m_dataCache_ptr->isTagPresent(request_line_address))
613  m_dataCache_ptr->setMRU(request_line_address);
614  }
615 
616  recordMissLatency(srequest, mach,
617  initialRequestTime,
618  forwardRequestTime,
619  firstResponseTime,
620  success, isRegion);
621  // update the data
622  //
623  // MUST AD DOING THIS FOR EACH REQUEST IN COALESCER
624  int len = reqCoalescer[request_line_address].size();
625  std::vector<PacketPtr> mylist;
626  for (int i = 0; i < len; ++i) {
627  PacketPtr pkt = reqCoalescer[request_line_address][i].pkt;
628  assert(type == reqCoalescer[request_line_address][i].primaryType);
629  request_address = pkt->getAddr();
630  request_line_address = makeLineAddress(pkt->getAddr());
631  if (pkt->getPtr<uint8_t>()) {
632  if ((type == RubyRequestType_LD) ||
633  (type == RubyRequestType_ATOMIC) ||
634  (type == RubyRequestType_ATOMIC_RETURN) ||
635  (type == RubyRequestType_IFETCH) ||
636  (type == RubyRequestType_RMW_Read) ||
637  (type == RubyRequestType_Locked_RMW_Read) ||
638  (type == RubyRequestType_Load_Linked)) {
639  pkt->setData(
640  data.getData(getOffset(request_address), pkt->getSize()));
641  } else {
642  data.setData(pkt->getPtr<uint8_t>(),
643  getOffset(request_address), pkt->getSize());
644  }
645  } else {
646  DPRINTF(MemoryAccess,
647  "WARNING. Data not transfered from Ruby to M5 for type " \
648  "%s\n",
649  RubyRequestType_to_string(type));
650  }
651 
652  // If using the RubyTester, update the RubyTester sender state's
653  // subBlock with the recieved data. The tester will later access
654  // this state.
655  // Note: RubyPort will access it's sender state before the
656  // RubyTester.
657  if (m_usingRubyTester) {
658  RubyPort::SenderState *requestSenderState =
660  RubyTester::SenderState* testerSenderState =
661  safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor);
662  testerSenderState->subBlock.mergeFrom(data);
663  }
664 
665  mylist.push_back(pkt);
666  }
667  delete srequest;
668  reqCoalescer.erase(request_line_address);
669  assert(!reqCoalescer.count(request_line_address));
670 
671 
672 
673  completeHitCallback(mylist, len);
674 }
675 
676 bool
678 {
679  return m_writeRequestTable.empty() && m_readRequestTable.empty();
680 }
681 
682 // Analyzes the packet to see if this request can be coalesced.
683 // If request can be coalesced, this request is added to the reqCoalescer table
684 // and makeRequest returns RequestStatus_Issued;
685 // If this is the first request to a cacheline, request is added to both
686 // newRequests queue and to the reqCoalescer table; makeRequest
687 // returns RequestStatus_Issued.
688 // If there is a pending request to this cacheline and this request
689 // can't be coalesced, RequestStatus_Aliased is returned and
690 // the packet needs to be reissued.
691 RequestStatus
693 {
694  // Check for GPU Barrier Kernel End or Kernel Begin
695  // Leave these to be handled by the child class
696  // Kernel End/Barrier = isFlush + isRelease
697  // Kernel Begin = isFlush + isAcquire
698  if (pkt->req->isKernel()) {
699  if (pkt->req->isAcquire()){
700  // This is a Kernel Begin leave handling to
701  // virtual xCoalescer::makeRequest
702  return RequestStatus_Issued;
703  }else if (pkt->req->isRelease()) {
704  // This is a Kernel End leave handling to
705  // virtual xCoalescer::makeRequest
706  // If we are here then we didn't call
707  // a virtual version of this function
708  // so we will also schedule the callback
709  int wf_id = 0;
710  if (pkt->req->hasContextId()) {
711  wf_id = pkt->req->contextId();
712  }
713  insertKernel(wf_id, pkt);
714  newKernelEnds.push_back(wf_id);
715  if (!issueEvent.scheduled()) {
717  }
718  return RequestStatus_Issued;
719  }
720  }
721 
722  // If number of outstanding requests greater than the max allowed,
723  // return RequestStatus_BufferFull. This logic can be extended to
724  // support proper backpressure.
726  return RequestStatus_BufferFull;
727  }
728 
729  RubyRequestType primary_type = RubyRequestType_NULL;
730  RubyRequestType secondary_type = RubyRequestType_NULL;
731 
732  if (pkt->isLLSC()) {
733  //
734  // Alpha LL/SC instructions need to be handled carefully by the cache
735  // coherence protocol to ensure they follow the proper semantics. In
736  // particular, by identifying the operations as atomic, the protocol
737  // should understand that migratory sharing optimizations should not
738  // be performed (i.e. a load between the LL and SC should not steal
739  // away exclusive permission).
740  //
741  if (pkt->isWrite()) {
742  primary_type = RubyRequestType_Store_Conditional;
743  } else {
744  assert(pkt->isRead());
745  primary_type = RubyRequestType_Load_Linked;
746  }
747  secondary_type = RubyRequestType_ATOMIC;
748  } else if (pkt->req->isLockedRMW()) {
749  //
750  // x86 locked instructions are translated to store cache coherence
751  // requests because these requests should always be treated as read
752  // exclusive operations and should leverage any migratory sharing
753  // optimization built into the protocol.
754  //
755  if (pkt->isWrite()) {
756  primary_type = RubyRequestType_Locked_RMW_Write;
757  } else {
758  assert(pkt->isRead());
759  primary_type = RubyRequestType_Locked_RMW_Read;
760  }
761  secondary_type = RubyRequestType_ST;
762  } else if (pkt->isAtomicOp()) {
763  //
764  // GPU Atomic Operation
765  //
766  primary_type = RubyRequestType_ATOMIC;
767  secondary_type = RubyRequestType_ATOMIC;
768  } else {
769  if (pkt->isRead()) {
770  if (pkt->req->isInstFetch()) {
771  primary_type = secondary_type = RubyRequestType_IFETCH;
772  } else {
773 #if THE_ISA == X86_ISA
774  uint32_t flags = pkt->req->getFlags();
775  bool storeCheck = flags &
777 #else
778  bool storeCheck = false;
779 #endif // X86_ISA
780  if (storeCheck) {
781  primary_type = RubyRequestType_RMW_Read;
782  secondary_type = RubyRequestType_ST;
783  } else {
784  primary_type = secondary_type = RubyRequestType_LD;
785  }
786  }
787  } else if (pkt->isWrite()) {
788  //
789  // Note: M5 packets do not differentiate ST from RMW_Write
790  //
791  primary_type = secondary_type = RubyRequestType_ST;
792  } else if (pkt->isFlush()) {
793  primary_type = secondary_type = RubyRequestType_FLUSH;
794  } else if (pkt->req->isRelease() || pkt->req->isAcquire()) {
795  if (assumingRfOCoherence) {
796  // If we reached here, this request must be a memFence
797  // and the protocol implements RfO, the coalescer can
798  // assume sequentially consistency and schedule the callback
799  // immediately.
800  // Currently the code implements fence callbacks
801  // by reusing the mechanism for kernel completions.
802  // This should be fixed.
803  int wf_id = 0;
804  if (pkt->req->hasContextId()) {
805  wf_id = pkt->req->contextId();
806  }
807  insertKernel(wf_id, pkt);
808  newKernelEnds.push_back(wf_id);
809  if (!issueEvent.scheduled()) {
811  }
812  return RequestStatus_Issued;
813  } else {
814  // If not RfO, return issued here and let the child coalescer
815  // take care of it.
816  return RequestStatus_Issued;
817  }
818  } else {
819  panic("Unsupported ruby packet type\n");
820  }
821  }
822 
823  // Check if there is any pending request to this cache line from
824  // previous cycles.
825  // If there is a pending request, return aliased. Since coalescing
826  // across time is not permitted, aliased requests are not coalesced.
827  // If a request for this address has already been issued, we must block
828  RequestStatus status = getRequestStatus(pkt, primary_type);
829  if (status != RequestStatus_Ready)
830  return status;
831 
832  Addr line_addr = makeLineAddress(pkt->getAddr());
833 
834  // Check if this request can be coalesced with previous
835  // requests from this cycle.
836  if (!reqCoalescer.count(line_addr)) {
837  // This is the first access to this cache line.
838  // A new request to the memory subsystem has to be
839  // made in the next cycle for this cache line, so
840  // add this line addr to the "newRequests" queue
841  newRequests.push_back(line_addr);
842 
843  // There was a request to this cache line in this cycle,
844  // let us see if we can coalesce this request with the previous
845  // requests from this cycle
846  } else if (primary_type !=
847  reqCoalescer[line_addr][0].primaryType) {
848  // can't coalesce loads, stores and atomics!
849  return RequestStatus_Aliased;
850  } else if (pkt->req->isLockedRMW() ||
851  reqCoalescer[line_addr][0].pkt->req->isLockedRMW()) {
852  // can't coalesce locked accesses, but can coalesce atomics!
853  return RequestStatus_Aliased;
854  } else if (pkt->req->hasContextId() && pkt->req->isRelease() &&
855  pkt->req->contextId() !=
856  reqCoalescer[line_addr][0].pkt->req->contextId()) {
857  // can't coalesce releases from different wavefronts
858  return RequestStatus_Aliased;
859  }
860 
861  // in addition to the packet, we need to save both request types
862  reqCoalescer[line_addr].emplace_back(pkt, primary_type, secondary_type);
863  if (!issueEvent.scheduled())
865  // TODO: issue hardware prefetches here
866  return RequestStatus_Issued;
867 }
868 
869 void
870 GPUCoalescer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
871 {
872 
873  int proc_id = -1;
874  if (pkt != NULL && pkt->req->hasContextId()) {
875  proc_id = pkt->req->contextId();
876  }
877 
878  // If valid, copy the pc to the ruby request
879  Addr pc = 0;
880  if (pkt->req->hasPC()) {
881  pc = pkt->req->getPC();
882  }
883 
884  // At the moment setting scopes only counts
885  // for GPU spill space accesses
886  // which is pkt->req->isStack()
887  // this scope is REPLACE since it
888  // does not need to be flushed at the end
889  // of a kernel Private and local may need
890  // to be visible at the end of the kernel
891  HSASegment accessSegment = reqSegmentToHSASegment(pkt->req);
892  HSAScope accessScope = reqScopeToHSAScope(pkt->req);
893 
894  Addr line_addr = makeLineAddress(pkt->getAddr());
895 
896  // Creating WriteMask that records written bytes
897  // and atomic operations. This enables partial writes
898  // and partial reads of those writes
899  DataBlock dataBlock;
900  dataBlock.clear();
901  uint32_t blockSize = RubySystem::getBlockSizeBytes();
902  std::vector<bool> accessMask(blockSize,false);
904  uint32_t tableSize = reqCoalescer[line_addr].size();
905  for (int i = 0; i < tableSize; i++) {
906  PacketPtr tmpPkt = reqCoalescer[line_addr][i].pkt;
907  uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr;
908  uint32_t tmpSize = tmpPkt->getSize();
909  if (tmpPkt->isAtomicOp()) {
910  std::pair<int,AtomicOpFunctor *> tmpAtomicOp(tmpOffset,
911  tmpPkt->getAtomicOp());
912  atomicOps.push_back(tmpAtomicOp);
913  } else if (tmpPkt->isWrite()) {
914  dataBlock.setData(tmpPkt->getPtr<uint8_t>(),
915  tmpOffset, tmpSize);
916  }
917  for (int j = 0; j < tmpSize; j++) {
918  accessMask[tmpOffset + j] = true;
919  }
920  }
921  std::shared_ptr<RubyRequest> msg;
922  if (pkt->isAtomicOp()) {
923  msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
924  pkt->getPtr<uint8_t>(),
925  pkt->getSize(), pc, secondary_type,
926  RubyAccessMode_Supervisor, pkt,
927  PrefetchBit_No, proc_id, 100,
928  blockSize, accessMask,
929  dataBlock, atomicOps,
930  accessScope, accessSegment);
931  } else {
932  msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
933  pkt->getPtr<uint8_t>(),
934  pkt->getSize(), pc, secondary_type,
935  RubyAccessMode_Supervisor, pkt,
936  PrefetchBit_No, proc_id, 100,
937  blockSize, accessMask,
938  dataBlock,
939  accessScope, accessSegment);
940  }
941  DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n",
942  curTick(), m_version, "Coal", "Begin", "", "",
943  printAddress(msg->getPhysicalAddress()),
944  RubyRequestType_to_string(secondary_type));
945 
946  fatal_if(secondary_type == RubyRequestType_IFETCH,
947  "there should not be any I-Fetch requests in the GPU Coalescer");
948 
949  Tick latency = cyclesToTicks(
950  m_controller->mandatoryQueueLatency(secondary_type));
951  assert(latency > 0);
952 
953  assert(m_mandatory_q_ptr);
954  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
955 }
956 
957 template <class KEY, class VALUE>
958 std::ostream &
959 operator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map)
960 {
961  out << "[";
962  for (auto i = map.begin(); i != map.end(); ++i)
963  out << " " << i->first << "=" << i->second;
964  out << " ]";
965 
966  return out;
967 }
968 
969 void
970 GPUCoalescer::print(ostream& out) const
971 {
972  out << "[GPUCoalescer: " << m_version
973  << ", outstanding requests: " << m_outstanding_count
974  << ", read request table: " << m_readRequestTable
975  << ", write request table: " << m_writeRequestTable
976  << "]";
977 }
978 
979 // this can be called from setState whenever coherence permissions are
980 // upgraded when invoked, coherence violations will be checked for the
981 // given block
982 void
984 {
985 }
986 
987 void
988 GPUCoalescer::recordRequestType(SequencerRequestType requestType) {
989  DPRINTF(RubyStats, "Recorded statistic: %s\n",
990  SequencerRequestType_to_string(requestType));
991 }
992 
993 
994 void
996 {
997  // newRequests has the cacheline addresses of all the
998  // requests which need to be issued to the memory subsystem
999  // in this cycle
1000  int len = newRequests.size();
1001  DPRINTF(GPUCoalescer, "Completing issue for %d new requests.\n", len);
1002  for (int i = 0; i < len; ++i) {
1003  // Get the requests from reqCoalescer table. Get only the
1004  // first request for each cacheline, the remaining requests
1005  // can be coalesced with the first request. So, only
1006  // one request is issued per cacheline.
1007  RequestDesc info = reqCoalescer[newRequests[i]][0];
1008  PacketPtr pkt = info.pkt;
1009  DPRINTF(GPUCoalescer, "Completing for newReq %d: paddr %#x\n",
1010  i, pkt->req->getPaddr());
1011  // Insert this request to the read/writeRequestTables. These tables
1012  // are used to track aliased requests in makeRequest subroutine
1013  bool found = insertRequest(pkt, info.primaryType);
1014 
1015  if (found) {
1016  panic("GPUCoalescer::makeRequest should never be called if the "
1017  "request is already outstanding\n");
1018  }
1019 
1020  // Issue request to ruby subsystem
1021  issueRequest(pkt, info.secondaryType);
1022  }
1023  newRequests.clear();
1024 
1025  // have Kernel End releases been issued this cycle
1026  len = newKernelEnds.size();
1027  for (int i = 0; i < len; i++) {
1029  }
1030  newKernelEnds.clear();
1031 }
1032 
1033 void
1035 {
1036  ruby_eviction_callback(address);
1037 }
1038 
1039 void
1041 {
1042  assert(kernelEndList.count(wavefront_id));
1043 
1044  ruby_hit_callback(kernelEndList[wavefront_id]);
1045 
1046  kernelEndList.erase(wavefront_id);
1047 }
1048 
1049 void
1051  MachineType mach,
1052  const DataBlock& data)
1053 {
1054  assert(address == makeLineAddress(address));
1055 
1056  DPRINTF(GPUCoalescer, "atomic callback for address %#x\n", address);
1057  assert(m_writeRequestTable.count(makeLineAddress(address)));
1058 
1059  RequestTable::iterator i = m_writeRequestTable.find(address);
1060  assert(i != m_writeRequestTable.end());
1061  GPUCoalescerRequest* srequest = i->second;
1062 
1063  m_writeRequestTable.erase(i);
1064  markRemoved();
1065 
1066  assert((srequest->m_type == RubyRequestType_ATOMIC) ||
1067  (srequest->m_type == RubyRequestType_ATOMIC_RETURN) ||
1068  (srequest->m_type == RubyRequestType_ATOMIC_NO_RETURN));
1069 
1070 
1071  // Atomics don't write to cache, so there is no MRU update...
1072 
1073  recordMissLatency(srequest, mach,
1074  srequest->issue_time, Cycles(0), Cycles(0), true, false);
1075 
1076  PacketPtr pkt = srequest->pkt;
1077  Addr request_address = pkt->getAddr();
1078  Addr request_line_address = makeLineAddress(pkt->getAddr());
1079 
1080  int len = reqCoalescer[request_line_address].size();
1081  std::vector<PacketPtr> mylist;
1082  for (int i = 0; i < len; ++i) {
1083  PacketPtr pkt = reqCoalescer[request_line_address][i].pkt;
1084  assert(srequest->m_type ==
1085  reqCoalescer[request_line_address][i].primaryType);
1086  request_address = (pkt->getAddr());
1087  request_line_address = makeLineAddress(request_address);
1088  if (pkt->getPtr<uint8_t>() &&
1089  srequest->m_type != RubyRequestType_ATOMIC_NO_RETURN) {
1090  /* atomics are done in memory, and return the data *before* the atomic op... */
1091  pkt->setData(
1092  data.getData(getOffset(request_address), pkt->getSize()));
1093  } else {
1094  DPRINTF(MemoryAccess,
1095  "WARNING. Data not transfered from Ruby to M5 for type " \
1096  "%s\n",
1097  RubyRequestType_to_string(srequest->m_type));
1098  }
1099 
1100  // If using the RubyTester, update the RubyTester sender state's
1101  // subBlock with the recieved data. The tester will later access
1102  // this state.
1103  // Note: RubyPort will access it's sender state before the
1104  // RubyTester.
1105  if (m_usingRubyTester) {
1106  RubyPort::SenderState *requestSenderState =
1108  RubyTester::SenderState* testerSenderState =
1109  safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor);
1110  testerSenderState->subBlock.mergeFrom(data);
1111  }
1112 
1113  mylist.push_back(pkt);
1114  }
1115  delete srequest;
1116  reqCoalescer.erase(request_line_address);
1117  assert(!reqCoalescer.count(request_line_address));
1118 
1119  completeHitCallback(mylist, len);
1120 }
1121 
1122 void
1124 {
1125  if (myMachID == senderMachID) {
1126  CP_TCPLdHits++;
1127  } else if (machineIDToMachineType(senderMachID) == MachineType_TCP) {
1129  } else if (machineIDToMachineType(senderMachID) == MachineType_TCC) {
1130  CP_TCCLdHits++;
1131  } else {
1132  CP_LdMiss++;
1133  }
1134 }
1135 
1136 void
1138 {
1139  if (myMachID == senderMachID) {
1140  CP_TCPStHits++;
1141  } else if (machineIDToMachineType(senderMachID) == MachineType_TCP) {
1143  } else if (machineIDToMachineType(senderMachID) == MachineType_TCC) {
1144  CP_TCCStHits++;
1145  } else {
1146  CP_StMiss++;
1147  }
1148 }
1149 
1150 void
1152 {
1153  for (int i = 0; i < len; ++i) {
1155  safe_cast<RubyPort::SenderState *>(mylist[i]->senderState);
1156  MemSlavePort *port = ss->port;
1157  assert(port != NULL);
1158 
1159  mylist[i]->senderState = ss->predecessor;
1160  delete ss;
1161  port->hitCallback(mylist[i]);
1162  trySendRetries();
1163  }
1164 
1166 }
1167 
1168 PacketPtr
1170 {
1171  RequestTable::iterator i = m_readRequestTable.find(address);
1172  assert(i != m_readRequestTable.end());
1173  GPUCoalescerRequest* request = i->second;
1174  return request->pkt;
1175 }
1176 
1177 void
1179  MachineType mach,
1180  Cycles initialRequestTime,
1181  Cycles forwardRequestTime,
1182  Cycles firstResponseTime,
1183  bool success, bool isRegion)
1184 {
1185  RubyRequestType type = srequest->m_type;
1186  Cycles issued_time = srequest->issue_time;
1187  Cycles completion_time = curCycle();
1188  assert(completion_time >= issued_time);
1189  Cycles total_lat = completion_time - issued_time;
1190 
1191  // cache stats (valid for RfO protocol only)
1192  if (mach == MachineType_TCP) {
1193  if (type == RubyRequestType_LD) {
1194  GPU_TCPLdHits++;
1195  } else {
1196  GPU_TCPStHits++;
1197  }
1198  } else if (mach == MachineType_L1Cache_wCC) {
1199  if (type == RubyRequestType_LD) {
1201  } else {
1203  }
1204  } else if (mach == MachineType_TCC) {
1205  if (type == RubyRequestType_LD) {
1206  GPU_TCCLdHits++;
1207  } else {
1208  GPU_TCCStHits++;
1209  }
1210  } else {
1211  if (type == RubyRequestType_LD) {
1212  GPU_LdMiss++;
1213  } else {
1214  GPU_StMiss++;
1215  }
1216  }
1217 
1218  // Profile all access latency, even zero latency accesses
1219  m_latencyHist.sample(total_lat);
1220  m_typeLatencyHist[type]->sample(total_lat);
1221 
1222  // Profile the miss latency for all non-zero demand misses
1223  if (total_lat != Cycles(0)) {
1224  m_missLatencyHist.sample(total_lat);
1225  m_missTypeLatencyHist[type]->sample(total_lat);
1226 
1227  if (mach != MachineType_NUM) {
1228  m_missMachLatencyHist[mach]->sample(total_lat);
1229  m_missTypeMachLatencyHist[type][mach]->sample(total_lat);
1230 
1231  if ((issued_time <= initialRequestTime) &&
1232  (initialRequestTime <= forwardRequestTime) &&
1233  (forwardRequestTime <= firstResponseTime) &&
1234  (firstResponseTime <= completion_time)) {
1235 
1236  m_IssueToInitialDelayHist[mach]->sample(
1237  initialRequestTime - issued_time);
1238  m_InitialToForwardDelayHist[mach]->sample(
1239  forwardRequestTime - initialRequestTime);
1240  m_ForwardToFirstResponseDelayHist[mach]->sample(
1241  firstResponseTime - forwardRequestTime);
1243  completion_time - firstResponseTime);
1244  }
1245  }
1246 
1247  }
1248 
1249  DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
1250  curTick(), m_version, "Coal",
1251  success ? "Done" : "SC_Failed", "", "",
1252  printAddress(srequest->pkt->getAddr()), total_lat);
1253 }
1254 
1255 void
1257 {
1259 
1260  // These statistical variables are not for display.
1261  // The profiler will collate these across different
1262  // coalescers and display those collated statistics.
1263  m_outstandReqHist.init(10);
1264  m_latencyHist.init(10);
1265  m_missLatencyHist.init(10);
1266 
1267  for (int i = 0; i < RubyRequestType_NUM; i++) {
1268  m_typeLatencyHist.push_back(new Stats::Histogram());
1269  m_typeLatencyHist[i]->init(10);
1270 
1271  m_missTypeLatencyHist.push_back(new Stats::Histogram());
1272  m_missTypeLatencyHist[i]->init(10);
1273  }
1274 
1275  for (int i = 0; i < MachineType_NUM; i++) {
1276  m_missMachLatencyHist.push_back(new Stats::Histogram());
1277  m_missMachLatencyHist[i]->init(10);
1278 
1280  m_IssueToInitialDelayHist[i]->init(10);
1281 
1283  m_InitialToForwardDelayHist[i]->init(10);
1284 
1287 
1290  }
1291 
1292  for (int i = 0; i < RubyRequestType_NUM; i++) {
1294 
1295  for (int j = 0; j < MachineType_NUM; j++) {
1296  m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram());
1297  m_missTypeMachLatencyHist[i][j]->init(10);
1298  }
1299  }
1300 
1301  // GPU cache stats
1303  .name(name() + ".gpu_tcp_ld_hits")
1304  .desc("loads that hit in the TCP")
1305  ;
1307  .name(name() + ".gpu_tcp_ld_transfers")
1308  .desc("TCP to TCP load transfers")
1309  ;
1311  .name(name() + ".gpu_tcc_ld_hits")
1312  .desc("loads that hit in the TCC")
1313  ;
1314  GPU_LdMiss
1315  .name(name() + ".gpu_ld_misses")
1316  .desc("loads that miss in the GPU")
1317  ;
1318 
1320  .name(name() + ".gpu_tcp_st_hits")
1321  .desc("stores that hit in the TCP")
1322  ;
1324  .name(name() + ".gpu_tcp_st_transfers")
1325  .desc("TCP to TCP store transfers")
1326  ;
1328  .name(name() + ".gpu_tcc_st_hits")
1329  .desc("stores that hit in the TCC")
1330  ;
1331  GPU_StMiss
1332  .name(name() + ".gpu_st_misses")
1333  .desc("stores that miss in the GPU")
1334  ;
1335 
1336  // CP cache stats
1337  CP_TCPLdHits
1338  .name(name() + ".cp_tcp_ld_hits")
1339  .desc("loads that hit in the TCP")
1340  ;
1342  .name(name() + ".cp_tcp_ld_transfers")
1343  .desc("TCP to TCP load transfers")
1344  ;
1345  CP_TCCLdHits
1346  .name(name() + ".cp_tcc_ld_hits")
1347  .desc("loads that hit in the TCC")
1348  ;
1349  CP_LdMiss
1350  .name(name() + ".cp_ld_misses")
1351  .desc("loads that miss in the GPU")
1352  ;
1353 
1354  CP_TCPStHits
1355  .name(name() + ".cp_tcp_st_hits")
1356  .desc("stores that hit in the TCP")
1357  ;
1359  .name(name() + ".cp_tcp_st_transfers")
1360  .desc("TCP to TCP store transfers")
1361  ;
1362  CP_TCCStHits
1363  .name(name() + ".cp_tcc_st_hits")
1364  .desc("stores that hit in the TCC")
1365  ;
1366  CP_StMiss
1367  .name(name() + ".cp_st_misses")
1368  .desc("stores that miss in the GPU")
1369  ;
1370 }
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:163
#define DPRINTF(x,...)
Definition: trace.hh:225
void recordMissLatency(GPUCoalescerRequest *request, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool success, bool isRegion)
HSASegment reqSegmentToHSASegment(const RequestPtr &req)
Definition: GPUCoalescer.cc:91
void insertKernel(int wavefront_id, PacketPtr pkt)
void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
Stats::Scalar CP_TCCStHits
EventFunctionWrapper issueEvent
bool empty() const
const uint8_t * getData(int offset, int len) const
Definition: DataBlock.cc:95
Stats::Scalar GPU_TCPStHits
const int FlagShift
Definition: ldstflags.hh:50
int m_deadlock_threshold
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:81
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:171
AtomicOpFunctor * getAtomicOp() const
Accessor function to atomic op.
Definition: packet.hh:758
Stats::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
Bitfield< 7 > i
std::vector< Stats::Histogram * > m_ForwardToFirstResponseDelayHist
STL pair class.
Definition: stl.hh:58
RubyRequestType m_type
Definition: GPUCoalescer.hh:65
CoalescingTable reqCoalescer
virtual void issueRequest(PacketPtr pkt, RubyRequestType type)
GPUCoalescer(const Params *)
RequestTable m_readRequestTable
std::shared_ptr< Request > RequestPtr
Definition: request.hh:81
Stats::Scalar GPU_TCPLdHits
AbstractController * m_controller
Definition: RubyPort.hh:189
Stats::Scalar CP_StMiss
ip6_addr_t addr
Definition: inet.hh:330
Stats::Scalar GPU_TCCStHits
void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID)
EventFunctionWrapper deadlockCheckEvent
void kernelCallback(int wavfront_id)
void trySendRetries()
Definition: RubyPort.cc:449
Stats::Scalar CP_LdMiss
Stats::Scalar GPU_TCPLdTransfers
RequestStatus getRequestStatus(PacketPtr pkt, RubyRequestType request_type)
void clearLocked(Addr addr)
Definition: CacheMemory.cc:495
Histogram & init(size_type size)
Set the parameters of this histogram.
Definition: statistics.hh:2641
Overload hash function for BasicBlockRange type.
Definition: vec_reg.hh:587
bool isLocked(Addr addr, int context)
Definition: CacheMemory.cc:506
Stats::Scalar CP_TCPLdTransfers
std::vector< Stats::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
int m_max_outstanding_requests
Stats::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
Tick clockPeriod() const
bool isWrite() const
Definition: packet.hh:523
void completeIssue()
T * getPtr()
get a pointer to the data ptr.
Definition: packet.hh:1084
bool isRead() const
Definition: packet.hh:522
STL vector class.
Definition: stl.hh:37
Stats::Scalar CP_TCPStTransfers
bool isAtomicOp() const
Definition: packet.hh:759
RequestPtr req
A pointer to the original request.
Definition: packet.hh:321
RubyGPUCoalescerParams Params
Definition: GPUCoalescer.hh:97
Bitfield< 5, 0 > status
static const Priority Progress_Event_Pri
Progress events come at the end.
Definition: eventq.hh:220
unsigned getSize() const
Definition: packet.hh:730
Tick cyclesToTicks(Cycles c) const
bool areNSlotsAvailable(unsigned int n, Tick curTime)
uint8_t type
Definition: inet.hh:328
Tick curTick()
The current simulated tick.
Definition: core.hh:44
void markRemoved()
Bitfield< 4 > pc
void printProgress(std::ostream &out) const
Stats::Scalar GPU_LdMiss
void ruby_eviction_callback(Addr address)
Definition: RubyPort.cc:598
CacheMemory * m_dataCache_ptr
void setMRU(Addr address)
Definition: CacheMemory.cc:364
SenderState * predecessor
Definition: packet.hh:399
void resetStats() override
Callback to reset stats.
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
Definition: packet.hh:1152
void readCallback(Addr address, DataBlock &data)
bool insertRequest(PacketPtr pkt, RubyRequestType request_type)
uint64_t Tick
Tick count type.
Definition: types.hh:61
RubyRequestType primaryType
Definition: GPUCoalescer.hh:88
void mergeFrom(const DataBlock &data)
Definition: SubBlock.hh:60
void writeCallback(Addr address, DataBlock &data)
bool assumingRfOCoherence
std::vector< Addr > newRequests
uint32_t m_version
Definition: RubyPort.hh:188
Stats::Scalar GPU_TCCLdHits
virtual RequestStatus makeRequest(PacketPtr pkt) override
int m_store_waiting_on_load_cycles
Bitfield< 18, 16 > len
A simple histogram stat.
Definition: statistics.hh:2626
Stats::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
Addr getAddr() const
Definition: packet.hh:720
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
std::vector< std::vector< Stats::Histogram * > > m_missTypeMachLatencyHist
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:199
bool m_usingRubyTester
Definition: RubyPort.hh:191
void schedule(Event &event, Tick when)
Definition: eventq.hh:934
Addr getOffset(Addr addr)
Definition: Address.cc:48
Bitfield< 21 > ss
void completeHitCallback(std::vector< PacketPtr > &mylist, int len)
void recordRequestType(SequencerRequestType requestType)
std::vector< Stats::Histogram * > m_InitialToForwardDelayHist
void ruby_hit_callback(PacketPtr pkt)
Definition: RubyPort.cc:426
void checkCoherence(Addr address)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
int m_load_waiting_on_load_cycles
T safe_cast(U ptr)
Definition: cast.hh:59
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
void hitCallback(GPUCoalescerRequest *request, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion)
Addr makeLineAddress(Addr addr)
Definition: Address.cc:54
std::unordered_map< int, PacketPtr > kernelEndList
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
std::string printAddress(Addr addr)
Definition: Address.cc:67
void reset()
Reset stat value to default.
Definition: statistics.hh:1920
HSAScope reqScopeToHSAScope(const RequestPtr &req)
Definition: GPUCoalescer.cc:71
Stats::Scalar GPU_StMiss
std::vector< Stats::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Stats::Scalar CP_TCPLdHits
void blockOnQueue(Addr, MessageBuffer *)
Bitfield< 24 > j
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:459
std::vector< Stats::Histogram * > m_typeLatencyHist
bool isLLSC() const
Definition: packet.hh:548
MessageBuffer * m_mandatory_q_ptr
Definition: RubyPort.hh:190
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:276
void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID)
virtual const std::string name() const
Definition: sim_object.hh:129
void testDrainComplete()
Definition: RubyPort.cc:475
bool isTagPresent(Addr address) const
Definition: CacheMemory.cc:226
PacketPtr mapAddrToPkt(Addr address)
std::vector< int > newKernelEnds
void clear()
Definition: DataBlock.cc:50
void removeRequest(GPUCoalescerRequest *request)
Declaration of the Packet class.
RubyRequestType secondaryType
Definition: GPUCoalescer.hh:89
SenderState * senderState
This packet&#39;s sender state.
Definition: packet.hh:474
Stats::Scalar CP_TCPStHits
int m_load_waiting_on_store_cycles
virtual Cycles mandatoryQueueLatency(const RubyRequestType &param_type)
void setLocked(Addr addr, int context)
Definition: CacheMemory.cc:484
RequestTable m_writeRequestTable
CacheMemory * m_instCache_ptr
void regStats() override
Callback to set stat parameters.
PacketPtr pkt
Definition: GPUCoalescer.hh:87
int m_outstanding_count
void setData(const uint8_t *data, int offset, int len)
Definition: DataBlock.cc:108
bool handleLlsc(Addr address, GPUCoalescerRequest *request)
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:309
virtual void regStats()
Callback to set stat parameters.
Definition: group.cc:64
MachineType machineIDToMachineType(MachineID machID)
void print(std::ostream &out) const
bool m_runningGarnetStandalone
Bitfield< 0 > p
std::vector< Stats::Histogram * > m_FirstResponseToCompletionDelayHist
std::vector< Stats::Histogram * > m_missTypeLatencyHist
int m_store_waiting_on_store_cycles
const char data[]
bool isFlush() const
Definition: packet.hh:551
bool isBlocked(Addr) const
static uint32_t getBlockSizeBytes()
Definition: RubySystem.hh:59
void enqueue(MsgPtr message, Tick curTime, Tick delta)
void evictionCallback(Addr address)
Stats::Scalar GPU_TCPStTransfers
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1896
MemSlavePort * port
Definition: RubyPort.hh:140
#define DPRINTFR(...)
Definition: trace.hh:227
Stats::Scalar CP_TCCLdHits

Generated on Thu May 28 2020 16:21:35 for gem5 by doxygen 1.8.13