gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
GPUCoalescer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: Sooraj Puthoor
34  */
35 
36 #include "base/logging.hh"
37 #include "base/str.hh"
38 #include "config/the_isa.hh"
39 
40 #if THE_ISA == X86_ISA
42 
43 #endif // X86_ISA
45 
47 #include "debug/GPUCoalescer.hh"
48 #include "debug/MemoryAccess.hh"
49 #include "debug/ProtocolTrace.hh"
50 #include "debug/RubyPort.hh"
51 #include "debug/RubyStats.hh"
52 #include "gpu-compute/shader.hh"
53 #include "mem/packet.hh"
62 #include "params/RubyGPUCoalescer.hh"
63 
64 using namespace std;
65 
67 RubyGPUCoalescerParams::create()
68 {
69  return new GPUCoalescer(this);
70 }
71 
72 HSAScope
74 {
75  HSAScope accessScope = HSAScope_UNSPECIFIED;
76  if (req->isScoped()) {
77  if (req->isWavefrontScope()) {
78  accessScope = HSAScope_WAVEFRONT;
79  } else if (req->isWorkgroupScope()) {
80  accessScope = HSAScope_WORKGROUP;
81  } else if (req->isDeviceScope()) {
82  accessScope = HSAScope_DEVICE;
83  } else if (req->isSystemScope()) {
84  accessScope = HSAScope_SYSTEM;
85  } else {
86  fatal("Bad scope type");
87  }
88  }
89  return accessScope;
90 }
91 
92 HSASegment
94 {
95  HSASegment accessSegment = HSASegment_GLOBAL;
96 
97  if (req->isGlobalSegment()) {
98  accessSegment = HSASegment_GLOBAL;
99  } else if (req->isGroupSegment()) {
100  accessSegment = HSASegment_GROUP;
101  } else if (req->isPrivateSegment()) {
102  accessSegment = HSASegment_PRIVATE;
103  } else if (req->isKernargSegment()) {
104  accessSegment = HSASegment_KERNARG;
105  } else if (req->isReadonlySegment()) {
106  accessSegment = HSASegment_READONLY;
107  } else if (req->isSpillSegment()) {
108  accessSegment = HSASegment_SPILL;
109  } else if (req->isArgSegment()) {
110  accessSegment = HSASegment_ARG;
111  } else {
112  fatal("Bad segment type");
113  }
114 
115  return accessSegment;
116 }
117 
119  : RubyPort(p),
120  issueEvent([this]{ completeIssue(); }, "Issue coalesced request",
122  deadlockCheckEvent([this]{ wakeup(); }, "GPUCoalescer deadlock check")
123 {
128 
130 
133  m_instCache_ptr = nullptr;
134  m_dataCache_ptr = nullptr;
135 
136  m_instCache_ptr = p->icache;
137  m_dataCache_ptr = p->dcache;
138  m_max_outstanding_requests = p->max_outstanding_requests;
139  m_deadlock_threshold = p->deadlock_threshold;
140 
141  assert(m_max_outstanding_requests > 0);
142  assert(m_deadlock_threshold > 0);
143  assert(m_instCache_ptr);
144  assert(m_dataCache_ptr);
145 
146  m_runningGarnetStandalone = p->garnet_standalone;
147  assumingRfOCoherence = p->assume_rfo;
148 }
149 
151 {
152 }
153 
154 void
156 {
157  // Check for deadlock of any of the requests
158  Cycles current_time = curCycle();
159 
160  // Check across all outstanding requests
161  int total_outstanding = 0;
162 
163  RequestTable::iterator read = m_readRequestTable.begin();
164  RequestTable::iterator read_end = m_readRequestTable.end();
165  for (; read != read_end; ++read) {
166  GPUCoalescerRequest* request = read->second;
167  if (current_time - request->issue_time < m_deadlock_threshold)
168  continue;
169 
170  panic("Possible Deadlock detected. Aborting!\n"
171  "version: %d request.paddr: 0x%x m_readRequestTable: %d "
172  "current time: %u issue_time: %d difference: %d\n", m_version,
173  request->pkt->getAddr(), m_readRequestTable.size(),
174  current_time * clockPeriod(), request->issue_time * clockPeriod(),
175  (current_time - request->issue_time)*clockPeriod());
176  }
177 
178  RequestTable::iterator write = m_writeRequestTable.begin();
179  RequestTable::iterator write_end = m_writeRequestTable.end();
180  for (; write != write_end; ++write) {
181  GPUCoalescerRequest* request = write->second;
182  if (current_time - request->issue_time < m_deadlock_threshold)
183  continue;
184 
185  panic("Possible Deadlock detected. Aborting!\n"
186  "version: %d request.paddr: 0x%x m_writeRequestTable: %d "
187  "current time: %u issue_time: %d difference: %d\n", m_version,
188  request->pkt->getAddr(), m_writeRequestTable.size(),
189  current_time * clockPeriod(), request->issue_time * clockPeriod(),
190  (current_time - request->issue_time) * clockPeriod());
191  }
192 
193  total_outstanding += m_writeRequestTable.size();
194  total_outstanding += m_readRequestTable.size();
195 
196  assert(m_outstanding_count == total_outstanding);
197 
198  if (m_outstanding_count > 0) {
199  // If there are still outstanding requests, keep checking
202  curTick());
203  }
204 }
205 
206 void
208 {
211  for (int i = 0; i < RubyRequestType_NUM; i++) {
212  m_typeLatencyHist[i]->reset();
213  m_missTypeLatencyHist[i]->reset();
214  for (int j = 0; j < MachineType_NUM; j++) {
215  m_missTypeMachLatencyHist[i][j]->reset();
216  }
217  }
218 
219  for (int i = 0; i < MachineType_NUM; i++) {
220  m_missMachLatencyHist[i]->reset();
221 
222  m_IssueToInitialDelayHist[i]->reset();
223  m_InitialToForwardDelayHist[i]->reset();
226  }
227 }
228 
229 void
230 GPUCoalescer::printProgress(ostream& out) const
231 {
232 }
233 
234 RequestStatus
235 GPUCoalescer::getRequestStatus(PacketPtr pkt, RubyRequestType request_type)
236 {
237  Addr line_addr = makeLineAddress(pkt->getAddr());
238 
240  return RequestStatus_BufferFull;
241  }
242 
243  if (m_controller->isBlocked(line_addr) &&
244  request_type != RubyRequestType_Locked_RMW_Write) {
245  return RequestStatus_Aliased;
246  }
247 
248  if ((request_type == RubyRequestType_ST) ||
249  (request_type == RubyRequestType_ATOMIC) ||
250  (request_type == RubyRequestType_ATOMIC_RETURN) ||
251  (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
252  (request_type == RubyRequestType_RMW_Read) ||
253  (request_type == RubyRequestType_RMW_Write) ||
254  (request_type == RubyRequestType_Load_Linked) ||
255  (request_type == RubyRequestType_Store_Conditional) ||
256  (request_type == RubyRequestType_Locked_RMW_Read) ||
257  (request_type == RubyRequestType_Locked_RMW_Write) ||
258  (request_type == RubyRequestType_FLUSH)) {
259 
260  // Check if there is any outstanding read request for the same
261  // cache line.
262  if (m_readRequestTable.count(line_addr) > 0) {
264  return RequestStatus_Aliased;
265  }
266 
267  if (m_writeRequestTable.count(line_addr) > 0) {
268  // There is an outstanding write request for the cache line
270  return RequestStatus_Aliased;
271  }
272  } else {
273  // Check if there is any outstanding write request for the same
274  // cache line.
275  if (m_writeRequestTable.count(line_addr) > 0) {
277  return RequestStatus_Aliased;
278  }
279 
280  if (m_readRequestTable.count(line_addr) > 0) {
281  // There is an outstanding read request for the cache line
283  return RequestStatus_Aliased;
284  }
285  }
286 
287  return RequestStatus_Ready;
288 
289 }
290 
291 
292 
293 // sets the kernelEndList
294 void
295 GPUCoalescer::insertKernel(int wavefront_id, PacketPtr pkt)
296 {
297  // Don't know if this will happen or is possible
298  // but I just want to be careful and not have it become
299  // simulator hang in the future
300  DPRINTF(GPUCoalescer, "inserting wf: %d to kernelEndlist\n", wavefront_id);
301  assert(kernelEndList.count(wavefront_id) == 0);
302 
303  kernelEndList[wavefront_id] = pkt;
304  DPRINTF(GPUCoalescer, "kernelEndList->size() = %d\n",
305  kernelEndList.size());
306 }
307 
308 
309 // Insert the request on the correct request table. Return true if
310 // the entry was already present.
311 bool
312 GPUCoalescer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
313 {
314  assert(getRequestStatus(pkt, request_type) == RequestStatus_Ready ||
315  pkt->req->isLockedRMW() ||
317 
318  int total_outstanding M5_VAR_USED =
319  m_writeRequestTable.size() + m_readRequestTable.size();
320 
321  assert(m_outstanding_count == total_outstanding);
322 
323  // See if we should schedule a deadlock check
324  if (!deadlockCheckEvent.scheduled()) {
326  }
327 
328  Addr line_addr = makeLineAddress(pkt->getAddr());
329  if ((request_type == RubyRequestType_ST) ||
330  (request_type == RubyRequestType_ATOMIC) ||
331  (request_type == RubyRequestType_ATOMIC_RETURN) ||
332  (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
333  (request_type == RubyRequestType_RMW_Read) ||
334  (request_type == RubyRequestType_RMW_Write) ||
335  (request_type == RubyRequestType_Load_Linked) ||
336  (request_type == RubyRequestType_Store_Conditional) ||
337  (request_type == RubyRequestType_Locked_RMW_Read) ||
338  (request_type == RubyRequestType_Locked_RMW_Write) ||
339  (request_type == RubyRequestType_FLUSH)) {
340 
342  m_writeRequestTable.insert(RequestTable::value_type(line_addr,
343  (GPUCoalescerRequest*) NULL));
344  if (r.second) {
345  RequestTable::iterator i = r.first;
346  i->second = new GPUCoalescerRequest(pkt, request_type,
347  curCycle());
349  "Inserting write request for paddr %#x for type %d\n",
350  pkt->req->getPaddr(), i->second->m_type);
352  } else {
353  return true;
354  }
355  } else {
357  m_readRequestTable.insert(RequestTable::value_type(line_addr,
358  (GPUCoalescerRequest*) NULL));
359 
360  if (r.second) {
361  RequestTable::iterator i = r.first;
362  i->second = new GPUCoalescerRequest(pkt, request_type,
363  curCycle());
365  "Inserting read request for paddr %#x for type %d\n",
366  pkt->req->getPaddr(), i->second->m_type);
368  } else {
369  return true;
370  }
371  }
372 
374 
375  total_outstanding = m_writeRequestTable.size() + m_readRequestTable.size();
376  assert(m_outstanding_count == total_outstanding);
377 
378  return false;
379 }
380 
381 void
383 {
385  assert(m_outstanding_count ==
386  m_writeRequestTable.size() + m_readRequestTable.size());
387 }
388 
389 void
391 {
392  assert(m_outstanding_count ==
393  m_writeRequestTable.size() + m_readRequestTable.size());
394 
395  Addr line_addr = makeLineAddress(srequest->pkt->getAddr());
396  if ((srequest->m_type == RubyRequestType_ST) ||
397  (srequest->m_type == RubyRequestType_RMW_Read) ||
398  (srequest->m_type == RubyRequestType_RMW_Write) ||
399  (srequest->m_type == RubyRequestType_Load_Linked) ||
400  (srequest->m_type == RubyRequestType_Store_Conditional) ||
401  (srequest->m_type == RubyRequestType_Locked_RMW_Read) ||
402  (srequest->m_type == RubyRequestType_Locked_RMW_Write)) {
403  m_writeRequestTable.erase(line_addr);
404  } else {
405  m_readRequestTable.erase(line_addr);
406  }
407 
408  markRemoved();
409 }
410 
411 bool
413 {
414  //
415  // The success flag indicates whether the LLSC operation was successful.
416  // LL ops will always succeed, but SC may fail if the cache line is no
417  // longer locked.
418  //
419  bool success = true;
420  if (request->m_type == RubyRequestType_Store_Conditional) {
421  if (!m_dataCache_ptr->isLocked(address, m_version)) {
422  //
423  // For failed SC requests, indicate the failure to the cpu by
424  // setting the extra data to zero.
425  //
426  request->pkt->req->setExtraData(0);
427  success = false;
428  } else {
429  //
430  // For successful SC requests, indicate the success to the cpu by
431  // setting the extra data to one.
432  //
433  request->pkt->req->setExtraData(1);
434  }
435  //
436  // Independent of success, all SC operations must clear the lock
437  //
438  m_dataCache_ptr->clearLocked(address);
439  } else if (request->m_type == RubyRequestType_Load_Linked) {
440  //
441  // Note: To fully follow Alpha LLSC semantics, should the LL clear any
442  // previously locked cache lines?
443  //
445  } else if ((m_dataCache_ptr->isTagPresent(address)) &&
446  (m_dataCache_ptr->isLocked(address, m_version))) {
447  //
448  // Normal writes should clear the locked address
449  //
450  m_dataCache_ptr->clearLocked(address);
451  }
452  return success;
453 }
454 
455 void
457 {
458  writeCallback(address, MachineType_NULL, data);
459 }
460 
461 void
463  MachineType mach,
464  DataBlock& data)
465 {
466  writeCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
467 }
468 
469 void
471  MachineType mach,
472  DataBlock& data,
473  Cycles initialRequestTime,
474  Cycles forwardRequestTime,
475  Cycles firstResponseTime)
476 {
477  writeCallback(address, mach, data,
478  initialRequestTime, forwardRequestTime, firstResponseTime,
479  false);
480 }
481 
482 void
484  MachineType mach,
485  DataBlock& data,
486  Cycles initialRequestTime,
487  Cycles forwardRequestTime,
488  Cycles firstResponseTime,
489  bool isRegion)
490 {
491  assert(address == makeLineAddress(address));
492 
493  DPRINTF(GPUCoalescer, "write callback for address %#x\n", address);
494  assert(m_writeRequestTable.count(makeLineAddress(address)));
495 
496  RequestTable::iterator i = m_writeRequestTable.find(address);
497  assert(i != m_writeRequestTable.end());
498  GPUCoalescerRequest* request = i->second;
499 
500  m_writeRequestTable.erase(i);
501  markRemoved();
502 
503  assert((request->m_type == RubyRequestType_ST) ||
504  (request->m_type == RubyRequestType_ATOMIC) ||
505  (request->m_type == RubyRequestType_ATOMIC_RETURN) ||
506  (request->m_type == RubyRequestType_ATOMIC_NO_RETURN) ||
507  (request->m_type == RubyRequestType_RMW_Read) ||
508  (request->m_type == RubyRequestType_RMW_Write) ||
509  (request->m_type == RubyRequestType_Load_Linked) ||
510  (request->m_type == RubyRequestType_Store_Conditional) ||
511  (request->m_type == RubyRequestType_Locked_RMW_Read) ||
512  (request->m_type == RubyRequestType_Locked_RMW_Write) ||
513  (request->m_type == RubyRequestType_FLUSH));
514 
515 
516  //
517  // For Alpha, properly handle LL, SC, and write requests with respect to
518  // locked cache blocks.
519  //
520  // Not valid for Garnet_standalone protocl
521  //
522  bool success = true;
524  success = handleLlsc(address, request);
525 
526  if (request->m_type == RubyRequestType_Locked_RMW_Read) {
528  } else if (request->m_type == RubyRequestType_Locked_RMW_Write) {
529  m_controller->unblock(address);
530  }
531 
532  hitCallback(request, mach, data, success,
533  request->issue_time, forwardRequestTime, firstResponseTime,
534  isRegion);
535 }
536 
537 void
539 {
540  readCallback(address, MachineType_NULL, data);
541 }
542 
543 void
545  MachineType mach,
546  DataBlock& data)
547 {
548  readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
549 }
550 
551 void
553  MachineType mach,
554  DataBlock& data,
555  Cycles initialRequestTime,
556  Cycles forwardRequestTime,
557  Cycles firstResponseTime)
558 {
559 
560  readCallback(address, mach, data,
561  initialRequestTime, forwardRequestTime, firstResponseTime,
562  false);
563 }
564 
565 void
567  MachineType mach,
568  DataBlock& data,
569  Cycles initialRequestTime,
570  Cycles forwardRequestTime,
571  Cycles firstResponseTime,
572  bool isRegion)
573 {
574  assert(address == makeLineAddress(address));
575  assert(m_readRequestTable.count(makeLineAddress(address)));
576 
577  DPRINTF(GPUCoalescer, "read callback for address %#x\n", address);
578  RequestTable::iterator i = m_readRequestTable.find(address);
579  assert(i != m_readRequestTable.end());
580  GPUCoalescerRequest* request = i->second;
581 
582  m_readRequestTable.erase(i);
583  markRemoved();
584 
585  assert((request->m_type == RubyRequestType_LD) ||
586  (request->m_type == RubyRequestType_IFETCH));
587 
588  hitCallback(request, mach, data, true,
589  request->issue_time, forwardRequestTime, firstResponseTime,
590  isRegion);
591 }
592 
593 void
595  MachineType mach,
596  DataBlock& data,
597  bool success,
598  Cycles initialRequestTime,
599  Cycles forwardRequestTime,
600  Cycles firstResponseTime,
601  bool isRegion)
602 {
603  PacketPtr pkt = srequest->pkt;
604  Addr request_address = pkt->getAddr();
605  Addr request_line_address = makeLineAddress(request_address);
606 
607  RubyRequestType type = srequest->m_type;
608 
609  // Set this cache entry to the most recently used
610  if (type == RubyRequestType_IFETCH) {
611  if (m_instCache_ptr->isTagPresent(request_line_address))
612  m_instCache_ptr->setMRU(request_line_address);
613  } else {
614  if (m_dataCache_ptr->isTagPresent(request_line_address))
615  m_dataCache_ptr->setMRU(request_line_address);
616  }
617 
618  recordMissLatency(srequest, mach,
619  initialRequestTime,
620  forwardRequestTime,
621  firstResponseTime,
622  success, isRegion);
623  // update the data
624  //
625  // MUST AD DOING THIS FOR EACH REQUEST IN COALESCER
626  int len = reqCoalescer[request_line_address].size();
627  std::vector<PacketPtr> mylist;
628  for (int i = 0; i < len; ++i) {
629  PacketPtr pkt = reqCoalescer[request_line_address][i].pkt;
630  assert(type == reqCoalescer[request_line_address][i].primaryType);
631  request_address = pkt->getAddr();
632  request_line_address = makeLineAddress(pkt->getAddr());
633  if (pkt->getPtr<uint8_t>()) {
634  if ((type == RubyRequestType_LD) ||
635  (type == RubyRequestType_ATOMIC) ||
636  (type == RubyRequestType_ATOMIC_RETURN) ||
637  (type == RubyRequestType_IFETCH) ||
638  (type == RubyRequestType_RMW_Read) ||
639  (type == RubyRequestType_Locked_RMW_Read) ||
640  (type == RubyRequestType_Load_Linked)) {
641  pkt->setData(
642  data.getData(getOffset(request_address), pkt->getSize()));
643  } else {
644  data.setData(pkt->getPtr<uint8_t>(),
645  getOffset(request_address), pkt->getSize());
646  }
647  } else {
648  DPRINTF(MemoryAccess,
649  "WARNING. Data not transfered from Ruby to M5 for type " \
650  "%s\n",
651  RubyRequestType_to_string(type));
652  }
653 
654  // If using the RubyTester, update the RubyTester sender state's
655  // subBlock with the recieved data. The tester will later access
656  // this state.
657  // Note: RubyPort will access it's sender state before the
658  // RubyTester.
659  if (m_usingRubyTester) {
660  RubyPort::SenderState *requestSenderState =
662  RubyTester::SenderState* testerSenderState =
663  safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor);
664  testerSenderState->subBlock.mergeFrom(data);
665  }
666 
667  mylist.push_back(pkt);
668  }
669  delete srequest;
670  reqCoalescer.erase(request_line_address);
671  assert(!reqCoalescer.count(request_line_address));
672 
673 
674 
675  completeHitCallback(mylist, len);
676 }
677 
678 bool
680 {
681  return m_writeRequestTable.empty() && m_readRequestTable.empty();
682 }
683 
684 // Analyzes the packet to see if this request can be coalesced.
685 // If request can be coalesced, this request is added to the reqCoalescer table
686 // and makeRequest returns RequestStatus_Issued;
687 // If this is the first request to a cacheline, request is added to both
688 // newRequests queue and to the reqCoalescer table; makeRequest
689 // returns RequestStatus_Issued.
690 // If there is a pending request to this cacheline and this request
691 // can't be coalesced, RequestStatus_Aliased is returned and
692 // the packet needs to be reissued.
693 RequestStatus
695 {
696  // Check for GPU Barrier Kernel End or Kernel Begin
697  // Leave these to be handled by the child class
698  // Kernel End/Barrier = isFlush + isRelease
699  // Kernel Begin = isFlush + isAcquire
700  if (pkt->req->isKernel()) {
701  if (pkt->req->isAcquire()){
702  // This is a Kernel Begin leave handling to
703  // virtual xCoalescer::makeRequest
704  return RequestStatus_Issued;
705  }else if (pkt->req->isRelease()) {
706  // This is a Kernel End leave handling to
707  // virtual xCoalescer::makeRequest
708  // If we are here then we didn't call
709  // a virtual version of this function
710  // so we will also schedule the callback
711  int wf_id = 0;
712  if (pkt->req->hasContextId()) {
713  wf_id = pkt->req->contextId();
714  }
715  insertKernel(wf_id, pkt);
716  newKernelEnds.push_back(wf_id);
717  if (!issueEvent.scheduled()) {
719  }
720  return RequestStatus_Issued;
721  }
722  }
723 
724  // If number of outstanding requests greater than the max allowed,
725  // return RequestStatus_BufferFull. This logic can be extended to
726  // support proper backpressure.
728  return RequestStatus_BufferFull;
729  }
730 
731  RubyRequestType primary_type = RubyRequestType_NULL;
732  RubyRequestType secondary_type = RubyRequestType_NULL;
733 
734  if (pkt->isLLSC()) {
735  //
736  // Alpha LL/SC instructions need to be handled carefully by the cache
737  // coherence protocol to ensure they follow the proper semantics. In
738  // particular, by identifying the operations as atomic, the protocol
739  // should understand that migratory sharing optimizations should not
740  // be performed (i.e. a load between the LL and SC should not steal
741  // away exclusive permission).
742  //
743  if (pkt->isWrite()) {
744  primary_type = RubyRequestType_Store_Conditional;
745  } else {
746  assert(pkt->isRead());
747  primary_type = RubyRequestType_Load_Linked;
748  }
749  secondary_type = RubyRequestType_ATOMIC;
750  } else if (pkt->req->isLockedRMW()) {
751  //
752  // x86 locked instructions are translated to store cache coherence
753  // requests because these requests should always be treated as read
754  // exclusive operations and should leverage any migratory sharing
755  // optimization built into the protocol.
756  //
757  if (pkt->isWrite()) {
758  primary_type = RubyRequestType_Locked_RMW_Write;
759  } else {
760  assert(pkt->isRead());
761  primary_type = RubyRequestType_Locked_RMW_Read;
762  }
763  secondary_type = RubyRequestType_ST;
764  } else if (pkt->isAtomicOp()) {
765  //
766  // GPU Atomic Operation
767  //
768  primary_type = RubyRequestType_ATOMIC;
769  secondary_type = RubyRequestType_ATOMIC;
770  } else {
771  if (pkt->isRead()) {
772  if (pkt->req->isInstFetch()) {
773  primary_type = secondary_type = RubyRequestType_IFETCH;
774  } else {
775 #if THE_ISA == X86_ISA
776  uint32_t flags = pkt->req->getFlags();
777  bool storeCheck = flags &
779 #else
780  bool storeCheck = false;
781 #endif // X86_ISA
782  if (storeCheck) {
783  primary_type = RubyRequestType_RMW_Read;
784  secondary_type = RubyRequestType_ST;
785  } else {
786  primary_type = secondary_type = RubyRequestType_LD;
787  }
788  }
789  } else if (pkt->isWrite()) {
790  //
791  // Note: M5 packets do not differentiate ST from RMW_Write
792  //
793  primary_type = secondary_type = RubyRequestType_ST;
794  } else if (pkt->isFlush()) {
795  primary_type = secondary_type = RubyRequestType_FLUSH;
796  } else if (pkt->req->isRelease() || pkt->req->isAcquire()) {
797  if (assumingRfOCoherence) {
798  // If we reached here, this request must be a memFence
799  // and the protocol implements RfO, the coalescer can
800  // assume sequentially consistency and schedule the callback
801  // immediately.
802  // Currently the code implements fence callbacks
803  // by reusing the mechanism for kernel completions.
804  // This should be fixed.
805  int wf_id = 0;
806  if (pkt->req->hasContextId()) {
807  wf_id = pkt->req->contextId();
808  }
809  insertKernel(wf_id, pkt);
810  newKernelEnds.push_back(wf_id);
811  if (!issueEvent.scheduled()) {
813  }
814  return RequestStatus_Issued;
815  } else {
816  // If not RfO, return issued here and let the child coalescer
817  // take care of it.
818  return RequestStatus_Issued;
819  }
820  } else {
821  panic("Unsupported ruby packet type\n");
822  }
823  }
824 
825  // Check if there is any pending request to this cache line from
826  // previous cycles.
827  // If there is a pending request, return aliased. Since coalescing
828  // across time is not permitted, aliased requests are not coalesced.
829  // If a request for this address has already been issued, we must block
830  RequestStatus status = getRequestStatus(pkt, primary_type);
831  if (status != RequestStatus_Ready)
832  return status;
833 
834  Addr line_addr = makeLineAddress(pkt->getAddr());
835 
836  // Check if this request can be coalesced with previous
837  // requests from this cycle.
838  if (!reqCoalescer.count(line_addr)) {
839  // This is the first access to this cache line.
840  // A new request to the memory subsystem has to be
841  // made in the next cycle for this cache line, so
842  // add this line addr to the "newRequests" queue
843  newRequests.push_back(line_addr);
844 
845  // There was a request to this cache line in this cycle,
846  // let us see if we can coalesce this request with the previous
847  // requests from this cycle
848  } else if (primary_type !=
849  reqCoalescer[line_addr][0].primaryType) {
850  // can't coalesce loads, stores and atomics!
851  return RequestStatus_Aliased;
852  } else if (pkt->req->isLockedRMW() ||
853  reqCoalescer[line_addr][0].pkt->req->isLockedRMW()) {
854  // can't coalesce locked accesses, but can coalesce atomics!
855  return RequestStatus_Aliased;
856  } else if (pkt->req->hasContextId() && pkt->req->isRelease() &&
857  pkt->req->contextId() !=
858  reqCoalescer[line_addr][0].pkt->req->contextId()) {
859  // can't coalesce releases from different wavefronts
860  return RequestStatus_Aliased;
861  }
862 
863  // in addition to the packet, we need to save both request types
864  reqCoalescer[line_addr].emplace_back(pkt, primary_type, secondary_type);
865  if (!issueEvent.scheduled())
867  // TODO: issue hardware prefetches here
868  return RequestStatus_Issued;
869 }
870 
871 void
872 GPUCoalescer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
873 {
874 
875  int proc_id = -1;
876  if (pkt != NULL && pkt->req->hasContextId()) {
877  proc_id = pkt->req->contextId();
878  }
879 
880  // If valid, copy the pc to the ruby request
881  Addr pc = 0;
882  if (pkt->req->hasPC()) {
883  pc = pkt->req->getPC();
884  }
885 
886  // At the moment setting scopes only counts
887  // for GPU spill space accesses
888  // which is pkt->req->isStack()
889  // this scope is REPLACE since it
890  // does not need to be flushed at the end
891  // of a kernel Private and local may need
892  // to be visible at the end of the kernel
893  HSASegment accessSegment = reqSegmentToHSASegment(pkt->req);
894  HSAScope accessScope = reqScopeToHSAScope(pkt->req);
895 
896  Addr line_addr = makeLineAddress(pkt->getAddr());
897 
898  // Creating WriteMask that records written bytes
899  // and atomic operations. This enables partial writes
900  // and partial reads of those writes
901  DataBlock dataBlock;
902  dataBlock.clear();
903  uint32_t blockSize = RubySystem::getBlockSizeBytes();
904  std::vector<bool> accessMask(blockSize,false);
906  uint32_t tableSize = reqCoalescer[line_addr].size();
907  for (int i = 0; i < tableSize; i++) {
908  PacketPtr tmpPkt = reqCoalescer[line_addr][i].pkt;
909  uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr;
910  uint32_t tmpSize = tmpPkt->getSize();
911  if (tmpPkt->isAtomicOp()) {
912  std::pair<int,AtomicOpFunctor *> tmpAtomicOp(tmpOffset,
913  tmpPkt->getAtomicOp());
914  atomicOps.push_back(tmpAtomicOp);
915  } else if (tmpPkt->isWrite()) {
916  dataBlock.setData(tmpPkt->getPtr<uint8_t>(),
917  tmpOffset, tmpSize);
918  }
919  for (int j = 0; j < tmpSize; j++) {
920  accessMask[tmpOffset + j] = true;
921  }
922  }
923  std::shared_ptr<RubyRequest> msg;
924  if (pkt->isAtomicOp()) {
925  msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
926  pkt->getPtr<uint8_t>(),
927  pkt->getSize(), pc, secondary_type,
928  RubyAccessMode_Supervisor, pkt,
929  PrefetchBit_No, proc_id, 100,
930  blockSize, accessMask,
931  dataBlock, atomicOps,
932  accessScope, accessSegment);
933  } else {
934  msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
935  pkt->getPtr<uint8_t>(),
936  pkt->getSize(), pc, secondary_type,
937  RubyAccessMode_Supervisor, pkt,
938  PrefetchBit_No, proc_id, 100,
939  blockSize, accessMask,
940  dataBlock,
941  accessScope, accessSegment);
942  }
943  DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n",
944  curTick(), m_version, "Coal", "Begin", "", "",
945  printAddress(msg->getPhysicalAddress()),
946  RubyRequestType_to_string(secondary_type));
947 
948  fatal_if(secondary_type == RubyRequestType_IFETCH,
949  "there should not be any I-Fetch requests in the GPU Coalescer");
950 
951  Tick latency = cyclesToTicks(
952  m_controller->mandatoryQueueLatency(secondary_type));
953  assert(latency > 0);
954 
955  assert(m_mandatory_q_ptr);
956  m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
957 }
958 
959 template <class KEY, class VALUE>
960 std::ostream &
961 operator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map)
962 {
963  out << "[";
964  for (auto i = map.begin(); i != map.end(); ++i)
965  out << " " << i->first << "=" << i->second;
966  out << " ]";
967 
968  return out;
969 }
970 
971 void
972 GPUCoalescer::print(ostream& out) const
973 {
974  out << "[GPUCoalescer: " << m_version
975  << ", outstanding requests: " << m_outstanding_count
976  << ", read request table: " << m_readRequestTable
977  << ", write request table: " << m_writeRequestTable
978  << "]";
979 }
980 
981 // this can be called from setState whenever coherence permissions are
982 // upgraded when invoked, coherence violations will be checked for the
983 // given block
984 void
986 {
987 }
988 
989 void
990 GPUCoalescer::recordRequestType(SequencerRequestType requestType) {
991  DPRINTF(RubyStats, "Recorded statistic: %s\n",
992  SequencerRequestType_to_string(requestType));
993 }
994 
995 
996 void
998 {
999  // newRequests has the cacheline addresses of all the
1000  // requests which need to be issued to the memory subsystem
1001  // in this cycle
1002  int len = newRequests.size();
1003  DPRINTF(GPUCoalescer, "Completing issue for %d new requests.\n", len);
1004  for (int i = 0; i < len; ++i) {
1005  // Get the requests from reqCoalescer table. Get only the
1006  // first request for each cacheline, the remaining requests
1007  // can be coalesced with the first request. So, only
1008  // one request is issued per cacheline.
1009  RequestDesc info = reqCoalescer[newRequests[i]][0];
1010  PacketPtr pkt = info.pkt;
1011  DPRINTF(GPUCoalescer, "Completing for newReq %d: paddr %#x\n",
1012  i, pkt->req->getPaddr());
1013  // Insert this request to the read/writeRequestTables. These tables
1014  // are used to track aliased requests in makeRequest subroutine
1015  bool found = insertRequest(pkt, info.primaryType);
1016 
1017  if (found) {
1018  panic("GPUCoalescer::makeRequest should never be called if the "
1019  "request is already outstanding\n");
1020  }
1021 
1022  // Issue request to ruby subsystem
1023  issueRequest(pkt, info.secondaryType);
1024  }
1025  newRequests.clear();
1026 
1027  // have Kernel End releases been issued this cycle
1028  len = newKernelEnds.size();
1029  for (int i = 0; i < len; i++) {
1031  }
1032  newKernelEnds.clear();
1033 }
1034 
1035 void
1037 {
1038  ruby_eviction_callback(address);
1039 }
1040 
1041 void
1043 {
1044  assert(kernelEndList.count(wavefront_id));
1045 
1046  ruby_hit_callback(kernelEndList[wavefront_id]);
1047 
1048  kernelEndList.erase(wavefront_id);
1049 }
1050 
1051 void
1053  MachineType mach,
1054  const DataBlock& data)
1055 {
1056  assert(address == makeLineAddress(address));
1057 
1058  DPRINTF(GPUCoalescer, "atomic callback for address %#x\n", address);
1059  assert(m_writeRequestTable.count(makeLineAddress(address)));
1060 
1061  RequestTable::iterator i = m_writeRequestTable.find(address);
1062  assert(i != m_writeRequestTable.end());
1063  GPUCoalescerRequest* srequest = i->second;
1064 
1065  m_writeRequestTable.erase(i);
1066  markRemoved();
1067 
1068  assert((srequest->m_type == RubyRequestType_ATOMIC) ||
1069  (srequest->m_type == RubyRequestType_ATOMIC_RETURN) ||
1070  (srequest->m_type == RubyRequestType_ATOMIC_NO_RETURN));
1071 
1072 
1073  // Atomics don't write to cache, so there is no MRU update...
1074 
1075  recordMissLatency(srequest, mach,
1076  srequest->issue_time, Cycles(0), Cycles(0), true, false);
1077 
1078  PacketPtr pkt = srequest->pkt;
1079  Addr request_address = pkt->getAddr();
1080  Addr request_line_address = makeLineAddress(pkt->getAddr());
1081 
1082  int len = reqCoalescer[request_line_address].size();
1083  std::vector<PacketPtr> mylist;
1084  for (int i = 0; i < len; ++i) {
1085  PacketPtr pkt = reqCoalescer[request_line_address][i].pkt;
1086  assert(srequest->m_type ==
1087  reqCoalescer[request_line_address][i].primaryType);
1088  request_address = (pkt->getAddr());
1089  request_line_address = makeLineAddress(request_address);
1090  if (pkt->getPtr<uint8_t>() &&
1091  srequest->m_type != RubyRequestType_ATOMIC_NO_RETURN) {
1092  /* atomics are done in memory, and return the data *before* the atomic op... */
1093  pkt->setData(
1094  data.getData(getOffset(request_address), pkt->getSize()));
1095  } else {
1096  DPRINTF(MemoryAccess,
1097  "WARNING. Data not transfered from Ruby to M5 for type " \
1098  "%s\n",
1099  RubyRequestType_to_string(srequest->m_type));
1100  }
1101 
1102  // If using the RubyTester, update the RubyTester sender state's
1103  // subBlock with the recieved data. The tester will later access
1104  // this state.
1105  // Note: RubyPort will access it's sender state before the
1106  // RubyTester.
1107  if (m_usingRubyTester) {
1108  RubyPort::SenderState *requestSenderState =
1110  RubyTester::SenderState* testerSenderState =
1111  safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor);
1112  testerSenderState->subBlock.mergeFrom(data);
1113  }
1114 
1115  mylist.push_back(pkt);
1116  }
1117  delete srequest;
1118  reqCoalescer.erase(request_line_address);
1119  assert(!reqCoalescer.count(request_line_address));
1120 
1121  completeHitCallback(mylist, len);
1122 }
1123 
1124 void
1126 {
1127  if (myMachID == senderMachID) {
1128  CP_TCPLdHits++;
1129  } else if (machineIDToMachineType(senderMachID) == MachineType_TCP) {
1131  } else if (machineIDToMachineType(senderMachID) == MachineType_TCC) {
1132  CP_TCCLdHits++;
1133  } else {
1134  CP_LdMiss++;
1135  }
1136 }
1137 
1138 void
1140 {
1141  if (myMachID == senderMachID) {
1142  CP_TCPStHits++;
1143  } else if (machineIDToMachineType(senderMachID) == MachineType_TCP) {
1145  } else if (machineIDToMachineType(senderMachID) == MachineType_TCC) {
1146  CP_TCCStHits++;
1147  } else {
1148  CP_StMiss++;
1149  }
1150 }
1151 
1152 void
1154 {
1155  for (int i = 0; i < len; ++i) {
1157  safe_cast<RubyPort::SenderState *>(mylist[i]->senderState);
1158  MemSlavePort *port = ss->port;
1159  assert(port != NULL);
1160 
1161  mylist[i]->senderState = ss->predecessor;
1162  delete ss;
1163  port->hitCallback(mylist[i]);
1164  trySendRetries();
1165  }
1166 
1168 }
1169 
1170 PacketPtr
1172 {
1173  RequestTable::iterator i = m_readRequestTable.find(address);
1174  assert(i != m_readRequestTable.end());
1175  GPUCoalescerRequest* request = i->second;
1176  return request->pkt;
1177 }
1178 
1179 void
1181  MachineType mach,
1182  Cycles initialRequestTime,
1183  Cycles forwardRequestTime,
1184  Cycles firstResponseTime,
1185  bool success, bool isRegion)
1186 {
1187  RubyRequestType type = srequest->m_type;
1188  Cycles issued_time = srequest->issue_time;
1189  Cycles completion_time = curCycle();
1190  assert(completion_time >= issued_time);
1191  Cycles total_lat = completion_time - issued_time;
1192 
1193  // cache stats (valid for RfO protocol only)
1194  if (mach == MachineType_TCP) {
1195  if (type == RubyRequestType_LD) {
1196  GPU_TCPLdHits++;
1197  } else {
1198  GPU_TCPStHits++;
1199  }
1200  } else if (mach == MachineType_L1Cache_wCC) {
1201  if (type == RubyRequestType_LD) {
1203  } else {
1205  }
1206  } else if (mach == MachineType_TCC) {
1207  if (type == RubyRequestType_LD) {
1208  GPU_TCCLdHits++;
1209  } else {
1210  GPU_TCCStHits++;
1211  }
1212  } else {
1213  if (type == RubyRequestType_LD) {
1214  GPU_LdMiss++;
1215  } else {
1216  GPU_StMiss++;
1217  }
1218  }
1219 
1220  // Profile all access latency, even zero latency accesses
1221  m_latencyHist.sample(total_lat);
1222  m_typeLatencyHist[type]->sample(total_lat);
1223 
1224  // Profile the miss latency for all non-zero demand misses
1225  if (total_lat != Cycles(0)) {
1226  m_missLatencyHist.sample(total_lat);
1227  m_missTypeLatencyHist[type]->sample(total_lat);
1228 
1229  if (mach != MachineType_NUM) {
1230  m_missMachLatencyHist[mach]->sample(total_lat);
1231  m_missTypeMachLatencyHist[type][mach]->sample(total_lat);
1232 
1233  if ((issued_time <= initialRequestTime) &&
1234  (initialRequestTime <= forwardRequestTime) &&
1235  (forwardRequestTime <= firstResponseTime) &&
1236  (firstResponseTime <= completion_time)) {
1237 
1238  m_IssueToInitialDelayHist[mach]->sample(
1239  initialRequestTime - issued_time);
1240  m_InitialToForwardDelayHist[mach]->sample(
1241  forwardRequestTime - initialRequestTime);
1242  m_ForwardToFirstResponseDelayHist[mach]->sample(
1243  firstResponseTime - forwardRequestTime);
1245  completion_time - firstResponseTime);
1246  }
1247  }
1248 
1249  }
1250 
1251  DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
1252  curTick(), m_version, "Coal",
1253  success ? "Done" : "SC_Failed", "", "",
1254  printAddress(srequest->pkt->getAddr()), total_lat);
1255 }
1256 
1257 void
1259 {
1261 
1262  // These statistical variables are not for display.
1263  // The profiler will collate these across different
1264  // coalescers and display those collated statistics.
1265  m_outstandReqHist.init(10);
1266  m_latencyHist.init(10);
1267  m_missLatencyHist.init(10);
1268 
1269  for (int i = 0; i < RubyRequestType_NUM; i++) {
1270  m_typeLatencyHist.push_back(new Stats::Histogram());
1271  m_typeLatencyHist[i]->init(10);
1272 
1273  m_missTypeLatencyHist.push_back(new Stats::Histogram());
1274  m_missTypeLatencyHist[i]->init(10);
1275  }
1276 
1277  for (int i = 0; i < MachineType_NUM; i++) {
1278  m_missMachLatencyHist.push_back(new Stats::Histogram());
1279  m_missMachLatencyHist[i]->init(10);
1280 
1282  m_IssueToInitialDelayHist[i]->init(10);
1283 
1285  m_InitialToForwardDelayHist[i]->init(10);
1286 
1289 
1292  }
1293 
1294  for (int i = 0; i < RubyRequestType_NUM; i++) {
1296 
1297  for (int j = 0; j < MachineType_NUM; j++) {
1298  m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram());
1299  m_missTypeMachLatencyHist[i][j]->init(10);
1300  }
1301  }
1302 
1303  // GPU cache stats
1305  .name(name() + ".gpu_tcp_ld_hits")
1306  .desc("loads that hit in the TCP")
1307  ;
1309  .name(name() + ".gpu_tcp_ld_transfers")
1310  .desc("TCP to TCP load transfers")
1311  ;
1313  .name(name() + ".gpu_tcc_ld_hits")
1314  .desc("loads that hit in the TCC")
1315  ;
1316  GPU_LdMiss
1317  .name(name() + ".gpu_ld_misses")
1318  .desc("loads that miss in the GPU")
1319  ;
1320 
1322  .name(name() + ".gpu_tcp_st_hits")
1323  .desc("stores that hit in the TCP")
1324  ;
1326  .name(name() + ".gpu_tcp_st_transfers")
1327  .desc("TCP to TCP store transfers")
1328  ;
1330  .name(name() + ".gpu_tcc_st_hits")
1331  .desc("stores that hit in the TCC")
1332  ;
1333  GPU_StMiss
1334  .name(name() + ".gpu_st_misses")
1335  .desc("stores that miss in the GPU")
1336  ;
1337 
1338  // CP cache stats
1339  CP_TCPLdHits
1340  .name(name() + ".cp_tcp_ld_hits")
1341  .desc("loads that hit in the TCP")
1342  ;
1344  .name(name() + ".cp_tcp_ld_transfers")
1345  .desc("TCP to TCP load transfers")
1346  ;
1347  CP_TCCLdHits
1348  .name(name() + ".cp_tcc_ld_hits")
1349  .desc("loads that hit in the TCC")
1350  ;
1351  CP_LdMiss
1352  .name(name() + ".cp_ld_misses")
1353  .desc("loads that miss in the GPU")
1354  ;
1355 
1356  CP_TCPStHits
1357  .name(name() + ".cp_tcp_st_hits")
1358  .desc("stores that hit in the TCP")
1359  ;
1361  .name(name() + ".cp_tcp_st_transfers")
1362  .desc("TCP to TCP store transfers")
1363  ;
1364  CP_TCCStHits
1365  .name(name() + ".cp_tcc_st_hits")
1366  .desc("stores that hit in the TCC")
1367  ;
1368  CP_StMiss
1369  .name(name() + ".cp_st_misses")
1370  .desc("stores that miss in the GPU")
1371  ;
1372 }
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:167
#define DPRINTF(x,...)
Definition: trace.hh:229
void recordMissLatency(GPUCoalescerRequest *request, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool success, bool isRegion)
HSASegment reqSegmentToHSASegment(const RequestPtr &req)
Definition: GPUCoalescer.cc:93
void insertKernel(int wavefront_id, PacketPtr pkt)
void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
static const Priority Progress_Event_Pri
Progress events come at the end.
Definition: eventq.hh:172
Stats::Scalar CP_TCCStHits
EventFunctionWrapper issueEvent
bool empty() const
const uint8_t * getData(int offset, int len) const
Definition: DataBlock.cc:95
Stats::Scalar GPU_TCPStHits
const int FlagShift
Definition: ldstflags.hh:52
int m_deadlock_threshold
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:83
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:175
AtomicOpFunctor * getAtomicOp() const
Accessor function to atomic op.
Definition: packet.hh:764
Stats::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
Bitfield< 7 > i
std::vector< Stats::Histogram * > m_ForwardToFirstResponseDelayHist
STL pair class.
Definition: stl.hh:61
RubyRequestType m_type
Definition: GPUCoalescer.hh:67
CoalescingTable reqCoalescer
virtual void issueRequest(PacketPtr pkt, RubyRequestType type)
GPUCoalescer(const Params *)
RequestTable m_readRequestTable
std::shared_ptr< Request > RequestPtr
Definition: request.hh:83
Stats::Scalar GPU_TCPLdHits
AbstractController * m_controller
Definition: RubyPort.hh:187
Stats::Scalar CP_StMiss
ip6_addr_t addr
Definition: inet.hh:335
Stats::Scalar GPU_TCCStHits
void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID)
EventFunctionWrapper deadlockCheckEvent
void kernelCallback(int wavfront_id)
void trySendRetries()
Definition: RubyPort.cc:440
Stats::Scalar CP_LdMiss
Stats::Scalar GPU_TCPLdTransfers
virtual void regStats()
Callback to set stat parameters.
Definition: group.cc:66
RequestStatus getRequestStatus(PacketPtr pkt, RubyRequestType request_type)
void clearLocked(Addr addr)
Definition: CacheMemory.cc:495
Histogram & init(size_type size)
Set the parameters of this histogram.
Definition: statistics.hh:2644
Overload hash function for BasicBlockRange type.
Definition: vec_reg.hh:586
bool isLocked(Addr addr, int context)
Definition: CacheMemory.cc:506
Stats::Scalar CP_TCPLdTransfers
virtual RequestStatus makeRequest(PacketPtr pkt)
std::vector< Stats::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
int m_max_outstanding_requests
Stats::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
Tick clockPeriod() const
bool isWrite() const
Definition: packet.hh:529
void completeIssue()
T * getPtr()
get a pointer to the data ptr.
Definition: packet.hh:1090
bool isRead() const
Definition: packet.hh:528
STL vector class.
Definition: stl.hh:40
Stats::Scalar CP_TCPStTransfers
bool isAtomicOp() const
Definition: packet.hh:765
RequestPtr req
A pointer to the original request.
Definition: packet.hh:327
RubyGPUCoalescerParams Params
Definition: GPUCoalescer.hh:99
Bitfield< 5, 0 > status
unsigned getSize() const
Definition: packet.hh:736
Tick cyclesToTicks(Cycles c) const
bool areNSlotsAvailable(unsigned int n, Tick curTime)
uint8_t type
Definition: inet.hh:333
Tick curTick()
The current simulated tick.
Definition: core.hh:47
void markRemoved()
Bitfield< 4 > pc
void printProgress(std::ostream &out) const
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:385
Stats::Scalar GPU_LdMiss
void ruby_eviction_callback(Addr address)
Definition: RubyPort.cc:589
CacheMemory * m_dataCache_ptr
void setMRU(Addr address)
Definition: CacheMemory.cc:364
SenderState * predecessor
Definition: packet.hh:405
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
Definition: packet.hh:1158
void readCallback(Addr address, DataBlock &data)
bool insertRequest(PacketPtr pkt, RubyRequestType request_type)
uint64_t Tick
Tick count type.
Definition: types.hh:63
RubyRequestType primaryType
Definition: GPUCoalescer.hh:90
void mergeFrom(const DataBlock &data)
Definition: SubBlock.hh:60
void writeCallback(Addr address, DataBlock &data)
bool assumingRfOCoherence
std::vector< Addr > newRequests
uint32_t m_version
Definition: RubyPort.hh:186
Stats::Scalar GPU_TCCLdHits
int m_store_waiting_on_load_cycles
Bitfield< 18, 16 > len
A simple histogram stat.
Definition: statistics.hh:2629
Stats::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
Addr getAddr() const
Definition: packet.hh:726
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
std::vector< std::vector< Stats::Histogram * > > m_missTypeMachLatencyHist
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:203
bool m_usingRubyTester
Definition: RubyPort.hh:189
Addr getOffset(Addr addr)
Definition: Address.cc:48
Bitfield< 21 > ss
void completeHitCallback(std::vector< PacketPtr > &mylist, int len)
void recordRequestType(SequencerRequestType requestType)
std::vector< Stats::Histogram * > m_InitialToForwardDelayHist
void ruby_hit_callback(PacketPtr pkt)
Definition: RubyPort.cc:417
void checkCoherence(Addr address)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
virtual const std::string name() const
Definition: sim_object.hh:120
int m_load_waiting_on_load_cycles
T safe_cast(U ptr)
Definition: cast.hh:61
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:255
void hitCallback(GPUCoalescerRequest *request, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion)
Addr makeLineAddress(Addr addr)
Definition: Address.cc:54
std::unordered_map< int, PacketPtr > kernelEndList
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
std::string printAddress(Addr addr)
Definition: Address.cc:67
void reset()
Reset stat value to default.
Definition: statistics.hh:1923
HSAScope reqScopeToHSAScope(const RequestPtr &req)
Definition: GPUCoalescer.cc:73
Stats::Scalar GPU_StMiss
std::vector< Stats::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Stats::Scalar CP_TCPLdHits
void blockOnQueue(Addr, MessageBuffer *)
Bitfield< 24 > j
void regStats()
Callback to set stat parameters.
std::vector< Stats::Histogram * > m_typeLatencyHist
bool isLLSC() const
Definition: packet.hh:554
MessageBuffer * m_mandatory_q_ptr
Definition: RubyPort.hh:188
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:279
void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID)
void testDrainComplete()
Definition: RubyPort.cc:466
bool isTagPresent(Addr address) const
Definition: CacheMemory.cc:226
PacketPtr mapAddrToPkt(Addr address)
std::vector< int > newKernelEnds
void clear()
Definition: DataBlock.cc:50
void removeRequest(GPUCoalescerRequest *request)
Declaration of the Packet class.
RubyRequestType secondaryType
Definition: GPUCoalescer.hh:91
SenderState * senderState
This packet&#39;s sender state.
Definition: packet.hh:480
Stats::Scalar CP_TCPStHits
int m_load_waiting_on_store_cycles
virtual Cycles mandatoryQueueLatency(const RubyRequestType &param_type)
void setLocked(Addr addr, int context)
Definition: CacheMemory.cc:484
RequestTable m_writeRequestTable
void schedule(Event &event, Tick when)
Definition: eventq.hh:744
CacheMemory * m_instCache_ptr
void resetStats()
Callback to reset stats.
PacketPtr pkt
Definition: GPUCoalescer.hh:89
int m_outstanding_count
void setData(const uint8_t *data, int offset, int len)
Definition: DataBlock.cc:108
bool handleLlsc(Addr address, GPUCoalescerRequest *request)
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:312
static const int NumArgumentRegs M5_VAR_USED
Definition: process.cc:84
MachineType machineIDToMachineType(MachineID machID)
void print(std::ostream &out) const
bool m_runningGarnetStandalone
Bitfield< 0 > p
std::vector< Stats::Histogram * > m_FirstResponseToCompletionDelayHist
std::vector< Stats::Histogram * > m_missTypeLatencyHist
int m_store_waiting_on_store_cycles
const char data[]
bool isFlush() const
Definition: packet.hh:557
bool isBlocked(Addr) const
static uint32_t getBlockSizeBytes()
Definition: RubySystem.hh:59
void enqueue(MsgPtr message, Tick curTime, Tick delta)
void evictionCallback(Addr address)
Stats::Scalar GPU_TCPStTransfers
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1899
MemSlavePort * port
Definition: RubyPort.hh:140
#define DPRINTFR(...)
Definition: trace.hh:231
Stats::Scalar CP_TCCLdHits

Generated on Fri Feb 28 2020 16:27:02 for gem5 by doxygen 1.8.13