gem5  v20.1.0.0
GPUCoalescer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
35 
36 #include "base/logging.hh"
37 #include "base/str.hh"
38 #include "config/the_isa.hh"
40 #include "debug/GPUCoalescer.hh"
41 #include "debug/MemoryAccess.hh"
42 #include "debug/ProtocolTrace.hh"
43 #include "debug/RubyPort.hh"
44 #include "debug/RubyStats.hh"
45 #include "gpu-compute/shader.hh"
46 #include "mem/packet.hh"
55 #include "params/RubyGPUCoalescer.hh"
56 
57 using namespace std;
58 
60  : coalescer(gc)
61 {
62 }
63 
64 void
66 {
67  uint64_t seqNum = pkt->req->getReqInstSeqNum();
68 
69  instMap[seqNum].push_back(pkt);
70  DPRINTF(GPUCoalescer, "Adding 0x%X seqNum %d to map. (map %d vec %d)\n",
71  pkt->getAddr(), seqNum, instMap.size(), instMap[seqNum].size());
72 }
73 
74 bool
76 {
77  return !instMap.empty();
78 }
79 
82 {
83  if (offset >= instMap.size()) {
84  return nullptr;
85  }
86 
87  auto instMapIter = instMap.begin();
88  std::advance(instMapIter, offset);
89 
90  return &(instMapIter->second);
91 }
92 
93 void
95 {
96  for (auto iter = instMap.begin(); iter != instMap.end(); ) {
97  if (iter->second.empty()) {
98  DPRINTF(GPUCoalescer, "Returning token seqNum %d\n", iter->first);
99  instMap.erase(iter++);
101  } else {
102  ++iter;
103  }
104  }
105 }
106 
107 bool
108 UncoalescedTable::areRequestsDone(const uint64_t instSeqNum) {
109  // iterate the instructions held in UncoalescedTable to see whether there
110  // are more requests to issue; if yes, not yet done; otherwise, done
111  for (auto& inst : instMap) {
112  DPRINTF(GPUCoalescer, "instSeqNum= %d, pending packets=%d\n"
113  ,inst.first, inst.second.size());
114  if (inst.first == instSeqNum) { return false; }
115  }
116 
117  return true;
118 }
119 
120 void
122 {
123  ss << "Listing pending packets from " << instMap.size() << " instructions";
124 
125  for (auto& inst : instMap) {
126  ss << "\tAddr: " << printAddress(inst.first) << " with "
127  << inst.second.size() << " pending packets" << std::endl;
128  }
129 }
130 
131 void
133 {
134  Tick current_time = curTick();
135 
136  for (auto &it : instMap) {
137  for (auto &pkt : it.second) {
138  if (current_time - pkt->req->time() > threshold) {
139  std::stringstream ss;
141 
142  panic("Possible Deadlock detected. Aborting!\n"
143  "version: %d request.paddr: 0x%x uncoalescedTable: %d "
144  "current time: %u issue_time: %d difference: %d\n"
145  "Request Tables:\n\n%s", coalescer->getId(),
146  pkt->getAddr(), instMap.size(), current_time,
147  pkt->req->time(), current_time - pkt->req->time(),
148  ss.str());
149  }
150  }
151  }
152 }
153 
155  : RubyPort(p),
156  issueEvent([this]{ completeIssue(); }, "Issue coalesced request",
158  uncoalescedTable(this),
159  deadlockCheckEvent([this]{ wakeup(); }, "GPUCoalescer deadlock check"),
160  gmTokenPort(name() + ".gmTokenPort", this)
161 {
162  m_store_waiting_on_load_cycles = 0;
163  m_store_waiting_on_store_cycles = 0;
164  m_load_waiting_on_store_cycles = 0;
165  m_load_waiting_on_load_cycles = 0;
166 
167  m_outstanding_count = 0;
168 
169  coalescingWindow = p->max_coalesces_per_cycle;
170 
171  m_max_outstanding_requests = 0;
172  m_instCache_ptr = nullptr;
173  m_dataCache_ptr = nullptr;
174 
175  m_instCache_ptr = p->icache;
176  m_dataCache_ptr = p->dcache;
177  m_max_outstanding_requests = p->max_outstanding_requests;
178  m_deadlock_threshold = p->deadlock_threshold;
179 
180  assert(m_max_outstanding_requests > 0);
181  assert(m_deadlock_threshold > 0);
182  assert(m_instCache_ptr);
183  assert(m_dataCache_ptr);
184 
185  m_runningGarnetStandalone = p->garnet_standalone;
186 }
187 
189 {
190 }
191 
192 Port &
193 GPUCoalescer::getPort(const std::string &if_name, PortID idx)
194 {
195  if (if_name == "gmTokenPort") {
196  return gmTokenPort;
197  }
198 
199  // delgate to RubyPort otherwise
200  return RubyPort::getPort(if_name, idx);
201 }
202 
203 void
205 {
206  Cycles current_time = curCycle();
207  for (auto& requestList : coalescedTable) {
208  for (auto& req : requestList.second) {
209  if (current_time - req->getIssueTime() > m_deadlock_threshold) {
210  std::stringstream ss;
212  warn("GPUCoalescer %d Possible deadlock detected!\n%s\n",
213  m_version, ss.str());
214  panic("Aborting due to deadlock!\n");
215  }
216  }
217  }
218 
219  Tick tick_threshold = cyclesToTicks(m_deadlock_threshold);
220  uncoalescedTable.checkDeadlock(tick_threshold);
221 
222  if (m_outstanding_count > 0) {
225  curTick());
226  }
227 }
228 
229 void
231 {
232  ss << "Printing out " << coalescedTable.size()
233  << " outstanding requests in the coalesced table\n";
234 
235  for (auto& requestList : coalescedTable) {
236  for (auto& request : requestList.second) {
237  ss << "\tAddr: " << printAddress(requestList.first) << "\n"
238  << "\tInstruction sequence number: "
239  << request->getSeqNum() << "\n"
240  << "\t\tType: "
241  << RubyRequestType_to_string(request->getRubyType()) << "\n"
242  << "\t\tNumber of associated packets: "
243  << request->getPackets().size() << "\n"
244  << "\t\tIssue time: "
245  << request->getIssueTime() * clockPeriod() << "\n"
246  << "\t\tDifference from current tick: "
247  << (curCycle() - request->getIssueTime()) * clockPeriod();
248  }
249  }
250 
251  // print out packets waiting to be issued in uncoalesced table
253 }
254 
255 void
257 {
260  for (int i = 0; i < RubyRequestType_NUM; i++) {
261  m_typeLatencyHist[i]->reset();
262  m_missTypeLatencyHist[i]->reset();
263  for (int j = 0; j < MachineType_NUM; j++) {
264  m_missTypeMachLatencyHist[i][j]->reset();
265  }
266  }
267 
268  for (int i = 0; i < MachineType_NUM; i++) {
269  m_missMachLatencyHist[i]->reset();
270 
271  m_IssueToInitialDelayHist[i]->reset();
272  m_InitialToForwardDelayHist[i]->reset();
275  }
276 }
277 
278 void
279 GPUCoalescer::printProgress(ostream& out) const
280 {
281 }
282 
283 // sets the kernelEndList
284 void
285 GPUCoalescer::insertKernel(int wavefront_id, PacketPtr pkt)
286 {
287  // Don't know if this will happen or is possible
288  // but I just want to be careful and not have it become
289  // simulator hang in the future
290  DPRINTF(GPUCoalescer, "inserting wf: %d to kernelEndlist\n", wavefront_id);
291  assert(kernelEndList.count(wavefront_id) == 0);
292 
293  kernelEndList[wavefront_id] = pkt;
294  DPRINTF(GPUCoalescer, "kernelEndList->size() = %d\n",
295  kernelEndList.size());
296 }
297 
298 void
300 {
301  writeCallback(address, MachineType_NULL, data);
302 }
303 
304 void
306  MachineType mach,
307  DataBlock& data)
308 {
309  writeCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
310 }
311 
312 void
314  MachineType mach,
315  DataBlock& data,
316  Cycles initialRequestTime,
317  Cycles forwardRequestTime,
318  Cycles firstResponseTime)
319 {
320  writeCallback(address, mach, data,
321  initialRequestTime, forwardRequestTime, firstResponseTime,
322  false);
323 }
324 
325 void
327  MachineType mach,
328  DataBlock& data,
329  Cycles initialRequestTime,
330  Cycles forwardRequestTime,
331  Cycles firstResponseTime,
332  bool isRegion)
333 {
334  assert(address == makeLineAddress(address));
335  assert(coalescedTable.count(address));
336 
337  auto crequest = coalescedTable.at(address).front();
338 
339  hitCallback(crequest, mach, data, true, crequest->getIssueTime(),
340  forwardRequestTime, firstResponseTime, isRegion);
341 
342  // remove this crequest in coalescedTable
343  delete crequest;
344  coalescedTable.at(address).pop_front();
345 
346  if (coalescedTable.at(address).empty()) {
347  coalescedTable.erase(address);
348  } else {
349  auto nextRequest = coalescedTable.at(address).front();
350  issueRequest(nextRequest);
351  }
352 }
353 
354 void
356  uint64_t instSeqNum,
357  MachineType mach)
358 {
359  DPRINTF(GPUCoalescer, "writeCompleteCallback for address 0x%x"
360  " instSeqNum = %d\n", address, instSeqNum);
361 
362  assert(pendingWriteInsts.count(instSeqNum) == 1);
363  PendingWriteInst& inst = pendingWriteInsts[instSeqNum];
364 
365  // check the uncoalescedTable to see whether all requests for the inst
366  // have been issued or not
367  bool reqsAllIssued = uncoalescedTable.areRequestsDone(instSeqNum);
368  DPRINTF(GPUCoalescer, "instSeqNum = %d, pendingStores=%d, "
369  "reqsAllIssued=%d\n", reqsAllIssued,
370  inst.getNumPendingStores()-1, reqsAllIssued);
371 
372  if (inst.receiveWriteCompleteAck() && reqsAllIssued ) {
373  // if the pending write instruction has received all write completion
374  // callbacks for its issued Ruby requests, we can now start respond
375  // the requesting CU in one response packet.
377 
378  DPRINTF(GPUCoalescer, "write inst %d completed at coalescer\n",
379  instSeqNum);
380  pendingWriteInsts.erase(instSeqNum);
381  }
382 }
383 
384 void
386 {
387  readCallback(address, MachineType_NULL, data);
388 }
389 
390 void
392  MachineType mach,
393  DataBlock& data)
394 {
395  readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
396 }
397 
398 void
400  MachineType mach,
401  DataBlock& data,
402  Cycles initialRequestTime,
403  Cycles forwardRequestTime,
404  Cycles firstResponseTime)
405 {
406 
407  readCallback(address, mach, data,
408  initialRequestTime, forwardRequestTime, firstResponseTime,
409  false);
410 }
411 
412 void
414  MachineType mach,
415  DataBlock& data,
416  Cycles initialRequestTime,
417  Cycles forwardRequestTime,
418  Cycles firstResponseTime,
419  bool isRegion)
420 {
421  assert(address == makeLineAddress(address));
422  assert(coalescedTable.count(address));
423 
424  auto crequest = coalescedTable.at(address).front();
425  fatal_if(crequest->getRubyType() != RubyRequestType_LD,
426  "readCallback received non-read type response\n");
427 
428  // Iterate over the coalesced requests to respond to as many loads as
429  // possible until another request type is seen. Models MSHR for TCP.
430  while (crequest->getRubyType() == RubyRequestType_LD) {
431  hitCallback(crequest, mach, data, true, crequest->getIssueTime(),
432  forwardRequestTime, firstResponseTime, isRegion);
433 
434  delete crequest;
435  coalescedTable.at(address).pop_front();
436  if (coalescedTable.at(address).empty()) {
437  break;
438  }
439 
440  crequest = coalescedTable.at(address).front();
441  }
442 
443  if (coalescedTable.at(address).empty()) {
444  coalescedTable.erase(address);
445  } else {
446  auto nextRequest = coalescedTable.at(address).front();
447  issueRequest(nextRequest);
448  }
449 }
450 
451 void
453  MachineType mach,
454  DataBlock& data,
455  bool success,
456  Cycles initialRequestTime,
457  Cycles forwardRequestTime,
458  Cycles firstResponseTime,
459  bool isRegion)
460 {
461  PacketPtr pkt = crequest->getFirstPkt();
462  Addr request_address = pkt->getAddr();
463  Addr request_line_address M5_VAR_USED = makeLineAddress(request_address);
464 
465  RubyRequestType type = crequest->getRubyType();
466 
467  DPRINTF(GPUCoalescer, "Got hitCallback for 0x%X\n", request_line_address);
468 
469  recordMissLatency(crequest, mach,
470  initialRequestTime,
471  forwardRequestTime,
472  firstResponseTime,
473  success, isRegion);
474  // update the data
475  //
476  // MUST AD DOING THIS FOR EACH REQUEST IN COALESCER
477  std::vector<PacketPtr> pktList = crequest->getPackets();
478  DPRINTF(GPUCoalescer, "Responding to %d packets for addr 0x%X\n",
479  pktList.size(), request_line_address);
480  for (auto& pkt : pktList) {
481  request_address = pkt->getAddr();
482  if (pkt->getPtr<uint8_t>()) {
483  if ((type == RubyRequestType_LD) ||
484  (type == RubyRequestType_ATOMIC) ||
485  (type == RubyRequestType_ATOMIC_RETURN) ||
486  (type == RubyRequestType_IFETCH) ||
487  (type == RubyRequestType_RMW_Read) ||
488  (type == RubyRequestType_Locked_RMW_Read) ||
489  (type == RubyRequestType_Load_Linked)) {
490  pkt->setData(
491  data.getData(getOffset(request_address), pkt->getSize()));
492  } else {
493  data.setData(pkt->getPtr<uint8_t>(),
494  getOffset(request_address), pkt->getSize());
495  }
496  } else {
497  DPRINTF(MemoryAccess,
498  "WARNING. Data not transfered from Ruby to M5 for type " \
499  "%s\n",
500  RubyRequestType_to_string(type));
501  }
502  }
503 
505  assert(m_outstanding_count >= 0);
506 
507  completeHitCallback(pktList);
508 }
509 
510 bool
512 {
513  return coalescedTable.empty();
514 }
515 
516 RubyRequestType
518 {
519  RubyRequestType req_type = RubyRequestType_NULL;
520 
521  // These types are not support or not used in GPU caches.
522  assert(!pkt->req->isLLSC());
523  assert(!pkt->req->isLockedRMW());
524  assert(!pkt->req->isInstFetch());
525  assert(!pkt->isFlush());
526 
527  if (pkt->req->isAtomicReturn()) {
528  req_type = RubyRequestType_ATOMIC_RETURN;
529  } else if (pkt->req->isAtomicNoReturn()) {
530  req_type = RubyRequestType_ATOMIC_NO_RETURN;
531  } else if (pkt->isRead()) {
532  req_type = RubyRequestType_LD;
533  } else if (pkt->isWrite()) {
534  req_type = RubyRequestType_ST;
535  } else {
536  panic("Unsupported ruby packet type\n");
537  }
538 
539  return req_type;
540 }
541 
542 // Places an uncoalesced packet in uncoalescedTable. If the packet is a
543 // special type (MemFence, scoping, etc), it is issued immediately.
544 RequestStatus
546 {
547  // all packets must have valid instruction sequence numbers
548  assert(pkt->req->hasInstSeqNum());
549 
550  if (pkt->cmd == MemCmd::MemSyncReq) {
551  // issue mem_sync requests immediately to the cache system without
552  // going through uncoalescedTable like normal LD/ST/Atomic requests
553  issueMemSyncRequest(pkt);
554  } else {
555  // otherwise, this must be either read or write command
556  assert(pkt->isRead() || pkt->isWrite());
557 
558  // the pkt is temporarily stored in the uncoalesced table until
559  // it's picked for coalescing process later in this cycle or in a
560  // future cycle
562  DPRINTF(GPUCoalescer, "Put pkt with addr 0x%X to uncoalescedTable\n",
563  pkt->getAddr());
564 
565  // we schedule an issue event here to process the uncoalesced table
566  // and try to issue Ruby request to cache system
567  if (!issueEvent.scheduled()) {
569  }
570  }
571 
572  // we always return RequestStatus_Issued in this coalescer
573  // b/c the coalescer's resouce was checked ealier and the coalescer is
574  // queueing up aliased requets in its coalesced table
575  return RequestStatus_Issued;
576 }
577 
578 template <class KEY, class VALUE>
579 std::ostream &
580 operator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map)
581 {
582  out << "[";
583  for (auto i = map.begin(); i != map.end(); ++i)
584  out << " " << i->first << "=" << i->second;
585  out << " ]";
586 
587  return out;
588 }
589 
590 void
591 GPUCoalescer::print(ostream& out) const
592 {
593  out << "[GPUCoalescer: " << m_version
594  << ", outstanding requests: " << m_outstanding_count
595  << "]";
596 }
597 
598 
599 bool
601 {
602  uint64_t seqNum = pkt->req->getReqInstSeqNum();
603  Addr line_addr = makeLineAddress(pkt->getAddr());
604 
605  // If the packet has the same line address as a request already in the
606  // coalescedTable and has the same sequence number, it can be coalesced.
607  if (coalescedTable.count(line_addr)) {
608  // Search for a previous coalesced request with the same seqNum.
609  auto& creqQueue = coalescedTable.at(line_addr);
610  auto citer = std::find_if(creqQueue.begin(), creqQueue.end(),
611  [&](CoalescedRequest* c) { return c->getSeqNum() == seqNum; }
612  );
613  if (citer != creqQueue.end()) {
614  (*citer)->insertPacket(pkt);
615  return true;
616  }
617  }
618 
620  // This is an "aliased" or new request. Create a RubyRequest and
621  // append it to the list of "targets" in the coalescing table.
622  DPRINTF(GPUCoalescer, "Creating new or aliased request for 0x%X\n",
623  line_addr);
624 
625  CoalescedRequest *creq = new CoalescedRequest(seqNum);
626  creq->insertPacket(pkt);
627  creq->setRubyType(getRequestType(pkt));
628  creq->setIssueTime(curCycle());
629 
630  if (!coalescedTable.count(line_addr)) {
631  // If there is no outstanding request for this line address,
632  // create a new coalecsed request and issue it immediately.
633  auto reqList = std::deque<CoalescedRequest*> { creq };
634  coalescedTable.insert(std::make_pair(line_addr, reqList));
635 
636  DPRINTF(GPUCoalescer, "Issued req type %s seqNum %d\n",
637  RubyRequestType_to_string(creq->getRubyType()), seqNum);
638  issueRequest(creq);
639  } else {
640  // The request is for a line address that is already outstanding
641  // but for a different instruction. Add it as a new request to be
642  // issued when the current outstanding request is completed.
643  coalescedTable.at(line_addr).push_back(creq);
644  DPRINTF(GPUCoalescer, "found address 0x%X with new seqNum %d\n",
645  line_addr, seqNum);
646  }
647 
648  // In both cases, requests are added to the coalescing table and will
649  // be counted as outstanding requests.
651 
652  // We track all issued or to-be-issued Ruby requests associated with
653  // write instructions. An instruction may have multiple Ruby
654  // requests.
655  if (pkt->cmd == MemCmd::WriteReq) {
656  DPRINTF(GPUCoalescer, "adding write inst %d at line 0x%x to"
657  " the pending write instruction list\n", seqNum,
658  line_addr);
659 
661  safe_cast<RubyPort::SenderState*>(pkt->senderState);
662 
663  // we need to save this port because it will be used to call
664  // back the requesting CU when we receive write
665  // complete callbacks for all issued Ruby requests of this
666  // instruction.
667  RubyPort::MemResponsePort* mem_response_port = ss->port;
668 
669  GPUDynInstPtr gpuDynInst = nullptr;
670 
671  if (!m_usingRubyTester) {
672  // If this coalescer is connected to a real CU, we need
673  // to save the corresponding gpu dynamic instruction.
674  // CU will use that instruction to decrement wait counters
675  // in the issuing wavefront.
676  // For Ruby tester, gpuDynInst == nullptr
678  safe_cast<ComputeUnit::DataPort::SenderState*>
679  (ss->predecessor);
680  gpuDynInst = cu_state->_gpuDynInst;
681  }
682 
683  PendingWriteInst& inst = pendingWriteInsts[seqNum];
684  inst.addPendingReq(mem_response_port, gpuDynInst,
686  }
687 
688  return true;
689  }
690 
691  // The maximum number of outstanding requests have been issued.
692  return false;
693 }
694 
695 void
697 {
698  // Iterate over the maximum number of instructions we can coalesce
699  // per cycle (coalescingWindow).
700  for (int instIdx = 0; instIdx < coalescingWindow; ++instIdx) {
701  PerInstPackets *pktList =
703 
704  // getInstPackets will return nullptr if no instruction
705  // exists at the current offset.
706  if (!pktList) {
707  break;
708  } else {
709  // Since we have a pointer to the list of packets in the inst,
710  // erase them from the list if coalescing is successful and
711  // leave them in the list otherwise. This aggressively attempts
712  // to coalesce as many packets as possible from the current inst.
713  pktList->remove_if(
714  [&](PacketPtr pkt) { return coalescePacket(pkt); }
715  );
716  }
717  }
718 
719  // Clean up any instructions in the uncoalesced table that have had
720  // all of their packets coalesced and return a token for that column.
722 
723  // have Kernel End releases been issued this cycle
724  int len = newKernelEnds.size();
725  for (int i = 0; i < len; i++) {
727  }
728  newKernelEnds.clear();
729 }
730 
731 void
733 {
734  ruby_eviction_callback(address);
735 }
736 
737 void
739 {
740  assert(kernelEndList.count(wavefront_id));
741 
742  ruby_hit_callback(kernelEndList[wavefront_id]);
743 
744  kernelEndList.erase(wavefront_id);
745 }
746 
747 void
749  MachineType mach,
750  const DataBlock& data)
751 {
752  assert(address == makeLineAddress(address));
753  assert(coalescedTable.count(address));
754 
755  auto crequest = coalescedTable.at(address).front();
756 
757  fatal_if((crequest->getRubyType() != RubyRequestType_ATOMIC &&
758  crequest->getRubyType() != RubyRequestType_ATOMIC_RETURN &&
759  crequest->getRubyType() != RubyRequestType_ATOMIC_NO_RETURN),
760  "atomicCallback saw non-atomic type response\n");
761 
762  hitCallback(crequest, mach, (DataBlock&)data, true,
763  crequest->getIssueTime(), Cycles(0), Cycles(0), false);
764 
765  delete crequest;
766  coalescedTable.at(address).pop_front();
767 
768  if (coalescedTable.at(address).empty()) {
769  coalescedTable.erase(address);
770  } else {
771  auto nextRequest = coalescedTable.at(address).front();
772  issueRequest(nextRequest);
773  }
774 }
775 
776 void
778 {
779  for (auto& pkt : mylist) {
781  safe_cast<RubyPort::SenderState *>(pkt->senderState);
782  MemResponsePort *port = ss->port;
783  assert(port != NULL);
784 
785  pkt->senderState = ss->predecessor;
786 
787  if (pkt->cmd != MemCmd::WriteReq) {
788  // for WriteReq, we keep the original senderState until
789  // writeCompleteCallback
790  delete ss;
791  }
792 
793  port->hitCallback(pkt);
794  trySendRetries();
795  }
796 
797  // We schedule an event in the same tick as hitCallback (similar to
798  // makeRequest) rather than calling completeIssue directly to reduce
799  // function calls to complete issue. This can only happen if the max
800  // outstanding requests is less than the number of slots in the
801  // uncoalesced table and makeRequest is not called again.
804  }
805 
807 }
808 
809 void
811  MachineType mach,
812  Cycles initialRequestTime,
813  Cycles forwardRequestTime,
814  Cycles firstResponseTime,
815  bool success, bool isRegion)
816 {
817 }
818 
819 void
821 {
823 
824  // These statistical variables are not for display.
825  // The profiler will collate these across different
826  // coalescers and display those collated statistics.
828  m_latencyHist.init(10);
830 
831  for (int i = 0; i < RubyRequestType_NUM; i++) {
832  m_typeLatencyHist.push_back(new Stats::Histogram());
833  m_typeLatencyHist[i]->init(10);
834 
835  m_missTypeLatencyHist.push_back(new Stats::Histogram());
836  m_missTypeLatencyHist[i]->init(10);
837  }
838 
839  for (int i = 0; i < MachineType_NUM; i++) {
840  m_missMachLatencyHist.push_back(new Stats::Histogram());
841  m_missMachLatencyHist[i]->init(10);
842 
844  m_IssueToInitialDelayHist[i]->init(10);
845 
847  m_InitialToForwardDelayHist[i]->init(10);
848 
851 
854  }
855 
856  for (int i = 0; i < RubyRequestType_NUM; i++) {
858 
859  for (int j = 0; j < MachineType_NUM; j++) {
860  m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram());
861  m_missTypeMachLatencyHist[i][j]->init(10);
862  }
863  }
864 }
Stats::Group::regStats
virtual void regStats()
Callback to set stat parameters.
Definition: group.cc:64
GPUCoalescer::m_deadlock_threshold
Cycles m_deadlock_threshold
Definition: GPUCoalescer.hh:403
Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:460
GPUCoalescer::issueMemSyncRequest
virtual void issueMemSyncRequest(PacketPtr pkt)
Definition: GPUCoalescer.hh:370
warn
#define warn(...)
Definition: logging.hh:239
RubyPort::m_usingRubyTester
bool m_usingRubyTester
Definition: RubyPort.hh:192
RubyPort::ruby_hit_callback
void ruby_hit_callback(PacketPtr pkt)
Definition: RubyPort.cc:432
GPUCoalescer::m_latencyHist
Stats::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
Definition: GPUCoalescer.hh:472
Profiler.hh
data
const char data[]
Definition: circlebuf.test.cc:42
shader.hh
CoalescedRequest::getRubyType
RubyRequestType getRubyType() const
Definition: GPUCoalescer.hh:111
Packet::getAddr
Addr getAddr() const
Definition: packet.hh:754
makeLineAddress
Addr makeLineAddress(Addr addr)
Definition: Address.cc:54
CoalescedRequest::insertPacket
void insertPacket(PacketPtr pkt)
Definition: GPUCoalescer.hh:103
GPUCoalescer::m_missTypeMachLatencyHist
std::vector< std::vector< Stats::Histogram * > > m_missTypeMachLatencyHist
Definition: GPUCoalescer.hh:483
ArmISA::i
Bitfield< 7 > i
Definition: miscregs_types.hh:63
GPUCoalescer::writeCompleteCallback
void writeCompleteCallback(Addr address, uint64_t instSeqNum, MachineType mach)
Definition: GPUCoalescer.cc:355
PendingWriteInst::receiveWriteCompleteAck
bool receiveWriteCompleteAck()
Definition: GPUCoalescer.hh:153
GPUCoalescer::evictionCallback
void evictionCallback(Addr address)
Definition: GPUCoalescer.cc:732
GPUCoalescer::getGMTokenPort
GMTokenPort & getGMTokenPort()
Definition: GPUCoalescer.hh:327
UncoalescedTable::coalescer
GPUCoalescer * coalescer
Definition: GPUCoalescer.hh:85
GPUCoalescer
Definition: GPUCoalescer.hh:201
RubyPort::testDrainComplete
void testDrainComplete()
Definition: RubyPort.cc:481
Packet::isRead
bool isRead() const
Definition: packet.hh:556
type
uint8_t type
Definition: inet.hh:421
GPUCoalescer::printRequestTable
void printRequestTable(std::stringstream &ss)
Definition: GPUCoalescer.cc:230
GPUCoalescer::Params
RubyGPUCoalescerParams Params
Definition: GPUCoalescer.hh:224
GPUCoalescer.hh
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:63
GPUCoalescer::printProgress
void printProgress(std::ostream &out) const
Definition: GPUCoalescer.cc:279
AbstractController.hh
PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:237
UncoalescedTable::insertPacket
void insertPacket(PacketPtr pkt)
Definition: GPUCoalescer.cc:65
GPUCoalescer::newKernelEnds
std::vector< int > newKernelEnds
Definition: GPUCoalescer.hh:433
Packet::req
RequestPtr req
A pointer to the original request.
Definition: packet.hh:340
RubyRequest.hh
GPUCoalescer::gmTokenPort
GMTokenPort gmTokenPort
Definition: GPUCoalescer.hh:510
std::vector
STL vector class.
Definition: stl.hh:37
GPUCoalescer::kernelCallback
void kernelCallback(int wavefront_id)
Definition: GPUCoalescer.cc:738
PendingWriteInst::getNumPendingStores
int getNumPendingStores()
Definition: GPUCoalescer.hh:183
GPUCoalescer::m_outstanding_count
int m_outstanding_count
Definition: GPUCoalescer.hh:430
Packet::getSize
unsigned getSize() const
Definition: packet.hh:764
GPUCoalescer::getPort
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition: GPUCoalescer.cc:193
PendingWriteInst::addPendingReq
void addPendingReq(RubyPort::MemResponsePort *port, GPUDynInstPtr inst, bool usingRubyTester)
Definition: GPUCoalescer.hh:138
RubyPort::getPort
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition: RubyPort.cc:92
PendingWriteInst
Definition: GPUCoalescer.hh:125
CoalescedRequest::getFirstPkt
PacketPtr getFirstPkt() const
Definition: GPUCoalescer.hh:109
RubyPort::getId
uint32_t getId()
Definition: RubyPort.hh:165
GPUCoalescer::regStats
void regStats() override
Callback to set stat parameters.
Definition: GPUCoalescer.cc:820
Packet::setData
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
Definition: packet.hh:1225
DataBlock
Definition: DataBlock.hh:40
packet.hh
Stats::Histogram
A simple histogram stat.
Definition: statistics.hh:2654
GPUCoalescer::deadlockCheckEvent
EventFunctionWrapper deadlockCheckEvent
Definition: GPUCoalescer.hh:442
str.hh
MemCmd::WriteReq
@ WriteReq
Definition: packet.hh:85
GPUCoalescer::m_InitialToForwardDelayHist
std::vector< Stats::Histogram * > m_InitialToForwardDelayHist
Definition: GPUCoalescer.hh:487
GPUCoalescer::m_outstandReqHist
Stats::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
Definition: GPUCoalescer.hh:469
GPUCoalescer::completeHitCallback
void completeHitCallback(std::vector< PacketPtr > &mylist)
Definition: GPUCoalescer.cc:777
ArmISA::j
Bitfield< 24 > j
Definition: miscregs_types.hh:54
RubyPort::MemResponsePort
Definition: RubyPort.hh:75
EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1005
UncoalescedTable::printRequestTable
void printRequestTable(std::stringstream &ss)
Definition: GPUCoalescer.cc:121
Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition: clocked_object.hh:224
GPUCoalescer::coalescedTable
std::map< Addr, std::deque< CoalescedRequest * > > coalescedTable
Definition: GPUCoalescer.hh:422
ArmISA::ss
Bitfield< 21 > ss
Definition: miscregs_types.hh:56
CacheMemory.hh
GPUCoalescer::m_IssueToInitialDelayHist
std::vector< Stats::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Definition: GPUCoalescer.hh:486
UncoalescedTable::packetAvailable
bool packetAvailable()
Definition: GPUCoalescer.cc:75
GPUCoalescer::~GPUCoalescer
~GPUCoalescer()
Definition: GPUCoalescer.cc:188
GPUCoalescer::atomicCallback
virtual void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
Definition: GPUCoalescer.cc:748
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:234
GPUCoalescer::print
void print(std::ostream &out) const
Definition: GPUCoalescer.cc:591
DataBlock.hh
GPUCoalescer::wakeup
void wakeup()
Definition: GPUCoalescer.cc:204
RubyPort
Definition: RubyPort.hh:58
Port
Ports are used to interface objects to each other.
Definition: port.hh:56
RubyTester.hh
Clocked::curCycle
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Definition: clocked_object.hh:192
GPUCoalescer::m_typeLatencyHist
std::vector< Stats::Histogram * > m_typeLatencyHist
Definition: GPUCoalescer.hh:473
GPUCoalescer::getRequestType
virtual RubyRequestType getRequestType(PacketPtr pkt)
Definition: GPUCoalescer.cc:517
UncoalescedTable::UncoalescedTable
UncoalescedTable(GPUCoalescer *gc)
Definition: GPUCoalescer.cc:59
GPUCoalescer::pendingWriteInsts
std::unordered_map< uint64_t, PendingWriteInst > pendingWriteInsts
Definition: GPUCoalescer.hh:427
GPUCoalescer::insertKernel
void insertKernel(int wavefront_id, PacketPtr pkt)
Definition: GPUCoalescer.cc:285
GPUCoalescer::issueEvent
EventFunctionWrapper issueEvent
Definition: GPUCoalescer.hh:399
GPUCoalescer::m_ForwardToFirstResponseDelayHist
std::vector< Stats::Histogram * > m_ForwardToFirstResponseDelayHist
Definition: GPUCoalescer.hh:488
ComputeUnit::DataPort::SenderState
Definition: compute_unit.hh:660
GPUCoalescer::resetStats
void resetStats() override
Callback to reset stats.
Definition: GPUCoalescer.cc:256
RubySystem.hh
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
PendingWriteInst::ackWriteCompletion
void ackWriteCompletion(bool usingRubyTester)
Definition: GPUCoalescer.hh:162
name
const std::string & name()
Definition: trace.cc:50
GPUCoalescer::writeCallback
void writeCallback(Addr address, DataBlock &data)
Definition: GPUCoalescer.cc:299
Clocked::clockPeriod
Tick clockPeriod() const
Definition: clocked_object.hh:214
GPUCoalescer::makeRequest
RequestStatus makeRequest(PacketPtr pkt) override
Definition: GPUCoalescer.cc:545
GPUCoalescer::readCallback
void readCallback(Addr address, DataBlock &data)
Definition: GPUCoalescer.cc:385
GPUCoalescer::coalescePacket
bool coalescePacket(PacketPtr pkt)
Definition: GPUCoalescer.cc:600
ComputeUnit::DataPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:662
Packet::cmd
MemCmd cmd
The command field of the packet.
Definition: packet.hh:335
MessageBuffer.hh
GPUCoalescer::m_missTypeLatencyHist
std::vector< Stats::Histogram * > m_missTypeLatencyHist
Definition: GPUCoalescer.hh:478
GPUCoalescer::recordMissLatency
void recordMissLatency(CoalescedRequest *crequest, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool success, bool isRegion)
Definition: GPUCoalescer.cc:810
UncoalescedTable::areRequestsDone
bool areRequestsDone(const uint64_t instSeqNum)
Definition: GPUCoalescer.cc:108
std
Overload hash function for BasicBlockRange type.
Definition: vec_reg.hh:587
CoalescedRequest
Definition: GPUCoalescer.hh:94
MemCmd::MemSyncReq
@ MemSyncReq
Definition: packet.hh:115
GPUCoalescer::m_FirstResponseToCompletionDelayHist
std::vector< Stats::Histogram * > m_FirstResponseToCompletionDelayHist
Definition: GPUCoalescer.hh:489
Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:257
std::deque
STL deque class.
Definition: stl.hh:44
GPUCoalescer::kernelEndList
std::unordered_map< int, PacketPtr > kernelEndList
Definition: GPUCoalescer.hh:432
getOffset
Addr getOffset(Addr addr)
Definition: Address.cc:48
GPUCoalescer::m_missMachLatencyHist
std::vector< Stats::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
Definition: GPUCoalescer.hh:482
ArmISA::len
Bitfield< 18, 16 > len
Definition: miscregs_types.hh:439
UncoalescedTable::instMap
std::map< uint64_t, PerInstPackets > instMap
Definition: GPUCoalescer.hh:91
GPUCoalescer::m_max_outstanding_requests
int m_max_outstanding_requests
Definition: GPUCoalescer.hh:402
Stats::DistBase::reset
void reset()
Reset stat value to default.
Definition: statistics.hh:1948
RubyPort::MemResponsePort::hitCallback
void hitCallback(PacketPtr pkt)
Definition: RubyPort.cc:515
GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
logging.hh
Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:83
Packet::isWrite
bool isWrite() const
Definition: packet.hh:557
GPUCoalescer::completeIssue
void completeIssue()
Definition: GPUCoalescer.cc:696
Packet::getPtr
T * getPtr()
get a pointer to the data ptr.
Definition: packet.hh:1157
UncoalescedTable::checkDeadlock
void checkDeadlock(Tick threshold)
Definition: GPUCoalescer.cc:132
ArmISA::c
Bitfield< 29 > c
Definition: miscregs_types.hh:50
GPUCoalescer::empty
bool empty() const
Definition: GPUCoalescer.cc:511
CoalescedRequest::getPackets
std::vector< PacketPtr > & getPackets()
Definition: GPUCoalescer.hh:112
GPUCoalescer::issueRequest
virtual void issueRequest(CoalescedRequest *crequest)=0
GPUCoalescer::coalescingWindow
int coalescingWindow
Definition: GPUCoalescer.hh:410
Stats::Histogram::init
Histogram & init(size_type size)
Set the parameters of this histogram.
Definition: statistics.hh:2669
Packet::isFlush
bool isFlush() const
Definition: packet.hh:585
Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:508
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
std::list
STL list class.
Definition: stl.hh:51
GPUCoalescer::m_missLatencyHist
Stats::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
Definition: GPUCoalescer.hh:477
EventBase::Progress_Event_Pri
static const Priority Progress_Event_Pri
Progress events come at the end.
Definition: eventq.hh:221
RubyPort::trySendRetries
void trySendRetries()
Definition: RubyPort.cc:455
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:219
CoalescedRequest::setIssueTime
void setIssueTime(Cycles _issueTime)
Definition: GPUCoalescer.hh:105
RubyPort::m_version
uint32_t m_version
Definition: RubyPort.hh:189
printAddress
std::string printAddress(Addr addr)
Definition: Address.cc:74
RubyPort::SenderState
Definition: RubyPort.hh:139
UncoalescedTable::getInstPackets
PerInstPackets * getInstPackets(int offset)
Definition: GPUCoalescer.cc:81
RubyPort::ruby_eviction_callback
void ruby_eviction_callback(Addr address)
Definition: RubyPort.cc:614
SubBlock.hh
TokenResponsePort::sendTokens
void sendTokens(int num_tokens)
Return num_tokens tokens back to the request port.
Definition: token_port.cc:79
GPUCoalescer::GPUCoalescer
GPUCoalescer(const Params *)
Definition: GPUCoalescer.cc:154
CoalescedRequest::setRubyType
void setRubyType(RubyRequestType type)
Definition: GPUCoalescer.hh:106
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:171
curTick
Tick curTick()
The current simulated tick.
Definition: core.hh:45
UncoalescedTable::updateResources
void updateResources()
Definition: GPUCoalescer.cc:94
ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:153
operator<<
std::ostream & operator<<(ostream &out, const std::unordered_map< KEY, VALUE > &map)
Definition: GPUCoalescer.cc:580
GPUCoalescer::uncoalescedTable
UncoalescedTable uncoalescedTable
Definition: GPUCoalescer.hh:415
GPUCoalescer::hitCallback
void hitCallback(CoalescedRequest *crequest, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion)
Definition: GPUCoalescer.cc:452

Generated on Wed Sep 30 2020 14:02:13 for gem5 by doxygen 1.8.17