gem5  v21.2.1.1
lsq_unit.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2010-2014, 2017-2020 ARM Limited
3  * Copyright (c) 2013 Advanced Micro Devices, Inc.
4  * All rights reserved
5  *
6  * The license below extends only to copyright in the software and shall
7  * not be construed as granting a license to any other intellectual
8  * property including but not limited to intellectual property relating
9  * to a hardware implementation of the functionality of the software
10  * licensed hereunder. You may use the software subject to the license
11  * terms below provided that you ensure that this notice is replicated
12  * unmodified and in its entirety in all distributions of the software,
13  * modified or unmodified, in source code or in binary form.
14  *
15  * Copyright (c) 2004-2006 The Regents of The University of Michigan
16  * All rights reserved.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions are
20  * met: redistributions of source code must retain the above copyright
21  * notice, this list of conditions and the following disclaimer;
22  * redistributions in binary form must reproduce the above copyright
23  * notice, this list of conditions and the following disclaimer in the
24  * documentation and/or other materials provided with the distribution;
25  * neither the name of the copyright holders nor the names of its
26  * contributors may be used to endorse or promote products derived from
27  * this software without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40  */
41 
42 #include "cpu/o3/lsq_unit.hh"
43 
45 #include "base/str.hh"
46 #include "config/the_isa.hh"
47 #include "cpu/checker/cpu.hh"
48 #include "cpu/o3/dyn_inst.hh"
49 #include "cpu/o3/limits.hh"
50 #include "cpu/o3/lsq.hh"
51 #include "debug/Activity.hh"
52 #include "debug/HtmCpu.hh"
53 #include "debug/IEW.hh"
54 #include "debug/LSQUnit.hh"
55 #include "debug/O3PipeView.hh"
56 #include "mem/packet.hh"
57 #include "mem/request.hh"
58 
59 namespace gem5
60 {
61 
62 namespace o3
63 {
64 
66  PacketPtr _pkt, LSQUnit *lsq_ptr)
67  : Event(Default_Pri, AutoDelete),
68  inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
69 {
70  assert(_inst->savedRequest);
71  _inst->savedRequest->writebackScheduled();
72 }
73 
74 void
76 {
77  assert(!lsqPtr->cpu->switchedOut());
78 
79  lsqPtr->writeback(inst, pkt);
80 
81  assert(inst->savedRequest);
82  inst->savedRequest->writebackDone();
83  delete pkt;
84 }
85 
86 const char *
88 {
89  return "Store writeback";
90 }
91 
92 bool
94 {
95  LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->senderState);
96  assert(request != nullptr);
97  bool ret = true;
98  /* Check that the request is still alive before any further action. */
99  if (!request->isReleased()) {
100  ret = request->recvTimingResp(pkt);
101  }
102  return ret;
103 }
104 
105 void
107 {
108  LSQRequest *request = dynamic_cast<LSQRequest *>(pkt->senderState);
109  DynInstPtr inst = request->instruction();
110 
111  // hardware transactional memory
112  // sanity check
113  if (pkt->isHtmTransactional() && !inst->isSquashed()) {
114  assert(inst->getHtmTransactionUid() == pkt->getHtmTransactionUid());
115  }
116 
117  // if in a HTM transaction, it's possible
118  // to abort within the cache hierarchy.
119  // This is signalled back to the processor
120  // through responses to memory requests.
121  if (pkt->htmTransactionFailedInCache()) {
122  // cannot do this for write requests because
123  // they cannot tolerate faults
124  const HtmCacheFailure htm_rc =
126  if (pkt->isWrite()) {
127  DPRINTF(HtmCpu,
128  "store notification (ignored) of HTM transaction failure "
129  "in cache - addr=0x%lx - rc=%s - htmUid=%d\n",
130  pkt->getAddr(), htmFailureToStr(htm_rc),
131  pkt->getHtmTransactionUid());
132  } else {
133  HtmFailureFaultCause fail_reason =
135 
136  if (htm_rc == HtmCacheFailure::FAIL_SELF) {
137  fail_reason = HtmFailureFaultCause::SIZE;
138  } else if (htm_rc == HtmCacheFailure::FAIL_REMOTE) {
139  fail_reason = HtmFailureFaultCause::MEMORY;
140  } else if (htm_rc == HtmCacheFailure::FAIL_OTHER) {
141  // these are likely loads that were issued out of order
142  // they are faulted here, but it's unlikely that these will
143  // ever reach the commit head.
144  fail_reason = HtmFailureFaultCause::OTHER;
145  } else {
146  panic("HTM error - unhandled return code from cache (%s)",
147  htmFailureToStr(htm_rc));
148  }
149 
150  inst->fault =
151  std::make_shared<GenericHtmFailureFault>(
152  inst->getHtmTransactionUid(),
153  fail_reason);
154 
155  DPRINTF(HtmCpu,
156  "load notification of HTM transaction failure "
157  "in cache - pc=%s - addr=0x%lx - "
158  "rc=%u - htmUid=%d\n",
159  inst->pcState(), pkt->getAddr(),
160  htmFailureToStr(htm_rc), pkt->getHtmTransactionUid());
161  }
162  }
163 
164  cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
165 
166  assert(!cpu->switchedOut());
167  if (!inst->isSquashed()) {
168  if (request->needWBToRegister()) {
169  // Only loads, store conditionals and atomics perform the writeback
170  // after receving the response from the memory
171  assert(inst->isLoad() || inst->isStoreConditional() ||
172  inst->isAtomic());
173 
174  // hardware transactional memory
175  if (pkt->htmTransactionFailedInCache()) {
178  }
179 
180  writeback(inst, request->mainPacket());
181  if (inst->isStore() || inst->isAtomic()) {
182  request->writebackDone();
183  completeStore(request->instruction()->sqIt);
184  }
185  } else if (inst->isStore()) {
186  // This is a regular store (i.e., not store conditionals and
187  // atomics), so it can complete without writing back
188  completeStore(request->instruction()->sqIt);
189  }
190  }
191 }
192 
193 LSQUnit::LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
194  : lsqID(-1), storeQueue(sqEntries), loadQueue(lqEntries),
195  storesToWB(0),
196  htmStarts(0), htmStops(0),
198  cacheBlockMask(0), stalled(false),
199  isStoreBlocked(false), storeInFlight(false), stats(nullptr)
200 {
201 }
202 
203 void
204 LSQUnit::init(CPU *cpu_ptr, IEW *iew_ptr, const O3CPUParams &params,
205  LSQ *lsq_ptr, unsigned id)
206 {
207  lsqID = id;
208 
209  cpu = cpu_ptr;
210  iewStage = iew_ptr;
211 
212  lsq = lsq_ptr;
213 
214  cpu->addStatGroup(csprintf("lsq%i", lsqID).c_str(), &stats);
215 
216  DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",lsqID);
217 
218  depCheckShift = params.LSQDepCheckShift;
219  checkLoads = params.LSQCheckLoads;
220  needsTSO = params.needsTSO;
221 
222  resetState();
223 }
224 
225 
226 void
228 {
229  storesToWB = 0;
230 
231  // hardware transactional memory
232  // nesting depth
233  htmStarts = htmStops = 0;
234 
236 
237  retryPkt = NULL;
238  memDepViolator = NULL;
239 
240  stalled = false;
241 
242  cacheBlockMask = ~(cpu->cacheLineSize() - 1);
243 }
244 
245 std::string
247 {
248  if (MaxThreads == 1) {
249  return iewStage->name() + ".lsq";
250  } else {
251  return iewStage->name() + ".lsq.thread" + std::to_string(lsqID);
252  }
253 }
254 
256  : statistics::Group(parent),
257  ADD_STAT(forwLoads, statistics::units::Count::get(),
258  "Number of loads that had data forwarded from stores"),
259  ADD_STAT(squashedLoads, statistics::units::Count::get(),
260  "Number of loads squashed"),
261  ADD_STAT(ignoredResponses, statistics::units::Count::get(),
262  "Number of memory responses ignored because the instruction is "
263  "squashed"),
264  ADD_STAT(memOrderViolation, statistics::units::Count::get(),
265  "Number of memory ordering violations"),
266  ADD_STAT(squashedStores, statistics::units::Count::get(),
267  "Number of stores squashed"),
268  ADD_STAT(rescheduledLoads, statistics::units::Count::get(),
269  "Number of loads that were rescheduled"),
270  ADD_STAT(blockedByCache, statistics::units::Count::get(),
271  "Number of times an access to memory failed due to the cache "
272  "being blocked"),
273  ADD_STAT(loadToUse, "Distribution of cycle latency between the "
274  "first time a load is issued and its completion")
275 {
276  loadToUse
277  .init(0, 299, 10)
279 }
280 
281 void
283 {
284  dcachePort = dcache_port;
285 }
286 
287 void
289 {
290  for (int i = 0; i < loadQueue.capacity(); ++i)
291  assert(!loadQueue[i].valid());
292 
293  assert(storesToWB == 0);
294  assert(!retryPkt);
295 }
296 
297 void
299 {
300  resetState();
301 }
302 
303 void
305 {
306  assert(inst->isMemRef());
307 
308  assert(inst->isLoad() || inst->isStore() || inst->isAtomic());
309 
310  if (inst->isLoad()) {
311  insertLoad(inst);
312  } else {
313  insertStore(inst);
314  }
315 
316  inst->setInLSQ();
317 }
318 
319 void
321 {
322  assert(!loadQueue.full());
323  assert(loadQueue.size() < loadQueue.capacity());
324 
325  DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n",
326  load_inst->pcState(), loadQueue.tail(), load_inst->seqNum);
327 
328  /* Grow the queue. */
330 
331  load_inst->sqIt = storeQueue.end();
332 
333  assert(!loadQueue.back().valid());
334  loadQueue.back().set(load_inst);
335  load_inst->lqIdx = loadQueue.tail();
336  assert(load_inst->lqIdx > 0);
337  load_inst->lqIt = loadQueue.getIterator(load_inst->lqIdx);
338 
339  // hardware transactional memory
340  // transactional state and nesting depth must be tracked
341  // in the in-order part of the core.
342  if (load_inst->isHtmStart()) {
343  htmStarts++;
344  DPRINTF(HtmCpu, ">> htmStarts++ (%d) : htmStops (%d)\n",
346 
347  const int htm_depth = htmStarts - htmStops;
348  const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
349  auto htm_uid = htm_cpt->getHtmUid();
350 
351  // for debugging purposes
352  if (!load_inst->inHtmTransactionalState()) {
353  htm_uid = htm_cpt->newHtmUid();
354  DPRINTF(HtmCpu, "generating new htmUid=%u\n", htm_uid);
355  if (htm_depth != 1) {
356  DPRINTF(HtmCpu,
357  "unusual HTM transactional depth (%d)"
358  " possibly caused by mispeculation - htmUid=%u\n",
359  htm_depth, htm_uid);
360  }
361  }
362  load_inst->setHtmTransactionalState(htm_uid, htm_depth);
363  }
364 
365  if (load_inst->isHtmStop()) {
366  htmStops++;
367  DPRINTF(HtmCpu, ">> htmStarts (%d) : htmStops++ (%d)\n",
369 
370  if (htmStops==1 && htmStarts==0) {
371  DPRINTF(HtmCpu,
372  "htmStops==1 && htmStarts==0. "
373  "This generally shouldn't happen "
374  "(unless due to misspeculation)\n");
375  }
376  }
377 }
378 
379 void
381 {
382  // Make sure it is not full before inserting an instruction.
383  assert(!storeQueue.full());
384  assert(storeQueue.size() < storeQueue.capacity());
385 
386  DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n",
387  store_inst->pcState(), storeQueue.tail(), store_inst->seqNum);
389 
390  store_inst->sqIdx = storeQueue.tail();
391  store_inst->sqIt = storeQueue.getIterator(store_inst->sqIdx);
392 
393  store_inst->lqIdx = loadQueue.tail() + 1;
394  assert(store_inst->lqIdx > 0);
395  store_inst->lqIt = loadQueue.end();
396 
397  storeQueue.back().set(store_inst);
398 }
399 
402 {
403  DynInstPtr temp = memDepViolator;
404 
405  memDepViolator = NULL;
406 
407  return temp;
408 }
409 
410 unsigned
412 {
413  DPRINTF(LSQUnit, "LQ size: %d, #loads occupied: %d\n",
415  return loadQueue.capacity() - loadQueue.size();
416 }
417 
418 unsigned
420 {
421  DPRINTF(LSQUnit, "SQ size: %d, #stores occupied: %d\n",
423  return storeQueue.capacity() - storeQueue.size();
424 
425  }
426 
427 void
429 {
430  // Should only ever get invalidations in here
431  assert(pkt->isInvalidate());
432 
433  DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
434 
435  for (int x = 0; x < cpu->numContexts(); x++) {
436  gem5::ThreadContext *tc = cpu->getContext(x);
437  bool no_squash = cpu->thread[x]->noSquashFromTC;
438  cpu->thread[x]->noSquashFromTC = true;
440  cpu->thread[x]->noSquashFromTC = no_squash;
441  }
442 
443  if (loadQueue.empty())
444  return;
445 
446  auto iter = loadQueue.begin();
447 
448  Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;
449 
450  DynInstPtr ld_inst = iter->instruction();
451  assert(ld_inst);
452  LSQRequest *request = iter->request();
453 
454  // Check that this snoop didn't just invalidate our lock flag
455  if (ld_inst->effAddrValid() &&
456  request->isCacheBlockHit(invalidate_addr, cacheBlockMask)
457  && ld_inst->memReqFlags & Request::LLSC) {
458  ld_inst->tcBase()->getIsaPtr()->handleLockedSnoopHit(ld_inst.get());
459  }
460 
461  bool force_squash = false;
462 
463  while (++iter != loadQueue.end()) {
464  ld_inst = iter->instruction();
465  assert(ld_inst);
466  request = iter->request();
467  if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered())
468  continue;
469 
470  DPRINTF(LSQUnit, "-- inst [sn:%lli] to pktAddr:%#x\n",
471  ld_inst->seqNum, invalidate_addr);
472 
473  if (force_squash ||
474  request->isCacheBlockHit(invalidate_addr, cacheBlockMask)) {
475  if (needsTSO) {
476  // If we have a TSO system, as all loads must be ordered with
477  // all other loads, this load as well as *all* subsequent loads
478  // need to be squashed to prevent possible load reordering.
479  force_squash = true;
480  }
481  if (ld_inst->possibleLoadViolation() || force_squash) {
482  DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n",
483  pkt->getAddr(), ld_inst->seqNum);
484 
485  // Mark the load for re-execution
486  ld_inst->fault = std::make_shared<ReExec>();
487  request->setStateToFault();
488  } else {
489  DPRINTF(LSQUnit, "HitExternal Snoop for addr %#x [sn:%lli]\n",
490  pkt->getAddr(), ld_inst->seqNum);
491 
492  // Make sure that we don't lose a snoop hitting a LOCKED
493  // address since the LOCK* flags don't get updated until
494  // commit.
495  if (ld_inst->memReqFlags & Request::LLSC) {
496  ld_inst->tcBase()->getIsaPtr()->
497  handleLockedSnoopHit(ld_inst.get());
498  }
499 
500  // If a older load checks this and it's true
501  // then we might have missed the snoop
502  // in which case we need to invalidate to be sure
503  ld_inst->hitExternalSnoop(true);
504  }
505  }
506  }
507  return;
508 }
509 
510 Fault
511 LSQUnit::checkViolations(typename LoadQueue::iterator& loadIt,
512  const DynInstPtr& inst)
513 {
514  Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
515  Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
516 
522  while (loadIt != loadQueue.end()) {
523  DynInstPtr ld_inst = loadIt->instruction();
524  if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
525  ++loadIt;
526  continue;
527  }
528 
529  Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift;
530  Addr ld_eff_addr2 =
531  (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
532 
533  if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {
534  if (inst->isLoad()) {
535  // If this load is to the same block as an external snoop
536  // invalidate that we've observed then the load needs to be
537  // squashed as it could have newer data
538  if (ld_inst->hitExternalSnoop()) {
539  if (!memDepViolator ||
540  ld_inst->seqNum < memDepViolator->seqNum) {
541  DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] "
542  "and [sn:%lli] at address %#x\n",
543  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
544  memDepViolator = ld_inst;
545 
547 
548  return std::make_shared<GenericISA::M5PanicFault>(
549  "Detected fault with inst [sn:%lli] and "
550  "[sn:%lli] at address %#x\n",
551  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
552  }
553  }
554 
555  // Otherwise, mark the load has a possible load violation and
556  // if we see a snoop before it's commited, we need to squash
557  ld_inst->possibleLoadViolation(true);
558  DPRINTF(LSQUnit, "Found possible load violation at addr: %#x"
559  " between instructions [sn:%lli] and [sn:%lli]\n",
560  inst_eff_addr1, inst->seqNum, ld_inst->seqNum);
561  } else {
562  // A load/store incorrectly passed this store.
563  // Check if we already have a violator, or if it's newer
564  // squash and refetch.
565  if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
566  break;
567 
568  DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and "
569  "[sn:%lli] at address %#x\n",
570  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
571  memDepViolator = ld_inst;
572 
574 
575  return std::make_shared<GenericISA::M5PanicFault>(
576  "Detected fault with "
577  "inst [sn:%lli] and [sn:%lli] at address %#x\n",
578  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
579  }
580  }
581 
582  ++loadIt;
583  }
584  return NoFault;
585 }
586 
587 
588 
589 
590 Fault
592 {
593  // Execute a specific load.
594  Fault load_fault = NoFault;
595 
596  DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n",
597  inst->pcState(), inst->seqNum);
598 
599  assert(!inst->isSquashed());
600 
601  load_fault = inst->initiateAcc();
602 
603  if (load_fault == NoFault && !inst->readMemAccPredicate()) {
604  assert(inst->readPredicate());
605  inst->setExecuted();
606  inst->completeAcc(nullptr);
607  iewStage->instToCommit(inst);
609  return NoFault;
610  }
611 
612  if (inst->isTranslationDelayed() && load_fault == NoFault)
613  return load_fault;
614 
615  if (load_fault != NoFault && inst->translationCompleted() &&
616  inst->savedRequest->isPartialFault()
617  && !inst->savedRequest->isComplete()) {
618  assert(inst->savedRequest->isSplit());
619  // If we have a partial fault where the mem access is not complete yet
620  // then the cache must have been blocked. This load will be re-executed
621  // when the cache gets unblocked. We will handle the fault when the
622  // mem access is complete.
623  return NoFault;
624  }
625 
626  // If the instruction faulted or predicated false, then we need to send it
627  // along to commit without the instruction completing.
628  if (load_fault != NoFault || !inst->readPredicate()) {
629  // Send this instruction to commit, also make sure iew stage
630  // realizes there is activity. Mark it as executed unless it
631  // is a strictly ordered load that needs to hit the head of
632  // commit.
633  if (!inst->readPredicate())
634  inst->forwardOldRegs();
635  DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n",
636  inst->seqNum,
637  (load_fault != NoFault ? "fault" : "predication"));
638  if (!(inst->hasRequest() && inst->strictlyOrdered()) ||
639  inst->isAtCommit()) {
640  inst->setExecuted();
641  }
642  iewStage->instToCommit(inst);
644  } else {
645  if (inst->effAddrValid()) {
646  auto it = inst->lqIt;
647  ++it;
648 
649  if (checkLoads)
650  return checkViolations(it, inst);
651  }
652  }
653 
654  return load_fault;
655 }
656 
657 Fault
659 {
660  // Make sure that a store exists.
661  assert(storeQueue.size() != 0);
662 
663  int store_idx = store_inst->sqIdx;
664 
665  DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n",
666  store_inst->pcState(), store_inst->seqNum);
667 
668  assert(!store_inst->isSquashed());
669 
670  // Check the recently completed loads to see if any match this store's
671  // address. If so, then we have a memory ordering violation.
672  typename LoadQueue::iterator loadIt = store_inst->lqIt;
673 
674  Fault store_fault = store_inst->initiateAcc();
675 
676  if (store_inst->isTranslationDelayed() &&
677  store_fault == NoFault)
678  return store_fault;
679 
680  if (!store_inst->readPredicate()) {
681  DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n",
682  store_inst->seqNum);
683  store_inst->forwardOldRegs();
684  return store_fault;
685  }
686 
687  if (storeQueue[store_idx].size() == 0) {
688  DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n",
689  store_inst->pcState(), store_inst->seqNum);
690 
691  if (store_inst->isAtomic()) {
692  // If the instruction faulted, then we need to send it along
693  // to commit without the instruction completing.
694  if (!(store_inst->hasRequest() && store_inst->strictlyOrdered()) ||
695  store_inst->isAtCommit()) {
696  store_inst->setExecuted();
697  }
698  iewStage->instToCommit(store_inst);
700  }
701 
702  return store_fault;
703  }
704 
705  assert(store_fault == NoFault);
706 
707  if (store_inst->isStoreConditional() || store_inst->isAtomic()) {
708  // Store conditionals and Atomics need to set themselves as able to
709  // writeback if we haven't had a fault by here.
710  storeQueue[store_idx].canWB() = true;
711 
712  ++storesToWB;
713  }
714 
715  return checkViolations(loadIt, store_inst);
716 
717 }
718 
719 void
721 {
722  assert(loadQueue.front().valid());
723 
724  DynInstPtr inst = loadQueue.front().instruction();
725 
726  DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n",
727  inst->pcState());
728 
729  // Update histogram with memory latency from load
730  // Only take latency from load demand that where issued and did not fault
731  if (!inst->isInstPrefetch() && !inst->isDataPrefetch()
732  && inst->firstIssue != -1
733  && inst->lastWakeDependents != -1) {
734  stats.loadToUse.sample(cpu->ticksToCycles(
735  inst->lastWakeDependents - inst->firstIssue));
736  }
737 
738  loadQueue.front().clear();
740 }
741 
742 void
744 {
745  assert(loadQueue.size() == 0 || loadQueue.front().valid());
746 
747  while (loadQueue.size() != 0 && loadQueue.front().instruction()->seqNum
748  <= youngest_inst) {
749  commitLoad();
750  }
751 }
752 
753 void
755 {
756  assert(storeQueue.size() == 0 || storeQueue.front().valid());
757 
758  /* Forward iterate the store queue (age order). */
759  for (auto& x : storeQueue) {
760  assert(x.valid());
761  // Mark any stores that are now committed and have not yet
762  // been marked as able to write back.
763  if (!x.canWB()) {
764  if (x.instruction()->seqNum > youngest_inst) {
765  break;
766  }
767  DPRINTF(LSQUnit, "Marking store as able to write back, PC "
768  "%s [sn:%lli]\n",
769  x.instruction()->pcState(),
770  x.instruction()->seqNum);
771 
772  x.canWB() = true;
773 
774  ++storesToWB;
775  }
776  }
777 }
778 
779 void
781 {
782  assert(isStoreBlocked);
783  storeWBIt->request()->sendPacketToCache();
784  if (storeWBIt->request()->isSent()){
785  storePostSend();
786  }
787 }
788 
789 void
791 {
792  if (isStoreBlocked) {
793  DPRINTF(LSQUnit, "Writing back blocked store\n");
795  }
796 
797  while (storesToWB > 0 &&
798  storeWBIt.dereferenceable() &&
799  storeWBIt->valid() &&
800  storeWBIt->canWB() &&
801  ((!needsTSO) || (!storeInFlight)) &&
802  lsq->cachePortAvailable(false)) {
803 
804  if (isStoreBlocked) {
805  DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
806  " is blocked!\n");
807  break;
808  }
809 
810  // Store didn't write any data so no need to write it back to
811  // memory.
812  if (storeWBIt->size() == 0) {
813  /* It is important that the preincrement happens at (or before)
814  * the call, as the the code of completeStore checks
815  * storeWBIt. */
817  continue;
818  }
819 
820  if (storeWBIt->instruction()->isDataPrefetch()) {
821  storeWBIt++;
822  continue;
823  }
824 
825  assert(storeWBIt->hasRequest());
826  assert(!storeWBIt->committed());
827 
828  DynInstPtr inst = storeWBIt->instruction();
829  LSQRequest* request = storeWBIt->request();
830 
831  // Process store conditionals or store release after all previous
832  // stores are completed
833  if ((request->mainReq()->isLLSC() ||
834  request->mainReq()->isRelease()) &&
835  (storeWBIt.idx() != storeQueue.head())) {
836  DPRINTF(LSQUnit, "Store idx:%i PC:%s to Addr:%#x "
837  "[sn:%lli] is %s%s and not head of the queue\n",
838  storeWBIt.idx(), inst->pcState(),
839  request->mainReq()->getPaddr(), inst->seqNum,
840  request->mainReq()->isLLSC() ? "SC" : "",
841  request->mainReq()->isRelease() ? "/Release" : "");
842  break;
843  }
844 
845  storeWBIt->committed() = true;
846 
847  assert(!inst->memData);
848  inst->memData = new uint8_t[request->_size];
849 
850  if (storeWBIt->isAllZeros())
851  memset(inst->memData, 0, request->_size);
852  else
853  memcpy(inst->memData, storeWBIt->data(), request->_size);
854 
855  request->buildPackets();
856 
857  DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s "
858  "to Addr:%#x, data:%#x [sn:%lli]\n",
859  storeWBIt.idx(), inst->pcState(),
860  request->mainReq()->getPaddr(), (int)*(inst->memData),
861  inst->seqNum);
862 
863  // @todo: Remove this SC hack once the memory system handles it.
864  if (inst->isStoreConditional()) {
865  // Disable recording the result temporarily. Writing to
866  // misc regs normally updates the result, but this is not
867  // the desired behavior when handling store conditionals.
868  inst->recordResult(false);
869  bool success = inst->tcBase()->getIsaPtr()->handleLockedWrite(
870  inst.get(), request->mainReq(), cacheBlockMask);
871  inst->recordResult(true);
872  request->packetSent();
873 
874  if (!success) {
875  request->complete();
876  // Instantly complete this store.
877  DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. "
878  "Instantly completing it.\n",
879  inst->seqNum);
880  PacketPtr new_pkt = new Packet(*request->packet());
881  WritebackEvent *wb = new WritebackEvent(inst,
882  new_pkt, this);
883  cpu->schedule(wb, curTick() + 1);
885  if (!storeQueue.empty())
886  storeWBIt++;
887  else
889  continue;
890  }
891  }
892 
893  if (request->mainReq()->isLocalAccess()) {
894  assert(!inst->isStoreConditional());
895  assert(!inst->inHtmTransactionalState());
896  gem5::ThreadContext *thread = cpu->tcBase(lsqID);
897  PacketPtr main_pkt = new Packet(request->mainReq(),
899  main_pkt->dataStatic(inst->memData);
900  request->mainReq()->localAccessor(thread, main_pkt);
901  delete main_pkt;
903  storeWBIt++;
904  continue;
905  }
906  /* Send to cache */
907  request->sendPacketToCache();
908 
909  /* If successful, do the post send */
910  if (request->isSent()) {
911  storePostSend();
912  } else {
913  DPRINTF(LSQUnit, "D-Cache became blocked when writing [sn:%lli], "
914  "will retry later\n",
915  inst->seqNum);
916  }
917  }
918  assert(storesToWB >= 0);
919 }
920 
921 void
922 LSQUnit::squash(const InstSeqNum &squashed_num)
923 {
924  DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
925  "(Loads:%i Stores:%i)\n", squashed_num, loadQueue.size(),
926  storeQueue.size());
927 
928  while (loadQueue.size() != 0 &&
929  loadQueue.back().instruction()->seqNum > squashed_num) {
930  DPRINTF(LSQUnit,"Load Instruction PC %s squashed, "
931  "[sn:%lli]\n",
932  loadQueue.back().instruction()->pcState(),
933  loadQueue.back().instruction()->seqNum);
934 
935  if (isStalled() && loadQueue.tail() == stallingLoadIdx) {
936  stalled = false;
937  stallingStoreIsn = 0;
938  stallingLoadIdx = 0;
939  }
940 
941  // hardware transactional memory
942  // Squashing instructions can alter the transaction nesting depth
943  // and must be corrected before fetching resumes.
944  if (loadQueue.back().instruction()->isHtmStart())
945  {
946  htmStarts = (--htmStarts < 0) ? 0 : htmStarts;
947  DPRINTF(HtmCpu, ">> htmStarts-- (%d) : htmStops (%d)\n",
949  }
950  if (loadQueue.back().instruction()->isHtmStop())
951  {
952  htmStops = (--htmStops < 0) ? 0 : htmStops;
953  DPRINTF(HtmCpu, ">> htmStarts (%d) : htmStops-- (%d)\n",
955  }
956  // Clear the smart pointer to make sure it is decremented.
957  loadQueue.back().instruction()->setSquashed();
958  loadQueue.back().clear();
959 
962  }
963 
964  // hardware transactional memory
965  // scan load queue (from oldest to youngest) for most recent valid htmUid
966  auto scan_it = loadQueue.begin();
967  uint64_t in_flight_uid = 0;
968  while (scan_it != loadQueue.end()) {
969  if (scan_it->instruction()->isHtmStart() &&
970  !scan_it->instruction()->isSquashed()) {
971  in_flight_uid = scan_it->instruction()->getHtmTransactionUid();
972  DPRINTF(HtmCpu, "loadQueue[%d]: found valid HtmStart htmUid=%u\n",
973  scan_it._idx, in_flight_uid);
974  }
975  scan_it++;
976  }
977  // If there's a HtmStart in the pipeline then use its htmUid,
978  // otherwise use the most recently committed uid
979  const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
980  if (htm_cpt) {
981  const uint64_t old_local_htm_uid = htm_cpt->getHtmUid();
982  uint64_t new_local_htm_uid;
983  if (in_flight_uid > 0)
984  new_local_htm_uid = in_flight_uid;
985  else
986  new_local_htm_uid = lastRetiredHtmUid;
987 
988  if (old_local_htm_uid != new_local_htm_uid) {
989  DPRINTF(HtmCpu, "flush: lastRetiredHtmUid=%u\n",
991  DPRINTF(HtmCpu, "flush: resetting localHtmUid=%u\n",
992  new_local_htm_uid);
993 
994  htm_cpt->setHtmUid(new_local_htm_uid);
995  }
996  }
997 
998  if (memDepViolator && squashed_num < memDepViolator->seqNum) {
999  memDepViolator = NULL;
1000  }
1001 
1002  while (storeQueue.size() != 0 &&
1003  storeQueue.back().instruction()->seqNum > squashed_num) {
1004  // Instructions marked as can WB are already committed.
1005  if (storeQueue.back().canWB()) {
1006  break;
1007  }
1008 
1009  DPRINTF(LSQUnit,"Store Instruction PC %s squashed, "
1010  "idx:%i [sn:%lli]\n",
1011  storeQueue.back().instruction()->pcState(),
1012  storeQueue.tail(), storeQueue.back().instruction()->seqNum);
1013 
1014  // I don't think this can happen. It should have been cleared
1015  // by the stalling load.
1016  if (isStalled() &&
1017  storeQueue.back().instruction()->seqNum == stallingStoreIsn) {
1018  panic("Is stalled should have been cleared by stalling load!\n");
1019  stalled = false;
1020  stallingStoreIsn = 0;
1021  }
1022 
1023  // Clear the smart pointer to make sure it is decremented.
1024  storeQueue.back().instruction()->setSquashed();
1025 
1026  // Must delete request now that it wasn't handed off to
1027  // memory. This is quite ugly. @todo: Figure out the proper
1028  // place to really handle request deletes.
1029  storeQueue.back().clear();
1030 
1031  storeQueue.pop_back();
1033  }
1034 }
1035 
1036 uint64_t
1038 {
1039  const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
1040  return htm_cpt->getHtmUid();
1041 }
1042 
1043 void
1045 {
1046  if (isStalled() &&
1047  storeWBIt->instruction()->seqNum == stallingStoreIsn) {
1048  DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
1049  "load idx:%i\n",
1051  stalled = false;
1052  stallingStoreIsn = 0;
1054  }
1055 
1056  if (!storeWBIt->instruction()->isStoreConditional()) {
1057  // The store is basically completed at this time. This
1058  // only works so long as the checker doesn't try to
1059  // verify the value in memory for stores.
1060  storeWBIt->instruction()->setCompleted();
1061 
1062  if (cpu->checker) {
1063  cpu->checker->verify(storeWBIt->instruction());
1064  }
1065  }
1066 
1067  if (needsTSO) {
1068  storeInFlight = true;
1069  }
1070 
1071  storeWBIt++;
1072 }
1073 
1074 void
1076 {
1077  iewStage->wakeCPU();
1078 
1079  // Squashed instructions do not need to complete their access.
1080  if (inst->isSquashed()) {
1081  assert (!inst->isStore() || inst->isStoreConditional());
1083  return;
1084  }
1085 
1086  if (!inst->isExecuted()) {
1087  inst->setExecuted();
1088 
1089  if (inst->fault == NoFault) {
1090  // Complete access to copy data to proper place.
1091  inst->completeAcc(pkt);
1092  } else {
1093  // If the instruction has an outstanding fault, we cannot complete
1094  // the access as this discards the current fault.
1095 
1096  // If we have an outstanding fault, the fault should only be of
1097  // type ReExec or - in case of a SplitRequest - a partial
1098  // translation fault
1099 
1100  // Unless it's a hardware transactional memory fault
1101  auto htm_fault = std::dynamic_pointer_cast<
1102  GenericHtmFailureFault>(inst->fault);
1103 
1104  if (!htm_fault) {
1105  assert(dynamic_cast<ReExec*>(inst->fault.get()) != nullptr ||
1106  inst->savedRequest->isPartialFault());
1107 
1108  } else if (!pkt->htmTransactionFailedInCache()) {
1109  // Situation in which the instruction has a hardware
1110  // transactional memory fault but not the packet itself. This
1111  // can occur with ldp_uop microops since access is spread over
1112  // multiple packets.
1113  DPRINTF(HtmCpu,
1114  "%s writeback with HTM failure fault, "
1115  "however, completing packet is not aware of "
1116  "transaction failure. cause=%s htmUid=%u\n",
1117  inst->staticInst->getName(),
1118  htmFailureToStr(htm_fault->getHtmFailureFaultCause()),
1119  htm_fault->getHtmUid());
1120  }
1121 
1122  DPRINTF(LSQUnit, "Not completing instruction [sn:%lli] access "
1123  "due to pending fault.\n", inst->seqNum);
1124  }
1125  }
1126 
1127  // Need to insert instruction into queue to commit
1128  iewStage->instToCommit(inst);
1129 
1131 
1132  // see if this load changed the PC
1134 }
1135 
1136 void
1137 LSQUnit::completeStore(typename StoreQueue::iterator store_idx)
1138 {
1139  assert(store_idx->valid());
1140  store_idx->completed() = true;
1141  --storesToWB;
1142  // A bit conservative because a store completion may not free up entries,
1143  // but hopefully avoids two store completions in one cycle from making
1144  // the CPU tick twice.
1145  cpu->wakeCPU();
1147 
1148  /* We 'need' a copy here because we may clear the entry from the
1149  * store queue. */
1150  DynInstPtr store_inst = store_idx->instruction();
1151  if (store_idx == storeQueue.begin()) {
1152  do {
1153  storeQueue.front().clear();
1155  } while (storeQueue.front().completed() &&
1156  !storeQueue.empty());
1157 
1158  iewStage->updateLSQNextCycle = true;
1159  }
1160 
1161  DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
1162  "idx:%i\n",
1163  store_inst->seqNum, store_idx.idx() - 1, storeQueue.head() - 1);
1164 
1165 #if TRACING_ON
1166  if (debug::O3PipeView) {
1167  store_inst->storeTick =
1168  curTick() - store_inst->fetchTick;
1169  }
1170 #endif
1171 
1172  if (isStalled() &&
1173  store_inst->seqNum == stallingStoreIsn) {
1174  DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
1175  "load idx:%i\n",
1177  stalled = false;
1178  stallingStoreIsn = 0;
1180  }
1181 
1182  store_inst->setCompleted();
1183 
1184  if (needsTSO) {
1185  storeInFlight = false;
1186  }
1187 
1188  // Tell the checker we've completed this instruction. Some stores
1189  // may get reported twice to the checker, but the checker can
1190  // handle that case.
1191  // Store conditionals cannot be sent to the checker yet, they have
1192  // to update the misc registers first which should take place
1193  // when they commit
1194  if (cpu->checker && !store_inst->isStoreConditional()) {
1195  cpu->checker->verify(store_inst);
1196  }
1197 }
1198 
1199 bool
1200 LSQUnit::trySendPacket(bool isLoad, PacketPtr data_pkt)
1201 {
1202  bool ret = true;
1203  bool cache_got_blocked = false;
1204 
1205  LSQRequest *request = dynamic_cast<LSQRequest*>(data_pkt->senderState);
1206 
1207  if (!lsq->cacheBlocked() &&
1208  lsq->cachePortAvailable(isLoad)) {
1209  if (!dcachePort->sendTimingReq(data_pkt)) {
1210  ret = false;
1211  cache_got_blocked = true;
1212  }
1213  } else {
1214  ret = false;
1215  }
1216 
1217  if (ret) {
1218  if (!isLoad) {
1219  isStoreBlocked = false;
1220  }
1221  lsq->cachePortBusy(isLoad);
1222  request->packetSent();
1223  } else {
1224  if (cache_got_blocked) {
1225  lsq->cacheBlocked(true);
1227  }
1228  if (!isLoad) {
1229  assert(request == storeWBIt->request());
1230  isStoreBlocked = true;
1231  }
1232  request->packetNotSent();
1233  }
1234  DPRINTF(LSQUnit, "Memory request (pkt: %s) from inst [sn:%llu] was"
1235  " %ssent (cache is blocked: %d, cache_got_blocked: %d)\n",
1236  data_pkt->print(), request->instruction()->seqNum,
1237  ret ? "": "not ", lsq->cacheBlocked(), cache_got_blocked);
1238  return ret;
1239 }
1240 
1241 void
1243 {
1244  if (isStoreBlocked) {
1245  DPRINTF(LSQUnit, "Receiving retry: blocked store\n");
1247  }
1248 }
1249 
1250 void
1252 {
1253  cprintf("Load store queue: Dumping instructions.\n");
1254  cprintf("Load queue size: %i\n", loadQueue.size());
1255  cprintf("Load queue: ");
1256 
1257  for (const auto& e: loadQueue) {
1258  const DynInstPtr &inst(e.instruction());
1259  cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1260  }
1261  cprintf("\n");
1262 
1263  cprintf("Store queue size: %i\n", storeQueue.size());
1264  cprintf("Store queue: ");
1265 
1266  for (const auto& e: storeQueue) {
1267  const DynInstPtr &inst(e.instruction());
1268  cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1269  }
1270 
1271  cprintf("\n");
1272 }
1273 
1274 void LSQUnit::schedule(Event& ev, Tick when) { cpu->schedule(ev, when); }
1275 
1277 
1278 unsigned int
1280 {
1281  return cpu->cacheLineSize();
1282 }
1283 
1284 Fault
1285 LSQUnit::read(LSQRequest *request, int load_idx)
1286 {
1287  LQEntry& load_entry = loadQueue[load_idx];
1288  const DynInstPtr& load_inst = load_entry.instruction();
1289 
1290  load_entry.setRequest(request);
1291  assert(load_inst);
1292 
1293  assert(!load_inst->isExecuted());
1294 
1295  // Make sure this isn't a strictly ordered load
1296  // A bit of a hackish way to get strictly ordered accesses to work
1297  // only if they're at the head of the LSQ and are ready to commit
1298  // (at the head of the ROB too).
1299 
1300  if (request->mainReq()->isStrictlyOrdered() &&
1301  (load_idx != loadQueue.head() || !load_inst->isAtCommit())) {
1302  // Tell IQ/mem dep unit that this instruction will need to be
1303  // rescheduled eventually
1304  iewStage->rescheduleMemInst(load_inst);
1305  load_inst->clearIssued();
1306  load_inst->effAddrValid(false);
1308  DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n",
1309  load_inst->seqNum, load_inst->pcState());
1310 
1311  // Must delete request now that it wasn't handed off to
1312  // memory. This is quite ugly. @todo: Figure out the proper
1313  // place to really handle request deletes.
1314  load_entry.setRequest(nullptr);
1315  request->discard();
1316  return std::make_shared<GenericISA::M5PanicFault>(
1317  "Strictly ordered load [sn:%llx] PC %s\n",
1318  load_inst->seqNum, load_inst->pcState());
1319  }
1320 
1321  DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
1322  "storeHead: %i addr: %#x%s\n",
1323  load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1,
1324  request->mainReq()->getPaddr(), request->isSplit() ? " split" :
1325  "");
1326 
1327  if (request->mainReq()->isLLSC()) {
1328  // Disable recording the result temporarily. Writing to misc
1329  // regs normally updates the result, but this is not the
1330  // desired behavior when handling store conditionals.
1331  load_inst->recordResult(false);
1332  load_inst->tcBase()->getIsaPtr()->handleLockedRead(load_inst.get(),
1333  request->mainReq());
1334  load_inst->recordResult(true);
1335  }
1336 
1337  if (request->mainReq()->isLocalAccess()) {
1338  assert(!load_inst->memData);
1339  assert(!load_inst->inHtmTransactionalState());
1340  load_inst->memData = new uint8_t[MaxDataBytes];
1341 
1342  gem5::ThreadContext *thread = cpu->tcBase(lsqID);
1343  PacketPtr main_pkt = new Packet(request->mainReq(), MemCmd::ReadReq);
1344 
1345  main_pkt->dataStatic(load_inst->memData);
1346 
1347  Cycles delay = request->mainReq()->localAccessor(thread, main_pkt);
1348 
1349  WritebackEvent *wb = new WritebackEvent(load_inst, main_pkt, this);
1350  cpu->schedule(wb, cpu->clockEdge(delay));
1351  return NoFault;
1352  }
1353 
1354  // hardware transactional memory
1355  if (request->mainReq()->isHTMStart() || request->mainReq()->isHTMCommit())
1356  {
1357  // don't want to send nested transactionStarts and
1358  // transactionStops outside of core, e.g. to Ruby
1359  if (request->mainReq()->getFlags().isSet(Request::NO_ACCESS)) {
1360  Cycles delay(0);
1361  PacketPtr data_pkt =
1362  new Packet(request->mainReq(), MemCmd::ReadReq);
1363 
1364  // Allocate memory if this is the first time a load is issued.
1365  if (!load_inst->memData) {
1366  load_inst->memData =
1367  new uint8_t[request->mainReq()->getSize()];
1368  // sanity checks espect zero in request's data
1369  memset(load_inst->memData, 0, request->mainReq()->getSize());
1370  }
1371 
1372  data_pkt->dataStatic(load_inst->memData);
1373  if (load_inst->inHtmTransactionalState()) {
1374  data_pkt->setHtmTransactional(
1375  load_inst->getHtmTransactionUid());
1376  }
1377  data_pkt->makeResponse();
1378 
1379  WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
1380  cpu->schedule(wb, cpu->clockEdge(delay));
1381  return NoFault;
1382  }
1383  }
1384 
1385  // Check the SQ for any previous stores that might lead to forwarding
1386  auto store_it = load_inst->sqIt;
1387  assert (store_it >= storeWBIt);
1388  // End once we've reached the top of the LSQ
1389  while (store_it != storeWBIt && !load_inst->isDataPrefetch()) {
1390  // Move the index to one younger
1391  store_it--;
1392  assert(store_it->valid());
1393  assert(store_it->instruction()->seqNum < load_inst->seqNum);
1394  int store_size = store_it->size();
1395 
1396  // Cache maintenance instructions go down via the store
1397  // path but they carry no data and they shouldn't be
1398  // considered for forwarding
1399  if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&
1400  !(store_it->request()->mainReq() &&
1401  store_it->request()->mainReq()->isCacheMaintenance())) {
1402  assert(store_it->instruction()->effAddrValid());
1403 
1404  // Check if the store data is within the lower and upper bounds of
1405  // addresses that the request needs.
1406  auto req_s = request->mainReq()->getVaddr();
1407  auto req_e = req_s + request->mainReq()->getSize();
1408  auto st_s = store_it->instruction()->effAddr;
1409  auto st_e = st_s + store_size;
1410 
1411  bool store_has_lower_limit = req_s >= st_s;
1412  bool store_has_upper_limit = req_e <= st_e;
1413  bool lower_load_has_store_part = req_s < st_e;
1414  bool upper_load_has_store_part = req_e > st_s;
1415 
1417 
1418  // If the store entry is not atomic (atomic does not have valid
1419  // data), the store has all of the data needed, and
1420  // the load is not LLSC, then
1421  // we can forward data from the store to the load
1422  if (!store_it->instruction()->isAtomic() &&
1423  store_has_lower_limit && store_has_upper_limit &&
1424  !request->mainReq()->isLLSC()) {
1425 
1426  const auto& store_req = store_it->request()->mainReq();
1427  coverage = store_req->isMasked() ?
1430  } else if (
1431  // This is the partial store-load forwarding case where a store
1432  // has only part of the load's data and the load isn't LLSC
1433  (!request->mainReq()->isLLSC() &&
1434  ((store_has_lower_limit && lower_load_has_store_part) ||
1435  (store_has_upper_limit && upper_load_has_store_part) ||
1436  (lower_load_has_store_part && upper_load_has_store_part))) ||
1437  // The load is LLSC, and the store has all or part of the
1438  // load's data
1439  (request->mainReq()->isLLSC() &&
1440  ((store_has_lower_limit || upper_load_has_store_part) &&
1441  (store_has_upper_limit || lower_load_has_store_part))) ||
1442  // The store entry is atomic and has all or part of the load's
1443  // data
1444  (store_it->instruction()->isAtomic() &&
1445  ((store_has_lower_limit || upper_load_has_store_part) &&
1446  (store_has_upper_limit || lower_load_has_store_part)))) {
1447 
1449  }
1450 
1451  if (coverage == AddrRangeCoverage::FullAddrRangeCoverage) {
1452  // Get shift amount for offset into the store's data.
1453  int shift_amt = request->mainReq()->getVaddr() -
1454  store_it->instruction()->effAddr;
1455 
1456  // Allocate memory if this is the first time a load is issued.
1457  if (!load_inst->memData) {
1458  load_inst->memData =
1459  new uint8_t[request->mainReq()->getSize()];
1460  }
1461  if (store_it->isAllZeros())
1462  memset(load_inst->memData, 0,
1463  request->mainReq()->getSize());
1464  else
1465  memcpy(load_inst->memData,
1466  store_it->data() + shift_amt,
1467  request->mainReq()->getSize());
1468 
1469  DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
1470  "addr %#x\n", store_it._idx,
1471  request->mainReq()->getVaddr());
1472 
1473  PacketPtr data_pkt = new Packet(request->mainReq(),
1474  MemCmd::ReadReq);
1475  data_pkt->dataStatic(load_inst->memData);
1476 
1477  // hardware transactional memory
1478  // Store to load forwarding within a transaction
1479  // This should be okay because the store will be sent to
1480  // the memory subsystem and subsequently get added to the
1481  // write set of the transaction. The write set has a stronger
1482  // property than the read set, so the load doesn't necessarily
1483  // have to be there.
1484  assert(!request->mainReq()->isHTMCmd());
1485  if (load_inst->inHtmTransactionalState()) {
1486  assert (!storeQueue[store_it._idx].completed());
1487  assert (
1488  storeQueue[store_it._idx].instruction()->
1489  inHtmTransactionalState());
1490  assert (
1491  load_inst->getHtmTransactionUid() ==
1492  storeQueue[store_it._idx].instruction()->
1493  getHtmTransactionUid());
1494  data_pkt->setHtmTransactional(
1495  load_inst->getHtmTransactionUid());
1496  DPRINTF(HtmCpu, "HTM LD (ST2LDF) "
1497  "pc=0x%lx - vaddr=0x%lx - "
1498  "paddr=0x%lx - htmUid=%u\n",
1499  load_inst->pcState().instAddr(),
1500  data_pkt->req->hasVaddr() ?
1501  data_pkt->req->getVaddr() : 0lu,
1502  data_pkt->getAddr(),
1503  load_inst->getHtmTransactionUid());
1504  }
1505 
1506  if (request->isAnyOutstandingRequest()) {
1507  assert(request->_numOutstandingPackets > 0);
1508  // There are memory requests packets in flight already.
1509  // This may happen if the store was not complete the
1510  // first time this load got executed. Signal the senderSate
1511  // that response packets should be discarded.
1512  request->discard();
1513  }
1514 
1515  WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt,
1516  this);
1517 
1518  // We'll say this has a 1 cycle load-store forwarding latency
1519  // for now.
1520  // @todo: Need to make this a parameter.
1521  cpu->schedule(wb, curTick());
1522 
1523  // Don't need to do anything special for split loads.
1524  ++stats.forwLoads;
1525 
1526  return NoFault;
1527  } else if (
1529  // If it's already been written back, then don't worry about
1530  // stalling on it.
1531  if (store_it->completed()) {
1532  panic("Should not check one of these");
1533  continue;
1534  }
1535 
1536  // Must stall load and force it to retry, so long as it's the
1537  // oldest load that needs to do so.
1538  if (!stalled ||
1539  (stalled &&
1540  load_inst->seqNum <
1541  loadQueue[stallingLoadIdx].instruction()->seqNum)) {
1542  stalled = true;
1543  stallingStoreIsn = store_it->instruction()->seqNum;
1544  stallingLoadIdx = load_idx;
1545  }
1546 
1547  // Tell IQ/mem dep unit that this instruction will need to be
1548  // rescheduled eventually
1549  iewStage->rescheduleMemInst(load_inst);
1550  load_inst->clearIssued();
1551  load_inst->effAddrValid(false);
1553 
1554  // Do not generate a writeback event as this instruction is not
1555  // complete.
1556  DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
1557  "Store idx %i to load addr %#x\n",
1558  store_it._idx, request->mainReq()->getVaddr());
1559 
1560  // Must discard the request.
1561  request->discard();
1562  load_entry.setRequest(nullptr);
1563  return NoFault;
1564  }
1565  }
1566  }
1567 
1568  // If there's no forwarding case, then go access memory
1569  DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
1570  load_inst->seqNum, load_inst->pcState());
1571 
1572  // Allocate memory if this is the first time a load is issued.
1573  if (!load_inst->memData) {
1574  load_inst->memData = new uint8_t[request->mainReq()->getSize()];
1575  }
1576 
1577 
1578  // hardware transactional memory
1579  if (request->mainReq()->isHTMCmd()) {
1580  // this is a simple sanity check
1581  // the Ruby cache controller will set
1582  // memData to 0x0ul if successful.
1583  *load_inst->memData = (uint64_t) 0x1ull;
1584  }
1585 
1586  // For now, load throughput is constrained by the number of
1587  // load FUs only, and loads do not consume a cache port (only
1588  // stores do).
1589  // @todo We should account for cache port contention
1590  // and arbitrate between loads and stores.
1591 
1592  // if we the cache is not blocked, do cache access
1593  request->buildPackets();
1594  request->sendPacketToCache();
1595  if (!request->isSent())
1596  iewStage->blockMemInst(load_inst);
1597 
1598  return NoFault;
1599 }
1600 
1601 Fault
1602 LSQUnit::write(LSQRequest *request, uint8_t *data, int store_idx)
1603 {
1604  assert(storeQueue[store_idx].valid());
1605 
1606  DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i "
1607  "[sn:%llu]\n",
1608  store_idx - 1, request->req()->getPaddr(), storeQueue.head() - 1,
1609  storeQueue[store_idx].instruction()->seqNum);
1610 
1611  storeQueue[store_idx].setRequest(request);
1612  unsigned size = request->_size;
1613  storeQueue[store_idx].size() = size;
1614  bool store_no_data =
1615  request->mainReq()->getFlags() & Request::STORE_NO_DATA;
1616  storeQueue[store_idx].isAllZeros() = store_no_data;
1617  assert(size <= SQEntry::DataSize || store_no_data);
1618 
1619  // copy data into the storeQueue only if the store request has valid data
1620  if (!(request->req()->getFlags() & Request::CACHE_BLOCK_ZERO) &&
1621  !request->req()->isCacheMaintenance() &&
1622  !request->req()->isAtomic())
1623  memcpy(storeQueue[store_idx].data(), data, size);
1624 
1625  // This function only writes the data to the store queue, so no fault
1626  // can happen here.
1627  return NoFault;
1628 }
1629 
1630 InstSeqNum
1632 {
1633  if (loadQueue.front().valid())
1634  return loadQueue.front().instruction()->seqNum;
1635  else
1636  return 0;
1637 }
1638 
1639 InstSeqNum
1641 {
1642  if (storeQueue.front().valid())
1643  return storeQueue.front().instruction()->seqNum;
1644  else
1645  return 0;
1646 }
1647 
1648 } // namespace o3
1649 } // namespace gem5
gem5::o3::LSQUnit::storePostSend
void storePostSend()
Handles completing the send of a store to memory.
Definition: lsq_unit.cc:1044
gem5::o3::LSQUnit::LSQUnitStats::squashedLoads
statistics::Scalar squashedLoads
Total number of squashed loads.
Definition: lsq_unit.hh:517
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
gem5::CircularQueue::back
reference back()
Definition: circular_queue.hh:446
gem5::HtmCacheFailure::FAIL_OTHER
@ FAIL_OTHER
gem5::o3::LSQUnit::WritebackEvent::process
void process()
Processes the writeback event.
Definition: lsq_unit.cc:75
gem5::o3::CPU::ppDataAccessComplete
ProbePointArg< std::pair< DynInstPtr, PacketPtr > > * ppDataAccessComplete
Definition: cpu.hh:175
gem5::o3::LSQUnit::lastRetiredHtmUid
uint64_t lastRetiredHtmUid
Definition: lsq_unit.hh:468
gem5::o3::LSQ::LSQRequest
Memory operation metadata.
Definition: lsq.hh:189
gem5::NoFault
constexpr decltype(nullptr) NoFault
Definition: types.hh:260
gem5::o3::LSQUnit::stallingStoreIsn
InstSeqNum stallingStoreIsn
The store that causes the stall due to partial store to load forwarding.
Definition: lsq_unit.hh:486
gem5::RequestPort::sendTimingReq
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition: port.hh:495
gem5::o3::LSQUnit::insertStore
void insertStore(const DynInstPtr &store_inst)
Inserts a store instruction.
Definition: lsq_unit.cc:380
gem5::o3::LSQUnit::storeInFlight
bool storeInFlight
Whether or not a store is in flight.
Definition: lsq_unit.hh:497
gem5::o3::LSQ::cacheBlocked
bool cacheBlocked() const
Is D-cache blocked?
Definition: lsq.cc:184
gem5::o3::LSQUnit::WritebackEvent::description
const char * description() const
Returns the description of this event.
Definition: lsq_unit.cc:87
gem5::o3::LSQUnit::isStalled
bool isStalled()
Returns whether or not the LSQ unit is stalled.
Definition: lsq_unit.hh:559
gem5::o3::LSQUnit::checkLoads
bool checkLoads
Should loads be checked for dependency issues.
Definition: lsq_unit.hh:458
gem5::cprintf
void cprintf(const char *format, const Args &...args)
Definition: cprintf.hh:155
gem5::CircularQueue::advance_tail
void advance_tail()
Increases the tail by one.
Definition: circular_queue.hh:515
gem5::o3::CPU::mmu
BaseMMU * mmu
Definition: cpu.hh:111
gem5::o3::LSQUnit::LSQUnitStats::memOrderViolation
statistics::Scalar memOrderViolation
Tota number of memory ordering violations.
Definition: lsq_unit.hh:524
gem5::GenericHtmFailureFault
Definition: faults.hh:140
data
const char data[]
Definition: circlebuf.test.cc:48
gem5::o3::LSQUnit::recvTimingResp
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
Definition: lsq_unit.cc:93
gem5::HtmFailureFaultCause
HtmFailureFaultCause
Definition: htm.hh:47
gem5::o3::LSQUnit::LSQEntry::setRequest
void setRequest(LSQRequest *r)
Definition: lsq_unit.hh:139
gem5::o3::LSQUnit::iewStage
IEW * iewStage
Pointer to the IEW stage.
Definition: lsq_unit.hh:399
gem5::o3::LSQUnit::memDepViolator
DynInstPtr memDepViolator
The oldest load that caused a memory ordering violation.
Definition: lsq_unit.hh:500
gem5::o3::LSQUnit::stallingLoadIdx
int stallingLoadIdx
The index of the above store.
Definition: lsq_unit.hh:488
gem5::HtmFailureFaultCause::MEMORY
@ MEMORY
gem5::o3::IEW::replayMemInst
void replayMemInst(const DynInstPtr &inst)
Re-executes all rescheduled memory instructions.
Definition: iew.cc:550
gem5::o3::LSQUnit::LSQUnit
LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
Constructs an LSQ unit.
Definition: lsq_unit.cc:193
gem5::Packet::req
RequestPtr req
A pointer to the original request.
Definition: packet.hh:366
gem5::o3::LSQUnit::numFreeStoreEntries
unsigned numFreeStoreEntries()
Returns the number of free SQ entries.
Definition: lsq_unit.cc:419
gem5::o3::LSQUnit::init
void init(CPU *cpu_ptr, IEW *iew_ptr, const O3CPUParams &params, LSQ *lsq_ptr, unsigned id)
Initializes the LSQ unit with the specified number of entries.
Definition: lsq_unit.cc:204
gem5::o3::LSQUnit::LSQUnitStats::ignoredResponses
statistics::Scalar ignoredResponses
Total number of responses from the memory system that are ignored due to the instruction already bein...
Definition: lsq_unit.hh:521
gem5::o3::LSQ::LSQRequest::isCacheBlockHit
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)=0
Test if the request accesses a particular cache line.
gem5::statistics::nozero
const FlagsType nozero
Don't print if this is zero.
Definition: info.hh:68
gem5::o3::IEW::checkMisprediction
void checkMisprediction(const DynInstPtr &inst)
Check misprediction
Definition: iew.cc:1592
sc_dt::to_string
const std::string to_string(sc_enc enc)
Definition: sc_fxdefs.cc:91
gem5::o3::LSQ::LSQRequest::buildPackets
virtual void buildPackets()=0
gem5::HtmCacheFailure
HtmCacheFailure
Definition: htm.hh:59
gem5::o3::LSQUnit::schedule
void schedule(Event &ev, Tick when)
Schedule event for the cpu.
Definition: lsq_unit.cc:1274
gem5::o3::LSQ::cachePortAvailable
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
Definition: lsq.cc:196
gem5::ArmISA::e
Bitfield< 9 > e
Definition: misc_types.hh:65
gem5::Packet::setHtmTransactional
void setHtmTransactional(uint64_t val)
Stipulates that this packet/request originates in the CPU executing in transactional mode,...
Definition: packet.cc:521
gem5::o3::LSQUnit::htmStops
int htmStops
Definition: lsq_unit.hh:466
gem5::o3::LSQUnit::MaxDataBytes
static constexpr auto MaxDataBytes
Definition: lsq_unit.hh:92
gem5::o3::LSQ
Definition: lsq.hh:75
gem5::Packet::isWrite
bool isWrite() const
Definition: packet.hh:583
gem5::o3::LSQUnit::storesToWB
int storesToWB
The number of store instructions in the SQ waiting to writeback.
Definition: lsq_unit.hh:461
gem5::o3::LSQ::LSQRequest::instruction
const DynInstPtr & instruction()
Definition: lsq.hh:332
gem5::o3::LSQUnit::cacheBlockMask
Addr cacheBlockMask
Address Mask for a cache block (e.g.
Definition: lsq_unit.hh:476
gem5::o3::LSQ::LSQRequest::sendPacketToCache
virtual void sendPacketToCache()=0
gem5::o3::LSQUnit::setDcachePort
void setDcachePort(RequestPort *dcache_port)
Sets the pointer to the dcache port.
Definition: lsq_unit.cc:282
gem5::csprintf
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161
dyn_inst.hh
gem5::o3::LSQ::LSQRequest::mainPacket
virtual PacketPtr mainPacket()
Definition: lsq.hh:365
gem5::RefCountingPtr::get
T * get() const
Directly access the pointer itself without taking a reference.
Definition: refcnt.hh:227
gem5::CircularQueue::full
bool full() const
Is the queue full? A queue is full if the head is the 0^{th} element and the tail is the (size-1)^{th...
Definition: circular_queue.hh:558
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:67
gem5::o3::LSQ::LSQRequest::needWBToRegister
bool needWBToRegister() const
Definition: lsq.hh:409
gem5::o3::LSQUnit::LSQUnitStats::loadToUse
statistics::Distribution loadToUse
Distribution of cycle latency between the first time a load is issued and its completion.
Definition: lsq_unit.hh:537
gem5::o3::LSQUnit::depCheckShift
unsigned depCheckShift
The number of places to shift addresses in the LSQ before checking for dependency violations.
Definition: lsq_unit.hh:455
request.hh
gem5::HtmFailureFaultCause::OTHER
@ OTHER
gem5::o3::LSQUnit::AddrRangeCoverage::PartialAddrRangeCoverage
@ PartialAddrRangeCoverage
gem5::statistics::DistBase::sample
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1328
gem5::o3::LSQUnit::numFreeLoadEntries
unsigned numFreeLoadEntries()
Returns the number of free LQ entries.
Definition: lsq_unit.cc:411
gem5::o3::IEW::wakeCPU
void wakeCPU()
Tells the CPU to wakeup if it has descheduled itself due to no activity.
Definition: iew.cc:804
gem5::HtmCacheFailure::FAIL_SELF
@ FAIL_SELF
gem5::RefCountingPtr< DynInst >
gem5::BaseMMU
Definition: mmu.hh:53
gem5::o3::IEW::rescheduleMemInst
void rescheduleMemInst(const DynInstPtr &inst)
Tells memory dependence unit that a memory instruction needs to be rescheduled.
Definition: iew.cc:544
packet.hh
gem5::o3::LSQ::LSQRequest::req
RequestPtr req(int idx=0)
Definition: lsq.hh:356
gem5::o3::CPU::checker
gem5::Checker< DynInstPtr > * checker
Pointer to the checker, which can dynamically verify instruction results at run time.
Definition: cpu.hh:578
gem5::RequestPort
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Definition: port.hh:77
gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:78
gem5::o3::LSQUnit::getLatestHtmUid
uint64_t getLatestHtmUid() const
Definition: lsq_unit.cc:1037
str.hh
gem5::o3::LSQUnit::completeStore
void completeStore(typename StoreQueue::iterator store_idx)
Completes the store at the specified index.
Definition: lsq_unit.cc:1137
gem5::o3::LSQUnit::needsTSO
bool needsTSO
Flag for memory model.
Definition: lsq_unit.hh:503
gem5::o3::LSQUnit::LSQUnitStats::forwLoads
statistics::Scalar forwLoads
Total number of loads forwaded from LSQ stores.
Definition: lsq_unit.hh:514
gem5::Packet::dataStatic
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:1134
gem5::o3::LSQUnit::LSQUnitStats::blockedByCache
statistics::Scalar blockedByCache
Number of times the LSQ is blocked due to the cache.
Definition: lsq_unit.hh:533
gem5::o3::LSQUnit::WritebackEvent
Writeback event, specifically for when stores forward data to loads.
Definition: lsq_unit.hh:408
gem5::o3::LSQ::LSQRequest::_size
const uint32_t _size
Definition: lsq.hh:255
gem5::BaseISA::handleLockedSnoop
virtual void handleLockedSnoop(PacketPtr pkt, Addr cacheBlockMask)
Definition: isa.hh:99
gem5::o3::LSQ::LSQRequest::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)=0
gem5::CircularQueue::tail
size_t tail() const
Definition: circular_queue.hh:456
gem5::o3::LSQUnit
Class that implements the actual LQ and SQ for each specific thread.
Definition: lsq_unit.hh:89
gem5::HtmCacheFailure::FAIL_REMOTE
@ FAIL_REMOTE
gem5::CircularQueue::size
size_t size() const
Definition: circular_queue.hh:466
gem5::o3::LSQUnit::storeQueue
StoreQueue storeQueue
The store queue.
Definition: lsq_unit.hh:446
gem5::ThreadContext::getIsaPtr
virtual BaseISA * getIsaPtr()=0
gem5::o3::CPU
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition: cpu.hh:94
gem5::o3::LSQ::LSQRequest::packetSent
void packetSent()
Update the status to reflect that a packet was sent.
Definition: lsq.hh:431
gem5::o3::LSQ::LSQRequest::setStateToFault
void setStateToFault()
Definition: lsq.hh:494
gem5::o3::LSQUnit::stats
gem5::o3::LSQUnit::LSQUnitStats stats
gem5::o3::LSQUnit::commitLoad
void commitLoad()
Commits the head load.
Definition: lsq_unit.cc:720
gem5::statistics::Distribution::init
Distribution & init(Counter min, Counter max, Counter bkt)
Set the parameters of this distribution.
Definition: statistics.hh:2113
gem5::ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition: thread_context.hh:94
gem5::o3::LSQUnit::LSQEntry
Definition: lsq_unit.hh:96
gem5::o3::LSQUnit::LSQUnitStats::rescheduledLoads
statistics::Scalar rescheduledLoads
Number of loads that were rescheduled.
Definition: lsq_unit.hh:530
gem5::o3::LSQ::LSQRequest::discard
void discard()
The request is discarded (e.g.
Definition: lsq.hh:512
gem5::o3::LSQUnit::trySendPacket
bool trySendPacket(bool isLoad, PacketPtr data_pkt)
Attempts to send a packet to the cache.
Definition: lsq_unit.cc:1200
gem5::Fault
std::shared_ptr< FaultBase > Fault
Definition: types.hh:255
gem5::Packet::print
void print(std::ostream &o, int verbosity=0, const std::string &prefix="") const
Definition: packet.cc:373
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
gem5::Event
Definition: eventq.hh:251
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:283
gem5::o3::LSQUnit::isStoreBlocked
bool isStoreBlocked
Whehter or not a store is blocked due to the memory system.
Definition: lsq_unit.hh:494
gem5::Packet::htmTransactionFailedInCache
bool htmTransactionFailedInCache() const
Returns whether or not this packet/request has returned from the cache hierarchy in a failed transact...
Definition: packet.cc:508
gem5::probing::Packet
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109
gem5::o3::LSQ::LSQRequest::packetNotSent
void packetNotSent()
Update the status to reflect that a packet was not sent.
Definition: lsq.hh:440
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::o3::LSQUnit::WritebackEvent::WritebackEvent
WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr)
Constructs a writeback event.
Definition: lsq_unit.cc:65
gem5::o3::IEW
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Definition: iew.hh:87
cpu.hh
gem5::o3::LSQUnit::htmStarts
int htmStarts
Definition: lsq_unit.hh:465
gem5::Request::NO_ACCESS
@ NO_ACCESS
The request should not cause a memory access.
Definition: request.hh:146
gem5::o3::LSQUnit::cpu
CPU * cpu
Pointer to the CPU.
Definition: lsq_unit.hh:396
gem5::o3::LSQUnit::storeWBIt
StoreQueue::iterator storeWBIt
The index of the first instruction that may be ready to be written back, and has not yet been written...
Definition: lsq_unit.hh:473
gem5::o3::LSQUnit::lsqID
ThreadID lsqID
The LSQUnit thread id.
Definition: lsq_unit.hh:443
gem5::MemCmd::ReadReq
@ ReadReq
Definition: packet.hh:86
gem5::o3::LSQ::LSQRequest::packet
PacketPtr packet(int idx=0)
Definition: lsq.hh:362
gem5::o3::LSQ::LSQRequest::_numOutstandingPackets
uint32_t _numOutstandingPackets
Definition: lsq.hh:258
gem5::ReExec
Definition: faults.hh:92
gem5::o3::CPU::tcBase
gem5::ThreadContext * tcBase(ThreadID tid)
Returns a pointer to a thread context.
Definition: cpu.hh:566
gem5::Packet::getHtmTransactionUid
uint64_t getHtmTransactionUid() const
If a packet/request originates in a CPU executing in transactional mode, i.e.
Definition: packet.cc:534
gem5::o3::CPU::thread
std::vector< ThreadState * > thread
Pointers to all of the threads in the CPU.
Definition: cpu.hh:584
gem5::o3::LSQUnit::commitLoads
void commitLoads(InstSeqNum &youngest_inst)
Commits loads older than a specific sequence number.
Definition: lsq_unit.cc:743
gem5::o3::IEW::activityThisCycle
void activityThisCycle()
Reports to the CPU that there is activity this cycle.
Definition: iew.cc:810
gem5::CircularQueue::begin
iterator begin()
Iterators.
Definition: circular_queue.hh:565
gem5::o3::LSQUnit::completeDataAccess
void completeDataAccess(PacketPtr pkt)
Completes the data access that has been returned from the memory system.
Definition: lsq_unit.cc:106
gem5::o3::LSQ::cachePortBusy
void cachePortBusy(bool is_load)
Another store port is in use.
Definition: lsq.cc:208
gem5::o3::CPU::wakeCPU
void wakeCPU()
Wakes the CPU, rescheduling the CPU if it's not already active.
Definition: cpu.cc:1513
gem5::HtmFailureFaultCause::INVALID
@ INVALID
gem5::CircularQueue::front
reference front()
Definition: circular_queue.hh:441
gem5::Packet::getHtmTransactionFailedInCacheRC
HtmCacheFailure getHtmTransactionFailedInCacheRC() const
If a packet/request has returned from the cache hierarchy in a failed transaction,...
Definition: packet.cc:514
gem5::o3::LSQUnit::takeOverFrom
void takeOverFrom()
Takes over from another CPU's thread.
Definition: lsq_unit.cc:298
gem5::o3::LSQ::LSQRequest::isReleased
bool isReleased()
Test if the LSQRequest has been released, i.e.
Definition: lsq.hh:396
gem5::o3::LSQUnit::retryPkt
PacketPtr retryPkt
The packet that needs to be retried.
Definition: lsq_unit.hh:491
gem5::o3::LSQUnit::checkViolations
Fault checkViolations(typename LoadQueue::iterator &loadIt, const DynInstPtr &inst)
Check for ordering violations in the LSQ.
Definition: lsq_unit.cc:511
gem5::ThreadContext::getHtmCheckpointPtr
virtual BaseHTMCheckpointPtr & getHtmCheckpointPtr()=0
gem5::o3::LSQUnit::writeback
void writeback(const DynInstPtr &inst, PacketPtr pkt)
Writes back the instruction, sending it to IEW.
Definition: lsq_unit.cc:1075
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::o3::LSQUnit::AddrRangeCoverage::FullAddrRangeCoverage
@ FullAddrRangeCoverage
gem5::o3::LSQUnit::squash
void squash(const InstSeqNum &squashed_num)
Squashes all instructions younger than a specific sequence number.
Definition: lsq_unit.cc:922
gem5::Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:534
gem5::o3::LSQUnit::LSQUnitStats::squashedStores
statistics::Scalar squashedStores
Total number of squashed stores.
Definition: lsq_unit.hh:527
gem5::o3::LSQ::LSQRequest::complete
void complete()
Definition: lsq.hh:555
gem5::Request::CACHE_BLOCK_ZERO
@ CACHE_BLOCK_ZERO
This is a write that is targeted and zeroing an entire cache block.
Definition: request.hh:143
gem5::o3::LSQUnit::LSQEntry::instruction
const DynInstPtr & instruction() const
Definition: lsq_unit.hh:146
gem5::o3::LSQ::LSQRequest::isSent
bool isSent()
Definition: lsq.hh:475
gem5::o3::LSQUnit::executeStore
Fault executeStore(const DynInstPtr &inst)
Executes a store instruction.
Definition: lsq_unit.cc:658
gem5::Request::STORE_NO_DATA
static const FlagsType STORE_NO_DATA
Definition: request.hh:246
gem5::Request::LLSC
@ LLSC
The request is a Load locked/store conditional.
Definition: request.hh:156
gem5::o3::MaxThreads
static constexpr int MaxThreads
Definition: limits.hh:38
gem5::o3::LSQUnit::writebackBlockedStore
void writebackBlockedStore()
Try to finish a previously blocked write back attempt.
Definition: lsq_unit.cc:780
gem5::o3::CPU::activityThisCycle
void activityThisCycle()
Records that there was time buffer activity this cycle.
Definition: cpu.hh:539
gem5::HtmFailureFaultCause::SIZE
@ SIZE
gem5::CircularQueue::pop_front
void pop_front(size_t num_elem=1)
Circularly increase the head pointer.
Definition: circular_queue.hh:477
gem5::o3::IEW::instToCommit
void instToCommit(const DynInstPtr &inst)
Sends an instruction to commit through the time buffer.
Definition: iew.cc:568
gem5::RiscvISA::x
Bitfield< 3 > x
Definition: pagetable.hh:73
gem5::o3::LSQUnit::insert
void insert(const DynInstPtr &inst)
Inserts an instruction.
Definition: lsq_unit.cc:304
gem5::o3::LSQUnit::dcachePort
RequestPort * dcachePort
Pointer to the dcache port.
Definition: lsq_unit.hh:405
gem5::CircularQueue::pop_back
void pop_back()
Circularly decrease the tail pointer.
Definition: circular_queue.hh:490
gem5::CircularQueue::empty
bool empty() const
Is the queue empty?
Definition: circular_queue.hh:548
gem5::o3::LSQUnit::stalled
bool stalled
Whether or not the LSQ is stalled.
Definition: lsq_unit.hh:482
gem5::htmFailureToStr
std::string htmFailureToStr(HtmFailureFaultCause cause)
Convert enum into string to be used for debug purposes.
Definition: htm.cc:44
gem5::o3::IEW::blockMemInst
void blockMemInst(const DynInstPtr &inst)
Moves memory instruction onto the list of cache blocked instructions.
Definition: iew.cc:556
gem5::RefCountingPtr::data
T * data
The stored pointer.
Definition: refcnt.hh:146
gem5::CircularQueue::head
size_t head() const
Definition: circular_queue.hh:451
gem5::o3::LSQUnit::SQEntry::DataSize
static constexpr size_t DataSize
Definition: lsq_unit.hh:168
gem5::o3::LSQUnit::getMMUPtr
BaseMMU * getMMUPtr()
Definition: lsq_unit.cc:1276
gem5::o3::LSQUnit::lsq
LSQ * lsq
Pointer to the LSQ.
Definition: lsq_unit.hh:402
gem5::CircularQueue::end
iterator end()
Definition: circular_queue.hh:580
gem5::Packet::makeResponse
void makeResponse()
Take a request packet and modify it in place to be suitable for returning as a response to that reque...
Definition: packet.hh:1031
gem5::o3::LSQUnit::read
Fault read(LSQRequest *request, int load_idx)
Executes the load at the given index.
Definition: lsq_unit.cc:1285
gem5::o3::LSQUnit::executeLoad
Fault executeLoad(const DynInstPtr &inst)
Executes a load instruction.
Definition: lsq_unit.cc:591
gem5::o3::LSQUnit::recvRetry
void recvRetry()
Handles doing the retry.
Definition: lsq_unit.cc:1242
gem5::MemCmd::WriteReq
@ WriteReq
Definition: packet.hh:89
lsq_unit.hh
gem5::o3::LSQ::LSQRequest::isAnyOutstandingRequest
bool isAnyOutstandingRequest()
Test if there is any in-flight translation or mem access request.
Definition: lsq.hh:382
gem5::statistics::Group
Statistics container.
Definition: group.hh:93
gem5::ArmISA::id
Bitfield< 33 > id
Definition: misc_types.hh:251
gem5::o3::IEW::updateLSQNextCycle
bool updateLSQNextCycle
Records if the LSQ needs to be updated on the next cycle, so that IEW knows if there will be activity...
Definition: iew.hh:365
gem5::InstSeqNum
uint64_t InstSeqNum
Definition: inst_seq.hh:40
gem5::CircularQueue::capacity
size_t capacity() const
Definition: circular_queue.hh:461
gem5::o3::LSQUnit::drainSanityCheck
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition: lsq_unit.cc:288
gem5::o3::LSQUnit::commitStores
void commitStores(InstSeqNum &youngest_inst)
Commits stores older than a specific sequence number.
Definition: lsq_unit.cc:754
gem5::o3::LSQUnit::resetState
void resetState()
Reset the LSQ state.
Definition: lsq_unit.cc:227
gem5::o3::LSQ::LSQRequest::writebackDone
void writebackDone()
Definition: lsq.hh:534
gem5::Packet::setHtmTransactionFailedInCache
void setHtmTransactionFailedInCache(const HtmCacheFailure ret_code)
Stipulates that this packet/request has returned from the cache hierarchy in a failed transaction.
Definition: packet.cc:498
gem5::statistics::DataWrap::flags
Derived & flags(Flags _flags)
Set the flags and marks this stat to print at the end of simulation.
Definition: statistics.hh:358
debugfaults.hh
gem5::CircularQueue::getIterator
iterator getIterator(size_t idx)
Return an iterator to an index in the queue.
Definition: circular_queue.hh:592
gem5::Packet::isHtmTransactional
bool isHtmTransactional() const
Returns whether or not this packet/request originates in the CPU executing in transactional mode,...
Definition: packet.cc:528
gem5::o3::LSQUnit::getMemDepViolator
DynInstPtr getMemDepViolator()
Returns the memory ordering violator.
Definition: lsq_unit.cc:401
gem5::Packet::getAddr
Addr getAddr() const
Definition: packet.hh:781
gem5::o3::LSQUnit::getStoreHeadSeqNum
InstSeqNum getStoreHeadSeqNum()
Returns the sequence number of the head store instruction.
Definition: lsq_unit.cc:1640
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: tlb.cc:60
gem5::o3::LSQUnit::loadQueue
LoadQueue loadQueue
The load queue.
Definition: lsq_unit.hh:449
gem5::o3::LSQUnit::name
std::string name() const
Returns the name of the LSQ unit.
Definition: lsq_unit.cc:246
lsq.hh
gem5::o3::LSQUnit::AddrRangeCoverage::NoAddrRangeCoverage
@ NoAddrRangeCoverage
limits.hh
gem5::o3::LSQUnit::dumpInsts
void dumpInsts() const
Debugging function to dump instructions in the LSQ.
Definition: lsq_unit.cc:1251
gem5::o3::LSQUnit::insertLoad
void insertLoad(const DynInstPtr &load_inst)
Inserts a load instruction.
Definition: lsq_unit.cc:320
gem5::o3::LSQUnit::cacheLineSize
unsigned int cacheLineSize()
Definition: lsq_unit.cc:1279
gem5::o3::LSQ::LSQRequest::mainReq
virtual RequestPtr mainReq()
Definition: lsq.hh:372
gem5::o3::LSQUnit::getLoadHeadSeqNum
InstSeqNum getLoadHeadSeqNum()
Returns the sequence number of the head load instruction.
Definition: lsq_unit.cc:1631
gem5::o3::LSQUnit::writebackStores
void writebackStores()
Writes back stores.
Definition: lsq_unit.cc:790
gem5::o3::LSQ::LSQRequest::isSplit
bool isSplit() const
Definition: lsq.hh:403
gem5::o3::LSQUnit::write
Fault write(LSQRequest *request, uint8_t *data, int store_idx)
Executes the store at the given index.
Definition: lsq_unit.cc:1602
gem5::o3::LSQUnit::checkSnoop
void checkSnoop(PacketPtr pkt)
Check if an incoming invalidate hits in the lsq on a load that might have issued out of order wrt ano...
Definition: lsq_unit.cc:428
gem5::o3::IEW::name
std::string name() const
Returns the name of the IEW stage.
Definition: iew.cc:119
gem5::Packet::isInvalidate
bool isInvalidate() const
Definition: packet.hh:598
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
gem5::o3::LSQUnit::LSQUnitStats::LSQUnitStats
LSQUnitStats(statistics::Group *parent)
Definition: lsq_unit.cc:255

Generated on Wed May 4 2022 12:13:53 for gem5 by doxygen 1.8.17