gem5  v21.1.0.2
lsq_unit.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2010-2014, 2017-2020 ARM Limited
3  * Copyright (c) 2013 Advanced Micro Devices, Inc.
4  * All rights reserved
5  *
6  * The license below extends only to copyright in the software and shall
7  * not be construed as granting a license to any other intellectual
8  * property including but not limited to intellectual property relating
9  * to a hardware implementation of the functionality of the software
10  * licensed hereunder. You may use the software subject to the license
11  * terms below provided that you ensure that this notice is replicated
12  * unmodified and in its entirety in all distributions of the software,
13  * modified or unmodified, in source code or in binary form.
14  *
15  * Copyright (c) 2004-2006 The Regents of The University of Michigan
16  * All rights reserved.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions are
20  * met: redistributions of source code must retain the above copyright
21  * notice, this list of conditions and the following disclaimer;
22  * redistributions in binary form must reproduce the above copyright
23  * notice, this list of conditions and the following disclaimer in the
24  * documentation and/or other materials provided with the distribution;
25  * neither the name of the copyright holders nor the names of its
26  * contributors may be used to endorse or promote products derived from
27  * this software without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40  */
41 
42 #include "cpu/o3/lsq_unit.hh"
43 
45 #include "arch/locked_mem.hh"
46 #include "base/str.hh"
47 #include "config/the_isa.hh"
48 #include "cpu/checker/cpu.hh"
49 #include "cpu/o3/dyn_inst.hh"
50 #include "cpu/o3/limits.hh"
51 #include "cpu/o3/lsq.hh"
52 #include "debug/Activity.hh"
53 #include "debug/HtmCpu.hh"
54 #include "debug/IEW.hh"
55 #include "debug/LSQUnit.hh"
56 #include "debug/O3PipeView.hh"
57 #include "mem/packet.hh"
58 #include "mem/request.hh"
59 
60 namespace gem5
61 {
62 
63 namespace o3
64 {
65 
67  PacketPtr _pkt, LSQUnit *lsq_ptr)
68  : Event(Default_Pri, AutoDelete),
69  inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
70 {
71  assert(_inst->savedReq);
72  _inst->savedReq->writebackScheduled();
73 }
74 
75 void
77 {
78  assert(!lsqPtr->cpu->switchedOut());
79 
80  lsqPtr->writeback(inst, pkt);
81 
82  assert(inst->savedReq);
83  inst->savedReq->writebackDone();
84  delete pkt;
85 }
86 
87 const char *
89 {
90  return "Store writeback";
91 }
92 
93 bool
95 {
96  auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
97  LSQRequest* req = senderState->request();
98  assert(req != nullptr);
99  bool ret = true;
100  /* Check that the request is still alive before any further action. */
101  if (senderState->alive()) {
102  ret = req->recvTimingResp(pkt);
103  } else {
104  senderState->outstanding--;
105  }
106  return ret;
107 
108 }
109 
110 void
112 {
113  LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
114  DynInstPtr inst = state->inst;
115 
116  // hardware transactional memory
117  // sanity check
118  if (pkt->isHtmTransactional() && !inst->isSquashed()) {
119  assert(inst->getHtmTransactionUid() == pkt->getHtmTransactionUid());
120  }
121 
122  // if in a HTM transaction, it's possible
123  // to abort within the cache hierarchy.
124  // This is signalled back to the processor
125  // through responses to memory requests.
126  if (pkt->htmTransactionFailedInCache()) {
127  // cannot do this for write requests because
128  // they cannot tolerate faults
129  const HtmCacheFailure htm_rc =
131  if (pkt->isWrite()) {
132  DPRINTF(HtmCpu,
133  "store notification (ignored) of HTM transaction failure "
134  "in cache - addr=0x%lx - rc=%s - htmUid=%d\n",
135  pkt->getAddr(), htmFailureToStr(htm_rc),
136  pkt->getHtmTransactionUid());
137  } else {
138  HtmFailureFaultCause fail_reason =
140 
141  if (htm_rc == HtmCacheFailure::FAIL_SELF) {
142  fail_reason = HtmFailureFaultCause::SIZE;
143  } else if (htm_rc == HtmCacheFailure::FAIL_REMOTE) {
144  fail_reason = HtmFailureFaultCause::MEMORY;
145  } else if (htm_rc == HtmCacheFailure::FAIL_OTHER) {
146  // these are likely loads that were issued out of order
147  // they are faulted here, but it's unlikely that these will
148  // ever reach the commit head.
149  fail_reason = HtmFailureFaultCause::OTHER;
150  } else {
151  panic("HTM error - unhandled return code from cache (%s)",
152  htmFailureToStr(htm_rc));
153  }
154 
155  inst->fault =
156  std::make_shared<GenericHtmFailureFault>(
157  inst->getHtmTransactionUid(),
158  fail_reason);
159 
160  DPRINTF(HtmCpu,
161  "load notification of HTM transaction failure "
162  "in cache - pc=%s - addr=0x%lx - "
163  "rc=%u - htmUid=%d\n",
164  inst->pcState(), pkt->getAddr(),
165  htmFailureToStr(htm_rc), pkt->getHtmTransactionUid());
166  }
167  }
168 
169  cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
170 
171  /* Notify the sender state that the access is complete (for ownership
172  * tracking). */
173  state->complete();
174 
175  assert(!cpu->switchedOut());
176  if (!inst->isSquashed()) {
177  if (state->needWB) {
178  // Only loads, store conditionals and atomics perform the writeback
179  // after receving the response from the memory
180  assert(inst->isLoad() || inst->isStoreConditional() ||
181  inst->isAtomic());
182 
183  // hardware transactional memory
184  if (pkt->htmTransactionFailedInCache()) {
187  }
188 
189  writeback(inst, state->request()->mainPacket());
190  if (inst->isStore() || inst->isAtomic()) {
191  auto ss = dynamic_cast<SQSenderState*>(state);
192  ss->writebackDone();
193  completeStore(ss->idx);
194  }
195  } else if (inst->isStore()) {
196  // This is a regular store (i.e., not store conditionals and
197  // atomics), so it can complete without writing back
198  completeStore(dynamic_cast<SQSenderState*>(state)->idx);
199  }
200  }
201 }
202 
203 LSQUnit::LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
204  : lsqID(-1), storeQueue(sqEntries+1), loadQueue(lqEntries+1),
205  loads(0), stores(0), storesToWB(0),
206  htmStarts(0), htmStops(0),
208  cacheBlockMask(0), stalled(false),
209  isStoreBlocked(false), storeInFlight(false), stats(nullptr)
210 {
211 }
212 
213 void
214 LSQUnit::init(CPU *cpu_ptr, IEW *iew_ptr, const O3CPUParams &params,
215  LSQ *lsq_ptr, unsigned id)
216 {
217  lsqID = id;
218 
219  cpu = cpu_ptr;
220  iewStage = iew_ptr;
221 
222  lsq = lsq_ptr;
223 
224  cpu->addStatGroup(csprintf("lsq%i", lsqID).c_str(), &stats);
225 
226  DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",lsqID);
227 
228  depCheckShift = params.LSQDepCheckShift;
229  checkLoads = params.LSQCheckLoads;
230  needsTSO = params.needsTSO;
231 
232  resetState();
233 }
234 
235 
236 void
238 {
239  loads = stores = storesToWB = 0;
240 
241  // hardware transactional memory
242  // nesting depth
243  htmStarts = htmStops = 0;
244 
246 
247  retryPkt = NULL;
248  memDepViolator = NULL;
249 
250  stalled = false;
251 
252  cacheBlockMask = ~(cpu->cacheLineSize() - 1);
253 }
254 
255 std::string
257 {
258  if (MaxThreads == 1) {
259  return iewStage->name() + ".lsq";
260  } else {
261  return iewStage->name() + ".lsq.thread" + std::to_string(lsqID);
262  }
263 }
264 
266  : statistics::Group(parent),
267  ADD_STAT(forwLoads, statistics::units::Count::get(),
268  "Number of loads that had data forwarded from stores"),
269  ADD_STAT(squashedLoads, statistics::units::Count::get(),
270  "Number of loads squashed"),
271  ADD_STAT(ignoredResponses, statistics::units::Count::get(),
272  "Number of memory responses ignored because the instruction is "
273  "squashed"),
274  ADD_STAT(memOrderViolation, statistics::units::Count::get(),
275  "Number of memory ordering violations"),
276  ADD_STAT(squashedStores, statistics::units::Count::get(),
277  "Number of stores squashed"),
278  ADD_STAT(rescheduledLoads, statistics::units::Count::get(),
279  "Number of loads that were rescheduled"),
280  ADD_STAT(blockedByCache, statistics::units::Count::get(),
281  "Number of times an access to memory failed due to the cache "
282  "being blocked"),
283  ADD_STAT(loadToUse, "Distribution of cycle latency between the "
284  "first time a load is issued and its completion")
285 {
286  loadToUse
287  .init(0, 299, 10)
289 }
290 
291 void
293 {
294  dcachePort = dcache_port;
295 }
296 
297 void
299 {
300  for (int i = 0; i < loadQueue.capacity(); ++i)
301  assert(!loadQueue[i].valid());
302 
303  assert(storesToWB == 0);
304  assert(!retryPkt);
305 }
306 
307 void
309 {
310  resetState();
311 }
312 
313 void
315 {
316  assert(inst->isMemRef());
317 
318  assert(inst->isLoad() || inst->isStore() || inst->isAtomic());
319 
320  if (inst->isLoad()) {
321  insertLoad(inst);
322  } else {
323  insertStore(inst);
324  }
325 
326  inst->setInLSQ();
327 }
328 
329 void
331 {
332  assert(!loadQueue.full());
333  assert(loads < loadQueue.capacity());
334 
335  DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n",
336  load_inst->pcState(), loadQueue.tail(), load_inst->seqNum);
337 
338  /* Grow the queue. */
340 
341  load_inst->sqIt = storeQueue.end();
342 
343  assert(!loadQueue.back().valid());
344  loadQueue.back().set(load_inst);
345  load_inst->lqIdx = loadQueue.tail();
346  assert(load_inst->lqIdx > 0);
347  load_inst->lqIt = loadQueue.getIterator(load_inst->lqIdx);
348 
349  ++loads;
350 
351  // hardware transactional memory
352  // transactional state and nesting depth must be tracked
353  // in the in-order part of the core.
354  if (load_inst->isHtmStart()) {
355  htmStarts++;
356  DPRINTF(HtmCpu, ">> htmStarts++ (%d) : htmStops (%d)\n",
358 
359  const int htm_depth = htmStarts - htmStops;
360  const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
361  auto htm_uid = htm_cpt->getHtmUid();
362 
363  // for debugging purposes
364  if (!load_inst->inHtmTransactionalState()) {
365  htm_uid = htm_cpt->newHtmUid();
366  DPRINTF(HtmCpu, "generating new htmUid=%u\n", htm_uid);
367  if (htm_depth != 1) {
368  DPRINTF(HtmCpu,
369  "unusual HTM transactional depth (%d)"
370  " possibly caused by mispeculation - htmUid=%u\n",
371  htm_depth, htm_uid);
372  }
373  }
374  load_inst->setHtmTransactionalState(htm_uid, htm_depth);
375  }
376 
377  if (load_inst->isHtmStop()) {
378  htmStops++;
379  DPRINTF(HtmCpu, ">> htmStarts (%d) : htmStops++ (%d)\n",
381 
382  if (htmStops==1 && htmStarts==0) {
383  DPRINTF(HtmCpu,
384  "htmStops==1 && htmStarts==0. "
385  "This generally shouldn't happen "
386  "(unless due to misspeculation)\n");
387  }
388  }
389 }
390 
391 void
393 {
394  // Make sure it is not full before inserting an instruction.
395  assert(!storeQueue.full());
396  assert(stores < storeQueue.capacity());
397 
398  DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n",
399  store_inst->pcState(), storeQueue.tail(), store_inst->seqNum);
401 
402  store_inst->sqIdx = storeQueue.tail();
403  store_inst->lqIdx = loadQueue.tail() + 1;
404  assert(store_inst->lqIdx > 0);
405  store_inst->lqIt = loadQueue.end();
406 
407  storeQueue.back().set(store_inst);
408 
409  ++stores;
410 }
411 
414 {
415  DynInstPtr temp = memDepViolator;
416 
417  memDepViolator = NULL;
418 
419  return temp;
420 }
421 
422 unsigned
424 {
425  //LQ has an extra dummy entry to differentiate
426  //empty/full conditions. Subtract 1 from the free entries.
427  DPRINTF(LSQUnit, "LQ size: %d, #loads occupied: %d\n",
428  1 + loadQueue.capacity(), loads);
429  return loadQueue.capacity() - loads;
430 }
431 
432 unsigned
434 {
435  //SQ has an extra dummy entry to differentiate
436  //empty/full conditions. Subtract 1 from the free entries.
437  DPRINTF(LSQUnit, "SQ size: %d, #stores occupied: %d\n",
438  1 + storeQueue.capacity(), stores);
439  return storeQueue.capacity() - stores;
440 
441  }
442 
443 void
445 {
446  // Should only ever get invalidations in here
447  assert(pkt->isInvalidate());
448 
449  DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
450 
451  for (int x = 0; x < cpu->numContexts(); x++) {
453  bool no_squash = cpu->thread[x]->noSquashFromTC;
454  cpu->thread[x]->noSquashFromTC = true;
456  cpu->thread[x]->noSquashFromTC = no_squash;
457  }
458 
459  if (loadQueue.empty())
460  return;
461 
462  auto iter = loadQueue.begin();
463 
464  Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;
465 
466  DynInstPtr ld_inst = iter->instruction();
467  assert(ld_inst);
468  LSQRequest *req = iter->request();
469 
470  // Check that this snoop didn't just invalidate our lock flag
471  if (ld_inst->effAddrValid() &&
472  req->isCacheBlockHit(invalidate_addr, cacheBlockMask)
473  && ld_inst->memReqFlags & Request::LLSC)
475 
476  bool force_squash = false;
477 
478  while (++iter != loadQueue.end()) {
479  ld_inst = iter->instruction();
480  assert(ld_inst);
481  req = iter->request();
482  if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered())
483  continue;
484 
485  DPRINTF(LSQUnit, "-- inst [sn:%lli] to pktAddr:%#x\n",
486  ld_inst->seqNum, invalidate_addr);
487 
488  if (force_squash ||
489  req->isCacheBlockHit(invalidate_addr, cacheBlockMask)) {
490  if (needsTSO) {
491  // If we have a TSO system, as all loads must be ordered with
492  // all other loads, this load as well as *all* subsequent loads
493  // need to be squashed to prevent possible load reordering.
494  force_squash = true;
495  }
496  if (ld_inst->possibleLoadViolation() || force_squash) {
497  DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n",
498  pkt->getAddr(), ld_inst->seqNum);
499 
500  // Mark the load for re-execution
501  ld_inst->fault = std::make_shared<ReExec>();
502  req->setStateToFault();
503  } else {
504  DPRINTF(LSQUnit, "HitExternal Snoop for addr %#x [sn:%lli]\n",
505  pkt->getAddr(), ld_inst->seqNum);
506 
507  // Make sure that we don't lose a snoop hitting a LOCKED
508  // address since the LOCK* flags don't get updated until
509  // commit.
510  if (ld_inst->memReqFlags & Request::LLSC)
512 
513  // If a older load checks this and it's true
514  // then we might have missed the snoop
515  // in which case we need to invalidate to be sure
516  ld_inst->hitExternalSnoop(true);
517  }
518  }
519  }
520  return;
521 }
522 
523 Fault
524 LSQUnit::checkViolations(typename LoadQueue::iterator& loadIt,
525  const DynInstPtr& inst)
526 {
527  Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
528  Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
529 
535  while (loadIt != loadQueue.end()) {
536  DynInstPtr ld_inst = loadIt->instruction();
537  if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
538  ++loadIt;
539  continue;
540  }
541 
542  Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift;
543  Addr ld_eff_addr2 =
544  (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
545 
546  if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {
547  if (inst->isLoad()) {
548  // If this load is to the same block as an external snoop
549  // invalidate that we've observed then the load needs to be
550  // squashed as it could have newer data
551  if (ld_inst->hitExternalSnoop()) {
552  if (!memDepViolator ||
553  ld_inst->seqNum < memDepViolator->seqNum) {
554  DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] "
555  "and [sn:%lli] at address %#x\n",
556  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
557  memDepViolator = ld_inst;
558 
560 
561  return std::make_shared<GenericISA::M5PanicFault>(
562  "Detected fault with inst [sn:%lli] and "
563  "[sn:%lli] at address %#x\n",
564  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
565  }
566  }
567 
568  // Otherwise, mark the load has a possible load violation and
569  // if we see a snoop before it's commited, we need to squash
570  ld_inst->possibleLoadViolation(true);
571  DPRINTF(LSQUnit, "Found possible load violation at addr: %#x"
572  " between instructions [sn:%lli] and [sn:%lli]\n",
573  inst_eff_addr1, inst->seqNum, ld_inst->seqNum);
574  } else {
575  // A load/store incorrectly passed this store.
576  // Check if we already have a violator, or if it's newer
577  // squash and refetch.
578  if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
579  break;
580 
581  DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and "
582  "[sn:%lli] at address %#x\n",
583  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
584  memDepViolator = ld_inst;
585 
587 
588  return std::make_shared<GenericISA::M5PanicFault>(
589  "Detected fault with "
590  "inst [sn:%lli] and [sn:%lli] at address %#x\n",
591  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
592  }
593  }
594 
595  ++loadIt;
596  }
597  return NoFault;
598 }
599 
600 
601 
602 
603 Fault
605 {
606  // Execute a specific load.
607  Fault load_fault = NoFault;
608 
609  DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n",
610  inst->pcState(), inst->seqNum);
611 
612  assert(!inst->isSquashed());
613 
614  load_fault = inst->initiateAcc();
615 
616  if (load_fault == NoFault && !inst->readMemAccPredicate()) {
617  assert(inst->readPredicate());
618  inst->setExecuted();
619  inst->completeAcc(nullptr);
620  iewStage->instToCommit(inst);
622  return NoFault;
623  }
624 
625  if (inst->isTranslationDelayed() && load_fault == NoFault)
626  return load_fault;
627 
628  if (load_fault != NoFault && inst->translationCompleted() &&
629  inst->savedReq->isPartialFault() && !inst->savedReq->isComplete()) {
630  assert(inst->savedReq->isSplit());
631  // If we have a partial fault where the mem access is not complete yet
632  // then the cache must have been blocked. This load will be re-executed
633  // when the cache gets unblocked. We will handle the fault when the
634  // mem access is complete.
635  return NoFault;
636  }
637 
638  // If the instruction faulted or predicated false, then we need to send it
639  // along to commit without the instruction completing.
640  if (load_fault != NoFault || !inst->readPredicate()) {
641  // Send this instruction to commit, also make sure iew stage
642  // realizes there is activity. Mark it as executed unless it
643  // is a strictly ordered load that needs to hit the head of
644  // commit.
645  if (!inst->readPredicate())
646  inst->forwardOldRegs();
647  DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n",
648  inst->seqNum,
649  (load_fault != NoFault ? "fault" : "predication"));
650  if (!(inst->hasRequest() && inst->strictlyOrdered()) ||
651  inst->isAtCommit()) {
652  inst->setExecuted();
653  }
654  iewStage->instToCommit(inst);
656  } else {
657  if (inst->effAddrValid()) {
658  auto it = inst->lqIt;
659  ++it;
660 
661  if (checkLoads)
662  return checkViolations(it, inst);
663  }
664  }
665 
666  return load_fault;
667 }
668 
669 Fault
671 {
672  // Make sure that a store exists.
673  assert(stores != 0);
674 
675  int store_idx = store_inst->sqIdx;
676 
677  DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n",
678  store_inst->pcState(), store_inst->seqNum);
679 
680  assert(!store_inst->isSquashed());
681 
682  // Check the recently completed loads to see if any match this store's
683  // address. If so, then we have a memory ordering violation.
684  typename LoadQueue::iterator loadIt = store_inst->lqIt;
685 
686  Fault store_fault = store_inst->initiateAcc();
687 
688  if (store_inst->isTranslationDelayed() &&
689  store_fault == NoFault)
690  return store_fault;
691 
692  if (!store_inst->readPredicate()) {
693  DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n",
694  store_inst->seqNum);
695  store_inst->forwardOldRegs();
696  return store_fault;
697  }
698 
699  if (storeQueue[store_idx].size() == 0) {
700  DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n",
701  store_inst->pcState(), store_inst->seqNum);
702 
703  return store_fault;
704  }
705 
706  assert(store_fault == NoFault);
707 
708  if (store_inst->isStoreConditional() || store_inst->isAtomic()) {
709  // Store conditionals and Atomics need to set themselves as able to
710  // writeback if we haven't had a fault by here.
711  storeQueue[store_idx].canWB() = true;
712 
713  ++storesToWB;
714  }
715 
716  return checkViolations(loadIt, store_inst);
717 
718 }
719 
720 void
722 {
723  assert(loadQueue.front().valid());
724 
725  DynInstPtr inst = loadQueue.front().instruction();
726 
727  DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n",
728  inst->pcState());
729 
730  // Update histogram with memory latency from load
731  // Only take latency from load demand that where issued and did not fault
732  if (!inst->isInstPrefetch() && !inst->isDataPrefetch()
733  && inst->firstIssue != -1
734  && inst->lastWakeDependents != -1) {
736  inst->lastWakeDependents - inst->firstIssue));
737  }
738 
739  loadQueue.front().clear();
741 
742  --loads;
743 }
744 
745 void
747 {
748  assert(loads == 0 || loadQueue.front().valid());
749 
750  while (loads != 0 && loadQueue.front().instruction()->seqNum
751  <= youngest_inst) {
752  commitLoad();
753  }
754 }
755 
756 void
758 {
759  assert(stores == 0 || storeQueue.front().valid());
760 
761  /* Forward iterate the store queue (age order). */
762  for (auto& x : storeQueue) {
763  assert(x.valid());
764  // Mark any stores that are now committed and have not yet
765  // been marked as able to write back.
766  if (!x.canWB()) {
767  if (x.instruction()->seqNum > youngest_inst) {
768  break;
769  }
770  DPRINTF(LSQUnit, "Marking store as able to write back, PC "
771  "%s [sn:%lli]\n",
772  x.instruction()->pcState(),
773  x.instruction()->seqNum);
774 
775  x.canWB() = true;
776 
777  ++storesToWB;
778  }
779  }
780 }
781 
782 void
784 {
785  assert(isStoreBlocked);
786  storeWBIt->request()->sendPacketToCache();
787  if (storeWBIt->request()->isSent()){
788  storePostSend();
789  }
790 }
791 
792 void
794 {
795  if (isStoreBlocked) {
796  DPRINTF(LSQUnit, "Writing back blocked store\n");
798  }
799 
800  while (storesToWB > 0 &&
801  storeWBIt.dereferenceable() &&
802  storeWBIt->valid() &&
803  storeWBIt->canWB() &&
804  ((!needsTSO) || (!storeInFlight)) &&
805  lsq->cachePortAvailable(false)) {
806 
807  if (isStoreBlocked) {
808  DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
809  " is blocked!\n");
810  break;
811  }
812 
813  // Store didn't write any data so no need to write it back to
814  // memory.
815  if (storeWBIt->size() == 0) {
816  /* It is important that the preincrement happens at (or before)
817  * the call, as the the code of completeStore checks
818  * storeWBIt. */
820  continue;
821  }
822 
823  if (storeWBIt->instruction()->isDataPrefetch()) {
824  storeWBIt++;
825  continue;
826  }
827 
828  assert(storeWBIt->hasRequest());
829  assert(!storeWBIt->committed());
830 
831  DynInstPtr inst = storeWBIt->instruction();
832  LSQRequest* req = storeWBIt->request();
833 
834  // Process store conditionals or store release after all previous
835  // stores are completed
836  if ((req->mainRequest()->isLLSC() ||
837  req->mainRequest()->isRelease()) &&
838  (storeWBIt.idx() != storeQueue.head())) {
839  DPRINTF(LSQUnit, "Store idx:%i PC:%s to Addr:%#x "
840  "[sn:%lli] is %s%s and not head of the queue\n",
841  storeWBIt.idx(), inst->pcState(),
842  req->request()->getPaddr(), inst->seqNum,
843  req->mainRequest()->isLLSC() ? "SC" : "",
844  req->mainRequest()->isRelease() ? "/Release" : "");
845  break;
846  }
847 
848  storeWBIt->committed() = true;
849 
850  assert(!inst->memData);
851  inst->memData = new uint8_t[req->_size];
852 
853  if (storeWBIt->isAllZeros())
854  memset(inst->memData, 0, req->_size);
855  else
856  memcpy(inst->memData, storeWBIt->data(), req->_size);
857 
858 
859  if (req->senderState() == nullptr) {
860  SQSenderState *state = new SQSenderState(storeWBIt);
861  state->isLoad = false;
862  state->needWB = false;
863  state->inst = inst;
864 
865  req->senderState(state);
866  if (inst->isStoreConditional() || inst->isAtomic()) {
867  /* Only store conditionals and atomics need a writeback. */
868  state->needWB = true;
869  }
870  }
871  req->buildPackets();
872 
873  DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s "
874  "to Addr:%#x, data:%#x [sn:%lli]\n",
875  storeWBIt.idx(), inst->pcState(),
876  req->request()->getPaddr(), (int)*(inst->memData),
877  inst->seqNum);
878 
879  // @todo: Remove this SC hack once the memory system handles it.
880  if (inst->isStoreConditional()) {
881  // Disable recording the result temporarily. Writing to
882  // misc regs normally updates the result, but this is not
883  // the desired behavior when handling store conditionals.
884  inst->recordResult(false);
885  bool success = TheISA::handleLockedWrite(inst.get(),
886  req->request(), cacheBlockMask);
887  inst->recordResult(true);
888  req->packetSent();
889 
890  if (!success) {
891  req->complete();
892  // Instantly complete this store.
893  DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. "
894  "Instantly completing it.\n",
895  inst->seqNum);
896  PacketPtr new_pkt = new Packet(*req->packet());
897  WritebackEvent *wb = new WritebackEvent(inst,
898  new_pkt, this);
899  cpu->schedule(wb, curTick() + 1);
901  if (!storeQueue.empty())
902  storeWBIt++;
903  else
905  continue;
906  }
907  }
908 
909  if (req->request()->isLocalAccess()) {
910  assert(!inst->isStoreConditional());
911  assert(!inst->inHtmTransactionalState());
912  gem5::ThreadContext *thread = cpu->tcBase(lsqID);
913  PacketPtr main_pkt = new Packet(req->mainRequest(),
915  main_pkt->dataStatic(inst->memData);
916  req->request()->localAccessor(thread, main_pkt);
917  delete main_pkt;
919  storeWBIt++;
920  continue;
921  }
922  /* Send to cache */
923  req->sendPacketToCache();
924 
925  /* If successful, do the post send */
926  if (req->isSent()) {
927  storePostSend();
928  } else {
929  DPRINTF(LSQUnit, "D-Cache became blocked when writing [sn:%lli], "
930  "will retry later\n",
931  inst->seqNum);
932  }
933  }
934  assert(stores >= 0 && storesToWB >= 0);
935 }
936 
937 void
938 LSQUnit::squash(const InstSeqNum &squashed_num)
939 {
940  DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
941  "(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
942 
943  while (loads != 0 &&
944  loadQueue.back().instruction()->seqNum > squashed_num) {
945  DPRINTF(LSQUnit,"Load Instruction PC %s squashed, "
946  "[sn:%lli]\n",
947  loadQueue.back().instruction()->pcState(),
948  loadQueue.back().instruction()->seqNum);
949 
950  if (isStalled() && loadQueue.tail() == stallingLoadIdx) {
951  stalled = false;
952  stallingStoreIsn = 0;
953  stallingLoadIdx = 0;
954  }
955 
956  // hardware transactional memory
957  // Squashing instructions can alter the transaction nesting depth
958  // and must be corrected before fetching resumes.
959  if (loadQueue.back().instruction()->isHtmStart())
960  {
961  htmStarts = (--htmStarts < 0) ? 0 : htmStarts;
962  DPRINTF(HtmCpu, ">> htmStarts-- (%d) : htmStops (%d)\n",
964  }
965  if (loadQueue.back().instruction()->isHtmStop())
966  {
967  htmStops = (--htmStops < 0) ? 0 : htmStops;
968  DPRINTF(HtmCpu, ">> htmStarts (%d) : htmStops-- (%d)\n",
970  }
971  // Clear the smart pointer to make sure it is decremented.
972  loadQueue.back().instruction()->setSquashed();
973  loadQueue.back().clear();
974 
975  --loads;
976 
979  }
980 
981  // hardware transactional memory
982  // scan load queue (from oldest to youngest) for most recent valid htmUid
983  auto scan_it = loadQueue.begin();
984  uint64_t in_flight_uid = 0;
985  while (scan_it != loadQueue.end()) {
986  if (scan_it->instruction()->isHtmStart() &&
987  !scan_it->instruction()->isSquashed()) {
988  in_flight_uid = scan_it->instruction()->getHtmTransactionUid();
989  DPRINTF(HtmCpu, "loadQueue[%d]: found valid HtmStart htmUid=%u\n",
990  scan_it._idx, in_flight_uid);
991  }
992  scan_it++;
993  }
994  // If there's a HtmStart in the pipeline then use its htmUid,
995  // otherwise use the most recently committed uid
996  const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
997  if (htm_cpt) {
998  const uint64_t old_local_htm_uid = htm_cpt->getHtmUid();
999  uint64_t new_local_htm_uid;
1000  if (in_flight_uid > 0)
1001  new_local_htm_uid = in_flight_uid;
1002  else
1003  new_local_htm_uid = lastRetiredHtmUid;
1004 
1005  if (old_local_htm_uid != new_local_htm_uid) {
1006  DPRINTF(HtmCpu, "flush: lastRetiredHtmUid=%u\n",
1008  DPRINTF(HtmCpu, "flush: resetting localHtmUid=%u\n",
1009  new_local_htm_uid);
1010 
1011  htm_cpt->setHtmUid(new_local_htm_uid);
1012  }
1013  }
1014 
1015  if (memDepViolator && squashed_num < memDepViolator->seqNum) {
1016  memDepViolator = NULL;
1017  }
1018 
1019  while (stores != 0 &&
1020  storeQueue.back().instruction()->seqNum > squashed_num) {
1021  // Instructions marked as can WB are already committed.
1022  if (storeQueue.back().canWB()) {
1023  break;
1024  }
1025 
1026  DPRINTF(LSQUnit,"Store Instruction PC %s squashed, "
1027  "idx:%i [sn:%lli]\n",
1028  storeQueue.back().instruction()->pcState(),
1029  storeQueue.tail(), storeQueue.back().instruction()->seqNum);
1030 
1031  // I don't think this can happen. It should have been cleared
1032  // by the stalling load.
1033  if (isStalled() &&
1034  storeQueue.back().instruction()->seqNum == stallingStoreIsn) {
1035  panic("Is stalled should have been cleared by stalling load!\n");
1036  stalled = false;
1037  stallingStoreIsn = 0;
1038  }
1039 
1040  // Clear the smart pointer to make sure it is decremented.
1041  storeQueue.back().instruction()->setSquashed();
1042 
1043  // Must delete request now that it wasn't handed off to
1044  // memory. This is quite ugly. @todo: Figure out the proper
1045  // place to really handle request deletes.
1046  storeQueue.back().clear();
1047  --stores;
1048 
1049  storeQueue.pop_back();
1051  }
1052 }
1053 
1054 uint64_t
1056 {
1057  const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
1058  return htm_cpt->getHtmUid();
1059 }
1060 
1061 void
1063 {
1064  if (isStalled() &&
1065  storeWBIt->instruction()->seqNum == stallingStoreIsn) {
1066  DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
1067  "load idx:%i\n",
1069  stalled = false;
1070  stallingStoreIsn = 0;
1072  }
1073 
1074  if (!storeWBIt->instruction()->isStoreConditional()) {
1075  // The store is basically completed at this time. This
1076  // only works so long as the checker doesn't try to
1077  // verify the value in memory for stores.
1078  storeWBIt->instruction()->setCompleted();
1079 
1080  if (cpu->checker) {
1081  cpu->checker->verify(storeWBIt->instruction());
1082  }
1083  }
1084 
1085  if (needsTSO) {
1086  storeInFlight = true;
1087  }
1088 
1089  storeWBIt++;
1090 }
1091 
1092 void
1094 {
1095  iewStage->wakeCPU();
1096 
1097  // Squashed instructions do not need to complete their access.
1098  if (inst->isSquashed()) {
1099  assert (!inst->isStore() || inst->isStoreConditional());
1101  return;
1102  }
1103 
1104  if (!inst->isExecuted()) {
1105  inst->setExecuted();
1106 
1107  if (inst->fault == NoFault) {
1108  // Complete access to copy data to proper place.
1109  inst->completeAcc(pkt);
1110  } else {
1111  // If the instruction has an outstanding fault, we cannot complete
1112  // the access as this discards the current fault.
1113 
1114  // If we have an outstanding fault, the fault should only be of
1115  // type ReExec or - in case of a SplitRequest - a partial
1116  // translation fault
1117 
1118  // Unless it's a hardware transactional memory fault
1119  auto htm_fault = std::dynamic_pointer_cast<
1120  GenericHtmFailureFault>(inst->fault);
1121 
1122  if (!htm_fault) {
1123  assert(dynamic_cast<ReExec*>(inst->fault.get()) != nullptr ||
1124  inst->savedReq->isPartialFault());
1125 
1126  } else if (!pkt->htmTransactionFailedInCache()) {
1127  // Situation in which the instruction has a hardware
1128  // transactional memory fault but not the packet itself. This
1129  // can occur with ldp_uop microops since access is spread over
1130  // multiple packets.
1131  DPRINTF(HtmCpu,
1132  "%s writeback with HTM failure fault, "
1133  "however, completing packet is not aware of "
1134  "transaction failure. cause=%s htmUid=%u\n",
1135  inst->staticInst->getName(),
1136  htmFailureToStr(htm_fault->getHtmFailureFaultCause()),
1137  htm_fault->getHtmUid());
1138  }
1139 
1140  DPRINTF(LSQUnit, "Not completing instruction [sn:%lli] access "
1141  "due to pending fault.\n", inst->seqNum);
1142  }
1143  }
1144 
1145  // Need to insert instruction into queue to commit
1146  iewStage->instToCommit(inst);
1147 
1149 
1150  // see if this load changed the PC
1152 }
1153 
1154 void
1155 LSQUnit::completeStore(typename StoreQueue::iterator store_idx)
1156 {
1157  assert(store_idx->valid());
1158  store_idx->completed() = true;
1159  --storesToWB;
1160  // A bit conservative because a store completion may not free up entries,
1161  // but hopefully avoids two store completions in one cycle from making
1162  // the CPU tick twice.
1163  cpu->wakeCPU();
1165 
1166  /* We 'need' a copy here because we may clear the entry from the
1167  * store queue. */
1168  DynInstPtr store_inst = store_idx->instruction();
1169  if (store_idx == storeQueue.begin()) {
1170  do {
1171  storeQueue.front().clear();
1173  --stores;
1174  } while (storeQueue.front().completed() &&
1175  !storeQueue.empty());
1176 
1177  iewStage->updateLSQNextCycle = true;
1178  }
1179 
1180  DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
1181  "idx:%i\n",
1182  store_inst->seqNum, store_idx.idx() - 1, storeQueue.head() - 1);
1183 
1184 #if TRACING_ON
1185  if (debug::O3PipeView) {
1186  store_inst->storeTick =
1187  curTick() - store_inst->fetchTick;
1188  }
1189 #endif
1190 
1191  if (isStalled() &&
1192  store_inst->seqNum == stallingStoreIsn) {
1193  DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
1194  "load idx:%i\n",
1196  stalled = false;
1197  stallingStoreIsn = 0;
1199  }
1200 
1201  store_inst->setCompleted();
1202 
1203  if (needsTSO) {
1204  storeInFlight = false;
1205  }
1206 
1207  // Tell the checker we've completed this instruction. Some stores
1208  // may get reported twice to the checker, but the checker can
1209  // handle that case.
1210  // Store conditionals cannot be sent to the checker yet, they have
1211  // to update the misc registers first which should take place
1212  // when they commit
1213  if (cpu->checker && !store_inst->isStoreConditional()) {
1214  cpu->checker->verify(store_inst);
1215  }
1216 }
1217 
1218 bool
1219 LSQUnit::trySendPacket(bool isLoad, PacketPtr data_pkt)
1220 {
1221  bool ret = true;
1222  bool cache_got_blocked = false;
1223 
1224  auto state = dynamic_cast<LSQSenderState*>(data_pkt->senderState);
1225 
1226  if (!lsq->cacheBlocked() &&
1227  lsq->cachePortAvailable(isLoad)) {
1228  if (!dcachePort->sendTimingReq(data_pkt)) {
1229  ret = false;
1230  cache_got_blocked = true;
1231  }
1232  } else {
1233  ret = false;
1234  }
1235 
1236  if (ret) {
1237  if (!isLoad) {
1238  isStoreBlocked = false;
1239  }
1240  lsq->cachePortBusy(isLoad);
1241  state->outstanding++;
1242  state->request()->packetSent();
1243  } else {
1244  if (cache_got_blocked) {
1245  lsq->cacheBlocked(true);
1247  }
1248  if (!isLoad) {
1249  assert(state->request() == storeWBIt->request());
1250  isStoreBlocked = true;
1251  }
1252  state->request()->packetNotSent();
1253  }
1254  DPRINTF(LSQUnit, "Memory request (pkt: %s) from inst [sn:%llu] was"
1255  " %ssent (cache is blocked: %d, cache_got_blocked: %d)\n",
1256  data_pkt->print(), state->inst->seqNum,
1257  ret ? "": "not ", lsq->cacheBlocked(), cache_got_blocked);
1258  return ret;
1259 }
1260 
1261 void
1263 {
1264  if (isStoreBlocked) {
1265  DPRINTF(LSQUnit, "Receiving retry: blocked store\n");
1267  }
1268 }
1269 
1270 void
1272 {
1273  cprintf("Load store queue: Dumping instructions.\n");
1274  cprintf("Load queue size: %i\n", loads);
1275  cprintf("Load queue: ");
1276 
1277  for (const auto& e: loadQueue) {
1278  const DynInstPtr &inst(e.instruction());
1279  cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1280  }
1281  cprintf("\n");
1282 
1283  cprintf("Store queue size: %i\n", stores);
1284  cprintf("Store queue: ");
1285 
1286  for (const auto& e: storeQueue) {
1287  const DynInstPtr &inst(e.instruction());
1288  cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1289  }
1290 
1291  cprintf("\n");
1292 }
1293 
1294 void LSQUnit::schedule(Event& ev, Tick when) { cpu->schedule(ev, when); }
1295 
1297 
1298 unsigned int
1300 {
1301  return cpu->cacheLineSize();
1302 }
1303 
1304 Fault
1305 LSQUnit::read(LSQRequest *req, int load_idx)
1306 {
1307  LQEntry& load_req = loadQueue[load_idx];
1308  const DynInstPtr& load_inst = load_req.instruction();
1309 
1310  load_req.setRequest(req);
1311  assert(load_inst);
1312 
1313  assert(!load_inst->isExecuted());
1314 
1315  // Make sure this isn't a strictly ordered load
1316  // A bit of a hackish way to get strictly ordered accesses to work
1317  // only if they're at the head of the LSQ and are ready to commit
1318  // (at the head of the ROB too).
1319 
1320  if (req->mainRequest()->isStrictlyOrdered() &&
1321  (load_idx != loadQueue.head() || !load_inst->isAtCommit())) {
1322  // Tell IQ/mem dep unit that this instruction will need to be
1323  // rescheduled eventually
1324  iewStage->rescheduleMemInst(load_inst);
1325  load_inst->clearIssued();
1326  load_inst->effAddrValid(false);
1328  DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n",
1329  load_inst->seqNum, load_inst->pcState());
1330 
1331  // Must delete request now that it wasn't handed off to
1332  // memory. This is quite ugly. @todo: Figure out the proper
1333  // place to really handle request deletes.
1334  load_req.setRequest(nullptr);
1335  req->discard();
1336  return std::make_shared<GenericISA::M5PanicFault>(
1337  "Strictly ordered load [sn:%llx] PC %s\n",
1338  load_inst->seqNum, load_inst->pcState());
1339  }
1340 
1341  DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
1342  "storeHead: %i addr: %#x%s\n",
1343  load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1,
1344  req->mainRequest()->getPaddr(), req->isSplit() ? " split" : "");
1345 
1346  if (req->mainRequest()->isLLSC()) {
1347  // Disable recording the result temporarily. Writing to misc
1348  // regs normally updates the result, but this is not the
1349  // desired behavior when handling store conditionals.
1350  load_inst->recordResult(false);
1351  TheISA::handleLockedRead(load_inst.get(), req->mainRequest());
1352  load_inst->recordResult(true);
1353  }
1354 
1355  if (req->mainRequest()->isLocalAccess()) {
1356  assert(!load_inst->memData);
1357  assert(!load_inst->inHtmTransactionalState());
1358  load_inst->memData = new uint8_t[MaxDataBytes];
1359 
1360  gem5::ThreadContext *thread = cpu->tcBase(lsqID);
1361  PacketPtr main_pkt = new Packet(req->mainRequest(), MemCmd::ReadReq);
1362 
1363  main_pkt->dataStatic(load_inst->memData);
1364 
1365  Cycles delay = req->mainRequest()->localAccessor(thread, main_pkt);
1366 
1367  WritebackEvent *wb = new WritebackEvent(load_inst, main_pkt, this);
1368  cpu->schedule(wb, cpu->clockEdge(delay));
1369  return NoFault;
1370  }
1371 
1372  // hardware transactional memory
1373  if (req->mainRequest()->isHTMStart() || req->mainRequest()->isHTMCommit())
1374  {
1375  // don't want to send nested transactionStarts and
1376  // transactionStops outside of core, e.g. to Ruby
1377  if (req->mainRequest()->getFlags().isSet(Request::NO_ACCESS)) {
1378  Cycles delay(0);
1379  PacketPtr data_pkt =
1380  new Packet(req->mainRequest(), MemCmd::ReadReq);
1381 
1382  // Allocate memory if this is the first time a load is issued.
1383  if (!load_inst->memData) {
1384  load_inst->memData =
1385  new uint8_t[req->mainRequest()->getSize()];
1386  // sanity checks espect zero in request's data
1387  memset(load_inst->memData, 0, req->mainRequest()->getSize());
1388  }
1389 
1390  data_pkt->dataStatic(load_inst->memData);
1391  if (load_inst->inHtmTransactionalState()) {
1392  data_pkt->setHtmTransactional(
1393  load_inst->getHtmTransactionUid());
1394  }
1395  data_pkt->makeResponse();
1396 
1397  WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
1398  cpu->schedule(wb, cpu->clockEdge(delay));
1399  return NoFault;
1400  }
1401  }
1402 
1403  // Check the SQ for any previous stores that might lead to forwarding
1404  auto store_it = load_inst->sqIt;
1405  assert (store_it >= storeWBIt);
1406  // End once we've reached the top of the LSQ
1407  while (store_it != storeWBIt && !load_inst->isDataPrefetch()) {
1408  // Move the index to one younger
1409  store_it--;
1410  assert(store_it->valid());
1411  assert(store_it->instruction()->seqNum < load_inst->seqNum);
1412  int store_size = store_it->size();
1413 
1414  // Cache maintenance instructions go down via the store
1415  // path but they carry no data and they shouldn't be
1416  // considered for forwarding
1417  if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&
1418  !(store_it->request()->mainRequest() &&
1419  store_it->request()->mainRequest()->isCacheMaintenance())) {
1420  assert(store_it->instruction()->effAddrValid());
1421 
1422  // Check if the store data is within the lower and upper bounds of
1423  // addresses that the request needs.
1424  auto req_s = req->mainRequest()->getVaddr();
1425  auto req_e = req_s + req->mainRequest()->getSize();
1426  auto st_s = store_it->instruction()->effAddr;
1427  auto st_e = st_s + store_size;
1428 
1429  bool store_has_lower_limit = req_s >= st_s;
1430  bool store_has_upper_limit = req_e <= st_e;
1431  bool lower_load_has_store_part = req_s < st_e;
1432  bool upper_load_has_store_part = req_e > st_s;
1433 
1435 
1436  // If the store entry is not atomic (atomic does not have valid
1437  // data), the store has all of the data needed, and
1438  // the load is not LLSC, then
1439  // we can forward data from the store to the load
1440  if (!store_it->instruction()->isAtomic() &&
1441  store_has_lower_limit && store_has_upper_limit &&
1442  !req->mainRequest()->isLLSC()) {
1443 
1444  const auto& store_req = store_it->request()->mainRequest();
1445  coverage = store_req->isMasked() ?
1448  } else if (
1449  // This is the partial store-load forwarding case where a store
1450  // has only part of the load's data and the load isn't LLSC
1451  (!req->mainRequest()->isLLSC() &&
1452  ((store_has_lower_limit && lower_load_has_store_part) ||
1453  (store_has_upper_limit && upper_load_has_store_part) ||
1454  (lower_load_has_store_part && upper_load_has_store_part))) ||
1455  // The load is LLSC, and the store has all or part of the
1456  // load's data
1457  (req->mainRequest()->isLLSC() &&
1458  ((store_has_lower_limit || upper_load_has_store_part) &&
1459  (store_has_upper_limit || lower_load_has_store_part))) ||
1460  // The store entry is atomic and has all or part of the load's
1461  // data
1462  (store_it->instruction()->isAtomic() &&
1463  ((store_has_lower_limit || upper_load_has_store_part) &&
1464  (store_has_upper_limit || lower_load_has_store_part)))) {
1465 
1467  }
1468 
1469  if (coverage == AddrRangeCoverage::FullAddrRangeCoverage) {
1470  // Get shift amount for offset into the store's data.
1471  int shift_amt = req->mainRequest()->getVaddr() -
1472  store_it->instruction()->effAddr;
1473 
1474  // Allocate memory if this is the first time a load is issued.
1475  if (!load_inst->memData) {
1476  load_inst->memData =
1477  new uint8_t[req->mainRequest()->getSize()];
1478  }
1479  if (store_it->isAllZeros())
1480  memset(load_inst->memData, 0,
1481  req->mainRequest()->getSize());
1482  else
1483  memcpy(load_inst->memData,
1484  store_it->data() + shift_amt,
1485  req->mainRequest()->getSize());
1486 
1487  DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
1488  "addr %#x\n", store_it._idx,
1489  req->mainRequest()->getVaddr());
1490 
1491  PacketPtr data_pkt = new Packet(req->mainRequest(),
1492  MemCmd::ReadReq);
1493  data_pkt->dataStatic(load_inst->memData);
1494 
1495  // hardware transactional memory
1496  // Store to load forwarding within a transaction
1497  // This should be okay because the store will be sent to
1498  // the memory subsystem and subsequently get added to the
1499  // write set of the transaction. The write set has a stronger
1500  // property than the read set, so the load doesn't necessarily
1501  // have to be there.
1502  assert(!req->mainRequest()->isHTMCmd());
1503  if (load_inst->inHtmTransactionalState()) {
1504  assert (!storeQueue[store_it._idx].completed());
1505  assert (
1506  storeQueue[store_it._idx].instruction()->
1507  inHtmTransactionalState());
1508  assert (
1509  load_inst->getHtmTransactionUid() ==
1510  storeQueue[store_it._idx].instruction()->
1511  getHtmTransactionUid());
1512  data_pkt->setHtmTransactional(
1513  load_inst->getHtmTransactionUid());
1514  DPRINTF(HtmCpu, "HTM LD (ST2LDF) "
1515  "pc=0x%lx - vaddr=0x%lx - "
1516  "paddr=0x%lx - htmUid=%u\n",
1517  load_inst->instAddr(),
1518  data_pkt->req->hasVaddr() ?
1519  data_pkt->req->getVaddr() : 0lu,
1520  data_pkt->getAddr(),
1521  load_inst->getHtmTransactionUid());
1522  }
1523 
1524  if (req->isAnyOutstandingRequest()) {
1525  assert(req->_numOutstandingPackets > 0);
1526  // There are memory requests packets in flight already.
1527  // This may happen if the store was not complete the
1528  // first time this load got executed. Signal the senderSate
1529  // that response packets should be discarded.
1530  req->discardSenderState();
1531  }
1532 
1533  WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt,
1534  this);
1535 
1536  // We'll say this has a 1 cycle load-store forwarding latency
1537  // for now.
1538  // @todo: Need to make this a parameter.
1539  cpu->schedule(wb, curTick());
1540 
1541  // Don't need to do anything special for split loads.
1542  ++stats.forwLoads;
1543 
1544  return NoFault;
1545  } else if (
1547  // If it's already been written back, then don't worry about
1548  // stalling on it.
1549  if (store_it->completed()) {
1550  panic("Should not check one of these");
1551  continue;
1552  }
1553 
1554  // Must stall load and force it to retry, so long as it's the
1555  // oldest load that needs to do so.
1556  if (!stalled ||
1557  (stalled &&
1558  load_inst->seqNum <
1559  loadQueue[stallingLoadIdx].instruction()->seqNum)) {
1560  stalled = true;
1561  stallingStoreIsn = store_it->instruction()->seqNum;
1562  stallingLoadIdx = load_idx;
1563  }
1564 
1565  // Tell IQ/mem dep unit that this instruction will need to be
1566  // rescheduled eventually
1567  iewStage->rescheduleMemInst(load_inst);
1568  load_inst->clearIssued();
1569  load_inst->effAddrValid(false);
1571 
1572  // Do not generate a writeback event as this instruction is not
1573  // complete.
1574  DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
1575  "Store idx %i to load addr %#x\n",
1576  store_it._idx, req->mainRequest()->getVaddr());
1577 
1578  // Must discard the request.
1579  req->discard();
1580  load_req.setRequest(nullptr);
1581  return NoFault;
1582  }
1583  }
1584  }
1585 
1586  // If there's no forwarding case, then go access memory
1587  DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
1588  load_inst->seqNum, load_inst->pcState());
1589 
1590  // Allocate memory if this is the first time a load is issued.
1591  if (!load_inst->memData) {
1592  load_inst->memData = new uint8_t[req->mainRequest()->getSize()];
1593  }
1594 
1595 
1596  // hardware transactional memory
1597  if (req->mainRequest()->isHTMCmd()) {
1598  // this is a simple sanity check
1599  // the Ruby cache controller will set
1600  // memData to 0x0ul if successful.
1601  *load_inst->memData = (uint64_t) 0x1ull;
1602  }
1603 
1604  // For now, load throughput is constrained by the number of
1605  // load FUs only, and loads do not consume a cache port (only
1606  // stores do).
1607  // @todo We should account for cache port contention
1608  // and arbitrate between loads and stores.
1609 
1610  // if we the cache is not blocked, do cache access
1611  if (req->senderState() == nullptr) {
1612  LQSenderState *state = new LQSenderState(
1613  loadQueue.getIterator(load_idx));
1614  state->isLoad = true;
1615  state->inst = load_inst;
1616  state->isSplit = req->isSplit();
1617  req->senderState(state);
1618  }
1619  req->buildPackets();
1620  req->sendPacketToCache();
1621  if (!req->isSent())
1622  iewStage->blockMemInst(load_inst);
1623 
1624  return NoFault;
1625 }
1626 
1627 Fault
1628 LSQUnit::write(LSQRequest *req, uint8_t *data, int store_idx)
1629 {
1630  assert(storeQueue[store_idx].valid());
1631 
1632  DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i "
1633  "[sn:%llu]\n",
1634  store_idx - 1, req->request()->getPaddr(), storeQueue.head() - 1,
1635  storeQueue[store_idx].instruction()->seqNum);
1636 
1637  storeQueue[store_idx].setRequest(req);
1638  unsigned size = req->_size;
1639  storeQueue[store_idx].size() = size;
1640  bool store_no_data =
1641  req->mainRequest()->getFlags() & Request::STORE_NO_DATA;
1642  storeQueue[store_idx].isAllZeros() = store_no_data;
1643  assert(size <= SQEntry::DataSize || store_no_data);
1644 
1645  // copy data into the storeQueue only if the store request has valid data
1646  if (!(req->request()->getFlags() & Request::CACHE_BLOCK_ZERO) &&
1647  !req->request()->isCacheMaintenance() &&
1648  !req->request()->isAtomic())
1649  memcpy(storeQueue[store_idx].data(), data, size);
1650 
1651  // This function only writes the data to the store queue, so no fault
1652  // can happen here.
1653  return NoFault;
1654 }
1655 
1656 InstSeqNum
1658 {
1659  if (loadQueue.front().valid())
1660  return loadQueue.front().instruction()->seqNum;
1661  else
1662  return 0;
1663 }
1664 
1665 InstSeqNum
1667 {
1668  if (storeQueue.front().valid())
1669  return storeQueue.front().instruction()->seqNum;
1670  else
1671  return 0;
1672 }
1673 
1674 } // namespace o3
1675 } // namespace gem5
gem5::o3::LSQUnit::storePostSend
void storePostSend()
Handles completing the send of a store to memory.
Definition: lsq_unit.cc:1062
gem5::o3::LSQUnit::LSQUnitStats::squashedLoads
statistics::Scalar squashedLoads
Total number of squashed loads.
Definition: lsq_unit.hh:560
gem5::o3::LSQ::LSQSenderState::inst
DynInstPtr inst
Instruction which initiated the access to memory.
Definition: lsq.hh:91
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
gem5::CircularQueue::back
reference back()
Definition: circular_queue.hh:446
gem5::HtmCacheFailure::FAIL_OTHER
@ FAIL_OTHER
gem5::o3::LSQUnit::WritebackEvent::process
void process()
Processes the writeback event.
Definition: lsq_unit.cc:76
gem5::o3::CPU::ppDataAccessComplete
ProbePointArg< std::pair< DynInstPtr, PacketPtr > > * ppDataAccessComplete
Definition: cpu.hh:174
gem5::o3::LSQUnit::lastRetiredHtmUid
uint64_t lastRetiredHtmUid
Definition: lsq_unit.hh:511
gem5::o3::LSQ::LSQRequest
Memory operation metadata.
Definition: lsq.hh:231
gem5::NoFault
constexpr decltype(nullptr) NoFault
Definition: types.hh:260
gem5::BaseCPU::switchedOut
bool switchedOut() const
Determine if the CPU is switched out.
Definition: base.hh:357
gem5::o3::LSQUnit::stallingStoreIsn
InstSeqNum stallingStoreIsn
The store that causes the stall due to partial store to load forwarding.
Definition: lsq_unit.hh:529
gem5::RequestPort::sendTimingReq
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition: port.hh:495
gem5::o3::LSQ::LSQSenderState::complete
virtual void complete()=0
gem5::o3::LSQUnit::insertStore
void insertStore(const DynInstPtr &store_inst)
Inserts a store instruction.
Definition: lsq_unit.cc:392
gem5::o3::LSQUnit::storeInFlight
bool storeInFlight
Whether or not a store is in flight.
Definition: lsq_unit.hh:540
gem5::o3::LSQ::cacheBlocked
bool cacheBlocked() const
Is D-cache blocked?
Definition: lsq.cc:194
gem5::o3::LSQUnit::WritebackEvent::description
const char * description() const
Returns the description of this event.
Definition: lsq_unit.cc:88
gem5::o3::LSQUnit::isStalled
bool isStalled()
Returns whether or not the LSQ unit is stalled.
Definition: lsq_unit.hh:602
gem5::o3::LSQUnit::checkLoads
bool checkLoads
Should loads be checked for dependency issues.
Definition: lsq_unit.hh:497
gem5::cprintf
void cprintf(const char *format, const Args &...args)
Definition: cprintf.hh:155
gem5::CircularQueue::advance_tail
void advance_tail()
Increases the tail by one.
Definition: circular_queue.hh:515
gem5::o3::CPU::mmu
BaseMMU * mmu
Definition: cpu.hh:112
gem5::o3::LSQUnit::LSQUnitStats::memOrderViolation
statistics::Scalar memOrderViolation
Tota number of memory ordering violations.
Definition: lsq_unit.hh:567
gem5::GenericHtmFailureFault
Definition: faults.hh:140
data
const char data[]
Definition: circlebuf.test.cc:48
gem5::o3::LSQUnit::recvTimingResp
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
Definition: lsq_unit.cc:94
gem5::HtmFailureFaultCause
HtmFailureFaultCause
Definition: htm.hh:47
gem5::o3::LSQUnit::LSQEntry::setRequest
void setRequest(LSQRequest *r)
Definition: lsq_unit.hh:141
gem5::o3::LSQUnit::iewStage
IEW * iewStage
Pointer to the IEW stage.
Definition: lsq_unit.hh:401
gem5::o3::LSQUnit::memDepViolator
DynInstPtr memDepViolator
The oldest load that caused a memory ordering violation.
Definition: lsq_unit.hh:543
gem5::o3::LSQUnit::stallingLoadIdx
int stallingLoadIdx
The index of the above store.
Definition: lsq_unit.hh:531
gem5::HtmFailureFaultCause::MEMORY
@ MEMORY
gem5::BaseCPU::numContexts
unsigned numContexts()
Get the number of thread contexts available.
Definition: base.hh:293
gem5::o3::IEW::replayMemInst
void replayMemInst(const DynInstPtr &inst)
Re-executes all rescheduled memory instructions.
Definition: iew.cc:551
gem5::o3::LSQUnit::LSQUnit
LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
Constructs an LSQ unit.
Definition: lsq_unit.cc:203
gem5::o3::LSQUnit::read
Fault read(LSQRequest *req, int load_idx)
Executes the load at the given index.
Definition: lsq_unit.cc:1305
gem5::Packet::req
RequestPtr req
A pointer to the original request.
Definition: packet.hh:366
gem5::o3::LSQ::LSQSenderState::needWB
bool needWB
Whether or not the instruction will need to writeback.
Definition: lsq.hh:101
gem5::BaseCPU::cacheLineSize
unsigned int cacheLineSize() const
Get the cache line size of the system.
Definition: base.hh:381
gem5::o3::LSQUnit::numFreeStoreEntries
unsigned numFreeStoreEntries()
Returns the number of free SQ entries.
Definition: lsq_unit.cc:433
gem5::o3::LSQUnit::init
void init(CPU *cpu_ptr, IEW *iew_ptr, const O3CPUParams &params, LSQ *lsq_ptr, unsigned id)
Initializes the LSQ unit with the specified number of entries.
Definition: lsq_unit.cc:214
gem5::Request::CACHE_BLOCK_ZERO
@ CACHE_BLOCK_ZERO
This is a write that is targeted and zeroing an entire cache block.
Definition: request.hh:143
gem5::o3::LSQUnit::LSQUnitStats::ignoredResponses
statistics::Scalar ignoredResponses
Total number of responses from the memory system that are ignored due to the instruction already bein...
Definition: lsq_unit.hh:564
gem5::o3::LSQ::LSQRequest::isCacheBlockHit
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)=0
Test if the request accesses a particular cache line.
gem5::statistics::nozero
const FlagsType nozero
Don't print if this is zero.
Definition: info.hh:68
gem5::o3::IEW::checkMisprediction
void checkMisprediction(const DynInstPtr &inst)
Check misprediction
Definition: iew.cc:1593
sc_dt::to_string
const std::string to_string(sc_enc enc)
Definition: sc_fxdefs.cc:91
gem5::o3::LSQ::LSQRequest::buildPackets
virtual void buildPackets()=0
gem5::HtmCacheFailure
HtmCacheFailure
Definition: htm.hh:59
gem5::o3::LSQUnit::schedule
void schedule(Event &ev, Tick when)
Schedule event for the cpu.
Definition: lsq_unit.cc:1294
gem5::o3::LSQ::cachePortAvailable
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
Definition: lsq.cc:206
gem5::ArmISA::e
Bitfield< 9 > e
Definition: misc_types.hh:64
gem5::Packet::setHtmTransactional
void setHtmTransactional(uint64_t val)
Stipulates that this packet/request originates in the CPU executing in transactional mode,...
Definition: packet.cc:521
gem5::o3::LSQUnit::htmStops
int htmStops
Definition: lsq_unit.hh:509
gem5::o3::LSQUnit::MaxDataBytes
static constexpr auto MaxDataBytes
Definition: lsq_unit.hh:93
gem5::o3::LSQ
Definition: lsq.hh:75
gem5::Packet::isWrite
bool isWrite() const
Definition: packet.hh:583
gem5::o3::LSQUnit::storesToWB
int storesToWB
The number of store instructions in the SQ waiting to writeback.
Definition: lsq_unit.hh:504
gem5::o3::LSQUnit::cacheBlockMask
Addr cacheBlockMask
Address Mask for a cache block (e.g.
Definition: lsq_unit.hh:519
gem5::o3::LSQ::LSQRequest::sendPacketToCache
virtual void sendPacketToCache()=0
gem5::o3::LSQUnit::setDcachePort
void setDcachePort(RequestPort *dcache_port)
Sets the pointer to the dcache port.
Definition: lsq_unit.cc:292
gem5::EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
gem5::csprintf
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161
dyn_inst.hh
gem5::o3::LSQ::LSQRequest::mainPacket
virtual PacketPtr mainPacket()
Definition: lsq.hh:422
gem5::RefCountingPtr::get
T * get() const
Directly access the pointer itself without taking a reference.
Definition: refcnt.hh:227
gem5::CircularQueue::full
bool full() const
Is the queue full? A queue is full if the head is the 0^{th} element and the tail is the (size-1)^{th...
Definition: circular_queue.hh:558
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:66
gem5::o3::LSQUnit::LSQUnitStats::loadToUse
statistics::Distribution loadToUse
Distribution of cycle latency between the first time a load is issued and its completion.
Definition: lsq_unit.hh:580
gem5::o3::LSQUnit::depCheckShift
unsigned depCheckShift
The number of places to shift addresses in the LSQ before checking for dependency violations.
Definition: lsq_unit.hh:494
request.hh
gem5::HtmFailureFaultCause::OTHER
@ OTHER
gem5::o3::LSQUnit::AddrRangeCoverage::PartialAddrRangeCoverage
@ PartialAddrRangeCoverage
gem5::statistics::DistBase::sample
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1325
gem5::o3::LSQUnit::numFreeLoadEntries
unsigned numFreeLoadEntries()
Returns the number of free LQ entries.
Definition: lsq_unit.cc:423
gem5::o3::IEW::wakeCPU
void wakeCPU()
Tells the CPU to wakeup if it has descheduled itself due to no activity.
Definition: iew.cc:805
gem5::HtmCacheFailure::FAIL_SELF
@ FAIL_SELF
gem5::RefCountingPtr< DynInst >
gem5::BaseMMU
Definition: mmu.hh:50
gem5::o3::IEW::rescheduleMemInst
void rescheduleMemInst(const DynInstPtr &inst)
Tells memory dependence unit that a memory instruction needs to be rescheduled.
Definition: iew.cc:545
packet.hh
gem5::o3::CPU::checker
gem5::Checker< DynInstPtr > * checker
Pointer to the checker, which can dynamically verify instruction results at run time.
Definition: cpu.hh:602
gem5::RequestPort
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Definition: port.hh:77
gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:78
gem5::o3::LSQUnit::getLatestHtmUid
uint64_t getLatestHtmUid() const
Definition: lsq_unit.cc:1055
gem5::ArmISA::handleLockedWrite
bool handleLockedWrite(XC *xc, const RequestPtr &req, Addr cacheBlockMask)
Definition: locked_mem.hh:113
str.hh
gem5::o3::LSQUnit::completeStore
void completeStore(typename StoreQueue::iterator store_idx)
Completes the store at the specified index.
Definition: lsq_unit.cc:1155
gem5::o3::LSQUnit::needsTSO
bool needsTSO
Flag for memory model.
Definition: lsq_unit.hh:546
gem5::o3::LSQUnit::LSQUnitStats::forwLoads
statistics::Scalar forwLoads
Total number of loads forwaded from LSQ stores.
Definition: lsq_unit.hh:557
gem5::Packet::dataStatic
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:1134
gem5::o3::LSQUnit::LSQUnitStats::blockedByCache
statistics::Scalar blockedByCache
Number of times the LSQ is blocked due to the cache.
Definition: lsq_unit.hh:576
gem5::o3::LSQUnit::WritebackEvent
Writeback event, specifically for when stores forward data to loads.
Definition: lsq_unit.hh:447
gem5::o3::LSQ::LSQRequest::_size
const uint32_t _size
Definition: lsq.hh:297
gem5::o3::LSQ::LSQRequest::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)=0
gem5::CircularQueue::tail
size_t tail() const
Definition: circular_queue.hh:456
gem5::o3::LSQUnit
Class that implements the actual LQ and SQ for each specific thread.
Definition: lsq_unit.hh:90
gem5::HtmCacheFailure::FAIL_REMOTE
@ FAIL_REMOTE
gem5::CircularQueue::size
size_t size() const
Definition: circular_queue.hh:466
gem5::o3::CPU
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition: cpu.hh:95
gem5::o3::LSQ::LSQRequest::packetSent
void packetSent()
Update the status to reflect that a packet was sent.
Definition: lsq.hh:497
gem5::o3::LSQ::LSQRequest::setStateToFault
void setStateToFault()
Definition: lsq.hh:560
gem5::o3::LSQUnit::stats
gem5::o3::LSQUnit::LSQUnitStats stats
gem5::o3::LSQUnit::commitLoad
void commitLoad()
Commits the head load.
Definition: lsq_unit.cc:721
gem5::statistics::Distribution::init
Distribution & init(Counter min, Counter max, Counter bkt)
Set the parameters of this distribution.
Definition: statistics.hh:2110
gem5::ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition: thread_context.hh:93
gem5::o3::LSQUnit::LSQEntry
Definition: lsq_unit.hh:98
gem5::o3::LSQUnit::LSQUnitStats::rescheduledLoads
statistics::Scalar rescheduledLoads
Number of loads that were rescheduled.
Definition: lsq_unit.hh:573
gem5::o3::LSQ::LSQRequest::discard
void discard()
The request is discarded (e.g.
Definition: lsq.hh:578
gem5::o3::LSQUnit::trySendPacket
bool trySendPacket(bool isLoad, PacketPtr data_pkt)
Attempts to send a packet to the cache.
Definition: lsq_unit.cc:1219
gem5::Fault
std::shared_ptr< FaultBase > Fault
Definition: types.hh:255
gem5::Packet::print
void print(std::ostream &o, int verbosity=0, const std::string &prefix="") const
Definition: packet.cc:373
gem5::o3::LSQ::LSQSenderState::isSplit
bool isSplit
Whether or not this access is split in two.
Definition: lsq.hh:103
gem5::o3::LSQ::LSQRequest::discardSenderState
void discardSenderState()
Mark senderState as discarded.
Definition: lsq.hh:456
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
gem5::Event
Definition: eventq.hh:251
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
gem5::o3::LSQUnit::LQSenderState
Particularisation of the LSQSenderState to the LQ.
Definition: lsq_unit.hh:410
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:283
gem5::o3::LSQUnit::loads
int loads
The number of load instructions in the LQ.
Definition: lsq_unit.hh:500
gem5::o3::LSQUnit::isStoreBlocked
bool isStoreBlocked
Whehter or not a store is blocked due to the memory system.
Definition: lsq_unit.hh:537
gem5::Packet::htmTransactionFailedInCache
bool htmTransactionFailedInCache() const
Returns whether or not this packet/request has returned from the cache hierarchy in a failed transact...
Definition: packet.cc:508
gem5::probing::Packet
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::o3::LSQUnit::WritebackEvent::WritebackEvent
WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr)
Constructs a writeback event.
Definition: lsq_unit.cc:66
gem5::o3::IEW
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Definition: iew.hh:87
cpu.hh
gem5::o3::LSQUnit::htmStarts
int htmStarts
Definition: lsq_unit.hh:508
gem5::o3::LSQUnit::cpu
CPU * cpu
Pointer to the CPU.
Definition: lsq_unit.hh:398
gem5::o3::LSQUnit::storeWBIt
StoreQueue::iterator storeWBIt
The index of the first instruction that may be ready to be written back, and has not yet been written...
Definition: lsq_unit.hh:516
gem5::o3::LSQUnit::lsqID
ThreadID lsqID
The LSQUnit thread id.
Definition: lsq_unit.hh:482
gem5::MemCmd::ReadReq
@ ReadReq
Definition: packet.hh:86
gem5::o3::LSQ::LSQRequest::packet
PacketPtr packet(int idx=0)
Definition: lsq.hh:419
gem5::o3::LSQ::LSQRequest::_numOutstandingPackets
uint32_t _numOutstandingPackets
Definition: lsq.hh:300
gem5::ReExec
Definition: faults.hh:92
gem5::o3::CPU::tcBase
gem5::ThreadContext * tcBase(ThreadID tid)
Returns a pointer to a thread context.
Definition: cpu.hh:590
gem5::o3::LSQUnit::write
Fault write(LSQRequest *req, uint8_t *data, int store_idx)
Executes the store at the given index.
Definition: lsq_unit.cc:1628
gem5::Packet::getHtmTransactionUid
uint64_t getHtmTransactionUid() const
If a packet/request originates in a CPU executing in transactional mode, i.e.
Definition: packet.cc:534
gem5::o3::CPU::thread
std::vector< ThreadState * > thread
Pointers to all of the threads in the CPU.
Definition: cpu.hh:608
gem5::o3::LSQUnit::commitLoads
void commitLoads(InstSeqNum &youngest_inst)
Commits loads older than a specific sequence number.
Definition: lsq_unit.cc:746
gem5::o3::IEW::activityThisCycle
void activityThisCycle()
Reports to the CPU that there is activity this cycle.
Definition: iew.cc:811
gem5::CircularQueue::begin
iterator begin()
Iterators.
Definition: circular_queue.hh:565
gem5::ArmISA::handleLockedSnoopHit
void handleLockedSnoopHit(XC *xc)
Definition: locked_mem.hh:103
gem5::o3::LSQUnit::completeDataAccess
void completeDataAccess(PacketPtr pkt)
Completes the data access that has been returned from the memory system.
Definition: lsq_unit.cc:111
gem5::o3::LSQ::cachePortBusy
void cachePortBusy(bool is_load)
Another store port is in use.
Definition: lsq.cc:218
gem5::o3::CPU::wakeCPU
void wakeCPU()
Wakes the CPU, rescheduling the CPU if it's not already active.
Definition: cpu.cc:1594
gem5::HtmFailureFaultCause::INVALID
@ INVALID
ss
std::stringstream ss
Definition: trace.test.cc:45
gem5::statistics::Group::addStatGroup
void addStatGroup(const char *name, Group *block)
Add a stat block as a child of this block.
Definition: group.cc:117
gem5::CircularQueue::front
reference front()
Definition: circular_queue.hh:441
gem5::Packet::getHtmTransactionFailedInCacheRC
HtmCacheFailure getHtmTransactionFailedInCacheRC() const
If a packet/request has returned from the cache hierarchy in a failed transaction,...
Definition: packet.cc:514
gem5::o3::LSQUnit::takeOverFrom
void takeOverFrom()
Takes over from another CPU's thread.
Definition: lsq_unit.cc:308
gem5::o3::LSQ::LSQRequest::request
RequestPtr request(int idx=0)
Definition: lsq.hh:408
gem5::o3::LSQUnit::retryPkt
PacketPtr retryPkt
The packet that needs to be retried.
Definition: lsq_unit.hh:534
gem5::o3::LSQUnit::checkViolations
Fault checkViolations(typename LoadQueue::iterator &loadIt, const DynInstPtr &inst)
Check for ordering violations in the LSQ.
Definition: lsq_unit.cc:524
gem5::ThreadContext::getHtmCheckpointPtr
virtual BaseHTMCheckpointPtr & getHtmCheckpointPtr()=0
gem5::o3::LSQ::LSQRequest::senderState
void senderState(LSQSenderState *st)
Definition: lsq.hh:436
gem5::BaseCPU::getContext
virtual ThreadContext * getContext(int tn)
Given a thread num get tho thread context for it.
Definition: base.hh:290
gem5::o3::LSQUnit::stores
int stores
The number of store instructions in the SQ.
Definition: lsq_unit.hh:502
gem5::o3::LSQUnit::writeback
void writeback(const DynInstPtr &inst, PacketPtr pkt)
Writes back the instruction, sending it to IEW.
Definition: lsq_unit.cc:1093
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::o3::LSQUnit::AddrRangeCoverage::FullAddrRangeCoverage
@ FullAddrRangeCoverage
gem5::o3::LSQUnit::squash
void squash(const InstSeqNum &squashed_num)
Squashes all instructions younger than a specific sequence number.
Definition: lsq_unit.cc:938
gem5::Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:534
gem5::o3::LSQUnit::LSQUnitStats::squashedStores
statistics::Scalar squashedStores
Total number of squashed stores.
Definition: lsq_unit.hh:570
gem5::o3::LSQ::LSQRequest::complete
void complete()
Definition: lsq.hh:621
gem5::Clocked::clockEdge
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Definition: clocked_object.hh:177
gem5::o3::LSQUnit::LSQEntry::instruction
const DynInstPtr & instruction() const
Definition: lsq_unit.hh:148
gem5::o3::LSQ::LSQRequest::isSent
bool isSent()
Definition: lsq.hh:541
gem5::o3::LSQUnit::executeStore
Fault executeStore(const DynInstPtr &inst)
Executes a store instruction.
Definition: lsq_unit.cc:670
gem5::Request::STORE_NO_DATA
static const FlagsType STORE_NO_DATA
Definition: request.hh:244
gem5::o3::MaxThreads
static constexpr int MaxThreads
Definition: limits.hh:38
gem5::o3::LSQUnit::writebackBlockedStore
void writebackBlockedStore()
Try to finish a previously blocked write back attempt.
Definition: lsq_unit.cc:783
gem5::o3::CPU::activityThisCycle
void activityThisCycle()
Records that there was time buffer activity this cycle.
Definition: cpu.hh:569
gem5::HtmFailureFaultCause::SIZE
@ SIZE
gem5::o3::LSQ::LSQSenderState::isLoad
bool isLoad
Whether or not it is a load.
Definition: lsq.hh:99
gem5::CircularQueue::pop_front
void pop_front(size_t num_elem=1)
Circularly increase the head pointer.
Definition: circular_queue.hh:477
gem5::o3::IEW::instToCommit
void instToCommit(const DynInstPtr &inst)
Sends an instruction to commit through the time buffer.
Definition: iew.cc:569
gem5::RiscvISA::x
Bitfield< 3 > x
Definition: pagetable.hh:73
gem5::o3::LSQUnit::insert
void insert(const DynInstPtr &inst)
Inserts an instruction.
Definition: lsq_unit.cc:314
gem5::o3::LSQUnit::dcachePort
RequestPort * dcachePort
Pointer to the dcache port.
Definition: lsq_unit.hh:407
gem5::o3::LSQ::LSQSenderState
Derived class to hold any sender state the LSQ needs.
Definition: lsq.hh:80
gem5::CircularQueue::pop_back
void pop_back()
Circularly decrease the tail pointer.
Definition: circular_queue.hh:490
gem5::CircularQueue::empty
bool empty() const
Is the queue empty?
Definition: circular_queue.hh:548
gem5::o3::LSQUnit::stalled
bool stalled
Whether or not the LSQ is stalled.
Definition: lsq_unit.hh:525
gem5::htmFailureToStr
std::string htmFailureToStr(HtmFailureFaultCause cause)
Convert enum into string to be used for debug purposes.
Definition: htm.cc:44
gem5::Request::LLSC
@ LLSC
The request is a Load locked/store conditional.
Definition: request.hh:156
gem5::o3::IEW::blockMemInst
void blockMemInst(const DynInstPtr &inst)
Moves memory instruction onto the list of cache blocked instructions.
Definition: iew.cc:557
gem5::o3::LSQ::LSQRequest::mainRequest
virtual RequestPtr mainRequest()
Definition: lsq.hh:429
gem5::RefCountingPtr::data
T * data
The stored pointer.
Definition: refcnt.hh:146
gem5::CircularQueue::head
size_t head() const
Definition: circular_queue.hh:451
gem5::o3::LSQUnit::SQEntry::DataSize
static constexpr size_t DataSize
Definition: lsq_unit.hh:170
gem5::o3::LSQUnit::getMMUPtr
BaseMMU * getMMUPtr()
Definition: lsq_unit.cc:1296
gem5::o3::LSQUnit::lsq
LSQ * lsq
Pointer to the LSQ.
Definition: lsq_unit.hh:404
gem5::CircularQueue::end
iterator end()
Definition: circular_queue.hh:580
gem5::Packet::makeResponse
void makeResponse()
Take a request packet and modify it in place to be suitable for returning as a response to that reque...
Definition: packet.hh:1031
gem5::o3::LSQUnit::executeLoad
Fault executeLoad(const DynInstPtr &inst)
Executes a load instruction.
Definition: lsq_unit.cc:604
gem5::o3::LSQUnit::recvRetry
void recvRetry()
Handles doing the retry.
Definition: lsq_unit.cc:1262
gem5::MemCmd::WriteReq
@ WriteReq
Definition: packet.hh:89
lsq_unit.hh
gem5::o3::LSQ::LSQRequest::isAnyOutstandingRequest
bool isAnyOutstandingRequest()
Test if there is any in-flight translation or mem access request.
Definition: lsq.hh:466
gem5::ArmISA::handleLockedRead
void handleLockedRead(XC *xc, const RequestPtr &req)
Definition: locked_mem.hh:93
gem5::statistics::Group
Statistics container.
Definition: group.hh:93
gem5::Request::NO_ACCESS
@ NO_ACCESS
The request should not cause a memory access.
Definition: request.hh:146
gem5::ArmISA::id
Bitfield< 33 > id
Definition: misc_types.hh:250
gem5::o3::IEW::updateLSQNextCycle
bool updateLSQNextCycle
Records if the LSQ needs to be updated on the next cycle, so that IEW knows if there will be activity...
Definition: iew.hh:365
gem5::InstSeqNum
uint64_t InstSeqNum
Definition: inst_seq.hh:40
gem5::CircularQueue::capacity
size_t capacity() const
Definition: circular_queue.hh:461
gem5::o3::LSQUnit::drainSanityCheck
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition: lsq_unit.cc:298
gem5::o3::LSQUnit::commitStores
void commitStores(InstSeqNum &youngest_inst)
Commits stores older than a specific sequence number.
Definition: lsq_unit.cc:757
gem5::o3::LSQUnit::resetState
void resetState()
Reset the LSQ state.
Definition: lsq_unit.cc:237
gem5::o3::LSQUnit::SQSenderState
Particularisation of the LSQSenderState to the SQ.
Definition: lsq_unit.hh:429
gem5::Packet::setHtmTransactionFailedInCache
void setHtmTransactionFailedInCache(const HtmCacheFailure ret_code)
Stipulates that this packet/request has returned from the cache hierarchy in a failed transaction.
Definition: packet.cc:498
gem5::ArmISA::handleLockedSnoop
void handleLockedSnoop(XC *xc, PacketPtr pkt, Addr cacheBlockMask)
Definition: locked_mem.hh:64
gem5::statistics::DataWrap::flags
Derived & flags(Flags _flags)
Set the flags and marks this stat to print at the end of simulation.
Definition: statistics.hh:355
debugfaults.hh
gem5::Clocked::ticksToCycles
Cycles ticksToCycles(Tick t) const
Definition: clocked_object.hh:222
gem5::CircularQueue::getIterator
iterator getIterator(size_t idx)
Return an iterator to an index in the queue.
Definition: circular_queue.hh:592
gem5::Packet::isHtmTransactional
bool isHtmTransactional() const
Returns whether or not this packet/request originates in the CPU executing in transactional mode,...
Definition: packet.cc:528
gem5::o3::LSQUnit::getMemDepViolator
DynInstPtr getMemDepViolator()
Returns the memory ordering violator.
Definition: lsq_unit.cc:413
gem5::Packet::getAddr
Addr getAddr() const
Definition: packet.hh:781
gem5::o3::LSQUnit::getStoreHeadSeqNum
InstSeqNum getStoreHeadSeqNum()
Returns the sequence number of the head store instruction.
Definition: lsq_unit.cc:1666
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::o3::LSQUnit::loadQueue
LoadQueue loadQueue
The load queue.
Definition: lsq_unit.hh:488
gem5::o3::LSQUnit::name
std::string name() const
Returns the name of the LSQ unit.
Definition: lsq_unit.cc:256
lsq.hh
gem5::o3::LSQUnit::AddrRangeCoverage::NoAddrRangeCoverage
@ NoAddrRangeCoverage
limits.hh
gem5::o3::LSQUnit::dumpInsts
void dumpInsts() const
Debugging function to dump instructions in the LSQ.
Definition: lsq_unit.cc:1271
gem5::o3::LSQUnit::insertLoad
void insertLoad(const DynInstPtr &load_inst)
Inserts a load instruction.
Definition: lsq_unit.cc:330
gem5::o3::LSQUnit::cacheLineSize
unsigned int cacheLineSize()
Definition: lsq_unit.cc:1299
gem5::o3::LSQUnit::storeQueue
CircularQueue< SQEntry > storeQueue
The store queue.
Definition: lsq_unit.hh:485
gem5::o3::LSQ::LSQSenderState::request
LSQRequest * request()
Definition: lsq.hh:117
gem5::o3::LSQUnit::getLoadHeadSeqNum
InstSeqNum getLoadHeadSeqNum()
Returns the sequence number of the head load instruction.
Definition: lsq_unit.cc:1657
gem5::o3::LSQUnit::writebackStores
void writebackStores()
Writes back stores.
Definition: lsq_unit.cc:793
gem5::o3::LSQ::LSQRequest::isSplit
bool isSplit() const
Definition: lsq.hh:475
gem5::o3::LSQUnit::checkSnoop
void checkSnoop(PacketPtr pkt)
Check if an incoming invalidate hits in the lsq on a load that might have issued out of order wrt ano...
Definition: lsq_unit.cc:444
gem5::o3::IEW::name
std::string name() const
Returns the name of the IEW stage.
Definition: iew.cc:119
gem5::Packet::isInvalidate
bool isInvalidate() const
Definition: packet.hh:598
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:177
gem5::o3::LSQUnit::LSQUnitStats::LSQUnitStats
LSQUnitStats(statistics::Group *parent)
Definition: lsq_unit.cc:265

Generated on Tue Sep 21 2021 12:25:05 for gem5 by doxygen 1.8.17