gem5  v21.1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
lsq_unit.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2010-2014, 2017-2020 ARM Limited
3  * Copyright (c) 2013 Advanced Micro Devices, Inc.
4  * All rights reserved
5  *
6  * The license below extends only to copyright in the software and shall
7  * not be construed as granting a license to any other intellectual
8  * property including but not limited to intellectual property relating
9  * to a hardware implementation of the functionality of the software
10  * licensed hereunder. You may use the software subject to the license
11  * terms below provided that you ensure that this notice is replicated
12  * unmodified and in its entirety in all distributions of the software,
13  * modified or unmodified, in source code or in binary form.
14  *
15  * Copyright (c) 2004-2006 The Regents of The University of Michigan
16  * All rights reserved.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions are
20  * met: redistributions of source code must retain the above copyright
21  * notice, this list of conditions and the following disclaimer;
22  * redistributions in binary form must reproduce the above copyright
23  * notice, this list of conditions and the following disclaimer in the
24  * documentation and/or other materials provided with the distribution;
25  * neither the name of the copyright holders nor the names of its
26  * contributors may be used to endorse or promote products derived from
27  * this software without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40  */
41 
42 #include "cpu/o3/lsq_unit.hh"
43 
45 #include "arch/locked_mem.hh"
46 #include "base/str.hh"
47 #include "config/the_isa.hh"
48 #include "cpu/checker/cpu.hh"
49 #include "cpu/o3/limits.hh"
50 #include "cpu/o3/lsq.hh"
51 #include "debug/Activity.hh"
52 #include "debug/HtmCpu.hh"
53 #include "debug/IEW.hh"
54 #include "debug/LSQUnit.hh"
55 #include "debug/O3PipeView.hh"
56 #include "mem/packet.hh"
57 #include "mem/request.hh"
58 
59 namespace gem5
60 {
61 
62 namespace o3
63 {
64 
66  PacketPtr _pkt, LSQUnit *lsq_ptr)
67  : Event(Default_Pri, AutoDelete),
68  inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
69 {
70  assert(_inst->savedReq);
71  _inst->savedReq->writebackScheduled();
72 }
73 
74 void
76 {
77  assert(!lsqPtr->cpu->switchedOut());
78 
79  lsqPtr->writeback(inst, pkt);
80 
81  assert(inst->savedReq);
82  inst->savedReq->writebackDone();
83  delete pkt;
84 }
85 
86 const char *
88 {
89  return "Store writeback";
90 }
91 
92 bool
94 {
95  auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
96  LSQRequest* req = senderState->request();
97  assert(req != nullptr);
98  bool ret = true;
99  /* Check that the request is still alive before any further action. */
100  if (senderState->alive()) {
101  ret = req->recvTimingResp(pkt);
102  } else {
103  senderState->outstanding--;
104  }
105  return ret;
106 
107 }
108 
109 void
111 {
112  LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
113  DynInstPtr inst = state->inst;
114 
115  // hardware transactional memory
116  // sanity check
117  if (pkt->isHtmTransactional() && !inst->isSquashed()) {
118  assert(inst->getHtmTransactionUid() == pkt->getHtmTransactionUid());
119  }
120 
121  // if in a HTM transaction, it's possible
122  // to abort within the cache hierarchy.
123  // This is signalled back to the processor
124  // through responses to memory requests.
125  if (pkt->htmTransactionFailedInCache()) {
126  // cannot do this for write requests because
127  // they cannot tolerate faults
128  const HtmCacheFailure htm_rc =
130  if (pkt->isWrite()) {
131  DPRINTF(HtmCpu,
132  "store notification (ignored) of HTM transaction failure "
133  "in cache - addr=0x%lx - rc=%s - htmUid=%d\n",
134  pkt->getAddr(), htmFailureToStr(htm_rc),
135  pkt->getHtmTransactionUid());
136  } else {
137  HtmFailureFaultCause fail_reason =
139 
140  if (htm_rc == HtmCacheFailure::FAIL_SELF) {
141  fail_reason = HtmFailureFaultCause::SIZE;
142  } else if (htm_rc == HtmCacheFailure::FAIL_REMOTE) {
143  fail_reason = HtmFailureFaultCause::MEMORY;
144  } else if (htm_rc == HtmCacheFailure::FAIL_OTHER) {
145  // these are likely loads that were issued out of order
146  // they are faulted here, but it's unlikely that these will
147  // ever reach the commit head.
148  fail_reason = HtmFailureFaultCause::OTHER;
149  } else {
150  panic("HTM error - unhandled return code from cache (%s)",
151  htmFailureToStr(htm_rc));
152  }
153 
154  inst->fault =
155  std::make_shared<GenericHtmFailureFault>(
156  inst->getHtmTransactionUid(),
157  fail_reason);
158 
159  DPRINTF(HtmCpu,
160  "load notification of HTM transaction failure "
161  "in cache - pc=%s - addr=0x%lx - "
162  "rc=%u - htmUid=%d\n",
163  inst->pcState(), pkt->getAddr(),
164  htmFailureToStr(htm_rc), pkt->getHtmTransactionUid());
165  }
166  }
167 
168  cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
169 
170  /* Notify the sender state that the access is complete (for ownership
171  * tracking). */
172  state->complete();
173 
174  assert(!cpu->switchedOut());
175  if (!inst->isSquashed()) {
176  if (state->needWB) {
177  // Only loads, store conditionals and atomics perform the writeback
178  // after receving the response from the memory
179  assert(inst->isLoad() || inst->isStoreConditional() ||
180  inst->isAtomic());
181 
182  // hardware transactional memory
183  if (pkt->htmTransactionFailedInCache()) {
186  }
187 
188  writeback(inst, state->request()->mainPacket());
189  if (inst->isStore() || inst->isAtomic()) {
190  auto ss = dynamic_cast<SQSenderState*>(state);
191  ss->writebackDone();
192  completeStore(ss->idx);
193  }
194  } else if (inst->isStore()) {
195  // This is a regular store (i.e., not store conditionals and
196  // atomics), so it can complete without writing back
197  completeStore(dynamic_cast<SQSenderState*>(state)->idx);
198  }
199  }
200 }
201 
202 LSQUnit::LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
203  : lsqID(-1), storeQueue(sqEntries+1), loadQueue(lqEntries+1),
204  loads(0), stores(0), storesToWB(0),
205  htmStarts(0), htmStops(0),
207  cacheBlockMask(0), stalled(false),
208  isStoreBlocked(false), storeInFlight(false), stats(nullptr)
209 {
210 }
211 
212 void
213 LSQUnit::init(CPU *cpu_ptr, IEW *iew_ptr, const O3CPUParams &params,
214  LSQ *lsq_ptr, unsigned id)
215 {
216  lsqID = id;
217 
218  cpu = cpu_ptr;
219  iewStage = iew_ptr;
220 
221  lsq = lsq_ptr;
222 
223  cpu->addStatGroup(csprintf("lsq%i", lsqID).c_str(), &stats);
224 
225  DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",lsqID);
226 
227  depCheckShift = params.LSQDepCheckShift;
228  checkLoads = params.LSQCheckLoads;
229  needsTSO = params.needsTSO;
230 
231  resetState();
232 }
233 
234 
235 void
237 {
238  loads = stores = storesToWB = 0;
239 
240  // hardware transactional memory
241  // nesting depth
242  htmStarts = htmStops = 0;
243 
245 
246  retryPkt = NULL;
247  memDepViolator = NULL;
248 
249  stalled = false;
250 
251  cacheBlockMask = ~(cpu->cacheLineSize() - 1);
252 }
253 
254 std::string
256 {
257  if (MaxThreads == 1) {
258  return iewStage->name() + ".lsq";
259  } else {
260  return iewStage->name() + ".lsq.thread" + std::to_string(lsqID);
261  }
262 }
263 
265  : statistics::Group(parent),
266  ADD_STAT(forwLoads, statistics::units::Count::get(),
267  "Number of loads that had data forwarded from stores"),
268  ADD_STAT(squashedLoads, statistics::units::Count::get(),
269  "Number of loads squashed"),
270  ADD_STAT(ignoredResponses, statistics::units::Count::get(),
271  "Number of memory responses ignored because the instruction is "
272  "squashed"),
273  ADD_STAT(memOrderViolation, statistics::units::Count::get(),
274  "Number of memory ordering violations"),
275  ADD_STAT(squashedStores, statistics::units::Count::get(),
276  "Number of stores squashed"),
277  ADD_STAT(rescheduledLoads, statistics::units::Count::get(),
278  "Number of loads that were rescheduled"),
279  ADD_STAT(blockedByCache, statistics::units::Count::get(),
280  "Number of times an access to memory failed due to the cache "
281  "being blocked"),
282  ADD_STAT(loadToUse, "Distribution of cycle latency between the "
283  "first time a load is issued and its completion")
284 {
285  loadToUse
286  .init(0, 299, 10)
288 }
289 
290 void
292 {
293  dcachePort = dcache_port;
294 }
295 
296 void
298 {
299  for (int i = 0; i < loadQueue.capacity(); ++i)
300  assert(!loadQueue[i].valid());
301 
302  assert(storesToWB == 0);
303  assert(!retryPkt);
304 }
305 
306 void
308 {
309  resetState();
310 }
311 
312 void
314 {
315  assert(inst->isMemRef());
316 
317  assert(inst->isLoad() || inst->isStore() || inst->isAtomic());
318 
319  if (inst->isLoad()) {
320  insertLoad(inst);
321  } else {
322  insertStore(inst);
323  }
324 
325  inst->setInLSQ();
326 }
327 
328 void
330 {
331  assert(!loadQueue.full());
332  assert(loads < loadQueue.capacity());
333 
334  DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n",
335  load_inst->pcState(), loadQueue.tail(), load_inst->seqNum);
336 
337  /* Grow the queue. */
339 
340  load_inst->sqIt = storeQueue.end();
341 
342  assert(!loadQueue.back().valid());
343  loadQueue.back().set(load_inst);
344  load_inst->lqIdx = loadQueue.tail();
345  assert(load_inst->lqIdx > 0);
346  load_inst->lqIt = loadQueue.getIterator(load_inst->lqIdx);
347 
348  ++loads;
349 
350  // hardware transactional memory
351  // transactional state and nesting depth must be tracked
352  // in the in-order part of the core.
353  if (load_inst->isHtmStart()) {
354  htmStarts++;
355  DPRINTF(HtmCpu, ">> htmStarts++ (%d) : htmStops (%d)\n",
357 
358  const int htm_depth = htmStarts - htmStops;
359  const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
360  auto htm_uid = htm_cpt->getHtmUid();
361 
362  // for debugging purposes
363  if (!load_inst->inHtmTransactionalState()) {
364  htm_uid = htm_cpt->newHtmUid();
365  DPRINTF(HtmCpu, "generating new htmUid=%u\n", htm_uid);
366  if (htm_depth != 1) {
367  DPRINTF(HtmCpu,
368  "unusual HTM transactional depth (%d)"
369  " possibly caused by mispeculation - htmUid=%u\n",
370  htm_depth, htm_uid);
371  }
372  }
373  load_inst->setHtmTransactionalState(htm_uid, htm_depth);
374  }
375 
376  if (load_inst->isHtmStop()) {
377  htmStops++;
378  DPRINTF(HtmCpu, ">> htmStarts (%d) : htmStops++ (%d)\n",
380 
381  if (htmStops==1 && htmStarts==0) {
382  DPRINTF(HtmCpu,
383  "htmStops==1 && htmStarts==0. "
384  "This generally shouldn't happen "
385  "(unless due to misspeculation)\n");
386  }
387  }
388 }
389 
390 void
392 {
393  // Make sure it is not full before inserting an instruction.
394  assert(!storeQueue.full());
395  assert(stores < storeQueue.capacity());
396 
397  DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n",
398  store_inst->pcState(), storeQueue.tail(), store_inst->seqNum);
400 
401  store_inst->sqIdx = storeQueue.tail();
402  store_inst->lqIdx = loadQueue.tail() + 1;
403  assert(store_inst->lqIdx > 0);
404  store_inst->lqIt = loadQueue.end();
405 
406  storeQueue.back().set(store_inst);
407 
408  ++stores;
409 }
410 
413 {
414  DynInstPtr temp = memDepViolator;
415 
416  memDepViolator = NULL;
417 
418  return temp;
419 }
420 
421 unsigned
423 {
424  //LQ has an extra dummy entry to differentiate
425  //empty/full conditions. Subtract 1 from the free entries.
426  DPRINTF(LSQUnit, "LQ size: %d, #loads occupied: %d\n",
427  1 + loadQueue.capacity(), loads);
428  return loadQueue.capacity() - loads;
429 }
430 
431 unsigned
433 {
434  //SQ has an extra dummy entry to differentiate
435  //empty/full conditions. Subtract 1 from the free entries.
436  DPRINTF(LSQUnit, "SQ size: %d, #stores occupied: %d\n",
437  1 + storeQueue.capacity(), stores);
438  return storeQueue.capacity() - stores;
439 
440  }
441 
442 void
444 {
445  // Should only ever get invalidations in here
446  assert(pkt->isInvalidate());
447 
448  DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
449 
450  for (int x = 0; x < cpu->numContexts(); x++) {
452  bool no_squash = cpu->thread[x]->noSquashFromTC;
453  cpu->thread[x]->noSquashFromTC = true;
455  cpu->thread[x]->noSquashFromTC = no_squash;
456  }
457 
458  if (loadQueue.empty())
459  return;
460 
461  auto iter = loadQueue.begin();
462 
463  Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;
464 
465  DynInstPtr ld_inst = iter->instruction();
466  assert(ld_inst);
467  LSQRequest *req = iter->request();
468 
469  // Check that this snoop didn't just invalidate our lock flag
470  if (ld_inst->effAddrValid() &&
471  req->isCacheBlockHit(invalidate_addr, cacheBlockMask)
472  && ld_inst->memReqFlags & Request::LLSC)
474 
475  bool force_squash = false;
476 
477  while (++iter != loadQueue.end()) {
478  ld_inst = iter->instruction();
479  assert(ld_inst);
480  req = iter->request();
481  if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered())
482  continue;
483 
484  DPRINTF(LSQUnit, "-- inst [sn:%lli] to pktAddr:%#x\n",
485  ld_inst->seqNum, invalidate_addr);
486 
487  if (force_squash ||
488  req->isCacheBlockHit(invalidate_addr, cacheBlockMask)) {
489  if (needsTSO) {
490  // If we have a TSO system, as all loads must be ordered with
491  // all other loads, this load as well as *all* subsequent loads
492  // need to be squashed to prevent possible load reordering.
493  force_squash = true;
494  }
495  if (ld_inst->possibleLoadViolation() || force_squash) {
496  DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n",
497  pkt->getAddr(), ld_inst->seqNum);
498 
499  // Mark the load for re-execution
500  ld_inst->fault = std::make_shared<ReExec>();
501  req->setStateToFault();
502  } else {
503  DPRINTF(LSQUnit, "HitExternal Snoop for addr %#x [sn:%lli]\n",
504  pkt->getAddr(), ld_inst->seqNum);
505 
506  // Make sure that we don't lose a snoop hitting a LOCKED
507  // address since the LOCK* flags don't get updated until
508  // commit.
509  if (ld_inst->memReqFlags & Request::LLSC)
511 
512  // If a older load checks this and it's true
513  // then we might have missed the snoop
514  // in which case we need to invalidate to be sure
515  ld_inst->hitExternalSnoop(true);
516  }
517  }
518  }
519  return;
520 }
521 
522 Fault
523 LSQUnit::checkViolations(typename LoadQueue::iterator& loadIt,
524  const DynInstPtr& inst)
525 {
526  Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
527  Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
528 
534  while (loadIt != loadQueue.end()) {
535  DynInstPtr ld_inst = loadIt->instruction();
536  if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
537  ++loadIt;
538  continue;
539  }
540 
541  Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift;
542  Addr ld_eff_addr2 =
543  (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
544 
545  if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {
546  if (inst->isLoad()) {
547  // If this load is to the same block as an external snoop
548  // invalidate that we've observed then the load needs to be
549  // squashed as it could have newer data
550  if (ld_inst->hitExternalSnoop()) {
551  if (!memDepViolator ||
552  ld_inst->seqNum < memDepViolator->seqNum) {
553  DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] "
554  "and [sn:%lli] at address %#x\n",
555  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
556  memDepViolator = ld_inst;
557 
559 
560  return std::make_shared<GenericISA::M5PanicFault>(
561  "Detected fault with inst [sn:%lli] and "
562  "[sn:%lli] at address %#x\n",
563  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
564  }
565  }
566 
567  // Otherwise, mark the load has a possible load violation and
568  // if we see a snoop before it's commited, we need to squash
569  ld_inst->possibleLoadViolation(true);
570  DPRINTF(LSQUnit, "Found possible load violation at addr: %#x"
571  " between instructions [sn:%lli] and [sn:%lli]\n",
572  inst_eff_addr1, inst->seqNum, ld_inst->seqNum);
573  } else {
574  // A load/store incorrectly passed this store.
575  // Check if we already have a violator, or if it's newer
576  // squash and refetch.
577  if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
578  break;
579 
580  DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and "
581  "[sn:%lli] at address %#x\n",
582  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
583  memDepViolator = ld_inst;
584 
586 
587  return std::make_shared<GenericISA::M5PanicFault>(
588  "Detected fault with "
589  "inst [sn:%lli] and [sn:%lli] at address %#x\n",
590  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
591  }
592  }
593 
594  ++loadIt;
595  }
596  return NoFault;
597 }
598 
599 
600 
601 
602 Fault
604 {
605  // Execute a specific load.
606  Fault load_fault = NoFault;
607 
608  DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n",
609  inst->pcState(), inst->seqNum);
610 
611  assert(!inst->isSquashed());
612 
613  load_fault = inst->initiateAcc();
614 
615  if (load_fault == NoFault && !inst->readMemAccPredicate()) {
616  assert(inst->readPredicate());
617  inst->setExecuted();
618  inst->completeAcc(nullptr);
619  iewStage->instToCommit(inst);
621  return NoFault;
622  }
623 
624  if (inst->isTranslationDelayed() && load_fault == NoFault)
625  return load_fault;
626 
627  if (load_fault != NoFault && inst->translationCompleted() &&
628  inst->savedReq->isPartialFault() && !inst->savedReq->isComplete()) {
629  assert(inst->savedReq->isSplit());
630  // If we have a partial fault where the mem access is not complete yet
631  // then the cache must have been blocked. This load will be re-executed
632  // when the cache gets unblocked. We will handle the fault when the
633  // mem access is complete.
634  return NoFault;
635  }
636 
637  // If the instruction faulted or predicated false, then we need to send it
638  // along to commit without the instruction completing.
639  if (load_fault != NoFault || !inst->readPredicate()) {
640  // Send this instruction to commit, also make sure iew stage
641  // realizes there is activity. Mark it as executed unless it
642  // is a strictly ordered load that needs to hit the head of
643  // commit.
644  if (!inst->readPredicate())
645  inst->forwardOldRegs();
646  DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n",
647  inst->seqNum,
648  (load_fault != NoFault ? "fault" : "predication"));
649  if (!(inst->hasRequest() && inst->strictlyOrdered()) ||
650  inst->isAtCommit()) {
651  inst->setExecuted();
652  }
653  iewStage->instToCommit(inst);
655  } else {
656  if (inst->effAddrValid()) {
657  auto it = inst->lqIt;
658  ++it;
659 
660  if (checkLoads)
661  return checkViolations(it, inst);
662  }
663  }
664 
665  return load_fault;
666 }
667 
668 Fault
670 {
671  // Make sure that a store exists.
672  assert(stores != 0);
673 
674  int store_idx = store_inst->sqIdx;
675 
676  DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n",
677  store_inst->pcState(), store_inst->seqNum);
678 
679  assert(!store_inst->isSquashed());
680 
681  // Check the recently completed loads to see if any match this store's
682  // address. If so, then we have a memory ordering violation.
683  typename LoadQueue::iterator loadIt = store_inst->lqIt;
684 
685  Fault store_fault = store_inst->initiateAcc();
686 
687  if (store_inst->isTranslationDelayed() &&
688  store_fault == NoFault)
689  return store_fault;
690 
691  if (!store_inst->readPredicate()) {
692  DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n",
693  store_inst->seqNum);
694  store_inst->forwardOldRegs();
695  return store_fault;
696  }
697 
698  if (storeQueue[store_idx].size() == 0) {
699  DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n",
700  store_inst->pcState(), store_inst->seqNum);
701 
702  return store_fault;
703  }
704 
705  assert(store_fault == NoFault);
706 
707  if (store_inst->isStoreConditional() || store_inst->isAtomic()) {
708  // Store conditionals and Atomics need to set themselves as able to
709  // writeback if we haven't had a fault by here.
710  storeQueue[store_idx].canWB() = true;
711 
712  ++storesToWB;
713  }
714 
715  return checkViolations(loadIt, store_inst);
716 
717 }
718 
719 void
721 {
722  assert(loadQueue.front().valid());
723 
724  DynInstPtr inst = loadQueue.front().instruction();
725 
726  DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n",
727  inst->pcState());
728 
729  // Update histogram with memory latency from load
730  // Only take latency from load demand that where issued and did not fault
731  if (!inst->isInstPrefetch() && !inst->isDataPrefetch()
732  && inst->firstIssue != -1
733  && inst->lastWakeDependents != -1) {
735  inst->lastWakeDependents - inst->firstIssue));
736  }
737 
738  loadQueue.front().clear();
740 
741  --loads;
742 }
743 
744 void
746 {
747  assert(loads == 0 || loadQueue.front().valid());
748 
749  while (loads != 0 && loadQueue.front().instruction()->seqNum
750  <= youngest_inst) {
751  commitLoad();
752  }
753 }
754 
755 void
757 {
758  assert(stores == 0 || storeQueue.front().valid());
759 
760  /* Forward iterate the store queue (age order). */
761  for (auto& x : storeQueue) {
762  assert(x.valid());
763  // Mark any stores that are now committed and have not yet
764  // been marked as able to write back.
765  if (!x.canWB()) {
766  if (x.instruction()->seqNum > youngest_inst) {
767  break;
768  }
769  DPRINTF(LSQUnit, "Marking store as able to write back, PC "
770  "%s [sn:%lli]\n",
771  x.instruction()->pcState(),
772  x.instruction()->seqNum);
773 
774  x.canWB() = true;
775 
776  ++storesToWB;
777  }
778  }
779 }
780 
781 void
783 {
784  assert(isStoreBlocked);
785  storeWBIt->request()->sendPacketToCache();
786  if (storeWBIt->request()->isSent()){
787  storePostSend();
788  }
789 }
790 
791 void
793 {
794  if (isStoreBlocked) {
795  DPRINTF(LSQUnit, "Writing back blocked store\n");
797  }
798 
799  while (storesToWB > 0 &&
800  storeWBIt.dereferenceable() &&
801  storeWBIt->valid() &&
802  storeWBIt->canWB() &&
803  ((!needsTSO) || (!storeInFlight)) &&
804  lsq->cachePortAvailable(false)) {
805 
806  if (isStoreBlocked) {
807  DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
808  " is blocked!\n");
809  break;
810  }
811 
812  // Store didn't write any data so no need to write it back to
813  // memory.
814  if (storeWBIt->size() == 0) {
815  /* It is important that the preincrement happens at (or before)
816  * the call, as the the code of completeStore checks
817  * storeWBIt. */
819  continue;
820  }
821 
822  if (storeWBIt->instruction()->isDataPrefetch()) {
823  storeWBIt++;
824  continue;
825  }
826 
827  assert(storeWBIt->hasRequest());
828  assert(!storeWBIt->committed());
829 
830  DynInstPtr inst = storeWBIt->instruction();
831  LSQRequest* req = storeWBIt->request();
832 
833  // Process store conditionals or store release after all previous
834  // stores are completed
835  if ((req->mainRequest()->isLLSC() ||
836  req->mainRequest()->isRelease()) &&
837  (storeWBIt.idx() != storeQueue.head())) {
838  DPRINTF(LSQUnit, "Store idx:%i PC:%s to Addr:%#x "
839  "[sn:%lli] is %s%s and not head of the queue\n",
840  storeWBIt.idx(), inst->pcState(),
841  req->request()->getPaddr(), inst->seqNum,
842  req->mainRequest()->isLLSC() ? "SC" : "",
843  req->mainRequest()->isRelease() ? "/Release" : "");
844  break;
845  }
846 
847  storeWBIt->committed() = true;
848 
849  assert(!inst->memData);
850  inst->memData = new uint8_t[req->_size];
851 
852  if (storeWBIt->isAllZeros())
853  memset(inst->memData, 0, req->_size);
854  else
855  memcpy(inst->memData, storeWBIt->data(), req->_size);
856 
857 
858  if (req->senderState() == nullptr) {
859  SQSenderState *state = new SQSenderState(storeWBIt);
860  state->isLoad = false;
861  state->needWB = false;
862  state->inst = inst;
863 
864  req->senderState(state);
865  if (inst->isStoreConditional() || inst->isAtomic()) {
866  /* Only store conditionals and atomics need a writeback. */
867  state->needWB = true;
868  }
869  }
870  req->buildPackets();
871 
872  DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s "
873  "to Addr:%#x, data:%#x [sn:%lli]\n",
874  storeWBIt.idx(), inst->pcState(),
875  req->request()->getPaddr(), (int)*(inst->memData),
876  inst->seqNum);
877 
878  // @todo: Remove this SC hack once the memory system handles it.
879  if (inst->isStoreConditional()) {
880  // Disable recording the result temporarily. Writing to
881  // misc regs normally updates the result, but this is not
882  // the desired behavior when handling store conditionals.
883  inst->recordResult(false);
884  bool success = TheISA::handleLockedWrite(inst.get(),
885  req->request(), cacheBlockMask);
886  inst->recordResult(true);
887  req->packetSent();
888 
889  if (!success) {
890  req->complete();
891  // Instantly complete this store.
892  DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. "
893  "Instantly completing it.\n",
894  inst->seqNum);
895  PacketPtr new_pkt = new Packet(*req->packet());
896  WritebackEvent *wb = new WritebackEvent(inst,
897  new_pkt, this);
898  cpu->schedule(wb, curTick() + 1);
900  if (!storeQueue.empty())
901  storeWBIt++;
902  else
904  continue;
905  }
906  }
907 
908  if (req->request()->isLocalAccess()) {
909  assert(!inst->isStoreConditional());
910  assert(!inst->inHtmTransactionalState());
911  gem5::ThreadContext *thread = cpu->tcBase(lsqID);
912  PacketPtr main_pkt = new Packet(req->mainRequest(),
914  main_pkt->dataStatic(inst->memData);
915  req->request()->localAccessor(thread, main_pkt);
916  delete main_pkt;
918  storeWBIt++;
919  continue;
920  }
921  /* Send to cache */
922  req->sendPacketToCache();
923 
924  /* If successful, do the post send */
925  if (req->isSent()) {
926  storePostSend();
927  } else {
928  DPRINTF(LSQUnit, "D-Cache became blocked when writing [sn:%lli], "
929  "will retry later\n",
930  inst->seqNum);
931  }
932  }
933  assert(stores >= 0 && storesToWB >= 0);
934 }
935 
936 void
937 LSQUnit::squash(const InstSeqNum &squashed_num)
938 {
939  DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
940  "(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
941 
942  while (loads != 0 &&
943  loadQueue.back().instruction()->seqNum > squashed_num) {
944  DPRINTF(LSQUnit,"Load Instruction PC %s squashed, "
945  "[sn:%lli]\n",
946  loadQueue.back().instruction()->pcState(),
947  loadQueue.back().instruction()->seqNum);
948 
949  if (isStalled() && loadQueue.tail() == stallingLoadIdx) {
950  stalled = false;
951  stallingStoreIsn = 0;
952  stallingLoadIdx = 0;
953  }
954 
955  // hardware transactional memory
956  // Squashing instructions can alter the transaction nesting depth
957  // and must be corrected before fetching resumes.
958  if (loadQueue.back().instruction()->isHtmStart())
959  {
960  htmStarts = (--htmStarts < 0) ? 0 : htmStarts;
961  DPRINTF(HtmCpu, ">> htmStarts-- (%d) : htmStops (%d)\n",
963  }
964  if (loadQueue.back().instruction()->isHtmStop())
965  {
966  htmStops = (--htmStops < 0) ? 0 : htmStops;
967  DPRINTF(HtmCpu, ">> htmStarts (%d) : htmStops-- (%d)\n",
969  }
970  // Clear the smart pointer to make sure it is decremented.
971  loadQueue.back().instruction()->setSquashed();
972  loadQueue.back().clear();
973 
974  --loads;
975 
978  }
979 
980  // hardware transactional memory
981  // scan load queue (from oldest to youngest) for most recent valid htmUid
982  auto scan_it = loadQueue.begin();
983  uint64_t in_flight_uid = 0;
984  while (scan_it != loadQueue.end()) {
985  if (scan_it->instruction()->isHtmStart() &&
986  !scan_it->instruction()->isSquashed()) {
987  in_flight_uid = scan_it->instruction()->getHtmTransactionUid();
988  DPRINTF(HtmCpu, "loadQueue[%d]: found valid HtmStart htmUid=%u\n",
989  scan_it._idx, in_flight_uid);
990  }
991  scan_it++;
992  }
993  // If there's a HtmStart in the pipeline then use its htmUid,
994  // otherwise use the most recently committed uid
995  const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
996  if (htm_cpt) {
997  const uint64_t old_local_htm_uid = htm_cpt->getHtmUid();
998  uint64_t new_local_htm_uid;
999  if (in_flight_uid > 0)
1000  new_local_htm_uid = in_flight_uid;
1001  else
1002  new_local_htm_uid = lastRetiredHtmUid;
1003 
1004  if (old_local_htm_uid != new_local_htm_uid) {
1005  DPRINTF(HtmCpu, "flush: lastRetiredHtmUid=%u\n",
1007  DPRINTF(HtmCpu, "flush: resetting localHtmUid=%u\n",
1008  new_local_htm_uid);
1009 
1010  htm_cpt->setHtmUid(new_local_htm_uid);
1011  }
1012  }
1013 
1014  if (memDepViolator && squashed_num < memDepViolator->seqNum) {
1015  memDepViolator = NULL;
1016  }
1017 
1018  while (stores != 0 &&
1019  storeQueue.back().instruction()->seqNum > squashed_num) {
1020  // Instructions marked as can WB are already committed.
1021  if (storeQueue.back().canWB()) {
1022  break;
1023  }
1024 
1025  DPRINTF(LSQUnit,"Store Instruction PC %s squashed, "
1026  "idx:%i [sn:%lli]\n",
1027  storeQueue.back().instruction()->pcState(),
1028  storeQueue.tail(), storeQueue.back().instruction()->seqNum);
1029 
1030  // I don't think this can happen. It should have been cleared
1031  // by the stalling load.
1032  if (isStalled() &&
1033  storeQueue.back().instruction()->seqNum == stallingStoreIsn) {
1034  panic("Is stalled should have been cleared by stalling load!\n");
1035  stalled = false;
1036  stallingStoreIsn = 0;
1037  }
1038 
1039  // Clear the smart pointer to make sure it is decremented.
1040  storeQueue.back().instruction()->setSquashed();
1041 
1042  // Must delete request now that it wasn't handed off to
1043  // memory. This is quite ugly. @todo: Figure out the proper
1044  // place to really handle request deletes.
1045  storeQueue.back().clear();
1046  --stores;
1047 
1048  storeQueue.pop_back();
1050  }
1051 }
1052 
1053 uint64_t
1055 {
1056  const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
1057  return htm_cpt->getHtmUid();
1058 }
1059 
1060 void
1062 {
1063  if (isStalled() &&
1064  storeWBIt->instruction()->seqNum == stallingStoreIsn) {
1065  DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
1066  "load idx:%i\n",
1068  stalled = false;
1069  stallingStoreIsn = 0;
1071  }
1072 
1073  if (!storeWBIt->instruction()->isStoreConditional()) {
1074  // The store is basically completed at this time. This
1075  // only works so long as the checker doesn't try to
1076  // verify the value in memory for stores.
1077  storeWBIt->instruction()->setCompleted();
1078 
1079  if (cpu->checker) {
1080  cpu->checker->verify(storeWBIt->instruction());
1081  }
1082  }
1083 
1084  if (needsTSO) {
1085  storeInFlight = true;
1086  }
1087 
1088  storeWBIt++;
1089 }
1090 
1091 void
1093 {
1094  iewStage->wakeCPU();
1095 
1096  // Squashed instructions do not need to complete their access.
1097  if (inst->isSquashed()) {
1098  assert (!inst->isStore() || inst->isStoreConditional());
1100  return;
1101  }
1102 
1103  if (!inst->isExecuted()) {
1104  inst->setExecuted();
1105 
1106  if (inst->fault == NoFault) {
1107  // Complete access to copy data to proper place.
1108  inst->completeAcc(pkt);
1109  } else {
1110  // If the instruction has an outstanding fault, we cannot complete
1111  // the access as this discards the current fault.
1112 
1113  // If we have an outstanding fault, the fault should only be of
1114  // type ReExec or - in case of a SplitRequest - a partial
1115  // translation fault
1116 
1117  // Unless it's a hardware transactional memory fault
1118  auto htm_fault = std::dynamic_pointer_cast<
1119  GenericHtmFailureFault>(inst->fault);
1120 
1121  if (!htm_fault) {
1122  assert(dynamic_cast<ReExec*>(inst->fault.get()) != nullptr ||
1123  inst->savedReq->isPartialFault());
1124 
1125  } else if (!pkt->htmTransactionFailedInCache()) {
1126  // Situation in which the instruction has a hardware
1127  // transactional memory fault but not the packet itself. This
1128  // can occur with ldp_uop microops since access is spread over
1129  // multiple packets.
1130  DPRINTF(HtmCpu,
1131  "%s writeback with HTM failure fault, "
1132  "however, completing packet is not aware of "
1133  "transaction failure. cause=%s htmUid=%u\n",
1134  inst->staticInst->getName(),
1135  htmFailureToStr(htm_fault->getHtmFailureFaultCause()),
1136  htm_fault->getHtmUid());
1137  }
1138 
1139  DPRINTF(LSQUnit, "Not completing instruction [sn:%lli] access "
1140  "due to pending fault.\n", inst->seqNum);
1141  }
1142  }
1143 
1144  // Need to insert instruction into queue to commit
1145  iewStage->instToCommit(inst);
1146 
1148 
1149  // see if this load changed the PC
1151 }
1152 
1153 void
1154 LSQUnit::completeStore(typename StoreQueue::iterator store_idx)
1155 {
1156  assert(store_idx->valid());
1157  store_idx->completed() = true;
1158  --storesToWB;
1159  // A bit conservative because a store completion may not free up entries,
1160  // but hopefully avoids two store completions in one cycle from making
1161  // the CPU tick twice.
1162  cpu->wakeCPU();
1164 
1165  /* We 'need' a copy here because we may clear the entry from the
1166  * store queue. */
1167  DynInstPtr store_inst = store_idx->instruction();
1168  if (store_idx == storeQueue.begin()) {
1169  do {
1170  storeQueue.front().clear();
1172  --stores;
1173  } while (storeQueue.front().completed() &&
1174  !storeQueue.empty());
1175 
1176  iewStage->updateLSQNextCycle = true;
1177  }
1178 
1179  DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
1180  "idx:%i\n",
1181  store_inst->seqNum, store_idx.idx() - 1, storeQueue.head() - 1);
1182 
1183 #if TRACING_ON
1184  if (debug::O3PipeView) {
1185  store_inst->storeTick =
1186  curTick() - store_inst->fetchTick;
1187  }
1188 #endif
1189 
1190  if (isStalled() &&
1191  store_inst->seqNum == stallingStoreIsn) {
1192  DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
1193  "load idx:%i\n",
1195  stalled = false;
1196  stallingStoreIsn = 0;
1198  }
1199 
1200  store_inst->setCompleted();
1201 
1202  if (needsTSO) {
1203  storeInFlight = false;
1204  }
1205 
1206  // Tell the checker we've completed this instruction. Some stores
1207  // may get reported twice to the checker, but the checker can
1208  // handle that case.
1209  // Store conditionals cannot be sent to the checker yet, they have
1210  // to update the misc registers first which should take place
1211  // when they commit
1212  if (cpu->checker && !store_inst->isStoreConditional()) {
1213  cpu->checker->verify(store_inst);
1214  }
1215 }
1216 
1217 bool
1218 LSQUnit::trySendPacket(bool isLoad, PacketPtr data_pkt)
1219 {
1220  bool ret = true;
1221  bool cache_got_blocked = false;
1222 
1223  auto state = dynamic_cast<LSQSenderState*>(data_pkt->senderState);
1224 
1225  if (!lsq->cacheBlocked() &&
1226  lsq->cachePortAvailable(isLoad)) {
1227  if (!dcachePort->sendTimingReq(data_pkt)) {
1228  ret = false;
1229  cache_got_blocked = true;
1230  }
1231  } else {
1232  ret = false;
1233  }
1234 
1235  if (ret) {
1236  if (!isLoad) {
1237  isStoreBlocked = false;
1238  }
1239  lsq->cachePortBusy(isLoad);
1240  state->outstanding++;
1241  state->request()->packetSent();
1242  } else {
1243  if (cache_got_blocked) {
1244  lsq->cacheBlocked(true);
1246  }
1247  if (!isLoad) {
1248  assert(state->request() == storeWBIt->request());
1249  isStoreBlocked = true;
1250  }
1251  state->request()->packetNotSent();
1252  }
1253  DPRINTF(LSQUnit, "Memory request (pkt: %s) from inst [sn:%llu] was"
1254  " %ssent (cache is blocked: %d, cache_got_blocked: %d)\n",
1255  data_pkt->print(), state->inst->seqNum,
1256  ret ? "": "not ", lsq->cacheBlocked(), cache_got_blocked);
1257  return ret;
1258 }
1259 
1260 void
1262 {
1263  if (isStoreBlocked) {
1264  DPRINTF(LSQUnit, "Receiving retry: blocked store\n");
1266  }
1267 }
1268 
1269 void
1271 {
1272  cprintf("Load store queue: Dumping instructions.\n");
1273  cprintf("Load queue size: %i\n", loads);
1274  cprintf("Load queue: ");
1275 
1276  for (const auto& e: loadQueue) {
1277  const DynInstPtr &inst(e.instruction());
1278  cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1279  }
1280  cprintf("\n");
1281 
1282  cprintf("Store queue size: %i\n", stores);
1283  cprintf("Store queue: ");
1284 
1285  for (const auto& e: storeQueue) {
1286  const DynInstPtr &inst(e.instruction());
1287  cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1288  }
1289 
1290  cprintf("\n");
1291 }
1292 
1293 void LSQUnit::schedule(Event& ev, Tick when) { cpu->schedule(ev, when); }
1294 
1296 
1297 unsigned int
1299 {
1300  return cpu->cacheLineSize();
1301 }
1302 
1303 Fault
1304 LSQUnit::read(LSQRequest *req, int load_idx)
1305 {
1306  LQEntry& load_req = loadQueue[load_idx];
1307  const DynInstPtr& load_inst = load_req.instruction();
1308 
1309  load_req.setRequest(req);
1310  assert(load_inst);
1311 
1312  assert(!load_inst->isExecuted());
1313 
1314  // Make sure this isn't a strictly ordered load
1315  // A bit of a hackish way to get strictly ordered accesses to work
1316  // only if they're at the head of the LSQ and are ready to commit
1317  // (at the head of the ROB too).
1318 
1319  if (req->mainRequest()->isStrictlyOrdered() &&
1320  (load_idx != loadQueue.head() || !load_inst->isAtCommit())) {
1321  // Tell IQ/mem dep unit that this instruction will need to be
1322  // rescheduled eventually
1323  iewStage->rescheduleMemInst(load_inst);
1324  load_inst->clearIssued();
1325  load_inst->effAddrValid(false);
1327  DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n",
1328  load_inst->seqNum, load_inst->pcState());
1329 
1330  // Must delete request now that it wasn't handed off to
1331  // memory. This is quite ugly. @todo: Figure out the proper
1332  // place to really handle request deletes.
1333  load_req.setRequest(nullptr);
1334  req->discard();
1335  return std::make_shared<GenericISA::M5PanicFault>(
1336  "Strictly ordered load [sn:%llx] PC %s\n",
1337  load_inst->seqNum, load_inst->pcState());
1338  }
1339 
1340  DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
1341  "storeHead: %i addr: %#x%s\n",
1342  load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1,
1343  req->mainRequest()->getPaddr(), req->isSplit() ? " split" : "");
1344 
1345  if (req->mainRequest()->isLLSC()) {
1346  // Disable recording the result temporarily. Writing to misc
1347  // regs normally updates the result, but this is not the
1348  // desired behavior when handling store conditionals.
1349  load_inst->recordResult(false);
1350  TheISA::handleLockedRead(load_inst.get(), req->mainRequest());
1351  load_inst->recordResult(true);
1352  }
1353 
1354  if (req->mainRequest()->isLocalAccess()) {
1355  assert(!load_inst->memData);
1356  assert(!load_inst->inHtmTransactionalState());
1357  load_inst->memData = new uint8_t[MaxDataBytes];
1358 
1359  gem5::ThreadContext *thread = cpu->tcBase(lsqID);
1360  PacketPtr main_pkt = new Packet(req->mainRequest(), MemCmd::ReadReq);
1361 
1362  main_pkt->dataStatic(load_inst->memData);
1363 
1364  Cycles delay = req->mainRequest()->localAccessor(thread, main_pkt);
1365 
1366  WritebackEvent *wb = new WritebackEvent(load_inst, main_pkt, this);
1367  cpu->schedule(wb, cpu->clockEdge(delay));
1368  return NoFault;
1369  }
1370 
1371  // hardware transactional memory
1372  if (req->mainRequest()->isHTMStart() || req->mainRequest()->isHTMCommit())
1373  {
1374  // don't want to send nested transactionStarts and
1375  // transactionStops outside of core, e.g. to Ruby
1376  if (req->mainRequest()->getFlags().isSet(Request::NO_ACCESS)) {
1377  Cycles delay(0);
1378  PacketPtr data_pkt =
1379  new Packet(req->mainRequest(), MemCmd::ReadReq);
1380 
1381  // Allocate memory if this is the first time a load is issued.
1382  if (!load_inst->memData) {
1383  load_inst->memData =
1384  new uint8_t[req->mainRequest()->getSize()];
1385  // sanity checks espect zero in request's data
1386  memset(load_inst->memData, 0, req->mainRequest()->getSize());
1387  }
1388 
1389  data_pkt->dataStatic(load_inst->memData);
1390  if (load_inst->inHtmTransactionalState()) {
1391  data_pkt->setHtmTransactional(
1392  load_inst->getHtmTransactionUid());
1393  }
1394  data_pkt->makeResponse();
1395 
1396  WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
1397  cpu->schedule(wb, cpu->clockEdge(delay));
1398  return NoFault;
1399  }
1400  }
1401 
1402  // Check the SQ for any previous stores that might lead to forwarding
1403  auto store_it = load_inst->sqIt;
1404  assert (store_it >= storeWBIt);
1405  // End once we've reached the top of the LSQ
1406  while (store_it != storeWBIt && !load_inst->isDataPrefetch()) {
1407  // Move the index to one younger
1408  store_it--;
1409  assert(store_it->valid());
1410  assert(store_it->instruction()->seqNum < load_inst->seqNum);
1411  int store_size = store_it->size();
1412 
1413  // Cache maintenance instructions go down via the store
1414  // path but they carry no data and they shouldn't be
1415  // considered for forwarding
1416  if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&
1417  !(store_it->request()->mainRequest() &&
1418  store_it->request()->mainRequest()->isCacheMaintenance())) {
1419  assert(store_it->instruction()->effAddrValid());
1420 
1421  // Check if the store data is within the lower and upper bounds of
1422  // addresses that the request needs.
1423  auto req_s = req->mainRequest()->getVaddr();
1424  auto req_e = req_s + req->mainRequest()->getSize();
1425  auto st_s = store_it->instruction()->effAddr;
1426  auto st_e = st_s + store_size;
1427 
1428  bool store_has_lower_limit = req_s >= st_s;
1429  bool store_has_upper_limit = req_e <= st_e;
1430  bool lower_load_has_store_part = req_s < st_e;
1431  bool upper_load_has_store_part = req_e > st_s;
1432 
1434 
1435  // If the store entry is not atomic (atomic does not have valid
1436  // data), the store has all of the data needed, and
1437  // the load is not LLSC, then
1438  // we can forward data from the store to the load
1439  if (!store_it->instruction()->isAtomic() &&
1440  store_has_lower_limit && store_has_upper_limit &&
1441  !req->mainRequest()->isLLSC()) {
1442 
1443  const auto& store_req = store_it->request()->mainRequest();
1444  coverage = store_req->isMasked() ?
1447  } else if (
1448  // This is the partial store-load forwarding case where a store
1449  // has only part of the load's data and the load isn't LLSC
1450  (!req->mainRequest()->isLLSC() &&
1451  ((store_has_lower_limit && lower_load_has_store_part) ||
1452  (store_has_upper_limit && upper_load_has_store_part) ||
1453  (lower_load_has_store_part && upper_load_has_store_part))) ||
1454  // The load is LLSC, and the store has all or part of the
1455  // load's data
1456  (req->mainRequest()->isLLSC() &&
1457  ((store_has_lower_limit || upper_load_has_store_part) &&
1458  (store_has_upper_limit || lower_load_has_store_part))) ||
1459  // The store entry is atomic and has all or part of the load's
1460  // data
1461  (store_it->instruction()->isAtomic() &&
1462  ((store_has_lower_limit || upper_load_has_store_part) &&
1463  (store_has_upper_limit || lower_load_has_store_part)))) {
1464 
1466  }
1467 
1468  if (coverage == AddrRangeCoverage::FullAddrRangeCoverage) {
1469  // Get shift amount for offset into the store's data.
1470  int shift_amt = req->mainRequest()->getVaddr() -
1471  store_it->instruction()->effAddr;
1472 
1473  // Allocate memory if this is the first time a load is issued.
1474  if (!load_inst->memData) {
1475  load_inst->memData =
1476  new uint8_t[req->mainRequest()->getSize()];
1477  }
1478  if (store_it->isAllZeros())
1479  memset(load_inst->memData, 0,
1480  req->mainRequest()->getSize());
1481  else
1482  memcpy(load_inst->memData,
1483  store_it->data() + shift_amt,
1484  req->mainRequest()->getSize());
1485 
1486  DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
1487  "addr %#x\n", store_it._idx,
1488  req->mainRequest()->getVaddr());
1489 
1490  PacketPtr data_pkt = new Packet(req->mainRequest(),
1491  MemCmd::ReadReq);
1492  data_pkt->dataStatic(load_inst->memData);
1493 
1494  // hardware transactional memory
1495  // Store to load forwarding within a transaction
1496  // This should be okay because the store will be sent to
1497  // the memory subsystem and subsequently get added to the
1498  // write set of the transaction. The write set has a stronger
1499  // property than the read set, so the load doesn't necessarily
1500  // have to be there.
1501  assert(!req->mainRequest()->isHTMCmd());
1502  if (load_inst->inHtmTransactionalState()) {
1503  assert (!storeQueue[store_it._idx].completed());
1504  assert (
1505  storeQueue[store_it._idx].instruction()->
1506  inHtmTransactionalState());
1507  assert (
1508  load_inst->getHtmTransactionUid() ==
1509  storeQueue[store_it._idx].instruction()->
1510  getHtmTransactionUid());
1511  data_pkt->setHtmTransactional(
1512  load_inst->getHtmTransactionUid());
1513  DPRINTF(HtmCpu, "HTM LD (ST2LDF) "
1514  "pc=0x%lx - vaddr=0x%lx - "
1515  "paddr=0x%lx - htmUid=%u\n",
1516  load_inst->instAddr(),
1517  data_pkt->req->hasVaddr() ?
1518  data_pkt->req->getVaddr() : 0lu,
1519  data_pkt->getAddr(),
1520  load_inst->getHtmTransactionUid());
1521  }
1522 
1523  if (req->isAnyOutstandingRequest()) {
1524  assert(req->_numOutstandingPackets > 0);
1525  // There are memory requests packets in flight already.
1526  // This may happen if the store was not complete the
1527  // first time this load got executed. Signal the senderSate
1528  // that response packets should be discarded.
1529  req->discardSenderState();
1530  }
1531 
1532  WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt,
1533  this);
1534 
1535  // We'll say this has a 1 cycle load-store forwarding latency
1536  // for now.
1537  // @todo: Need to make this a parameter.
1538  cpu->schedule(wb, curTick());
1539 
1540  // Don't need to do anything special for split loads.
1541  ++stats.forwLoads;
1542 
1543  return NoFault;
1544  } else if (
1546  // If it's already been written back, then don't worry about
1547  // stalling on it.
1548  if (store_it->completed()) {
1549  panic("Should not check one of these");
1550  continue;
1551  }
1552 
1553  // Must stall load and force it to retry, so long as it's the
1554  // oldest load that needs to do so.
1555  if (!stalled ||
1556  (stalled &&
1557  load_inst->seqNum <
1558  loadQueue[stallingLoadIdx].instruction()->seqNum)) {
1559  stalled = true;
1560  stallingStoreIsn = store_it->instruction()->seqNum;
1561  stallingLoadIdx = load_idx;
1562  }
1563 
1564  // Tell IQ/mem dep unit that this instruction will need to be
1565  // rescheduled eventually
1566  iewStage->rescheduleMemInst(load_inst);
1567  load_inst->clearIssued();
1568  load_inst->effAddrValid(false);
1570 
1571  // Do not generate a writeback event as this instruction is not
1572  // complete.
1573  DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
1574  "Store idx %i to load addr %#x\n",
1575  store_it._idx, req->mainRequest()->getVaddr());
1576 
1577  // Must discard the request.
1578  req->discard();
1579  load_req.setRequest(nullptr);
1580  return NoFault;
1581  }
1582  }
1583  }
1584 
1585  // If there's no forwarding case, then go access memory
1586  DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
1587  load_inst->seqNum, load_inst->pcState());
1588 
1589  // Allocate memory if this is the first time a load is issued.
1590  if (!load_inst->memData) {
1591  load_inst->memData = new uint8_t[req->mainRequest()->getSize()];
1592  }
1593 
1594 
1595  // hardware transactional memory
1596  if (req->mainRequest()->isHTMCmd()) {
1597  // this is a simple sanity check
1598  // the Ruby cache controller will set
1599  // memData to 0x0ul if successful.
1600  *load_inst->memData = (uint64_t) 0x1ull;
1601  }
1602 
1603  // For now, load throughput is constrained by the number of
1604  // load FUs only, and loads do not consume a cache port (only
1605  // stores do).
1606  // @todo We should account for cache port contention
1607  // and arbitrate between loads and stores.
1608 
1609  // if we the cache is not blocked, do cache access
1610  if (req->senderState() == nullptr) {
1611  LQSenderState *state = new LQSenderState(
1612  loadQueue.getIterator(load_idx));
1613  state->isLoad = true;
1614  state->inst = load_inst;
1615  state->isSplit = req->isSplit();
1616  req->senderState(state);
1617  }
1618  req->buildPackets();
1619  req->sendPacketToCache();
1620  if (!req->isSent())
1621  iewStage->blockMemInst(load_inst);
1622 
1623  return NoFault;
1624 }
1625 
1626 Fault
1627 LSQUnit::write(LSQRequest *req, uint8_t *data, int store_idx)
1628 {
1629  assert(storeQueue[store_idx].valid());
1630 
1631  DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i "
1632  "[sn:%llu]\n",
1633  store_idx - 1, req->request()->getPaddr(), storeQueue.head() - 1,
1634  storeQueue[store_idx].instruction()->seqNum);
1635 
1636  storeQueue[store_idx].setRequest(req);
1637  unsigned size = req->_size;
1638  storeQueue[store_idx].size() = size;
1639  bool store_no_data =
1640  req->mainRequest()->getFlags() & Request::STORE_NO_DATA;
1641  storeQueue[store_idx].isAllZeros() = store_no_data;
1642  assert(size <= SQEntry::DataSize || store_no_data);
1643 
1644  // copy data into the storeQueue only if the store request has valid data
1645  if (!(req->request()->getFlags() & Request::CACHE_BLOCK_ZERO) &&
1646  !req->request()->isCacheMaintenance() &&
1647  !req->request()->isAtomic())
1648  memcpy(storeQueue[store_idx].data(), data, size);
1649 
1650  // This function only writes the data to the store queue, so no fault
1651  // can happen here.
1652  return NoFault;
1653 }
1654 
1655 InstSeqNum
1657 {
1658  if (loadQueue.front().valid())
1659  return loadQueue.front().instruction()->seqNum;
1660  else
1661  return 0;
1662 }
1663 
1664 InstSeqNum
1666 {
1667  if (storeQueue.front().valid())
1668  return storeQueue.front().instruction()->seqNum;
1669  else
1670  return 0;
1671 }
1672 
1673 } // namespace o3
1674 } // namespace gem5
gem5::o3::LSQUnit::storePostSend
void storePostSend()
Handles completing the send of a store to memory.
Definition: lsq_unit.cc:1061
gem5::o3::LSQUnit::LSQUnitStats::squashedLoads
statistics::Scalar squashedLoads
Total number of squashed loads.
Definition: lsq_unit.hh:560
gem5::o3::LSQ::LSQSenderState::inst
DynInstPtr inst
Instruction which initiated the access to memory.
Definition: lsq.hh:91
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
gem5::CircularQueue::back
reference back()
Definition: circular_queue.hh:446
gem5::HtmCacheFailure::FAIL_OTHER
@ FAIL_OTHER
gem5::o3::LSQUnit::WritebackEvent::process
void process()
Processes the writeback event.
Definition: lsq_unit.cc:75
gem5::o3::CPU::ppDataAccessComplete
ProbePointArg< std::pair< DynInstPtr, PacketPtr > > * ppDataAccessComplete
Definition: cpu.hh:174
gem5::o3::LSQUnit::lastRetiredHtmUid
uint64_t lastRetiredHtmUid
Definition: lsq_unit.hh:511
gem5::o3::LSQ::LSQRequest
Memory operation metadata.
Definition: lsq.hh:231
gem5::NoFault
constexpr decltype(nullptr) NoFault
Definition: types.hh:260
gem5::BaseCPU::switchedOut
bool switchedOut() const
Determine if the CPU is switched out.
Definition: base.hh:357
gem5::o3::LSQUnit::stallingStoreIsn
InstSeqNum stallingStoreIsn
The store that causes the stall due to partial store to load forwarding.
Definition: lsq_unit.hh:529
gem5::RequestPort::sendTimingReq
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition: port.hh:495
gem5::o3::LSQ::LSQSenderState::complete
virtual void complete()=0
gem5::o3::LSQUnit::insertStore
void insertStore(const DynInstPtr &store_inst)
Inserts a store instruction.
Definition: lsq_unit.cc:391
gem5::o3::LSQUnit::storeInFlight
bool storeInFlight
Whether or not a store is in flight.
Definition: lsq_unit.hh:540
gem5::o3::LSQ::cacheBlocked
bool cacheBlocked() const
Is D-cache blocked?
Definition: lsq.cc:194
gem5::o3::LSQUnit::WritebackEvent::description
const char * description() const
Returns the description of this event.
Definition: lsq_unit.cc:87
gem5::o3::LSQUnit::isStalled
bool isStalled()
Returns whether or not the LSQ unit is stalled.
Definition: lsq_unit.hh:602
gem5::o3::LSQUnit::checkLoads
bool checkLoads
Should loads be checked for dependency issues.
Definition: lsq_unit.hh:497
gem5::cprintf
void cprintf(const char *format, const Args &...args)
Definition: cprintf.hh:155
gem5::CircularQueue::advance_tail
void advance_tail()
Increases the tail by one.
Definition: circular_queue.hh:515
gem5::o3::CPU::mmu
BaseMMU * mmu
Definition: cpu.hh:112
gem5::o3::LSQUnit::LSQUnitStats::memOrderViolation
statistics::Scalar memOrderViolation
Tota number of memory ordering violations.
Definition: lsq_unit.hh:567
gem5::GenericHtmFailureFault
Definition: faults.hh:140
data
const char data[]
Definition: circlebuf.test.cc:48
gem5::o3::LSQUnit::recvTimingResp
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
Definition: lsq_unit.cc:93
gem5::HtmFailureFaultCause
HtmFailureFaultCause
Definition: htm.hh:47
gem5::o3::LSQUnit::LSQEntry::setRequest
void setRequest(LSQRequest *r)
Definition: lsq_unit.hh:141
gem5::o3::LSQUnit::iewStage
IEW * iewStage
Pointer to the IEW stage.
Definition: lsq_unit.hh:401
gem5::o3::LSQUnit::memDepViolator
DynInstPtr memDepViolator
The oldest load that caused a memory ordering violation.
Definition: lsq_unit.hh:543
gem5::o3::LSQUnit::stallingLoadIdx
int stallingLoadIdx
The index of the above store.
Definition: lsq_unit.hh:531
gem5::HtmFailureFaultCause::MEMORY
@ MEMORY
gem5::BaseCPU::numContexts
unsigned numContexts()
Get the number of thread contexts available.
Definition: base.hh:293
gem5::o3::IEW::replayMemInst
void replayMemInst(const DynInstPtr &inst)
Re-executes all rescheduled memory instructions.
Definition: iew.cc:551
gem5::o3::LSQUnit::LSQUnit
LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
Constructs an LSQ unit.
Definition: lsq_unit.cc:202
gem5::o3::LSQUnit::read
Fault read(LSQRequest *req, int load_idx)
Executes the load at the given index.
Definition: lsq_unit.cc:1304
gem5::Packet::req
RequestPtr req
A pointer to the original request.
Definition: packet.hh:366
gem5::o3::LSQ::LSQSenderState::needWB
bool needWB
Whether or not the instruction will need to writeback.
Definition: lsq.hh:101
gem5::BaseCPU::cacheLineSize
unsigned int cacheLineSize() const
Get the cache line size of the system.
Definition: base.hh:381
gem5::o3::LSQUnit::numFreeStoreEntries
unsigned numFreeStoreEntries()
Returns the number of free SQ entries.
Definition: lsq_unit.cc:432
gem5::o3::LSQUnit::init
void init(CPU *cpu_ptr, IEW *iew_ptr, const O3CPUParams &params, LSQ *lsq_ptr, unsigned id)
Initializes the LSQ unit with the specified number of entries.
Definition: lsq_unit.cc:213
gem5::Request::CACHE_BLOCK_ZERO
@ CACHE_BLOCK_ZERO
This is a write that is targeted and zeroing an entire cache block.
Definition: request.hh:143
gem5::o3::LSQUnit::LSQUnitStats::ignoredResponses
statistics::Scalar ignoredResponses
Total number of responses from the memory system that are ignored due to the instruction already bein...
Definition: lsq_unit.hh:564
gem5::o3::LSQ::LSQRequest::isCacheBlockHit
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)=0
Test if the request accesses a particular cache line.
gem5::statistics::nozero
const FlagsType nozero
Don't print if this is zero.
Definition: info.hh:68
gem5::o3::IEW::checkMisprediction
void checkMisprediction(const DynInstPtr &inst)
Check misprediction
Definition: iew.cc:1593
sc_dt::to_string
const std::string to_string(sc_enc enc)
Definition: sc_fxdefs.cc:91
gem5::o3::LSQ::LSQRequest::buildPackets
virtual void buildPackets()=0
gem5::HtmCacheFailure
HtmCacheFailure
Definition: htm.hh:59
gem5::o3::LSQUnit::schedule
void schedule(Event &ev, Tick when)
Schedule event for the cpu.
Definition: lsq_unit.cc:1293
gem5::o3::LSQ::cachePortAvailable
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
Definition: lsq.cc:206
gem5::ArmISA::e
Bitfield< 9 > e
Definition: misc_types.hh:64
gem5::Packet::setHtmTransactional
void setHtmTransactional(uint64_t val)
Stipulates that this packet/request originates in the CPU executing in transactional mode,...
Definition: packet.cc:521
gem5::o3::LSQUnit::htmStops
int htmStops
Definition: lsq_unit.hh:509
gem5::o3::LSQUnit::MaxDataBytes
static constexpr auto MaxDataBytes
Definition: lsq_unit.hh:93
gem5::o3::LSQ
Definition: lsq.hh:75
gem5::Packet::isWrite
bool isWrite() const
Definition: packet.hh:583
gem5::o3::LSQUnit::storesToWB
int storesToWB
The number of store instructions in the SQ waiting to writeback.
Definition: lsq_unit.hh:504
gem5::o3::LSQUnit::cacheBlockMask
Addr cacheBlockMask
Address Mask for a cache block (e.g.
Definition: lsq_unit.hh:519
gem5::o3::LSQ::LSQRequest::sendPacketToCache
virtual void sendPacketToCache()=0
gem5::o3::LSQUnit::setDcachePort
void setDcachePort(RequestPort *dcache_port)
Sets the pointer to the dcache port.
Definition: lsq_unit.cc:291
gem5::EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
gem5::csprintf
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161
gem5::o3::LSQ::LSQRequest::mainPacket
virtual PacketPtr mainPacket()
Definition: lsq.hh:422
gem5::RefCountingPtr::get
T * get() const
Directly access the pointer itself without taking a reference.
Definition: refcnt.hh:227
gem5::CircularQueue::full
bool full() const
Is the queue full? A queue is full if the head is the 0^{th} element and the tail is the (size-1)^{th...
Definition: circular_queue.hh:558
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:66
gem5::o3::LSQUnit::LSQUnitStats::loadToUse
statistics::Distribution loadToUse
Distribution of cycle latency between the first time a load is issued and its completion.
Definition: lsq_unit.hh:580
gem5::o3::LSQUnit::depCheckShift
unsigned depCheckShift
The number of places to shift addresses in the LSQ before checking for dependency violations.
Definition: lsq_unit.hh:494
request.hh
gem5::HtmFailureFaultCause::OTHER
@ OTHER
gem5::o3::LSQUnit::AddrRangeCoverage::PartialAddrRangeCoverage
@ PartialAddrRangeCoverage
gem5::statistics::DistBase::sample
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1319
gem5::o3::LSQUnit::numFreeLoadEntries
unsigned numFreeLoadEntries()
Returns the number of free LQ entries.
Definition: lsq_unit.cc:422
gem5::o3::IEW::wakeCPU
void wakeCPU()
Tells the CPU to wakeup if it has descheduled itself due to no activity.
Definition: iew.cc:805
gem5::HtmCacheFailure::FAIL_SELF
@ FAIL_SELF
gem5::RefCountingPtr< DynInst >
gem5::BaseMMU
Definition: mmu.hh:50
gem5::o3::IEW::rescheduleMemInst
void rescheduleMemInst(const DynInstPtr &inst)
Tells memory dependence unit that a memory instruction needs to be rescheduled.
Definition: iew.cc:545
packet.hh
gem5::o3::CPU::checker
gem5::Checker< DynInstPtr > * checker
Pointer to the checker, which can dynamically verify instruction results at run time.
Definition: cpu.hh:602
gem5::RequestPort
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Definition: port.hh:77
gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:78
gem5::o3::LSQUnit::getLatestHtmUid
uint64_t getLatestHtmUid() const
Definition: lsq_unit.cc:1054
gem5::ArmISA::handleLockedWrite
bool handleLockedWrite(XC *xc, const RequestPtr &req, Addr cacheBlockMask)
Definition: locked_mem.hh:113
str.hh
gem5::o3::LSQUnit::completeStore
void completeStore(typename StoreQueue::iterator store_idx)
Completes the store at the specified index.
Definition: lsq_unit.cc:1154
gem5::o3::LSQUnit::needsTSO
bool needsTSO
Flag for memory model.
Definition: lsq_unit.hh:546
gem5::o3::LSQUnit::LSQUnitStats::forwLoads
statistics::Scalar forwLoads
Total number of loads forwaded from LSQ stores.
Definition: lsq_unit.hh:557
gem5::Packet::dataStatic
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:1134
gem5::o3::LSQUnit::LSQUnitStats::blockedByCache
statistics::Scalar blockedByCache
Number of times the LSQ is blocked due to the cache.
Definition: lsq_unit.hh:576
gem5::o3::LSQUnit::WritebackEvent
Writeback event, specifically for when stores forward data to loads.
Definition: lsq_unit.hh:447
gem5::o3::LSQ::LSQRequest::_size
const uint32_t _size
Definition: lsq.hh:297
gem5::o3::LSQ::LSQRequest::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)=0
gem5::CircularQueue::tail
size_t tail() const
Definition: circular_queue.hh:456
gem5::o3::LSQUnit
Class that implements the actual LQ and SQ for each specific thread.
Definition: lsq_unit.hh:90
gem5::HtmCacheFailure::FAIL_REMOTE
@ FAIL_REMOTE
gem5::CircularQueue::size
size_t size() const
Definition: circular_queue.hh:466
gem5::o3::CPU
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition: cpu.hh:95
gem5::o3::LSQ::LSQRequest::packetSent
void packetSent()
Update the status to reflect that a packet was sent.
Definition: lsq.hh:497
gem5::o3::LSQ::LSQRequest::setStateToFault
void setStateToFault()
Definition: lsq.hh:560
gem5::o3::LSQUnit::stats
gem5::o3::LSQUnit::LSQUnitStats stats
gem5::o3::LSQUnit::commitLoad
void commitLoad()
Commits the head load.
Definition: lsq_unit.cc:720
gem5::statistics::Distribution::init
Distribution & init(Counter min, Counter max, Counter bkt)
Set the parameters of this distribution.
Definition: statistics.hh:2101
gem5::ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition: thread_context.hh:93
gem5::o3::LSQUnit::LSQEntry
Definition: lsq_unit.hh:98
gem5::o3::LSQUnit::LSQUnitStats::rescheduledLoads
statistics::Scalar rescheduledLoads
Number of loads that were rescheduled.
Definition: lsq_unit.hh:573
gem5::o3::LSQ::LSQRequest::discard
void discard()
The request is discarded (e.g.
Definition: lsq.hh:578
gem5::o3::LSQUnit::trySendPacket
bool trySendPacket(bool isLoad, PacketPtr data_pkt)
Attempts to send a packet to the cache.
Definition: lsq_unit.cc:1218
gem5::Fault
std::shared_ptr< FaultBase > Fault
Definition: types.hh:255
gem5::Packet::print
void print(std::ostream &o, int verbosity=0, const std::string &prefix="") const
Definition: packet.cc:373
gem5::o3::LSQ::LSQSenderState::isSplit
bool isSplit
Whether or not this access is split in two.
Definition: lsq.hh:103
gem5::o3::LSQ::LSQRequest::discardSenderState
void discardSenderState()
Mark senderState as discarded.
Definition: lsq.hh:456
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
gem5::Event
Definition: eventq.hh:251
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
gem5::o3::LSQUnit::LQSenderState
Particularisation of the LSQSenderState to the LQ.
Definition: lsq_unit.hh:410
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:283
gem5::o3::LSQUnit::loads
int loads
The number of load instructions in the LQ.
Definition: lsq_unit.hh:500
gem5::o3::LSQUnit::isStoreBlocked
bool isStoreBlocked
Whehter or not a store is blocked due to the memory system.
Definition: lsq_unit.hh:537
gem5::Packet::htmTransactionFailedInCache
bool htmTransactionFailedInCache() const
Returns whether or not this packet/request has returned from the cache hierarchy in a failed transact...
Definition: packet.cc:508
gem5::probing::Packet
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::o3::LSQUnit::WritebackEvent::WritebackEvent
WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr)
Constructs a writeback event.
Definition: lsq_unit.cc:65
gem5::o3::IEW
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Definition: iew.hh:87
cpu.hh
gem5::o3::LSQUnit::htmStarts
int htmStarts
Definition: lsq_unit.hh:508
gem5::o3::LSQUnit::cpu
CPU * cpu
Pointer to the CPU.
Definition: lsq_unit.hh:398
gem5::o3::LSQUnit::storeWBIt
StoreQueue::iterator storeWBIt
The index of the first instruction that may be ready to be written back, and has not yet been written...
Definition: lsq_unit.hh:516
gem5::o3::LSQUnit::lsqID
ThreadID lsqID
The LSQUnit thread id.
Definition: lsq_unit.hh:482
gem5::MemCmd::ReadReq
@ ReadReq
Definition: packet.hh:86
gem5::o3::LSQ::LSQRequest::packet
PacketPtr packet(int idx=0)
Definition: lsq.hh:419
gem5::o3::LSQ::LSQRequest::_numOutstandingPackets
uint32_t _numOutstandingPackets
Definition: lsq.hh:300
gem5::ReExec
Definition: faults.hh:92
gem5::o3::CPU::tcBase
gem5::ThreadContext * tcBase(ThreadID tid)
Returns a pointer to a thread context.
Definition: cpu.hh:590
gem5::o3::LSQUnit::write
Fault write(LSQRequest *req, uint8_t *data, int store_idx)
Executes the store at the given index.
Definition: lsq_unit.cc:1627
gem5::Packet::getHtmTransactionUid
uint64_t getHtmTransactionUid() const
If a packet/request originates in a CPU executing in transactional mode, i.e.
Definition: packet.cc:534
gem5::o3::CPU::thread
std::vector< ThreadState * > thread
Pointers to all of the threads in the CPU.
Definition: cpu.hh:608
gem5::o3::LSQUnit::commitLoads
void commitLoads(InstSeqNum &youngest_inst)
Commits loads older than a specific sequence number.
Definition: lsq_unit.cc:745
gem5::o3::IEW::activityThisCycle
void activityThisCycle()
Reports to the CPU that there is activity this cycle.
Definition: iew.cc:811
gem5::CircularQueue::begin
iterator begin()
Iterators.
Definition: circular_queue.hh:565
gem5::ArmISA::handleLockedSnoopHit
void handleLockedSnoopHit(XC *xc)
Definition: locked_mem.hh:103
gem5::o3::LSQUnit::completeDataAccess
void completeDataAccess(PacketPtr pkt)
Completes the data access that has been returned from the memory system.
Definition: lsq_unit.cc:110
gem5::o3::LSQ::cachePortBusy
void cachePortBusy(bool is_load)
Another store port is in use.
Definition: lsq.cc:218
gem5::o3::CPU::wakeCPU
void wakeCPU()
Wakes the CPU, rescheduling the CPU if it's not already active.
Definition: cpu.cc:1593
gem5::HtmFailureFaultCause::INVALID
@ INVALID
ss
std::stringstream ss
Definition: trace.test.cc:45
gem5::statistics::Group::addStatGroup
void addStatGroup(const char *name, Group *block)
Add a stat block as a child of this block.
Definition: group.cc:117
gem5::CircularQueue::front
reference front()
Definition: circular_queue.hh:441
gem5::Packet::getHtmTransactionFailedInCacheRC
HtmCacheFailure getHtmTransactionFailedInCacheRC() const
If a packet/request has returned from the cache hierarchy in a failed transaction,...
Definition: packet.cc:514
gem5::o3::LSQUnit::takeOverFrom
void takeOverFrom()
Takes over from another CPU's thread.
Definition: lsq_unit.cc:307
gem5::o3::LSQ::LSQRequest::request
RequestPtr request(int idx=0)
Definition: lsq.hh:408
gem5::o3::LSQUnit::retryPkt
PacketPtr retryPkt
The packet that needs to be retried.
Definition: lsq_unit.hh:534
gem5::o3::LSQUnit::checkViolations
Fault checkViolations(typename LoadQueue::iterator &loadIt, const DynInstPtr &inst)
Check for ordering violations in the LSQ.
Definition: lsq_unit.cc:523
gem5::ThreadContext::getHtmCheckpointPtr
virtual BaseHTMCheckpointPtr & getHtmCheckpointPtr()=0
gem5::o3::LSQ::LSQRequest::senderState
void senderState(LSQSenderState *st)
Definition: lsq.hh:436
gem5::BaseCPU::getContext
virtual ThreadContext * getContext(int tn)
Given a thread num get tho thread context for it.
Definition: base.hh:290
gem5::o3::LSQUnit::stores
int stores
The number of store instructions in the SQ.
Definition: lsq_unit.hh:502
gem5::o3::LSQUnit::writeback
void writeback(const DynInstPtr &inst, PacketPtr pkt)
Writes back the instruction, sending it to IEW.
Definition: lsq_unit.cc:1092
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::o3::LSQUnit::AddrRangeCoverage::FullAddrRangeCoverage
@ FullAddrRangeCoverage
gem5::o3::LSQUnit::squash
void squash(const InstSeqNum &squashed_num)
Squashes all instructions younger than a specific sequence number.
Definition: lsq_unit.cc:937
gem5::Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:534
gem5::o3::LSQUnit::LSQUnitStats::squashedStores
statistics::Scalar squashedStores
Total number of squashed stores.
Definition: lsq_unit.hh:570
gem5::o3::LSQ::LSQRequest::complete
void complete()
Definition: lsq.hh:621
gem5::Clocked::clockEdge
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Definition: clocked_object.hh:177
gem5::o3::LSQUnit::LSQEntry::instruction
const DynInstPtr & instruction() const
Definition: lsq_unit.hh:148
gem5::o3::LSQ::LSQRequest::isSent
bool isSent()
Definition: lsq.hh:541
gem5::o3::LSQUnit::executeStore
Fault executeStore(const DynInstPtr &inst)
Executes a store instruction.
Definition: lsq_unit.cc:669
gem5::Request::STORE_NO_DATA
static const FlagsType STORE_NO_DATA
Definition: request.hh:244
gem5::o3::MaxThreads
static constexpr int MaxThreads
Definition: limits.hh:38
gem5::o3::LSQUnit::writebackBlockedStore
void writebackBlockedStore()
Try to finish a previously blocked write back attempt.
Definition: lsq_unit.cc:782
gem5::o3::CPU::activityThisCycle
void activityThisCycle()
Records that there was time buffer activity this cycle.
Definition: cpu.hh:569
gem5::HtmFailureFaultCause::SIZE
@ SIZE
gem5::o3::LSQ::LSQSenderState::isLoad
bool isLoad
Whether or not it is a load.
Definition: lsq.hh:99
gem5::CircularQueue::pop_front
void pop_front(size_t num_elem=1)
Circularly increase the head pointer.
Definition: circular_queue.hh:477
gem5::o3::IEW::instToCommit
void instToCommit(const DynInstPtr &inst)
Sends an instruction to commit through the time buffer.
Definition: iew.cc:569
gem5::RiscvISA::x
Bitfield< 3 > x
Definition: pagetable.hh:73
gem5::o3::LSQUnit::insert
void insert(const DynInstPtr &inst)
Inserts an instruction.
Definition: lsq_unit.cc:313
gem5::o3::LSQUnit::dcachePort
RequestPort * dcachePort
Pointer to the dcache port.
Definition: lsq_unit.hh:407
gem5::o3::LSQ::LSQSenderState
Derived class to hold any sender state the LSQ needs.
Definition: lsq.hh:80
gem5::CircularQueue::pop_back
void pop_back()
Circularly decrease the tail pointer.
Definition: circular_queue.hh:490
gem5::CircularQueue::empty
bool empty() const
Is the queue empty?
Definition: circular_queue.hh:548
gem5::o3::LSQUnit::stalled
bool stalled
Whether or not the LSQ is stalled.
Definition: lsq_unit.hh:525
gem5::htmFailureToStr
std::string htmFailureToStr(HtmFailureFaultCause cause)
Convert enum into string to be used for debug purposes.
Definition: htm.cc:44
gem5::Request::LLSC
@ LLSC
The request is a Load locked/store conditional.
Definition: request.hh:156
gem5::o3::IEW::blockMemInst
void blockMemInst(const DynInstPtr &inst)
Moves memory instruction onto the list of cache blocked instructions.
Definition: iew.cc:557
gem5::o3::LSQ::LSQRequest::mainRequest
virtual RequestPtr mainRequest()
Definition: lsq.hh:429
gem5::RefCountingPtr::data
T * data
The stored pointer.
Definition: refcnt.hh:146
gem5::CircularQueue::head
size_t head() const
Definition: circular_queue.hh:451
gem5::o3::LSQUnit::SQEntry::DataSize
static constexpr size_t DataSize
Definition: lsq_unit.hh:170
gem5::o3::LSQUnit::getMMUPtr
BaseMMU * getMMUPtr()
Definition: lsq_unit.cc:1295
gem5::o3::LSQUnit::lsq
LSQ * lsq
Pointer to the LSQ.
Definition: lsq_unit.hh:404
gem5::CircularQueue::end
iterator end()
Definition: circular_queue.hh:580
gem5::Packet::makeResponse
void makeResponse()
Take a request packet and modify it in place to be suitable for returning as a response to that reque...
Definition: packet.hh:1031
gem5::o3::LSQUnit::executeLoad
Fault executeLoad(const DynInstPtr &inst)
Executes a load instruction.
Definition: lsq_unit.cc:603
gem5::o3::LSQUnit::recvRetry
void recvRetry()
Handles doing the retry.
Definition: lsq_unit.cc:1261
gem5::MemCmd::WriteReq
@ WriteReq
Definition: packet.hh:89
lsq_unit.hh
gem5::o3::LSQ::LSQRequest::isAnyOutstandingRequest
bool isAnyOutstandingRequest()
Test if there is any in-flight translation or mem access request.
Definition: lsq.hh:466
gem5::ArmISA::handleLockedRead
void handleLockedRead(XC *xc, const RequestPtr &req)
Definition: locked_mem.hh:93
gem5::statistics::Group
Statistics container.
Definition: group.hh:93
gem5::Request::NO_ACCESS
@ NO_ACCESS
The request should not cause a memory access.
Definition: request.hh:146
gem5::ArmISA::id
Bitfield< 33 > id
Definition: misc_types.hh:250
gem5::o3::IEW::updateLSQNextCycle
bool updateLSQNextCycle
Records if the LSQ needs to be updated on the next cycle, so that IEW knows if there will be activity...
Definition: iew.hh:365
gem5::InstSeqNum
uint64_t InstSeqNum
Definition: inst_seq.hh:40
gem5::CircularQueue::capacity
size_t capacity() const
Definition: circular_queue.hh:461
gem5::o3::LSQUnit::drainSanityCheck
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition: lsq_unit.cc:297
gem5::o3::LSQUnit::commitStores
void commitStores(InstSeqNum &youngest_inst)
Commits stores older than a specific sequence number.
Definition: lsq_unit.cc:756
gem5::o3::LSQUnit::resetState
void resetState()
Reset the LSQ state.
Definition: lsq_unit.cc:236
gem5::o3::LSQUnit::SQSenderState
Particularisation of the LSQSenderState to the SQ.
Definition: lsq_unit.hh:429
gem5::Packet::setHtmTransactionFailedInCache
void setHtmTransactionFailedInCache(const HtmCacheFailure ret_code)
Stipulates that this packet/request has returned from the cache hierarchy in a failed transaction.
Definition: packet.cc:498
gem5::ArmISA::handleLockedSnoop
void handleLockedSnoop(XC *xc, PacketPtr pkt, Addr cacheBlockMask)
Definition: locked_mem.hh:64
gem5::statistics::DataWrap::flags
Derived & flags(Flags _flags)
Set the flags and marks this stat to print at the end of simulation.
Definition: statistics.hh:355
debugfaults.hh
gem5::Clocked::ticksToCycles
Cycles ticksToCycles(Tick t) const
Definition: clocked_object.hh:222
gem5::CircularQueue::getIterator
iterator getIterator(size_t idx)
Return an iterator to an index in the queue.
Definition: circular_queue.hh:592
gem5::Packet::isHtmTransactional
bool isHtmTransactional() const
Returns whether or not this packet/request originates in the CPU executing in transactional mode,...
Definition: packet.cc:528
gem5::o3::LSQUnit::getMemDepViolator
DynInstPtr getMemDepViolator()
Returns the memory ordering violator.
Definition: lsq_unit.cc:412
gem5::Packet::getAddr
Addr getAddr() const
Definition: packet.hh:781
gem5::o3::LSQUnit::getStoreHeadSeqNum
InstSeqNum getStoreHeadSeqNum()
Returns the sequence number of the head store instruction.
Definition: lsq_unit.cc:1665
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::o3::LSQUnit::loadQueue
LoadQueue loadQueue
The load queue.
Definition: lsq_unit.hh:488
gem5::o3::LSQUnit::name
std::string name() const
Returns the name of the LSQ unit.
Definition: lsq_unit.cc:255
lsq.hh
gem5::o3::LSQUnit::AddrRangeCoverage::NoAddrRangeCoverage
@ NoAddrRangeCoverage
limits.hh
gem5::o3::LSQUnit::dumpInsts
void dumpInsts() const
Debugging function to dump instructions in the LSQ.
Definition: lsq_unit.cc:1270
gem5::o3::LSQUnit::insertLoad
void insertLoad(const DynInstPtr &load_inst)
Inserts a load instruction.
Definition: lsq_unit.cc:329
gem5::o3::LSQUnit::cacheLineSize
unsigned int cacheLineSize()
Definition: lsq_unit.cc:1298
gem5::o3::LSQUnit::storeQueue
CircularQueue< SQEntry > storeQueue
The store queue.
Definition: lsq_unit.hh:485
gem5::o3::LSQ::LSQSenderState::request
LSQRequest * request()
Definition: lsq.hh:117
gem5::o3::LSQUnit::getLoadHeadSeqNum
InstSeqNum getLoadHeadSeqNum()
Returns the sequence number of the head load instruction.
Definition: lsq_unit.cc:1656
gem5::o3::LSQUnit::writebackStores
void writebackStores()
Writes back stores.
Definition: lsq_unit.cc:792
gem5::o3::LSQ::LSQRequest::isSplit
bool isSplit() const
Definition: lsq.hh:475
gem5::o3::LSQUnit::checkSnoop
void checkSnoop(PacketPtr pkt)
Check if an incoming invalidate hits in the lsq on a load that might have issued out of order wrt ano...
Definition: lsq_unit.cc:443
gem5::o3::IEW::name
std::string name() const
Returns the name of the IEW stage.
Definition: iew.cc:119
gem5::Packet::isInvalidate
bool isInvalidate() const
Definition: packet.hh:598
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:177
gem5::o3::LSQUnit::LSQUnitStats::LSQUnitStats
LSQUnitStats(statistics::Group *parent)
Definition: lsq_unit.cc:264

Generated on Wed Jul 28 2021 12:10:24 for gem5 by doxygen 1.8.17