gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
lsq_unit_impl.hh
Go to the documentation of this file.
1 
2 /*
3  * Copyright (c) 2010-2014, 2017-2019 ARM Limited
4  * Copyright (c) 2013 Advanced Micro Devices, Inc.
5  * All rights reserved
6  *
7  * The license below extends only to copyright in the software and shall
8  * not be construed as granting a license to any other intellectual
9  * property including but not limited to intellectual property relating
10  * to a hardware implementation of the functionality of the software
11  * licensed hereunder. You may use the software subject to the license
12  * terms below provided that you ensure that this notice is replicated
13  * unmodified and in its entirety in all distributions of the software,
14  * modified or unmodified, in source code or in binary form.
15  *
16  * Copyright (c) 2004-2005 The Regents of The University of Michigan
17  * All rights reserved.
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions are
21  * met: redistributions of source code must retain the above copyright
22  * notice, this list of conditions and the following disclaimer;
23  * redistributions in binary form must reproduce the above copyright
24  * notice, this list of conditions and the following disclaimer in the
25  * documentation and/or other materials provided with the distribution;
26  * neither the name of the copyright holders nor the names of its
27  * contributors may be used to endorse or promote products derived from
28  * this software without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
32  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
33  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
34  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
36  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41  *
42  * Authors: Kevin Lim
43  * Korey Sewell
44  */
45 
46 #ifndef __CPU_O3_LSQ_UNIT_IMPL_HH__
47 #define __CPU_O3_LSQ_UNIT_IMPL_HH__
48 
50 #include "arch/locked_mem.hh"
51 #include "base/str.hh"
52 #include "config/the_isa.hh"
53 #include "cpu/checker/cpu.hh"
54 #include "cpu/o3/lsq.hh"
55 #include "cpu/o3/lsq_unit.hh"
56 #include "debug/Activity.hh"
57 #include "debug/IEW.hh"
58 #include "debug/LSQUnit.hh"
59 #include "debug/O3PipeView.hh"
60 #include "mem/packet.hh"
61 #include "mem/request.hh"
62 
63 template<class Impl>
65  PacketPtr _pkt, LSQUnit *lsq_ptr)
66  : Event(Default_Pri, AutoDelete),
67  inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
68 {
69  assert(_inst->savedReq);
70  _inst->savedReq->writebackScheduled();
71 }
72 
73 template<class Impl>
74 void
76 {
77  assert(!lsqPtr->cpu->switchedOut());
78 
79  lsqPtr->writeback(inst, pkt);
80 
81  assert(inst->savedReq);
82  inst->savedReq->writebackDone();
83  delete pkt;
84 }
85 
86 template<class Impl>
87 const char *
89 {
90  return "Store writeback";
91 }
92 
93 template <class Impl>
94 bool
96 {
97  auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
98  LSQRequest* req = senderState->request();
99  assert(req != nullptr);
100  bool ret = true;
101  /* Check that the request is still alive before any further action. */
102  if (senderState->alive()) {
103  ret = req->recvTimingResp(pkt);
104  } else {
105  senderState->outstanding--;
106  }
107  return ret;
108 
109 }
110 
111 template<class Impl>
112 void
114 {
115  LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
116  DynInstPtr inst = state->inst;
117 
118  cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
119 
120  /* Notify the sender state that the access is complete (for ownership
121  * tracking). */
122  state->complete();
123 
124  assert(!cpu->switchedOut());
125  if (!inst->isSquashed()) {
126  if (state->needWB) {
127  // Only loads, store conditionals and atomics perform the writeback
128  // after receving the response from the memory
129  assert(inst->isLoad() || inst->isStoreConditional() ||
130  inst->isAtomic());
131  writeback(inst, state->request()->mainPacket());
132  if (inst->isStore() || inst->isAtomic()) {
133  auto ss = dynamic_cast<SQSenderState*>(state);
134  ss->writebackDone();
135  completeStore(ss->idx);
136  }
137  } else if (inst->isStore()) {
138  // This is a regular store (i.e., not store conditionals and
139  // atomics), so it can complete without writing back
140  completeStore(dynamic_cast<SQSenderState*>(state)->idx);
141  }
142  }
143 }
144 
145 template <class Impl>
146 LSQUnit<Impl>::LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
147  : lsqID(-1), storeQueue(sqEntries+1), loadQueue(lqEntries+1),
148  loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
149  isStoreBlocked(false), storeInFlight(false), hasPendingRequest(false),
150  pendingRequest(nullptr)
151 {
152 }
153 
154 template<class Impl>
155 void
156 LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
157  LSQ *lsq_ptr, unsigned id)
158 {
159  lsqID = id;
160 
161  cpu = cpu_ptr;
162  iewStage = iew_ptr;
163 
164  lsq = lsq_ptr;
165 
166  DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",lsqID);
167 
168  depCheckShift = params->LSQDepCheckShift;
169  checkLoads = params->LSQCheckLoads;
170  needsTSO = params->needsTSO;
171 
172  resetState();
173 }
174 
175 
176 template<class Impl>
177 void
179 {
180  loads = stores = storesToWB = 0;
181 
182 
183  storeWBIt = storeQueue.begin();
184 
185  retryPkt = NULL;
186  memDepViolator = NULL;
187 
188  stalled = false;
189 
190  cacheBlockMask = ~(cpu->cacheLineSize() - 1);
191 }
192 
193 template<class Impl>
194 std::string
196 {
197  if (Impl::MaxThreads == 1) {
198  return iewStage->name() + ".lsq";
199  } else {
200  return iewStage->name() + ".lsq.thread" + std::to_string(lsqID);
201  }
202 }
203 
204 template<class Impl>
205 void
207 {
209  .name(name() + ".forwLoads")
210  .desc("Number of loads that had data forwarded from stores");
211 
213  .name(name() + ".invAddrLoads")
214  .desc("Number of loads ignored due to an invalid address");
215 
217  .name(name() + ".squashedLoads")
218  .desc("Number of loads squashed");
219 
221  .name(name() + ".ignoredResponses")
222  .desc("Number of memory responses ignored because the instruction is squashed");
223 
225  .name(name() + ".memOrderViolation")
226  .desc("Number of memory ordering violations");
227 
229  .name(name() + ".squashedStores")
230  .desc("Number of stores squashed");
231 
233  .name(name() + ".invAddrSwpfs")
234  .desc("Number of software prefetches ignored due to an invalid address");
235 
237  .name(name() + ".blockedLoads")
238  .desc("Number of blocked loads due to partial load-store forwarding");
239 
241  .name(name() + ".rescheduledLoads")
242  .desc("Number of loads that were rescheduled");
243 
245  .name(name() + ".cacheBlocked")
246  .desc("Number of times an access to memory failed due to the cache being blocked");
247 }
248 
249 template<class Impl>
250 void
252 {
253  dcachePort = dcache_port;
254 }
255 
256 template<class Impl>
257 void
259 {
260  for (int i = 0; i < loadQueue.capacity(); ++i)
261  assert(!loadQueue[i].valid());
262 
263  assert(storesToWB == 0);
264  assert(!retryPkt);
265 }
266 
267 template<class Impl>
268 void
270 {
271  resetState();
272 }
273 
274 template <class Impl>
275 void
277 {
278  assert(inst->isMemRef());
279 
280  assert(inst->isLoad() || inst->isStore() || inst->isAtomic());
281 
282  if (inst->isLoad()) {
283  insertLoad(inst);
284  } else {
285  insertStore(inst);
286  }
287 
288  inst->setInLSQ();
289 }
290 
291 template <class Impl>
292 void
294 {
295  assert(!loadQueue.full());
296  assert(loads < loadQueue.capacity());
297 
298  DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n",
299  load_inst->pcState(), loadQueue.tail(), load_inst->seqNum);
300 
301  /* Grow the queue. */
303 
304  load_inst->sqIt = storeQueue.end();
305 
306  assert(!loadQueue.back().valid());
307  loadQueue.back().set(load_inst);
308  load_inst->lqIdx = loadQueue.tail();
309  load_inst->lqIt = loadQueue.getIterator(load_inst->lqIdx);
310 
311  ++loads;
312 }
313 
314 template <class Impl>
315 void
317 {
318  // Make sure it is not full before inserting an instruction.
319  assert(!storeQueue.full());
320  assert(stores < storeQueue.capacity());
321 
322  DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n",
323  store_inst->pcState(), storeQueue.tail(), store_inst->seqNum);
324  storeQueue.advance_tail();
325 
326  store_inst->sqIdx = storeQueue.tail();
327  store_inst->lqIdx = loadQueue.moduloAdd(loadQueue.tail(), 1);
328  store_inst->lqIt = loadQueue.end();
329 
330  storeQueue.back().set(store_inst);
331 
332  ++stores;
333 }
334 
335 template <class Impl>
336 typename Impl::DynInstPtr
338 {
339  DynInstPtr temp = memDepViolator;
340 
341  memDepViolator = NULL;
342 
343  return temp;
344 }
345 
346 template <class Impl>
347 unsigned
349 {
350  //LQ has an extra dummy entry to differentiate
351  //empty/full conditions. Subtract 1 from the free entries.
352  DPRINTF(LSQUnit, "LQ size: %d, #loads occupied: %d\n",
353  1 + loadQueue.capacity(), loads);
354  return loadQueue.capacity() - loads;
355 }
356 
357 template <class Impl>
358 unsigned
360 {
361  //SQ has an extra dummy entry to differentiate
362  //empty/full conditions. Subtract 1 from the free entries.
363  DPRINTF(LSQUnit, "SQ size: %d, #stores occupied: %d\n",
364  1 + storeQueue.capacity(), stores);
365  return storeQueue.capacity() - stores;
366 
367  }
368 
369 template <class Impl>
370 void
372 {
373  // Should only ever get invalidations in here
374  assert(pkt->isInvalidate());
375 
376  DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
377 
378  for (int x = 0; x < cpu->numContexts(); x++) {
379  ThreadContext *tc = cpu->getContext(x);
380  bool no_squash = cpu->thread[x]->noSquashFromTC;
381  cpu->thread[x]->noSquashFromTC = true;
383  cpu->thread[x]->noSquashFromTC = no_squash;
384  }
385 
386  if (loadQueue.empty())
387  return;
388 
389  auto iter = loadQueue.begin();
390 
391  Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;
392 
393  DynInstPtr ld_inst = iter->instruction();
394  assert(ld_inst);
395  LSQRequest *req = iter->request();
396 
397  // Check that this snoop didn't just invalidate our lock flag
398  if (ld_inst->effAddrValid() &&
399  req->isCacheBlockHit(invalidate_addr, cacheBlockMask)
400  && ld_inst->memReqFlags & Request::LLSC)
401  TheISA::handleLockedSnoopHit(ld_inst.get());
402 
403  bool force_squash = false;
404 
405  while (++iter != loadQueue.end()) {
406  ld_inst = iter->instruction();
407  assert(ld_inst);
408  req = iter->request();
409  if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered())
410  continue;
411 
412  DPRINTF(LSQUnit, "-- inst [sn:%lli] to pktAddr:%#x\n",
413  ld_inst->seqNum, invalidate_addr);
414 
415  if (force_squash ||
416  req->isCacheBlockHit(invalidate_addr, cacheBlockMask)) {
417  if (needsTSO) {
418  // If we have a TSO system, as all loads must be ordered with
419  // all other loads, this load as well as *all* subsequent loads
420  // need to be squashed to prevent possible load reordering.
421  force_squash = true;
422  }
423  if (ld_inst->possibleLoadViolation() || force_squash) {
424  DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n",
425  pkt->getAddr(), ld_inst->seqNum);
426 
427  // Mark the load for re-execution
428  ld_inst->fault = std::make_shared<ReExec>();
429  req->setStateToFault();
430  } else {
431  DPRINTF(LSQUnit, "HitExternal Snoop for addr %#x [sn:%lli]\n",
432  pkt->getAddr(), ld_inst->seqNum);
433 
434  // Make sure that we don't lose a snoop hitting a LOCKED
435  // address since the LOCK* flags don't get updated until
436  // commit.
437  if (ld_inst->memReqFlags & Request::LLSC)
438  TheISA::handleLockedSnoopHit(ld_inst.get());
439 
440  // If a older load checks this and it's true
441  // then we might have missed the snoop
442  // in which case we need to invalidate to be sure
443  ld_inst->hitExternalSnoop(true);
444  }
445  }
446  }
447  return;
448 }
449 
450 template <class Impl>
451 Fault
452 LSQUnit<Impl>::checkViolations(typename LoadQueue::iterator& loadIt,
453  const DynInstPtr& inst)
454 {
455  Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
456  Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
457 
463  while (loadIt != loadQueue.end()) {
464  DynInstPtr ld_inst = loadIt->instruction();
465  if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
466  ++loadIt;
467  continue;
468  }
469 
470  Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift;
471  Addr ld_eff_addr2 =
472  (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
473 
474  if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {
475  if (inst->isLoad()) {
476  // If this load is to the same block as an external snoop
477  // invalidate that we've observed then the load needs to be
478  // squashed as it could have newer data
479  if (ld_inst->hitExternalSnoop()) {
480  if (!memDepViolator ||
481  ld_inst->seqNum < memDepViolator->seqNum) {
482  DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] "
483  "and [sn:%lli] at address %#x\n",
484  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
485  memDepViolator = ld_inst;
486 
488 
489  return std::make_shared<GenericISA::M5PanicFault>(
490  "Detected fault with inst [sn:%lli] and "
491  "[sn:%lli] at address %#x\n",
492  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
493  }
494  }
495 
496  // Otherwise, mark the load has a possible load violation
497  // and if we see a snoop before it's commited, we need to squash
498  ld_inst->possibleLoadViolation(true);
499  DPRINTF(LSQUnit, "Found possible load violation at addr: %#x"
500  " between instructions [sn:%lli] and [sn:%lli]\n",
501  inst_eff_addr1, inst->seqNum, ld_inst->seqNum);
502  } else {
503  // A load/store incorrectly passed this store.
504  // Check if we already have a violator, or if it's newer
505  // squash and refetch.
506  if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
507  break;
508 
509  DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and "
510  "[sn:%lli] at address %#x\n",
511  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
512  memDepViolator = ld_inst;
513 
515 
516  return std::make_shared<GenericISA::M5PanicFault>(
517  "Detected fault with "
518  "inst [sn:%lli] and [sn:%lli] at address %#x\n",
519  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
520  }
521  }
522 
523  ++loadIt;
524  }
525  return NoFault;
526 }
527 
528 
529 
530 
531 template <class Impl>
532 Fault
534 {
535  using namespace TheISA;
536  // Execute a specific load.
537  Fault load_fault = NoFault;
538 
539  DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n",
540  inst->pcState(), inst->seqNum);
541 
542  assert(!inst->isSquashed());
543 
544  load_fault = inst->initiateAcc();
545 
546  if (load_fault == NoFault && !inst->readMemAccPredicate()) {
547  assert(inst->readPredicate());
548  inst->setExecuted();
549  inst->completeAcc(nullptr);
550  iewStage->instToCommit(inst);
551  iewStage->activityThisCycle();
552  return NoFault;
553  }
554 
555  if (inst->isTranslationDelayed() && load_fault == NoFault)
556  return load_fault;
557 
558  if (load_fault != NoFault && inst->translationCompleted() &&
559  inst->savedReq->isPartialFault() && !inst->savedReq->isComplete()) {
560  assert(inst->savedReq->isSplit());
561  // If we have a partial fault where the mem access is not complete yet
562  // then the cache must have been blocked. This load will be re-executed
563  // when the cache gets unblocked. We will handle the fault when the
564  // mem access is complete.
565  return NoFault;
566  }
567 
568  // If the instruction faulted or predicated false, then we need to send it
569  // along to commit without the instruction completing.
570  if (load_fault != NoFault || !inst->readPredicate()) {
571  // Send this instruction to commit, also make sure iew stage
572  // realizes there is activity. Mark it as executed unless it
573  // is a strictly ordered load that needs to hit the head of
574  // commit.
575  if (!inst->readPredicate())
576  inst->forwardOldRegs();
577  DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n",
578  inst->seqNum,
579  (load_fault != NoFault ? "fault" : "predication"));
580  if (!(inst->hasRequest() && inst->strictlyOrdered()) ||
581  inst->isAtCommit()) {
582  inst->setExecuted();
583  }
584  iewStage->instToCommit(inst);
585  iewStage->activityThisCycle();
586  } else {
587  if (inst->effAddrValid()) {
588  auto it = inst->lqIt;
589  ++it;
590 
591  if (checkLoads)
592  return checkViolations(it, inst);
593  }
594  }
595 
596  return load_fault;
597 }
598 
599 template <class Impl>
600 Fault
602 {
603  using namespace TheISA;
604  // Make sure that a store exists.
605  assert(stores != 0);
606 
607  int store_idx = store_inst->sqIdx;
608 
609  DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n",
610  store_inst->pcState(), store_inst->seqNum);
611 
612  assert(!store_inst->isSquashed());
613 
614  // Check the recently completed loads to see if any match this store's
615  // address. If so, then we have a memory ordering violation.
616  typename LoadQueue::iterator loadIt = store_inst->lqIt;
617 
618  Fault store_fault = store_inst->initiateAcc();
619 
620  if (store_inst->isTranslationDelayed() &&
621  store_fault == NoFault)
622  return store_fault;
623 
624  if (!store_inst->readPredicate()) {
625  DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n",
626  store_inst->seqNum);
627  store_inst->forwardOldRegs();
628  return store_fault;
629  }
630 
631  if (storeQueue[store_idx].size() == 0) {
632  DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n",
633  store_inst->pcState(), store_inst->seqNum);
634 
635  return store_fault;
636  }
637 
638  assert(store_fault == NoFault);
639 
640  if (store_inst->isStoreConditional() || store_inst->isAtomic()) {
641  // Store conditionals and Atomics need to set themselves as able to
642  // writeback if we haven't had a fault by here.
643  storeQueue[store_idx].canWB() = true;
644 
645  ++storesToWB;
646  }
647 
648  return checkViolations(loadIt, store_inst);
649 
650 }
651 
652 template <class Impl>
653 void
655 {
656  assert(loadQueue.front().valid());
657 
658  DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n",
659  loadQueue.front().instruction()->pcState());
660 
661  loadQueue.front().clear();
663 
664  --loads;
665 }
666 
667 template <class Impl>
668 void
670 {
671  assert(loads == 0 || loadQueue.front().valid());
672 
673  while (loads != 0 && loadQueue.front().instruction()->seqNum
674  <= youngest_inst) {
675  commitLoad();
676  }
677 }
678 
679 template <class Impl>
680 void
682 {
683  assert(stores == 0 || storeQueue.front().valid());
684 
685  /* Forward iterate the store queue (age order). */
686  for (auto& x : storeQueue) {
687  assert(x.valid());
688  // Mark any stores that are now committed and have not yet
689  // been marked as able to write back.
690  if (!x.canWB()) {
691  if (x.instruction()->seqNum > youngest_inst) {
692  break;
693  }
694  DPRINTF(LSQUnit, "Marking store as able to write back, PC "
695  "%s [sn:%lli]\n",
696  x.instruction()->pcState(),
697  x.instruction()->seqNum);
698 
699  x.canWB() = true;
700 
701  ++storesToWB;
702  }
703  }
704 }
705 
706 template <class Impl>
707 void
709 {
710  assert(isStoreBlocked);
711  storeWBIt->request()->sendPacketToCache();
712  if (storeWBIt->request()->isSent()){
713  storePostSend();
714  }
715 }
716 
717 template <class Impl>
718 void
720 {
721  if (isStoreBlocked) {
722  DPRINTF(LSQUnit, "Writing back blocked store\n");
724  }
725 
726  while (storesToWB > 0 &&
728  storeWBIt->valid() &&
729  storeWBIt->canWB() &&
730  ((!needsTSO) || (!storeInFlight)) &&
731  lsq->cachePortAvailable(false)) {
732 
733  if (isStoreBlocked) {
734  DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
735  " is blocked!\n");
736  break;
737  }
738 
739  // Store didn't write any data so no need to write it back to
740  // memory.
741  if (storeWBIt->size() == 0) {
742  /* It is important that the preincrement happens at (or before)
743  * the call, as the the code of completeStore checks
744  * storeWBIt. */
746  continue;
747  }
748 
749  if (storeWBIt->instruction()->isDataPrefetch()) {
750  storeWBIt++;
751  continue;
752  }
753 
754  assert(storeWBIt->hasRequest());
755  assert(!storeWBIt->committed());
756 
757  DynInstPtr inst = storeWBIt->instruction();
758  LSQRequest* req = storeWBIt->request();
759  storeWBIt->committed() = true;
760 
761  assert(!inst->memData);
762  inst->memData = new uint8_t[req->_size];
763 
764  if (storeWBIt->isAllZeros())
765  memset(inst->memData, 0, req->_size);
766  else
767  memcpy(inst->memData, storeWBIt->data(), req->_size);
768 
769 
770  if (req->senderState() == nullptr) {
771  SQSenderState *state = new SQSenderState(storeWBIt);
772  state->isLoad = false;
773  state->needWB = false;
774  state->inst = inst;
775 
776  req->senderState(state);
777  if (inst->isStoreConditional() || inst->isAtomic()) {
778  /* Only store conditionals and atomics need a writeback. */
779  state->needWB = true;
780  }
781  }
782  req->buildPackets();
783 
784  DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s "
785  "to Addr:%#x, data:%#x [sn:%lli]\n",
786  storeWBIt.idx(), inst->pcState(),
787  req->request()->getPaddr(), (int)*(inst->memData),
788  inst->seqNum);
789 
790  // @todo: Remove this SC hack once the memory system handles it.
791  if (inst->isStoreConditional()) {
792  // Disable recording the result temporarily. Writing to
793  // misc regs normally updates the result, but this is not
794  // the desired behavior when handling store conditionals.
795  inst->recordResult(false);
796  bool success = TheISA::handleLockedWrite(inst.get(),
797  req->request(), cacheBlockMask);
798  inst->recordResult(true);
799  req->packetSent();
800 
801  if (!success) {
802  req->complete();
803  // Instantly complete this store.
804  DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. "
805  "Instantly completing it.\n",
806  inst->seqNum);
807  PacketPtr new_pkt = new Packet(*req->packet());
808  WritebackEvent *wb = new WritebackEvent(inst,
809  new_pkt, this);
810  cpu->schedule(wb, curTick() + 1);
812  if (!storeQueue.empty())
813  storeWBIt++;
814  else
815  storeWBIt = storeQueue.end();
816  continue;
817  }
818  }
819 
820  if (req->request()->isMmappedIpr()) {
821  assert(!inst->isStoreConditional());
822  ThreadContext *thread = cpu->tcBase(lsqID);
823  PacketPtr main_pkt = new Packet(req->mainRequest(),
825  main_pkt->dataStatic(inst->memData);
826  req->handleIprWrite(thread, main_pkt);
827  delete main_pkt;
829  storeWBIt++;
830  continue;
831  }
832  /* Send to cache */
833  req->sendPacketToCache();
834 
835  /* If successful, do the post send */
836  if (req->isSent()) {
837  storePostSend();
838  } else {
839  DPRINTF(LSQUnit, "D-Cache became blocked when writing [sn:%lli], "
840  "will retry later\n",
841  inst->seqNum);
842  }
843  }
844  assert(stores >= 0 && storesToWB >= 0);
845 }
846 
847 template <class Impl>
848 void
849 LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
850 {
851  DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
852  "(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
853 
854  while (loads != 0 &&
855  loadQueue.back().instruction()->seqNum > squashed_num) {
856  DPRINTF(LSQUnit,"Load Instruction PC %s squashed, "
857  "[sn:%lli]\n",
858  loadQueue.back().instruction()->pcState(),
859  loadQueue.back().instruction()->seqNum);
860 
861  if (isStalled() && loadQueue.tail() == stallingLoadIdx) {
862  stalled = false;
863  stallingStoreIsn = 0;
864  stallingLoadIdx = 0;
865  }
866 
867  // Clear the smart pointer to make sure it is decremented.
868  loadQueue.back().instruction()->setSquashed();
869  loadQueue.back().clear();
870 
871  --loads;
872 
875  }
876 
877  if (memDepViolator && squashed_num < memDepViolator->seqNum) {
878  memDepViolator = NULL;
879  }
880 
881  while (stores != 0 &&
882  storeQueue.back().instruction()->seqNum > squashed_num) {
883  // Instructions marked as can WB are already committed.
884  if (storeQueue.back().canWB()) {
885  break;
886  }
887 
888  DPRINTF(LSQUnit,"Store Instruction PC %s squashed, "
889  "idx:%i [sn:%lli]\n",
890  storeQueue.back().instruction()->pcState(),
891  storeQueue.tail(), storeQueue.back().instruction()->seqNum);
892 
893  // I don't think this can happen. It should have been cleared
894  // by the stalling load.
895  if (isStalled() &&
896  storeQueue.back().instruction()->seqNum == stallingStoreIsn) {
897  panic("Is stalled should have been cleared by stalling load!\n");
898  stalled = false;
899  stallingStoreIsn = 0;
900  }
901 
902  // Clear the smart pointer to make sure it is decremented.
903  storeQueue.back().instruction()->setSquashed();
904 
905  // Must delete request now that it wasn't handed off to
906  // memory. This is quite ugly. @todo: Figure out the proper
907  // place to really handle request deletes.
908  storeQueue.back().clear();
909  --stores;
910 
911  storeQueue.pop_back();
913  }
914 }
915 
916 template <class Impl>
917 void
919 {
920  if (isStalled() &&
921  storeWBIt->instruction()->seqNum == stallingStoreIsn) {
922  DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
923  "load idx:%i\n",
925  stalled = false;
926  stallingStoreIsn = 0;
927  iewStage->replayMemInst(loadQueue[stallingLoadIdx].instruction());
928  }
929 
930  if (!storeWBIt->instruction()->isStoreConditional()) {
931  // The store is basically completed at this time. This
932  // only works so long as the checker doesn't try to
933  // verify the value in memory for stores.
934  storeWBIt->instruction()->setCompleted();
935 
936  if (cpu->checker) {
937  cpu->checker->verify(storeWBIt->instruction());
938  }
939  }
940 
941  if (needsTSO) {
942  storeInFlight = true;
943  }
944 
945  storeWBIt++;
946 }
947 
948 template <class Impl>
949 void
951 {
952  iewStage->wakeCPU();
953 
954  // Squashed instructions do not need to complete their access.
955  if (inst->isSquashed()) {
956  assert(!inst->isStore());
958  return;
959  }
960 
961  if (!inst->isExecuted()) {
962  inst->setExecuted();
963 
964  if (inst->fault == NoFault) {
965  // Complete access to copy data to proper place.
966  inst->completeAcc(pkt);
967  } else {
968  // If the instruction has an outstanding fault, we cannot complete
969  // the access as this discards the current fault.
970 
971  // If we have an outstanding fault, the fault should only be of
972  // type ReExec or - in case of a SplitRequest - a partial
973  // translation fault
974  assert(dynamic_cast<ReExec*>(inst->fault.get()) != nullptr ||
975  inst->savedReq->isPartialFault());
976 
977  DPRINTF(LSQUnit, "Not completing instruction [sn:%lli] access "
978  "due to pending fault.\n", inst->seqNum);
979  }
980  }
981 
982  // Need to insert instruction into queue to commit
983  iewStage->instToCommit(inst);
984 
985  iewStage->activityThisCycle();
986 
987  // see if this load changed the PC
988  iewStage->checkMisprediction(inst);
989 }
990 
991 template <class Impl>
992 void
994 {
995  assert(store_idx->valid());
996  store_idx->completed() = true;
997  --storesToWB;
998  // A bit conservative because a store completion may not free up entries,
999  // but hopefully avoids two store completions in one cycle from making
1000  // the CPU tick twice.
1001  cpu->wakeCPU();
1002  cpu->activityThisCycle();
1003 
1004  /* We 'need' a copy here because we may clear the entry from the
1005  * store queue. */
1006  DynInstPtr store_inst = store_idx->instruction();
1007  if (store_idx == storeQueue.begin()) {
1008  do {
1009  storeQueue.front().clear();
1010  storeQueue.pop_front();
1011  --stores;
1012  } while (storeQueue.front().completed() &&
1013  !storeQueue.empty());
1014 
1015  iewStage->updateLSQNextCycle = true;
1016  }
1017 
1018  DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
1019  "idx:%i\n",
1020  store_inst->seqNum, store_idx.idx() - 1, storeQueue.head() - 1);
1021 
1022 #if TRACING_ON
1023  if (DTRACE(O3PipeView)) {
1024  store_inst->storeTick =
1025  curTick() - store_inst->fetchTick;
1026  }
1027 #endif
1028 
1029  if (isStalled() &&
1030  store_inst->seqNum == stallingStoreIsn) {
1031  DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
1032  "load idx:%i\n",
1034  stalled = false;
1035  stallingStoreIsn = 0;
1036  iewStage->replayMemInst(loadQueue[stallingLoadIdx].instruction());
1037  }
1038 
1039  store_inst->setCompleted();
1040 
1041  if (needsTSO) {
1042  storeInFlight = false;
1043  }
1044 
1045  // Tell the checker we've completed this instruction. Some stores
1046  // may get reported twice to the checker, but the checker can
1047  // handle that case.
1048  // Store conditionals cannot be sent to the checker yet, they have
1049  // to update the misc registers first which should take place
1050  // when they commit
1051  if (cpu->checker && !store_inst->isStoreConditional()) {
1052  cpu->checker->verify(store_inst);
1053  }
1054 }
1055 
1056 template <class Impl>
1057 bool
1059 {
1060  bool ret = true;
1061  bool cache_got_blocked = false;
1062 
1063  auto state = dynamic_cast<LSQSenderState*>(data_pkt->senderState);
1064 
1065  if (!lsq->cacheBlocked() &&
1066  lsq->cachePortAvailable(isLoad)) {
1067  if (!dcachePort->sendTimingReq(data_pkt)) {
1068  ret = false;
1069  cache_got_blocked = true;
1070  }
1071  } else {
1072  ret = false;
1073  }
1074 
1075  if (ret) {
1076  if (!isLoad) {
1077  isStoreBlocked = false;
1078  }
1079  lsq->cachePortBusy(isLoad);
1080  state->outstanding++;
1081  state->request()->packetSent();
1082  } else {
1083  if (cache_got_blocked) {
1084  lsq->cacheBlocked(true);
1085  ++lsqCacheBlocked;
1086  }
1087  if (!isLoad) {
1088  assert(state->request() == storeWBIt->request());
1089  isStoreBlocked = true;
1090  }
1091  state->request()->packetNotSent();
1092  }
1093  return ret;
1094 }
1095 
1096 template <class Impl>
1097 void
1099 {
1100  if (isStoreBlocked) {
1101  DPRINTF(LSQUnit, "Receiving retry: blocked store\n");
1103  }
1104 }
1105 
1106 template <class Impl>
1107 void
1109 {
1110  cprintf("Load store queue: Dumping instructions.\n");
1111  cprintf("Load queue size: %i\n", loads);
1112  cprintf("Load queue: ");
1113 
1114  for (const auto& e: loadQueue) {
1115  const DynInstPtr &inst(e.instruction());
1116  cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1117  }
1118  cprintf("\n");
1119 
1120  cprintf("Store queue size: %i\n", stores);
1121  cprintf("Store queue: ");
1122 
1123  for (const auto& e: storeQueue) {
1124  const DynInstPtr &inst(e.instruction());
1125  cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1126  }
1127 
1128  cprintf("\n");
1129 }
1130 
1131 template <class Impl>
1132 unsigned int
1134 {
1135  return cpu->cacheLineSize();
1136 }
1137 
1138 #endif//__CPU_O3_LSQ_UNIT_IMPL_HH__
A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
Definition: port.hh:75
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:167
#define DPRINTF(x,...)
Definition: trace.hh:229
MasterPort * dcachePort
Pointer to the dcache port.
Definition: lsq_unit.hh:405
unsigned numFreeLoadEntries()
Returns the number of free LQ entries.
void squash(const InstSeqNum &squashed_num)
Squashes all instructions younger than a specific sequence number.
Impl::DynInstPtr DynInstPtr
Definition: lsq_unit.hh:87
decltype(nullptr) constexpr NoFault
Definition: types.hh:245
Stats::Scalar invAddrSwpfs
Total number of software prefetches ignored due to invalid addresses.
Definition: lsq_unit.hh:569
Iterator to the circular queue.
iterator begin()
Iterators.
DynInstPtr memDepViolator
The oldest load that caused a memory ordering violation.
Definition: lsq_unit.hh:534
Bitfield< 7 > i
void completeStore(typename StoreQueue::iterator store_idx)
Completes the store at the specified index.
Stats::Scalar lsqForwLoads
Total number of loads forwaded from LSQ stores.
Definition: lsq_unit.hh:550
bool isStoreBlocked
Whehter or not a store is blocked due to the memory system.
Definition: lsq_unit.hh:528
static uint32_t moduloAdd(uint32_t op1, uint32_t op2, uint32_t size)
General modular addition.
uint32_t tail() const
LSQRequest * pendingRequest
The packet that is pending free cache ports.
Definition: lsq_unit.hh:541
DynInstPtr getMemDepViolator()
Returns the memory ordering violator.
The request is a Load locked/store conditional.
Definition: request.hh:148
Writeback event, specifically for when stores forward data to loads.
Definition: lsq_unit.hh:445
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
void insertStore(const DynInstPtr &store_inst)
Inserts a store instruction.
bool trySendPacket(bool isLoad, PacketPtr data_pkt)
Attempts to send a packet to the cache.
bool full() const
Is the queue full? A queue is full if the head is the 0^{th} element and the tail is the (size-1)^{th...
reference back()
bool isStalled()
Returns whether or not the LSQ unit is stalled.
Definition: lsq_unit.hh:611
Stats::Scalar lsqRescheduledLoads
Number of loads that were rescheduled.
Definition: lsq_unit.hh:575
void resetState()
Reset the LSQ state.
bool handleLockedWrite(XC *xc, const RequestPtr &req, Addr cacheBlockMask)
Definition: locked_mem.hh:79
iterator getIterator(size_t idx)
Return an iterator to an index in the vector.
StoreQueue::iterator storeWBIt
The index of the first instruction that may be ready to be written back, and has not yet been written...
Definition: lsq_unit.hh:507
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the slave port by calling its corresponding receive function...
Definition: port.hh:445
void regStats()
Registers statistics.
typename Impl::CPUPol::LSQ::LSQRequest LSQRequest
Definition: lsq_unit.hh:93
typename LSQ::LSQSenderState LSQSenderState
Definition: lsq_unit.hh:92
void insertLoad(const DynInstPtr &load_inst)
Inserts a load instruction.
Definition: lsq.hh:64
void storePostSend()
Handles completing the send of a store to memory.
size_t capacity() const
Stats::Scalar invAddrLoads
Total number of loads ignored due to invalid addresses.
Definition: lsq_unit.hh:553
bool isInvalidate() const
Definition: packet.hh:543
ThreadContext is the external interface to all thread state for anything outside of the CPU...
Stats::Scalar lsqIgnoredResponses
Total number of responses from the memory system that are ignored due to the instruction already bein...
Definition: lsq_unit.hh:560
Stats::Scalar lsqSquashedLoads
Total number of squashed loads.
Definition: lsq_unit.hh:556
void recvRetry()
Handles doing the retry.
size_t idx() const
OutputIterator has no extra requirements.
DynInstPtr inst
Instruction whose results are being written back.
Definition: lsq_unit.hh:460
Bitfield< 33 > id
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:1040
void drainSanityCheck() const
Perform sanity checks after a drain.
void takeOverFrom()
Takes over from another CPU&#39;s thread.
void commitStores(InstSeqNum &youngest_inst)
Commits stores older than a specific sequence number.
void writeback(const DynInstPtr &inst, PacketPtr pkt)
Writes back the instruction, sending it to IEW.
ThreadID lsqID
The LSQUnit thread id.
Definition: lsq_unit.hh:480
Fault executeStore(const DynInstPtr &inst)
Executes a store instruction.
void dumpInsts() const
Debugging function to dump instructions in the LSQ.
bool storeInFlight
Whether or not a store is in flight.
Definition: lsq_unit.hh:531
Tick curTick()
The current simulated tick.
Definition: core.hh:47
void advance_tail()
Increases the tail by one.
iterator end()
void pop_back()
Circularly decrease the tail pointer.
void checkSnoop(PacketPtr pkt)
Check if an incoming invalidate hits in the lsq on a load that might have issued out of order wrt ano...
#define DTRACE(x)
Definition: trace.hh:227
IEW * iewStage
Pointer to the IEW stage.
Definition: lsq_unit.hh:399
void cachePortBusy(bool is_load)
Another store port is in use.
Definition: lsq_impl.hh:220
Fault executeLoad(const DynInstPtr &inst)
Executes a load instruction.
bool stalled
Whether or not the LSQ is stalled.
Definition: lsq_unit.hh:516
void commitLoads(InstSeqNum &youngest_inst)
Commits loads older than a specific sequence number.
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
void commitLoad()
Commits the head load.
int stallingLoadIdx
The index of the above store.
Definition: lsq_unit.hh:522
void completeDataAccess(PacketPtr pkt)
Completes the data access that has been returned from the memory system.
Addr getAddr() const
Definition: packet.hh:726
uint64_t InstSeqNum
Definition: inst_seq.hh:40
Bitfield< 21 > ss
void setDcachePort(MasterPort *dcache_port)
Sets the pointer to the dcache port.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
LSQUnit< Impl > * lsqPtr
The pointer to the LSQ unit that issued the store.
Definition: lsq_unit.hh:466
bool empty() const
Is the queue empty?
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:255
bool hasPendingRequest
Whether or not there is a packet that couldn&#39;t be sent because of a lack of cache ports...
Definition: lsq_unit.hh:538
void pop_front(size_t num_elem=1)
Circularly increase the head pointer.
PacketPtr retryPkt
The packet that needs to be retried.
Definition: lsq_unit.hh:525
InstSeqNum stallingStoreIsn
The store that causes the stall due to partial store to load forwarding.
Definition: lsq_unit.hh:520
Particularisation of the LSQSenderState to the SQ.
Definition: lsq_unit.hh:427
Stats::Scalar lsqSquashedStores
Total number of squashed stores.
Definition: lsq_unit.hh:566
void writebackBlockedStore()
Try to finish a previously blocked write back attempt.
Bitfield< 9 > e
LSQ * lsq
Pointer to the LSQ.
Definition: lsq_unit.hh:402
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:279
const char * description() const
Returns the description of this event.
int stores
The number of store instructions in the SQ.
Definition: lsq_unit.hh:500
std::string name() const
Returns the name of the LSQ unit.
bool checkLoads
Should loads be checked for dependency issues.
Definition: lsq_unit.hh:495
Declaration of the Packet class.
PacketPtr pkt
The packet that would have been sent to memory.
Definition: lsq_unit.hh:463
unsigned numFreeStoreEntries()
Returns the number of free SQ entries.
SenderState * senderState
This packet&#39;s sender state.
Definition: packet.hh:480
Definition: eventq.hh:189
void process()
Processes the writeback event.
unsigned int cacheLineSize()
bool dereferenceable() const
Test dereferenceability.
void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, LSQ *lsq_ptr, unsigned id)
Initializes the LSQ unit with the specified number of entries.
int storesToWB
The number of store instructions in the SQ waiting to writeback.
Definition: lsq_unit.hh:502
void writebackStores()
Writes back stores.
bool needsTSO
Flag for memory model.
Definition: lsq_unit.hh:544
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
Definition: lsq_impl.hh:207
int loads
The number of load instructions in the LQ.
Definition: lsq_unit.hh:498
LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
Constructs an LSQ unit.
O3CPU * cpu
Pointer to the CPU.
Definition: lsq_unit.hh:396
void handleLockedSnoop(XC *xc, PacketPtr pkt, Addr cacheBlockMask)
Definition: locked_mem.hh:60
Stats::Scalar lsqBlockedLoads
Ready loads blocked due to partial store-forwarding.
Definition: lsq_unit.hh:572
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:312
Stats::Scalar lsqCacheBlocked
Number of times the LSQ is blocked due to the cache.
Definition: lsq_unit.hh:578
LoadQueue loadQueue
The load queue.
Definition: lsq_unit.hh:486
unsigned depCheckShift
The number of places to shift addresses in the LSQ before checking for dependency violations...
Definition: lsq_unit.hh:492
CircularQueue< SQEntry > storeQueue
The store queue.
Definition: lsq_unit.hh:483
bool cacheBlocked() const
Is D-cache blocked?
Definition: lsq_impl.hh:193
void insert(const DynInstPtr &inst)
Inserts an instruction.
Class that implements the actual LQ and SQ for each specific thread.
Definition: lsq_unit.hh:81
Fault checkViolations(typename LoadQueue::iterator &loadIt, const DynInstPtr &inst)
Check for ordering violations in the LSQ.
WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr)
Constructs a writeback event.
Bitfield< 1 > x
Definition: types.hh:105
Impl::O3CPU O3CPU
Definition: lsq_unit.hh:86
std::shared_ptr< FaultBase > Fault
Definition: types.hh:240
const std::string to_string(sc_enc enc)
Definition: sc_fxdefs.cc:60
Impl::CPUPol::IEW IEW
Definition: lsq_unit.hh:88
void handleLockedSnoopHit(XC *xc)
Definition: locked_mem.hh:72
Stats::Scalar lsqMemOrderViolation
Tota number of memory ordering violations.
Definition: lsq_unit.hh:563
void cprintf(const char *format, const Args &...args)
Definition: cprintf.hh:156
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:104
reference front()
Addr cacheBlockMask
Address Mask for a cache block (e.g.
Definition: lsq_unit.hh:510

Generated on Fri Feb 28 2020 16:26:59 for gem5 by doxygen 1.8.13