gem5  v20.0.0.2
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
lsq_unit_impl.hh
Go to the documentation of this file.
1 
2 /*
3  * Copyright (c) 2010-2014, 2017-2020 ARM Limited
4  * Copyright (c) 2013 Advanced Micro Devices, Inc.
5  * All rights reserved
6  *
7  * The license below extends only to copyright in the software and shall
8  * not be construed as granting a license to any other intellectual
9  * property including but not limited to intellectual property relating
10  * to a hardware implementation of the functionality of the software
11  * licensed hereunder. You may use the software subject to the license
12  * terms below provided that you ensure that this notice is replicated
13  * unmodified and in its entirety in all distributions of the software,
14  * modified or unmodified, in source code or in binary form.
15  *
16  * Copyright (c) 2004-2005 The Regents of The University of Michigan
17  * All rights reserved.
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions are
21  * met: redistributions of source code must retain the above copyright
22  * notice, this list of conditions and the following disclaimer;
23  * redistributions in binary form must reproduce the above copyright
24  * notice, this list of conditions and the following disclaimer in the
25  * documentation and/or other materials provided with the distribution;
26  * neither the name of the copyright holders nor the names of its
27  * contributors may be used to endorse or promote products derived from
28  * this software without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
32  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
33  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
34  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
36  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41  */
42 
43 #ifndef __CPU_O3_LSQ_UNIT_IMPL_HH__
44 #define __CPU_O3_LSQ_UNIT_IMPL_HH__
45 
47 #include "arch/locked_mem.hh"
48 #include "base/str.hh"
49 #include "config/the_isa.hh"
50 #include "cpu/checker/cpu.hh"
51 #include "cpu/o3/lsq.hh"
52 #include "cpu/o3/lsq_unit.hh"
53 #include "debug/Activity.hh"
54 #include "debug/IEW.hh"
55 #include "debug/LSQUnit.hh"
56 #include "debug/O3PipeView.hh"
57 #include "mem/packet.hh"
58 #include "mem/request.hh"
59 
60 template<class Impl>
62  PacketPtr _pkt, LSQUnit *lsq_ptr)
63  : Event(Default_Pri, AutoDelete),
64  inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
65 {
66  assert(_inst->savedReq);
67  _inst->savedReq->writebackScheduled();
68 }
69 
70 template<class Impl>
71 void
73 {
74  assert(!lsqPtr->cpu->switchedOut());
75 
76  lsqPtr->writeback(inst, pkt);
77 
78  assert(inst->savedReq);
79  inst->savedReq->writebackDone();
80  delete pkt;
81 }
82 
83 template<class Impl>
84 const char *
86 {
87  return "Store writeback";
88 }
89 
90 template <class Impl>
91 bool
93 {
94  auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
95  LSQRequest* req = senderState->request();
96  assert(req != nullptr);
97  bool ret = true;
98  /* Check that the request is still alive before any further action. */
99  if (senderState->alive()) {
100  ret = req->recvTimingResp(pkt);
101  } else {
102  senderState->outstanding--;
103  }
104  return ret;
105 
106 }
107 
108 template<class Impl>
109 void
111 {
112  LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
113  DynInstPtr inst = state->inst;
114 
115  cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
116 
117  /* Notify the sender state that the access is complete (for ownership
118  * tracking). */
119  state->complete();
120 
121  assert(!cpu->switchedOut());
122  if (!inst->isSquashed()) {
123  if (state->needWB) {
124  // Only loads, store conditionals and atomics perform the writeback
125  // after receving the response from the memory
126  assert(inst->isLoad() || inst->isStoreConditional() ||
127  inst->isAtomic());
128  writeback(inst, state->request()->mainPacket());
129  if (inst->isStore() || inst->isAtomic()) {
130  auto ss = dynamic_cast<SQSenderState*>(state);
131  ss->writebackDone();
132  completeStore(ss->idx);
133  }
134  } else if (inst->isStore()) {
135  // This is a regular store (i.e., not store conditionals and
136  // atomics), so it can complete without writing back
137  completeStore(dynamic_cast<SQSenderState*>(state)->idx);
138  }
139  }
140 }
141 
142 template <class Impl>
143 LSQUnit<Impl>::LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
144  : lsqID(-1), storeQueue(sqEntries+1), loadQueue(lqEntries+1),
145  loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
146  isStoreBlocked(false), storeInFlight(false), hasPendingRequest(false),
147  pendingRequest(nullptr)
148 {
149 }
150 
151 template<class Impl>
152 void
153 LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
154  LSQ *lsq_ptr, unsigned id)
155 {
156  lsqID = id;
157 
158  cpu = cpu_ptr;
159  iewStage = iew_ptr;
160 
161  lsq = lsq_ptr;
162 
163  DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",lsqID);
164 
165  depCheckShift = params->LSQDepCheckShift;
166  checkLoads = params->LSQCheckLoads;
167  needsTSO = params->needsTSO;
168 
169  resetState();
170 }
171 
172 
173 template<class Impl>
174 void
176 {
177  loads = stores = storesToWB = 0;
178 
179 
180  storeWBIt = storeQueue.begin();
181 
182  retryPkt = NULL;
183  memDepViolator = NULL;
184 
185  stalled = false;
186 
187  cacheBlockMask = ~(cpu->cacheLineSize() - 1);
188 }
189 
190 template<class Impl>
191 std::string
193 {
194  if (Impl::MaxThreads == 1) {
195  return iewStage->name() + ".lsq";
196  } else {
197  return iewStage->name() + ".lsq.thread" + std::to_string(lsqID);
198  }
199 }
200 
201 template<class Impl>
202 void
204 {
206  .name(name() + ".forwLoads")
207  .desc("Number of loads that had data forwarded from stores");
208 
210  .name(name() + ".invAddrLoads")
211  .desc("Number of loads ignored due to an invalid address");
212 
214  .name(name() + ".squashedLoads")
215  .desc("Number of loads squashed");
216 
218  .name(name() + ".ignoredResponses")
219  .desc("Number of memory responses ignored because the instruction is squashed");
220 
222  .name(name() + ".memOrderViolation")
223  .desc("Number of memory ordering violations");
224 
226  .name(name() + ".squashedStores")
227  .desc("Number of stores squashed");
228 
230  .name(name() + ".invAddrSwpfs")
231  .desc("Number of software prefetches ignored due to an invalid address");
232 
234  .name(name() + ".blockedLoads")
235  .desc("Number of blocked loads due to partial load-store forwarding");
236 
238  .name(name() + ".rescheduledLoads")
239  .desc("Number of loads that were rescheduled");
240 
242  .name(name() + ".cacheBlocked")
243  .desc("Number of times an access to memory failed due to the cache being blocked");
244 }
245 
246 template<class Impl>
247 void
249 {
250  dcachePort = dcache_port;
251 }
252 
253 template<class Impl>
254 void
256 {
257  for (int i = 0; i < loadQueue.capacity(); ++i)
258  assert(!loadQueue[i].valid());
259 
260  assert(storesToWB == 0);
261  assert(!retryPkt);
262 }
263 
264 template<class Impl>
265 void
267 {
268  resetState();
269 }
270 
271 template <class Impl>
272 void
274 {
275  assert(inst->isMemRef());
276 
277  assert(inst->isLoad() || inst->isStore() || inst->isAtomic());
278 
279  if (inst->isLoad()) {
280  insertLoad(inst);
281  } else {
282  insertStore(inst);
283  }
284 
285  inst->setInLSQ();
286 }
287 
288 template <class Impl>
289 void
291 {
292  assert(!loadQueue.full());
293  assert(loads < loadQueue.capacity());
294 
295  DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n",
296  load_inst->pcState(), loadQueue.tail(), load_inst->seqNum);
297 
298  /* Grow the queue. */
300 
301  load_inst->sqIt = storeQueue.end();
302 
303  assert(!loadQueue.back().valid());
304  loadQueue.back().set(load_inst);
305  load_inst->lqIdx = loadQueue.tail();
306  load_inst->lqIt = loadQueue.getIterator(load_inst->lqIdx);
307 
308  ++loads;
309 }
310 
311 template <class Impl>
312 void
314 {
315  // Make sure it is not full before inserting an instruction.
316  assert(!storeQueue.full());
317  assert(stores < storeQueue.capacity());
318 
319  DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n",
320  store_inst->pcState(), storeQueue.tail(), store_inst->seqNum);
321  storeQueue.advance_tail();
322 
323  store_inst->sqIdx = storeQueue.tail();
324  store_inst->lqIdx = loadQueue.moduloAdd(loadQueue.tail(), 1);
325  store_inst->lqIt = loadQueue.end();
326 
327  storeQueue.back().set(store_inst);
328 
329  ++stores;
330 }
331 
332 template <class Impl>
333 typename Impl::DynInstPtr
335 {
336  DynInstPtr temp = memDepViolator;
337 
338  memDepViolator = NULL;
339 
340  return temp;
341 }
342 
343 template <class Impl>
344 unsigned
346 {
347  //LQ has an extra dummy entry to differentiate
348  //empty/full conditions. Subtract 1 from the free entries.
349  DPRINTF(LSQUnit, "LQ size: %d, #loads occupied: %d\n",
350  1 + loadQueue.capacity(), loads);
351  return loadQueue.capacity() - loads;
352 }
353 
354 template <class Impl>
355 unsigned
357 {
358  //SQ has an extra dummy entry to differentiate
359  //empty/full conditions. Subtract 1 from the free entries.
360  DPRINTF(LSQUnit, "SQ size: %d, #stores occupied: %d\n",
361  1 + storeQueue.capacity(), stores);
362  return storeQueue.capacity() - stores;
363 
364  }
365 
366 template <class Impl>
367 void
369 {
370  // Should only ever get invalidations in here
371  assert(pkt->isInvalidate());
372 
373  DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
374 
375  for (int x = 0; x < cpu->numContexts(); x++) {
376  ThreadContext *tc = cpu->getContext(x);
377  bool no_squash = cpu->thread[x]->noSquashFromTC;
378  cpu->thread[x]->noSquashFromTC = true;
380  cpu->thread[x]->noSquashFromTC = no_squash;
381  }
382 
383  if (loadQueue.empty())
384  return;
385 
386  auto iter = loadQueue.begin();
387 
388  Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;
389 
390  DynInstPtr ld_inst = iter->instruction();
391  assert(ld_inst);
392  LSQRequest *req = iter->request();
393 
394  // Check that this snoop didn't just invalidate our lock flag
395  if (ld_inst->effAddrValid() &&
396  req->isCacheBlockHit(invalidate_addr, cacheBlockMask)
397  && ld_inst->memReqFlags & Request::LLSC)
398  TheISA::handleLockedSnoopHit(ld_inst.get());
399 
400  bool force_squash = false;
401 
402  while (++iter != loadQueue.end()) {
403  ld_inst = iter->instruction();
404  assert(ld_inst);
405  req = iter->request();
406  if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered())
407  continue;
408 
409  DPRINTF(LSQUnit, "-- inst [sn:%lli] to pktAddr:%#x\n",
410  ld_inst->seqNum, invalidate_addr);
411 
412  if (force_squash ||
413  req->isCacheBlockHit(invalidate_addr, cacheBlockMask)) {
414  if (needsTSO) {
415  // If we have a TSO system, as all loads must be ordered with
416  // all other loads, this load as well as *all* subsequent loads
417  // need to be squashed to prevent possible load reordering.
418  force_squash = true;
419  }
420  if (ld_inst->possibleLoadViolation() || force_squash) {
421  DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n",
422  pkt->getAddr(), ld_inst->seqNum);
423 
424  // Mark the load for re-execution
425  ld_inst->fault = std::make_shared<ReExec>();
426  req->setStateToFault();
427  } else {
428  DPRINTF(LSQUnit, "HitExternal Snoop for addr %#x [sn:%lli]\n",
429  pkt->getAddr(), ld_inst->seqNum);
430 
431  // Make sure that we don't lose a snoop hitting a LOCKED
432  // address since the LOCK* flags don't get updated until
433  // commit.
434  if (ld_inst->memReqFlags & Request::LLSC)
435  TheISA::handleLockedSnoopHit(ld_inst.get());
436 
437  // If a older load checks this and it's true
438  // then we might have missed the snoop
439  // in which case we need to invalidate to be sure
440  ld_inst->hitExternalSnoop(true);
441  }
442  }
443  }
444  return;
445 }
446 
447 template <class Impl>
448 Fault
449 LSQUnit<Impl>::checkViolations(typename LoadQueue::iterator& loadIt,
450  const DynInstPtr& inst)
451 {
452  Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
453  Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
454 
460  while (loadIt != loadQueue.end()) {
461  DynInstPtr ld_inst = loadIt->instruction();
462  if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
463  ++loadIt;
464  continue;
465  }
466 
467  Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift;
468  Addr ld_eff_addr2 =
469  (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
470 
471  if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {
472  if (inst->isLoad()) {
473  // If this load is to the same block as an external snoop
474  // invalidate that we've observed then the load needs to be
475  // squashed as it could have newer data
476  if (ld_inst->hitExternalSnoop()) {
477  if (!memDepViolator ||
478  ld_inst->seqNum < memDepViolator->seqNum) {
479  DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] "
480  "and [sn:%lli] at address %#x\n",
481  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
482  memDepViolator = ld_inst;
483 
485 
486  return std::make_shared<GenericISA::M5PanicFault>(
487  "Detected fault with inst [sn:%lli] and "
488  "[sn:%lli] at address %#x\n",
489  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
490  }
491  }
492 
493  // Otherwise, mark the load has a possible load violation
494  // and if we see a snoop before it's commited, we need to squash
495  ld_inst->possibleLoadViolation(true);
496  DPRINTF(LSQUnit, "Found possible load violation at addr: %#x"
497  " between instructions [sn:%lli] and [sn:%lli]\n",
498  inst_eff_addr1, inst->seqNum, ld_inst->seqNum);
499  } else {
500  // A load/store incorrectly passed this store.
501  // Check if we already have a violator, or if it's newer
502  // squash and refetch.
503  if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
504  break;
505 
506  DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and "
507  "[sn:%lli] at address %#x\n",
508  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
509  memDepViolator = ld_inst;
510 
512 
513  return std::make_shared<GenericISA::M5PanicFault>(
514  "Detected fault with "
515  "inst [sn:%lli] and [sn:%lli] at address %#x\n",
516  inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
517  }
518  }
519 
520  ++loadIt;
521  }
522  return NoFault;
523 }
524 
525 
526 
527 
528 template <class Impl>
529 Fault
531 {
532  using namespace TheISA;
533  // Execute a specific load.
534  Fault load_fault = NoFault;
535 
536  DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n",
537  inst->pcState(), inst->seqNum);
538 
539  assert(!inst->isSquashed());
540 
541  load_fault = inst->initiateAcc();
542 
543  if (load_fault == NoFault && !inst->readMemAccPredicate()) {
544  assert(inst->readPredicate());
545  inst->setExecuted();
546  inst->completeAcc(nullptr);
547  iewStage->instToCommit(inst);
548  iewStage->activityThisCycle();
549  return NoFault;
550  }
551 
552  if (inst->isTranslationDelayed() && load_fault == NoFault)
553  return load_fault;
554 
555  if (load_fault != NoFault && inst->translationCompleted() &&
556  inst->savedReq->isPartialFault() && !inst->savedReq->isComplete()) {
557  assert(inst->savedReq->isSplit());
558  // If we have a partial fault where the mem access is not complete yet
559  // then the cache must have been blocked. This load will be re-executed
560  // when the cache gets unblocked. We will handle the fault when the
561  // mem access is complete.
562  return NoFault;
563  }
564 
565  // If the instruction faulted or predicated false, then we need to send it
566  // along to commit without the instruction completing.
567  if (load_fault != NoFault || !inst->readPredicate()) {
568  // Send this instruction to commit, also make sure iew stage
569  // realizes there is activity. Mark it as executed unless it
570  // is a strictly ordered load that needs to hit the head of
571  // commit.
572  if (!inst->readPredicate())
573  inst->forwardOldRegs();
574  DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n",
575  inst->seqNum,
576  (load_fault != NoFault ? "fault" : "predication"));
577  if (!(inst->hasRequest() && inst->strictlyOrdered()) ||
578  inst->isAtCommit()) {
579  inst->setExecuted();
580  }
581  iewStage->instToCommit(inst);
582  iewStage->activityThisCycle();
583  } else {
584  if (inst->effAddrValid()) {
585  auto it = inst->lqIt;
586  ++it;
587 
588  if (checkLoads)
589  return checkViolations(it, inst);
590  }
591  }
592 
593  return load_fault;
594 }
595 
596 template <class Impl>
597 Fault
599 {
600  using namespace TheISA;
601  // Make sure that a store exists.
602  assert(stores != 0);
603 
604  int store_idx = store_inst->sqIdx;
605 
606  DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n",
607  store_inst->pcState(), store_inst->seqNum);
608 
609  assert(!store_inst->isSquashed());
610 
611  // Check the recently completed loads to see if any match this store's
612  // address. If so, then we have a memory ordering violation.
613  typename LoadQueue::iterator loadIt = store_inst->lqIt;
614 
615  Fault store_fault = store_inst->initiateAcc();
616 
617  if (store_inst->isTranslationDelayed() &&
618  store_fault == NoFault)
619  return store_fault;
620 
621  if (!store_inst->readPredicate()) {
622  DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n",
623  store_inst->seqNum);
624  store_inst->forwardOldRegs();
625  return store_fault;
626  }
627 
628  if (storeQueue[store_idx].size() == 0) {
629  DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n",
630  store_inst->pcState(), store_inst->seqNum);
631 
632  return store_fault;
633  }
634 
635  assert(store_fault == NoFault);
636 
637  if (store_inst->isStoreConditional() || store_inst->isAtomic()) {
638  // Store conditionals and Atomics need to set themselves as able to
639  // writeback if we haven't had a fault by here.
640  storeQueue[store_idx].canWB() = true;
641 
642  ++storesToWB;
643  }
644 
645  return checkViolations(loadIt, store_inst);
646 
647 }
648 
649 template <class Impl>
650 void
652 {
653  assert(loadQueue.front().valid());
654 
655  DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n",
656  loadQueue.front().instruction()->pcState());
657 
658  loadQueue.front().clear();
660 
661  --loads;
662 }
663 
664 template <class Impl>
665 void
667 {
668  assert(loads == 0 || loadQueue.front().valid());
669 
670  while (loads != 0 && loadQueue.front().instruction()->seqNum
671  <= youngest_inst) {
672  commitLoad();
673  }
674 }
675 
676 template <class Impl>
677 void
679 {
680  assert(stores == 0 || storeQueue.front().valid());
681 
682  /* Forward iterate the store queue (age order). */
683  for (auto& x : storeQueue) {
684  assert(x.valid());
685  // Mark any stores that are now committed and have not yet
686  // been marked as able to write back.
687  if (!x.canWB()) {
688  if (x.instruction()->seqNum > youngest_inst) {
689  break;
690  }
691  DPRINTF(LSQUnit, "Marking store as able to write back, PC "
692  "%s [sn:%lli]\n",
693  x.instruction()->pcState(),
694  x.instruction()->seqNum);
695 
696  x.canWB() = true;
697 
698  ++storesToWB;
699  }
700  }
701 }
702 
703 template <class Impl>
704 void
706 {
707  assert(isStoreBlocked);
708  storeWBIt->request()->sendPacketToCache();
709  if (storeWBIt->request()->isSent()){
710  storePostSend();
711  }
712 }
713 
714 template <class Impl>
715 void
717 {
718  if (isStoreBlocked) {
719  DPRINTF(LSQUnit, "Writing back blocked store\n");
721  }
722 
723  while (storesToWB > 0 &&
725  storeWBIt->valid() &&
726  storeWBIt->canWB() &&
727  ((!needsTSO) || (!storeInFlight)) &&
728  lsq->cachePortAvailable(false)) {
729 
730  if (isStoreBlocked) {
731  DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
732  " is blocked!\n");
733  break;
734  }
735 
736  // Store didn't write any data so no need to write it back to
737  // memory.
738  if (storeWBIt->size() == 0) {
739  /* It is important that the preincrement happens at (or before)
740  * the call, as the the code of completeStore checks
741  * storeWBIt. */
743  continue;
744  }
745 
746  if (storeWBIt->instruction()->isDataPrefetch()) {
747  storeWBIt++;
748  continue;
749  }
750 
751  assert(storeWBIt->hasRequest());
752  assert(!storeWBIt->committed());
753 
754  DynInstPtr inst = storeWBIt->instruction();
755  LSQRequest* req = storeWBIt->request();
756 
757  // Process store conditionals or store release after all previous
758  // stores are completed
759  if ((req->mainRequest()->isLLSC() ||
760  req->mainRequest()->isRelease()) &&
761  (storeWBIt.idx() != storeQueue.head())) {
762  DPRINTF(LSQUnit, "Store idx:%i PC:%s to Addr:%#x "
763  "[sn:%lli] is %s%s and not head of the queue\n",
764  storeWBIt.idx(), inst->pcState(),
765  req->request()->getPaddr(), inst->seqNum,
766  req->mainRequest()->isLLSC() ? "SC" : "",
767  req->mainRequest()->isRelease() ? "/Release" : "");
768  break;
769  }
770 
771  storeWBIt->committed() = true;
772 
773  assert(!inst->memData);
774  inst->memData = new uint8_t[req->_size];
775 
776  if (storeWBIt->isAllZeros())
777  memset(inst->memData, 0, req->_size);
778  else
779  memcpy(inst->memData, storeWBIt->data(), req->_size);
780 
781 
782  if (req->senderState() == nullptr) {
783  SQSenderState *state = new SQSenderState(storeWBIt);
784  state->isLoad = false;
785  state->needWB = false;
786  state->inst = inst;
787 
788  req->senderState(state);
789  if (inst->isStoreConditional() || inst->isAtomic()) {
790  /* Only store conditionals and atomics need a writeback. */
791  state->needWB = true;
792  }
793  }
794  req->buildPackets();
795 
796  DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s "
797  "to Addr:%#x, data:%#x [sn:%lli]\n",
798  storeWBIt.idx(), inst->pcState(),
799  req->request()->getPaddr(), (int)*(inst->memData),
800  inst->seqNum);
801 
802  // @todo: Remove this SC hack once the memory system handles it.
803  if (inst->isStoreConditional()) {
804  // Disable recording the result temporarily. Writing to
805  // misc regs normally updates the result, but this is not
806  // the desired behavior when handling store conditionals.
807  inst->recordResult(false);
808  bool success = TheISA::handleLockedWrite(inst.get(),
809  req->request(), cacheBlockMask);
810  inst->recordResult(true);
811  req->packetSent();
812 
813  if (!success) {
814  req->complete();
815  // Instantly complete this store.
816  DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. "
817  "Instantly completing it.\n",
818  inst->seqNum);
819  PacketPtr new_pkt = new Packet(*req->packet());
820  WritebackEvent *wb = new WritebackEvent(inst,
821  new_pkt, this);
822  cpu->schedule(wb, curTick() + 1);
824  if (!storeQueue.empty())
825  storeWBIt++;
826  else
827  storeWBIt = storeQueue.end();
828  continue;
829  }
830  }
831 
832  if (req->request()->isLocalAccess()) {
833  assert(!inst->isStoreConditional());
834  ThreadContext *thread = cpu->tcBase(lsqID);
835  PacketPtr main_pkt = new Packet(req->mainRequest(),
837  main_pkt->dataStatic(inst->memData);
838  req->request()->localAccessor(thread, main_pkt);
839  delete main_pkt;
841  storeWBIt++;
842  continue;
843  }
844  /* Send to cache */
845  req->sendPacketToCache();
846 
847  /* If successful, do the post send */
848  if (req->isSent()) {
849  storePostSend();
850  } else {
851  DPRINTF(LSQUnit, "D-Cache became blocked when writing [sn:%lli], "
852  "will retry later\n",
853  inst->seqNum);
854  }
855  }
856  assert(stores >= 0 && storesToWB >= 0);
857 }
858 
859 template <class Impl>
860 void
861 LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
862 {
863  DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
864  "(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
865 
866  while (loads != 0 &&
867  loadQueue.back().instruction()->seqNum > squashed_num) {
868  DPRINTF(LSQUnit,"Load Instruction PC %s squashed, "
869  "[sn:%lli]\n",
870  loadQueue.back().instruction()->pcState(),
871  loadQueue.back().instruction()->seqNum);
872 
873  if (isStalled() && loadQueue.tail() == stallingLoadIdx) {
874  stalled = false;
875  stallingStoreIsn = 0;
876  stallingLoadIdx = 0;
877  }
878 
879  // Clear the smart pointer to make sure it is decremented.
880  loadQueue.back().instruction()->setSquashed();
881  loadQueue.back().clear();
882 
883  --loads;
884 
887  }
888 
889  if (memDepViolator && squashed_num < memDepViolator->seqNum) {
890  memDepViolator = NULL;
891  }
892 
893  while (stores != 0 &&
894  storeQueue.back().instruction()->seqNum > squashed_num) {
895  // Instructions marked as can WB are already committed.
896  if (storeQueue.back().canWB()) {
897  break;
898  }
899 
900  DPRINTF(LSQUnit,"Store Instruction PC %s squashed, "
901  "idx:%i [sn:%lli]\n",
902  storeQueue.back().instruction()->pcState(),
903  storeQueue.tail(), storeQueue.back().instruction()->seqNum);
904 
905  // I don't think this can happen. It should have been cleared
906  // by the stalling load.
907  if (isStalled() &&
908  storeQueue.back().instruction()->seqNum == stallingStoreIsn) {
909  panic("Is stalled should have been cleared by stalling load!\n");
910  stalled = false;
911  stallingStoreIsn = 0;
912  }
913 
914  // Clear the smart pointer to make sure it is decremented.
915  storeQueue.back().instruction()->setSquashed();
916 
917  // Must delete request now that it wasn't handed off to
918  // memory. This is quite ugly. @todo: Figure out the proper
919  // place to really handle request deletes.
920  storeQueue.back().clear();
921  --stores;
922 
923  storeQueue.pop_back();
925  }
926 }
927 
928 template <class Impl>
929 void
931 {
932  if (isStalled() &&
933  storeWBIt->instruction()->seqNum == stallingStoreIsn) {
934  DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
935  "load idx:%i\n",
937  stalled = false;
938  stallingStoreIsn = 0;
939  iewStage->replayMemInst(loadQueue[stallingLoadIdx].instruction());
940  }
941 
942  if (!storeWBIt->instruction()->isStoreConditional()) {
943  // The store is basically completed at this time. This
944  // only works so long as the checker doesn't try to
945  // verify the value in memory for stores.
946  storeWBIt->instruction()->setCompleted();
947 
948  if (cpu->checker) {
949  cpu->checker->verify(storeWBIt->instruction());
950  }
951  }
952 
953  if (needsTSO) {
954  storeInFlight = true;
955  }
956 
957  storeWBIt++;
958 }
959 
960 template <class Impl>
961 void
963 {
964  iewStage->wakeCPU();
965 
966  // Squashed instructions do not need to complete their access.
967  if (inst->isSquashed()) {
968  assert(!inst->isStore());
970  return;
971  }
972 
973  if (!inst->isExecuted()) {
974  inst->setExecuted();
975 
976  if (inst->fault == NoFault) {
977  // Complete access to copy data to proper place.
978  inst->completeAcc(pkt);
979  } else {
980  // If the instruction has an outstanding fault, we cannot complete
981  // the access as this discards the current fault.
982 
983  // If we have an outstanding fault, the fault should only be of
984  // type ReExec or - in case of a SplitRequest - a partial
985  // translation fault
986  assert(dynamic_cast<ReExec*>(inst->fault.get()) != nullptr ||
987  inst->savedReq->isPartialFault());
988 
989  DPRINTF(LSQUnit, "Not completing instruction [sn:%lli] access "
990  "due to pending fault.\n", inst->seqNum);
991  }
992  }
993 
994  // Need to insert instruction into queue to commit
995  iewStage->instToCommit(inst);
996 
997  iewStage->activityThisCycle();
998 
999  // see if this load changed the PC
1000  iewStage->checkMisprediction(inst);
1001 }
1002 
1003 template <class Impl>
1004 void
1006 {
1007  assert(store_idx->valid());
1008  store_idx->completed() = true;
1009  --storesToWB;
1010  // A bit conservative because a store completion may not free up entries,
1011  // but hopefully avoids two store completions in one cycle from making
1012  // the CPU tick twice.
1013  cpu->wakeCPU();
1014  cpu->activityThisCycle();
1015 
1016  /* We 'need' a copy here because we may clear the entry from the
1017  * store queue. */
1018  DynInstPtr store_inst = store_idx->instruction();
1019  if (store_idx == storeQueue.begin()) {
1020  do {
1021  storeQueue.front().clear();
1022  storeQueue.pop_front();
1023  --stores;
1024  } while (storeQueue.front().completed() &&
1025  !storeQueue.empty());
1026 
1027  iewStage->updateLSQNextCycle = true;
1028  }
1029 
1030  DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
1031  "idx:%i\n",
1032  store_inst->seqNum, store_idx.idx() - 1, storeQueue.head() - 1);
1033 
1034 #if TRACING_ON
1035  if (DTRACE(O3PipeView)) {
1036  store_inst->storeTick =
1037  curTick() - store_inst->fetchTick;
1038  }
1039 #endif
1040 
1041  if (isStalled() &&
1042  store_inst->seqNum == stallingStoreIsn) {
1043  DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
1044  "load idx:%i\n",
1046  stalled = false;
1047  stallingStoreIsn = 0;
1048  iewStage->replayMemInst(loadQueue[stallingLoadIdx].instruction());
1049  }
1050 
1051  store_inst->setCompleted();
1052 
1053  if (needsTSO) {
1054  storeInFlight = false;
1055  }
1056 
1057  // Tell the checker we've completed this instruction. Some stores
1058  // may get reported twice to the checker, but the checker can
1059  // handle that case.
1060  // Store conditionals cannot be sent to the checker yet, they have
1061  // to update the misc registers first which should take place
1062  // when they commit
1063  if (cpu->checker && !store_inst->isStoreConditional()) {
1064  cpu->checker->verify(store_inst);
1065  }
1066 }
1067 
1068 template <class Impl>
1069 bool
1071 {
1072  bool ret = true;
1073  bool cache_got_blocked = false;
1074 
1075  auto state = dynamic_cast<LSQSenderState*>(data_pkt->senderState);
1076 
1077  if (!lsq->cacheBlocked() &&
1078  lsq->cachePortAvailable(isLoad)) {
1079  if (!dcachePort->sendTimingReq(data_pkt)) {
1080  ret = false;
1081  cache_got_blocked = true;
1082  }
1083  } else {
1084  ret = false;
1085  }
1086 
1087  if (ret) {
1088  if (!isLoad) {
1089  isStoreBlocked = false;
1090  }
1091  lsq->cachePortBusy(isLoad);
1092  state->outstanding++;
1093  state->request()->packetSent();
1094  } else {
1095  if (cache_got_blocked) {
1096  lsq->cacheBlocked(true);
1097  ++lsqCacheBlocked;
1098  }
1099  if (!isLoad) {
1100  assert(state->request() == storeWBIt->request());
1101  isStoreBlocked = true;
1102  }
1103  state->request()->packetNotSent();
1104  }
1105  return ret;
1106 }
1107 
1108 template <class Impl>
1109 void
1111 {
1112  if (isStoreBlocked) {
1113  DPRINTF(LSQUnit, "Receiving retry: blocked store\n");
1115  }
1116 }
1117 
1118 template <class Impl>
1119 void
1121 {
1122  cprintf("Load store queue: Dumping instructions.\n");
1123  cprintf("Load queue size: %i\n", loads);
1124  cprintf("Load queue: ");
1125 
1126  for (const auto& e: loadQueue) {
1127  const DynInstPtr &inst(e.instruction());
1128  cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1129  }
1130  cprintf("\n");
1131 
1132  cprintf("Store queue size: %i\n", stores);
1133  cprintf("Store queue: ");
1134 
1135  for (const auto& e: storeQueue) {
1136  const DynInstPtr &inst(e.instruction());
1137  cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1138  }
1139 
1140  cprintf("\n");
1141 }
1142 
1143 template <class Impl>
1144 unsigned int
1146 {
1147  return cpu->cacheLineSize();
1148 }
1149 
1150 #endif//__CPU_O3_LSQ_UNIT_IMPL_HH__
A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
Definition: port.hh:71
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:163
#define DPRINTF(x,...)
Definition: trace.hh:222
MasterPort * dcachePort
Pointer to the dcache port.
Definition: lsq_unit.hh:401
unsigned numFreeLoadEntries()
Returns the number of free LQ entries.
void squash(const InstSeqNum &squashed_num)
Squashes all instructions younger than a specific sequence number.
Impl::DynInstPtr DynInstPtr
Definition: lsq_unit.hh:83
The request is a Load locked/store conditional.
Definition: request.hh:144
decltype(nullptr) constexpr NoFault
Definition: types.hh:243
Stats::Scalar invAddrSwpfs
Total number of software prefetches ignored due to invalid addresses.
Definition: lsq_unit.hh:565
Iterator to the circular queue.
iterator begin()
Iterators.
DynInstPtr memDepViolator
The oldest load that caused a memory ordering violation.
Definition: lsq_unit.hh:530
Bitfield< 7 > i
void completeStore(typename StoreQueue::iterator store_idx)
Completes the store at the specified index.
Stats::Scalar lsqForwLoads
Total number of loads forwaded from LSQ stores.
Definition: lsq_unit.hh:546
bool isStoreBlocked
Whehter or not a store is blocked due to the memory system.
Definition: lsq_unit.hh:524
static uint32_t moduloAdd(uint32_t op1, uint32_t op2, uint32_t size)
General modular addition.
uint32_t tail() const
LSQRequest * pendingRequest
The packet that is pending free cache ports.
Definition: lsq_unit.hh:537
DynInstPtr getMemDepViolator()
Returns the memory ordering violator.
Writeback event, specifically for when stores forward data to loads.
Definition: lsq_unit.hh:441
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
void insertStore(const DynInstPtr &store_inst)
Inserts a store instruction.
bool trySendPacket(bool isLoad, PacketPtr data_pkt)
Attempts to send a packet to the cache.
bool full() const
Is the queue full? A queue is full if the head is the 0^{th} element and the tail is the (size-1)^{th...
reference back()
bool isStalled()
Returns whether or not the LSQ unit is stalled.
Definition: lsq_unit.hh:607
Stats::Scalar lsqRescheduledLoads
Number of loads that were rescheduled.
Definition: lsq_unit.hh:571
void resetState()
Reset the LSQ state.
bool handleLockedWrite(XC *xc, const RequestPtr &req, Addr cacheBlockMask)
Definition: locked_mem.hh:77
iterator getIterator(size_t idx)
Return an iterator to an index in the vector.
StoreQueue::iterator storeWBIt
The index of the first instruction that may be ready to be written back, and has not yet been written...
Definition: lsq_unit.hh:503
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the slave port by calling its corresponding receive function...
Definition: port.hh:441
void regStats()
Registers statistics.
typename Impl::CPUPol::LSQ::LSQRequest LSQRequest
Definition: lsq_unit.hh:89
typename LSQ::LSQSenderState LSQSenderState
Definition: lsq_unit.hh:88
#define DTRACE(x)
Definition: debug.hh:143
void insertLoad(const DynInstPtr &load_inst)
Inserts a load instruction.
Definition: lsq.hh:62
void storePostSend()
Handles completing the send of a store to memory.
size_t capacity() const
Stats::Scalar invAddrLoads
Total number of loads ignored due to invalid addresses.
Definition: lsq_unit.hh:549
bool isInvalidate() const
Definition: packet.hh:537
ThreadContext is the external interface to all thread state for anything outside of the CPU...
Stats::Scalar lsqIgnoredResponses
Total number of responses from the memory system that are ignored due to the instruction already bein...
Definition: lsq_unit.hh:556
Stats::Scalar lsqSquashedLoads
Total number of squashed loads.
Definition: lsq_unit.hh:552
void recvRetry()
Handles doing the retry.
size_t idx() const
OutputIterator has no extra requirements.
DynInstPtr inst
Instruction whose results are being written back.
Definition: lsq_unit.hh:456
Bitfield< 33 > id
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:1034
void drainSanityCheck() const
Perform sanity checks after a drain.
void takeOverFrom()
Takes over from another CPU&#39;s thread.
void commitStores(InstSeqNum &youngest_inst)
Commits stores older than a specific sequence number.
void writeback(const DynInstPtr &inst, PacketPtr pkt)
Writes back the instruction, sending it to IEW.
Bitfield< 3 > x
Definition: pagetable.hh:69
ThreadID lsqID
The LSQUnit thread id.
Definition: lsq_unit.hh:476
Fault executeStore(const DynInstPtr &inst)
Executes a store instruction.
void dumpInsts() const
Debugging function to dump instructions in the LSQ.
bool storeInFlight
Whether or not a store is in flight.
Definition: lsq_unit.hh:527
Tick curTick()
The current simulated tick.
Definition: core.hh:44
void advance_tail()
Increases the tail by one.
iterator end()
void pop_back()
Circularly decrease the tail pointer.
void checkSnoop(PacketPtr pkt)
Check if an incoming invalidate hits in the lsq on a load that might have issued out of order wrt ano...
IEW * iewStage
Pointer to the IEW stage.
Definition: lsq_unit.hh:395
void cachePortBusy(bool is_load)
Another store port is in use.
Definition: lsq_impl.hh:218
Fault executeLoad(const DynInstPtr &inst)
Executes a load instruction.
bool stalled
Whether or not the LSQ is stalled.
Definition: lsq_unit.hh:512
void commitLoads(InstSeqNum &youngest_inst)
Commits loads older than a specific sequence number.
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
void commitLoad()
Commits the head load.
int stallingLoadIdx
The index of the above store.
Definition: lsq_unit.hh:518
void completeDataAccess(PacketPtr pkt)
Completes the data access that has been returned from the memory system.
Addr getAddr() const
Definition: packet.hh:720
uint64_t InstSeqNum
Definition: inst_seq.hh:37
Bitfield< 21 > ss
void setDcachePort(MasterPort *dcache_port)
Sets the pointer to the dcache port.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
LSQUnit< Impl > * lsqPtr
The pointer to the LSQ unit that issued the store.
Definition: lsq_unit.hh:462
bool empty() const
Is the queue empty?
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
bool hasPendingRequest
Whether or not there is a packet that couldn&#39;t be sent because of a lack of cache ports...
Definition: lsq_unit.hh:534
void pop_front(size_t num_elem=1)
Circularly increase the head pointer.
PacketPtr retryPkt
The packet that needs to be retried.
Definition: lsq_unit.hh:521
InstSeqNum stallingStoreIsn
The store that causes the stall due to partial store to load forwarding.
Definition: lsq_unit.hh:516
Particularisation of the LSQSenderState to the SQ.
Definition: lsq_unit.hh:423
Stats::Scalar lsqSquashedStores
Total number of squashed stores.
Definition: lsq_unit.hh:562
void writebackBlockedStore()
Try to finish a previously blocked write back attempt.
Bitfield< 9 > e
LSQ * lsq
Pointer to the LSQ.
Definition: lsq_unit.hh:398
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:276
const char * description() const
Returns the description of this event.
int stores
The number of store instructions in the SQ.
Definition: lsq_unit.hh:496
std::string name() const
Returns the name of the LSQ unit.
bool checkLoads
Should loads be checked for dependency issues.
Definition: lsq_unit.hh:491
Declaration of the Packet class.
PacketPtr pkt
The packet that would have been sent to memory.
Definition: lsq_unit.hh:459
unsigned numFreeStoreEntries()
Returns the number of free SQ entries.
SenderState * senderState
This packet&#39;s sender state.
Definition: packet.hh:474
Definition: eventq.hh:246
void process()
Processes the writeback event.
unsigned int cacheLineSize()
bool dereferenceable() const
Test dereferenceability.
void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, LSQ *lsq_ptr, unsigned id)
Initializes the LSQ unit with the specified number of entries.
int storesToWB
The number of store instructions in the SQ waiting to writeback.
Definition: lsq_unit.hh:498
void writebackStores()
Writes back stores.
bool needsTSO
Flag for memory model.
Definition: lsq_unit.hh:540
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
Definition: lsq_impl.hh:205
int loads
The number of load instructions in the LQ.
Definition: lsq_unit.hh:494
LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
Constructs an LSQ unit.
O3CPU * cpu
Pointer to the CPU.
Definition: lsq_unit.hh:392
void handleLockedSnoop(XC *xc, PacketPtr pkt, Addr cacheBlockMask)
Definition: locked_mem.hh:58
Stats::Scalar lsqBlockedLoads
Ready loads blocked due to partial store-forwarding.
Definition: lsq_unit.hh:568
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:309
Stats::Scalar lsqCacheBlocked
Number of times the LSQ is blocked due to the cache.
Definition: lsq_unit.hh:574
LoadQueue loadQueue
The load queue.
Definition: lsq_unit.hh:482
unsigned depCheckShift
The number of places to shift addresses in the LSQ before checking for dependency violations...
Definition: lsq_unit.hh:488
CircularQueue< SQEntry > storeQueue
The store queue.
Definition: lsq_unit.hh:479
bool cacheBlocked() const
Is D-cache blocked?
Definition: lsq_impl.hh:191
void insert(const DynInstPtr &inst)
Inserts an instruction.
Class that implements the actual LQ and SQ for each specific thread.
Definition: lsq_unit.hh:77
Fault checkViolations(typename LoadQueue::iterator &loadIt, const DynInstPtr &inst)
Check for ordering violations in the LSQ.
WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr)
Constructs a writeback event.
Impl::O3CPU O3CPU
Definition: lsq_unit.hh:82
std::shared_ptr< FaultBase > Fault
Definition: types.hh:238
const std::string to_string(sc_enc enc)
Definition: sc_fxdefs.cc:60
Impl::CPUPol::IEW IEW
Definition: lsq_unit.hh:84
void handleLockedSnoopHit(XC *xc)
Definition: locked_mem.hh:70
Stats::Scalar lsqMemOrderViolation
Tota number of memory ordering violations.
Definition: lsq_unit.hh:559
void cprintf(const char *format, const Args &...args)
Definition: cprintf.hh:152
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:103
reference front()
Addr cacheBlockMask
Address Mask for a cache block (e.g.
Definition: lsq_unit.hh:506

Generated on Mon Jun 8 2020 15:45:08 for gem5 by doxygen 1.8.13