43 #ifndef __CPU_O3_LSQ_UNIT_IMPL_HH__
44 #define __CPU_O3_LSQ_UNIT_IMPL_HH__
47 #include "arch/locked_mem.hh"
49 #include "config/the_isa.hh"
53 #include "debug/Activity.hh"
54 #include "debug/HtmCpu.hh"
55 #include "debug/IEW.hh"
56 #include "debug/LSQUnit.hh"
57 #include "debug/O3PipeView.hh"
64 :
Event(Default_Pri, AutoDelete),
65 inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
67 assert(_inst->savedReq);
68 _inst->savedReq->writebackScheduled();
75 assert(!lsqPtr->cpu->switchedOut());
77 lsqPtr->writeback(inst, pkt);
79 assert(inst->savedReq);
80 inst->savedReq->writebackDone();
88 return "Store writeback";
97 assert(req !=
nullptr);
100 if (senderState->alive()) {
101 ret = req->recvTimingResp(pkt);
103 senderState->outstanding--;
133 "store notification (ignored) of HTM transaction failure "
134 "in cache - addr=0x%lx - rc=%s - htmUid=%d\n",
151 panic(
"HTM error - unhandled return code from cache (%s)",
156 std::make_shared<GenericHtmFailureFault>(
157 inst->getHtmTransactionUid(),
161 "load notification of HTM transaction failure "
162 "in cache - pc=%s - addr=0x%lx - "
163 "rc=%u - htmUid=%d\n",
164 inst->pcState(), pkt->
getAddr(),
169 cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
175 assert(!
cpu->switchedOut());
176 if (!inst->isSquashed()) {
180 assert(inst->isLoad() || inst->isStoreConditional() ||
185 state->request()->mainPacket()->setHtmTransactionFailedInCache(
189 writeback(inst, state->request()->mainPacket());
190 if (inst->isStore() || inst->isAtomic()) {
195 }
else if (inst->isStore()) {
203 template <
class Impl>
218 const DerivO3CPUParams ¶ms,
LSQ *lsq_ptr,
unsigned id)
227 cpu->addStatGroup(
csprintf(
"lsq%i", lsqID).c_str(), &stats);
231 depCheckShift = params.LSQDepCheckShift;
232 checkLoads = params.LSQCheckLoads;
233 needsTSO = params.needsTSO;
243 loads = stores = storesToWB = 0;
247 htmStarts = htmStops = 0;
249 storeWBIt = storeQueue.begin();
252 memDepViolator = NULL;
256 cacheBlockMask = ~(cpu->cacheLineSize() - 1);
263 if (Impl::MaxThreads == 1) {
264 return iewStage->name() +
".lsq";
270 template <
class Impl>
272 :
Stats::Group(parent),
274 "Number of loads that had data forwarded from stores"),
276 "Number of loads squashed"),
278 "Number of memory responses ignored because the instruction is "
281 "Number of memory ordering violations"),
284 "Number of loads that were rescheduled"),
286 "Number of times an access to memory failed due to the cache "
316 template <
class Impl>
320 assert(inst->isMemRef());
322 assert(inst->isLoad() || inst->isStore() || inst->isAtomic());
324 if (inst->isLoad()) {
333 template <
class Impl>
351 assert(load_inst->lqIdx > 0);
359 if (load_inst->isHtmStart()) {
361 DPRINTF(HtmCpu,
">> htmStarts++ (%d) : htmStops (%d)\n",
365 const auto& htm_cpt =
cpu->tcBase(
lsqID)->getHtmCheckpointPtr();
366 auto htm_uid = htm_cpt->getHtmUid();
369 if (!load_inst->inHtmTransactionalState()) {
370 htm_uid = htm_cpt->newHtmUid();
371 DPRINTF(HtmCpu,
"generating new htmUid=%u\n", htm_uid);
372 if (htm_depth != 1) {
374 "unusual HTM transactional depth (%d)"
375 " possibly caused by mispeculation - htmUid=%u\n",
379 load_inst->setHtmTransactionalState(htm_uid, htm_depth);
382 if (load_inst->isHtmStop()) {
384 DPRINTF(HtmCpu,
">> htmStarts (%d) : htmStops++ (%d)\n",
389 "htmStops==1 && htmStarts==0. "
390 "This generally shouldn't happen "
391 "(unless due to misspeculation)\n");
396 template <
class Impl>
405 store_inst->pcState(),
storeQueue.tail(), store_inst->seqNum);
410 assert(store_inst->lqIdx > 0);
418 template <
class Impl>
419 typename Impl::DynInstPtr
429 template <
class Impl>
440 template <
class Impl>
452 template <
class Impl>
461 for (
int x = 0;
x <
cpu->numContexts();
x++) {
463 bool no_squash =
cpu->thread[
x]->noSquashFromTC;
464 cpu->thread[
x]->noSquashFromTC =
true;
466 cpu->thread[
x]->noSquashFromTC = no_squash;
481 if (ld_inst->effAddrValid() &&
486 bool force_squash =
false;
489 ld_inst = iter->instruction();
491 req = iter->request();
492 if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered())
496 ld_inst->seqNum, invalidate_addr);
506 if (ld_inst->possibleLoadViolation() || force_squash) {
508 pkt->
getAddr(), ld_inst->seqNum);
511 ld_inst->fault = std::make_shared<ReExec>();
512 req->setStateToFault();
515 pkt->
getAddr(), ld_inst->seqNum);
526 ld_inst->hitExternalSnoop(
true);
533 template <
class Impl>
548 if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
557 if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {
558 if (inst->isLoad()) {
562 if (ld_inst->hitExternalSnoop()) {
566 "and [sn:%lli] at address %#x\n",
567 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
572 return std::make_shared<GenericISA::M5PanicFault>(
573 "Detected fault with inst [sn:%lli] and "
574 "[sn:%lli] at address %#x\n",
575 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
581 ld_inst->possibleLoadViolation(
true);
583 " between instructions [sn:%lli] and [sn:%lli]\n",
584 inst_eff_addr1, inst->seqNum, ld_inst->seqNum);
593 "[sn:%lli] at address %#x\n",
594 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
599 return std::make_shared<GenericISA::M5PanicFault>(
600 "Detected fault with "
601 "inst [sn:%lli] and [sn:%lli] at address %#x\n",
602 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
614 template <
class Impl>
622 inst->pcState(), inst->seqNum);
624 assert(!inst->isSquashed());
626 load_fault = inst->initiateAcc();
628 if (load_fault ==
NoFault && !inst->readMemAccPredicate()) {
629 assert(inst->readPredicate());
631 inst->completeAcc(
nullptr);
637 if (inst->isTranslationDelayed() && load_fault ==
NoFault)
640 if (load_fault !=
NoFault && inst->translationCompleted() &&
641 inst->savedReq->isPartialFault() && !inst->savedReq->isComplete()) {
642 assert(inst->savedReq->isSplit());
652 if (load_fault !=
NoFault || !inst->readPredicate()) {
657 if (!inst->readPredicate())
658 inst->forwardOldRegs();
661 (load_fault !=
NoFault ?
"fault" :
"predication"));
662 if (!(inst->hasRequest() && inst->strictlyOrdered()) ||
663 inst->isAtCommit()) {
669 if (inst->effAddrValid()) {
670 auto it = inst->lqIt;
681 template <
class Impl>
688 int store_idx = store_inst->sqIdx;
691 store_inst->pcState(), store_inst->seqNum);
693 assert(!store_inst->isSquashed());
697 typename LoadQueue::iterator loadIt = store_inst->lqIt;
699 Fault store_fault = store_inst->initiateAcc();
701 if (store_inst->isTranslationDelayed() &&
705 if (!store_inst->readPredicate()) {
706 DPRINTF(
LSQUnit,
"Store [sn:%lli] not executed from predication\n",
708 store_inst->forwardOldRegs();
714 store_inst->pcState(), store_inst->seqNum);
719 assert(store_fault ==
NoFault);
721 if (store_inst->isStoreConditional() || store_inst->isAtomic()) {
733 template <
class Impl>
748 template <
class Impl>
760 template <
class Impl>
772 if (
x.instruction()->seqNum > youngest_inst) {
777 x.instruction()->pcState(),
778 x.instruction()->seqNum);
787 template <
class Impl>
792 storeWBIt->request()->sendPacketToCache();
798 template <
class Impl>
830 if (
storeWBIt->instruction()->isDataPrefetch()) {
843 if ((req->mainRequest()->isLLSC() ||
844 req->mainRequest()->isRelease()) &&
847 "[sn:%lli] is %s%s and not head of the queue\n",
849 req->request()->getPaddr(), inst->seqNum,
850 req->mainRequest()->isLLSC() ?
"SC" :
"",
851 req->mainRequest()->isRelease() ?
"/Release" :
"");
857 assert(!inst->memData);
858 inst->memData =
new uint8_t[req->_size];
861 memset(inst->memData, 0, req->_size);
863 memcpy(inst->memData,
storeWBIt->data(), req->_size);
866 if (req->senderState() ==
nullptr) {
868 state->isLoad =
false;
869 state->needWB =
false;
872 req->senderState(state);
873 if (inst->isStoreConditional() || inst->isAtomic()) {
875 state->needWB =
true;
881 "to Addr:%#x, data:%#x [sn:%lli]\n",
883 req->request()->getPaddr(), (
int)*(inst->memData),
887 if (inst->isStoreConditional()) {
891 inst->recordResult(
false);
894 inst->recordResult(
true);
901 "Instantly completing it.\n",
916 if (req->request()->isLocalAccess()) {
917 assert(!inst->isStoreConditional());
918 assert(!inst->inHtmTransactionalState());
923 req->request()->localAccessor(thread, main_pkt);
930 req->sendPacketToCache();
936 DPRINTF(
LSQUnit,
"D-Cache became blocked when writing [sn:%lli], "
937 "will retry later\n",
944 template <
class Impl>
949 "(Loads:%i Stores:%i)\n", squashed_num,
loads,
stores);
970 DPRINTF(HtmCpu,
">> htmStarts-- (%d) : htmStops (%d)\n",
976 DPRINTF(HtmCpu,
">> htmStarts (%d) : htmStops-- (%d)\n",
992 uint64_t in_flight_uid = 0;
994 if (scan_it->instruction()->isHtmStart() &&
995 !scan_it->instruction()->isSquashed()) {
996 in_flight_uid = scan_it->instruction()->getHtmTransactionUid();
997 DPRINTF(HtmCpu,
"loadQueue[%d]: found valid HtmStart htmUid=%u\n",
998 scan_it._idx, in_flight_uid);
1004 const auto& htm_cpt =
cpu->tcBase(
lsqID)->getHtmCheckpointPtr();
1006 const uint64_t old_local_htm_uid = htm_cpt->getHtmUid();
1007 uint64_t new_local_htm_uid;
1008 if (in_flight_uid > 0)
1009 new_local_htm_uid = in_flight_uid;
1013 if (old_local_htm_uid != new_local_htm_uid) {
1014 DPRINTF(HtmCpu,
"flush: lastRetiredHtmUid=%u\n",
1016 DPRINTF(HtmCpu,
"flush: resetting localHtmUid=%u\n",
1019 htm_cpt->setHtmUid(new_local_htm_uid);
1028 storeQueue.back().instruction()->seqNum > squashed_num) {
1035 "idx:%i [sn:%lli]\n",
1043 panic(
"Is stalled should have been cleared by stalling load!\n");
1049 storeQueue.back().instruction()->setSquashed();
1062 template <
class Impl>
1076 if (!
storeWBIt->instruction()->isStoreConditional()) {
1080 storeWBIt->instruction()->setCompleted();
1094 template <
class Impl>
1101 if (inst->isSquashed()) {
1102 assert (!inst->isStore() || inst->isStoreConditional());
1107 if (!inst->isExecuted()) {
1108 inst->setExecuted();
1112 inst->completeAcc(pkt);
1122 auto htm_fault = std::dynamic_pointer_cast<
1126 assert(
dynamic_cast<ReExec*
>(inst->fault.get()) !=
nullptr ||
1127 inst->savedReq->isPartialFault());
1134 "%s writeback with HTM failure fault, "
1135 "however, completing packet is not aware of "
1136 "transaction failure. cause=%s htmUid=%u\n",
1137 inst->staticInst->getName(),
1139 htm_fault->getHtmUid());
1143 "due to pending fault.\n", inst->seqNum);
1153 iewStage->checkMisprediction(inst);
1156 template <
class Impl>
1160 assert(store_idx->valid());
1161 store_idx->completed() =
true;
1167 cpu->activityThisCycle();
1171 DynInstPtr store_inst = store_idx->instruction();
1180 iewStage->updateLSQNextCycle =
true;
1183 DPRINTF(
LSQUnit,
"Completing store [sn:%lli], idx:%i, store head "
1185 store_inst->seqNum, store_idx.
idx() - 1,
storeQueue.head() - 1);
1188 if (
DTRACE(O3PipeView)) {
1189 store_inst->storeTick =
1190 curTick() - store_inst->fetchTick;
1204 store_inst->setCompleted();
1216 if (
cpu->checker && !store_inst->isStoreConditional()) {
1217 cpu->checker->verify(store_inst);
1221 template <
class Impl>
1226 bool cache_got_blocked =
false;
1234 cache_got_blocked =
true;
1245 state->outstanding++;
1246 state->request()->packetSent();
1248 if (cache_got_blocked) {
1253 assert(state->request() ==
storeWBIt->request());
1256 state->request()->packetNotSent();
1261 template <
class Impl>
1271 template <
class Impl>
1275 cprintf(
"Load store queue: Dumping instructions.\n");
1281 cprintf(
"%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1290 cprintf(
"%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1296 template <
class Impl>
1300 return cpu->cacheLineSize();
1303 #endif//__CPU_O3_LSQ_UNIT_IMPL_HH__