46 #include "config/the_isa.hh"
51 #include "debug/Activity.hh"
52 #include "debug/HtmCpu.hh"
53 #include "debug/IEW.hh"
54 #include "debug/LSQUnit.hh"
55 #include "debug/O3PipeView.hh"
67 :
Event(Default_Pri, AutoDelete),
68 inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
70 assert(_inst->savedRequest);
71 _inst->savedRequest->writebackScheduled();
77 assert(!lsqPtr->cpu->switchedOut());
79 lsqPtr->writeback(inst, pkt);
81 assert(inst->savedRequest);
82 inst->savedRequest->writebackDone();
89 return "Store writeback";
96 assert(request !=
nullptr);
128 "store notification (ignored) of HTM transaction failure "
129 "in cache - addr=0x%lx - rc=%s - htmUid=%d\n",
146 panic(
"HTM error - unhandled return code from cache (%s)",
151 std::make_shared<GenericHtmFailureFault>(
152 inst->getHtmTransactionUid(),
156 "load notification of HTM transaction failure "
157 "in cache - pc=%s - addr=0x%lx - "
158 "rc=%u - htmUid=%d\n",
159 inst->pcState(), pkt->
getAddr(),
166 assert(!
cpu->switchedOut());
167 if (!inst->isSquashed()) {
171 assert(inst->isLoad() || inst->isStoreConditional() ||
181 if (inst->isStore() || inst->isAtomic()) {
185 }
else if (inst->isStore()) {
205 LSQ *lsq_ptr,
unsigned id)
256 : statistics::
Group(parent),
257 ADD_STAT(forwLoads, statistics::units::Count::get(),
258 "Number of loads that had data forwarded from stores"),
259 ADD_STAT(squashedLoads, statistics::units::Count::get(),
260 "Number of loads squashed"),
261 ADD_STAT(ignoredResponses, statistics::units::Count::get(),
262 "Number of memory responses ignored because the instruction is "
264 ADD_STAT(memOrderViolation, statistics::units::Count::get(),
265 "Number of memory ordering violations"),
266 ADD_STAT(squashedStores, statistics::units::Count::get(),
267 "Number of stores squashed"),
268 ADD_STAT(rescheduledLoads, statistics::units::Count::get(),
269 "Number of loads that were rescheduled"),
270 ADD_STAT(blockedByCache, statistics::units::Count::get(),
271 "Number of times an access to memory failed due to the cache "
273 ADD_STAT(loadToUse,
"Distribution of cycle latency between the "
274 "first time a load is issued and its completion")
306 assert(inst->isMemRef());
308 assert(inst->isLoad() || inst->isStore() || inst->isAtomic());
310 if (inst->isLoad()) {
336 assert(load_inst->lqIdx > 0);
342 if (load_inst->isHtmStart()) {
344 DPRINTF(HtmCpu,
">> htmStarts++ (%d) : htmStops (%d)\n",
349 auto htm_uid = htm_cpt->getHtmUid();
352 if (!load_inst->inHtmTransactionalState()) {
353 htm_uid = htm_cpt->newHtmUid();
354 DPRINTF(HtmCpu,
"generating new htmUid=%u\n", htm_uid);
355 if (htm_depth != 1) {
357 "unusual HTM transactional depth (%d)"
358 " possibly caused by mispeculation - htmUid=%u\n",
362 load_inst->setHtmTransactionalState(htm_uid, htm_depth);
365 if (load_inst->isHtmStop()) {
367 DPRINTF(HtmCpu,
">> htmStarts (%d) : htmStops++ (%d)\n",
372 "htmStops==1 && htmStarts==0. "
373 "This generally shouldn't happen "
374 "(unless due to misspeculation)\n");
394 assert(store_inst->lqIdx > 0);
435 for (
int x = 0;
x <
cpu->numContexts();
x++) {
437 bool no_squash =
cpu->
thread[
x]->noSquashFromTC;
455 if (ld_inst->effAddrValid() &&
458 ld_inst->tcBase()->getIsaPtr()->handleLockedSnoopHit(ld_inst.
get());
461 bool force_squash =
false;
464 ld_inst = iter->instruction();
466 request = iter->request();
467 if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered())
471 ld_inst->seqNum, invalidate_addr);
481 if (ld_inst->possibleLoadViolation() || force_squash) {
483 pkt->
getAddr(), ld_inst->seqNum);
486 ld_inst->fault = std::make_shared<ReExec>();
490 pkt->
getAddr(), ld_inst->seqNum);
496 ld_inst->tcBase()->getIsaPtr()->
497 handleLockedSnoopHit(ld_inst.
get());
503 ld_inst->hitExternalSnoop(
true);
524 if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
533 if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {
534 if (inst->isLoad()) {
538 if (ld_inst->hitExternalSnoop()) {
542 "and [sn:%lli] at address %#x\n",
543 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
548 return std::make_shared<GenericISA::M5PanicFault>(
549 "Detected fault with inst [sn:%lli] and "
550 "[sn:%lli] at address %#x\n",
551 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
557 ld_inst->possibleLoadViolation(
true);
559 " between instructions [sn:%lli] and [sn:%lli]\n",
560 inst_eff_addr1, inst->seqNum, ld_inst->seqNum);
569 "[sn:%lli] at address %#x\n",
570 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
575 return std::make_shared<GenericISA::M5PanicFault>(
576 "Detected fault with "
577 "inst [sn:%lli] and [sn:%lli] at address %#x\n",
578 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
597 inst->pcState(), inst->seqNum);
599 assert(!inst->isSquashed());
601 load_fault = inst->initiateAcc();
603 if (load_fault ==
NoFault && !inst->readMemAccPredicate()) {
604 assert(inst->readPredicate());
606 inst->completeAcc(
nullptr);
612 if (inst->isTranslationDelayed() && load_fault ==
NoFault)
615 if (load_fault !=
NoFault && inst->translationCompleted() &&
616 inst->savedRequest->isPartialFault()
617 && !inst->savedRequest->isComplete()) {
618 assert(inst->savedRequest->isSplit());
628 if (load_fault !=
NoFault || !inst->readPredicate()) {
633 if (!inst->readPredicate())
634 inst->forwardOldRegs();
637 (load_fault !=
NoFault ?
"fault" :
"predication"));
638 if (!(inst->hasRequest() && inst->strictlyOrdered()) ||
639 inst->isAtCommit()) {
645 if (inst->effAddrValid()) {
646 auto it = inst->lqIt;
663 int store_idx = store_inst->sqIdx;
666 store_inst->pcState(), store_inst->seqNum);
668 assert(!store_inst->isSquashed());
672 typename LoadQueue::iterator loadIt = store_inst->lqIt;
674 Fault store_fault = store_inst->initiateAcc();
676 if (store_inst->isTranslationDelayed() &&
680 if (!store_inst->readPredicate()) {
681 DPRINTF(
LSQUnit,
"Store [sn:%lli] not executed from predication\n",
683 store_inst->forwardOldRegs();
689 store_inst->pcState(), store_inst->seqNum);
691 if (store_inst->isAtomic()) {
694 if (!(store_inst->hasRequest() && store_inst->strictlyOrdered()) ||
695 store_inst->isAtCommit()) {
696 store_inst->setExecuted();
705 assert(store_fault ==
NoFault);
707 if (store_inst->isStoreConditional() || store_inst->isAtomic()) {
731 if (!inst->isInstPrefetch() && !inst->isDataPrefetch()
732 && inst->firstIssue != -1
733 && inst->lastWakeDependents != -1) {
735 inst->lastWakeDependents - inst->firstIssue));
764 if (
x.instruction()->seqNum > youngest_inst) {
769 x.instruction()->pcState(),
770 x.instruction()->seqNum);
783 storeWBIt->request()->sendPacketToCache();
820 if (
storeWBIt->instruction()->isDataPrefetch()) {
833 if ((request->
mainReq()->isLLSC() ||
834 request->
mainReq()->isRelease()) &&
837 "[sn:%lli] is %s%s and not head of the queue\n",
839 request->
mainReq()->getPaddr(), inst->seqNum,
840 request->
mainReq()->isLLSC() ?
"SC" :
"",
841 request->
mainReq()->isRelease() ?
"/Release" :
"");
847 assert(!inst->memData);
848 inst->memData =
new uint8_t[request->
_size];
851 memset(inst->memData, 0, request->
_size);
858 "to Addr:%#x, data:%#x [sn:%lli]\n",
860 request->
mainReq()->getPaddr(), (
int)*(inst->memData),
864 if (inst->isStoreConditional()) {
868 inst->recordResult(
false);
869 bool success = inst->tcBase()->getIsaPtr()->handleLockedWrite(
871 inst->recordResult(
true);
878 "Instantly completing it.\n",
893 if (request->
mainReq()->isLocalAccess()) {
894 assert(!inst->isStoreConditional());
895 assert(!inst->inHtmTransactionalState());
900 request->
mainReq()->localAccessor(thread, main_pkt);
913 DPRINTF(
LSQUnit,
"D-Cache became blocked when writing [sn:%lli], "
914 "will retry later\n",
947 DPRINTF(HtmCpu,
">> htmStarts-- (%d) : htmStops (%d)\n",
953 DPRINTF(HtmCpu,
">> htmStarts (%d) : htmStops-- (%d)\n",
967 uint64_t in_flight_uid = 0;
969 if (scan_it->instruction()->isHtmStart() &&
970 !scan_it->instruction()->isSquashed()) {
971 in_flight_uid = scan_it->instruction()->getHtmTransactionUid();
972 DPRINTF(HtmCpu,
"loadQueue[%d]: found valid HtmStart htmUid=%u\n",
973 scan_it._idx, in_flight_uid);
981 const uint64_t old_local_htm_uid = htm_cpt->getHtmUid();
982 uint64_t new_local_htm_uid;
983 if (in_flight_uid > 0)
984 new_local_htm_uid = in_flight_uid;
988 if (old_local_htm_uid != new_local_htm_uid) {
989 DPRINTF(HtmCpu,
"flush: lastRetiredHtmUid=%u\n",
991 DPRINTF(HtmCpu,
"flush: resetting localHtmUid=%u\n",
994 htm_cpt->setHtmUid(new_local_htm_uid);
1010 "idx:%i [sn:%lli]\n",
1018 panic(
"Is stalled should have been cleared by stalling load!\n");
1040 return htm_cpt->getHtmUid();
1056 if (!
storeWBIt->instruction()->isStoreConditional()) {
1060 storeWBIt->instruction()->setCompleted();
1080 if (inst->isSquashed()) {
1081 assert (!inst->isStore() || inst->isStoreConditional());
1086 if (!inst->isExecuted()) {
1087 inst->setExecuted();
1091 inst->completeAcc(pkt);
1101 auto htm_fault = std::dynamic_pointer_cast<
1105 assert(
dynamic_cast<ReExec*
>(inst->fault.
get()) !=
nullptr ||
1106 inst->savedRequest->isPartialFault());
1114 "%s writeback with HTM failure fault, "
1115 "however, completing packet is not aware of "
1116 "transaction failure. cause=%s htmUid=%u\n",
1117 inst->staticInst->getName(),
1119 htm_fault->getHtmUid());
1123 "due to pending fault.\n", inst->seqNum);
1139 assert(store_idx->valid());
1140 store_idx->completed() =
true;
1150 DynInstPtr store_inst = store_idx->instruction();
1161 DPRINTF(
LSQUnit,
"Completing store [sn:%lli], idx:%i, store head "
1166 if (debug::O3PipeView) {
1167 store_inst->storeTick =
1168 curTick() - store_inst->fetchTick;
1182 store_inst->setCompleted();
1194 if (
cpu->
checker && !store_inst->isStoreConditional()) {
1203 bool cache_got_blocked =
false;
1211 cache_got_blocked =
true;
1224 if (cache_got_blocked) {
1229 assert(request ==
storeWBIt->request());
1234 DPRINTF(
LSQUnit,
"Memory request (pkt: %s) from inst [sn:%llu] was"
1235 " %ssent (cache is blocked: %d, cache_got_blocked: %d)\n",
1253 cprintf(
"Load store queue: Dumping instructions.\n");
1259 cprintf(
"%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1268 cprintf(
"%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1281 return cpu->cacheLineSize();
1293 assert(!load_inst->isExecuted());
1300 if (request->
mainReq()->isStrictlyOrdered() &&
1301 (load_idx !=
loadQueue.
head() || !load_inst->isAtCommit())) {
1305 load_inst->clearIssued();
1306 load_inst->effAddrValid(
false);
1309 load_inst->seqNum, load_inst->pcState());
1316 return std::make_shared<GenericISA::M5PanicFault>(
1317 "Strictly ordered load [sn:%llx] PC %s\n",
1318 load_inst->seqNum, load_inst->pcState());
1322 "storeHead: %i addr: %#x%s\n",
1327 if (request->
mainReq()->isLLSC()) {
1331 load_inst->recordResult(
false);
1332 load_inst->tcBase()->getIsaPtr()->handleLockedRead(load_inst.
get(),
1334 load_inst->recordResult(
true);
1337 if (request->
mainReq()->isLocalAccess()) {
1338 assert(!load_inst->memData);
1339 assert(!load_inst->inHtmTransactionalState());
1347 Cycles delay = request->
mainReq()->localAccessor(thread, main_pkt);
1350 cpu->schedule(wb,
cpu->clockEdge(delay));
1355 if (request->
mainReq()->isHTMStart() || request->
mainReq()->isHTMCommit())
1365 if (!load_inst->memData) {
1366 load_inst->memData =
1367 new uint8_t[request->
mainReq()->getSize()];
1369 memset(load_inst->memData, 0, request->
mainReq()->getSize());
1373 if (load_inst->inHtmTransactionalState()) {
1375 load_inst->getHtmTransactionUid());
1380 cpu->schedule(wb,
cpu->clockEdge(delay));
1386 auto store_it = load_inst->sqIt;
1389 while (store_it !=
storeWBIt && !load_inst->isDataPrefetch()) {
1392 assert(store_it->valid());
1393 assert(store_it->instruction()->seqNum < load_inst->seqNum);
1394 int store_size = store_it->size();
1399 if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&
1400 !(store_it->request()->mainReq() &&
1401 store_it->request()->mainReq()->isCacheMaintenance())) {
1402 assert(store_it->instruction()->effAddrValid());
1406 auto req_s = request->
mainReq()->getVaddr();
1407 auto req_e = req_s + request->
mainReq()->getSize();
1408 auto st_s = store_it->instruction()->effAddr;
1409 auto st_e = st_s + store_size;
1411 bool store_has_lower_limit = req_s >= st_s;
1412 bool store_has_upper_limit = req_e <= st_e;
1413 bool lower_load_has_store_part = req_s < st_e;
1414 bool upper_load_has_store_part = req_e > st_s;
1422 if (!store_it->instruction()->isAtomic() &&
1423 store_has_lower_limit && store_has_upper_limit &&
1424 !request->
mainReq()->isLLSC()) {
1426 const auto& store_req = store_it->request()->mainReq();
1427 coverage = store_req->isMasked() ?
1433 (!request->
mainReq()->isLLSC() &&
1434 ((store_has_lower_limit && lower_load_has_store_part) ||
1435 (store_has_upper_limit && upper_load_has_store_part) ||
1436 (lower_load_has_store_part && upper_load_has_store_part))) ||
1439 (request->
mainReq()->isLLSC() &&
1440 ((store_has_lower_limit || upper_load_has_store_part) &&
1441 (store_has_upper_limit || lower_load_has_store_part))) ||
1444 (store_it->instruction()->isAtomic() &&
1445 ((store_has_lower_limit || upper_load_has_store_part) &&
1446 (store_has_upper_limit || lower_load_has_store_part)))) {
1453 int shift_amt = request->
mainReq()->getVaddr() -
1454 store_it->instruction()->effAddr;
1457 if (!load_inst->memData) {
1458 load_inst->memData =
1459 new uint8_t[request->
mainReq()->getSize()];
1461 if (store_it->isAllZeros())
1462 memset(load_inst->memData, 0,
1463 request->
mainReq()->getSize());
1465 memcpy(load_inst->memData,
1466 store_it->
data() + shift_amt,
1467 request->
mainReq()->getSize());
1470 "addr %#x\n", store_it._idx,
1471 request->
mainReq()->getVaddr());
1484 assert(!request->
mainReq()->isHTMCmd());
1485 if (load_inst->inHtmTransactionalState()) {
1486 assert (!
storeQueue[store_it._idx].completed());
1489 inHtmTransactionalState());
1491 load_inst->getHtmTransactionUid() ==
1493 getHtmTransactionUid());
1495 load_inst->getHtmTransactionUid());
1496 DPRINTF(HtmCpu,
"HTM LD (ST2LDF) "
1497 "pc=0x%lx - vaddr=0x%lx - "
1498 "paddr=0x%lx - htmUid=%u\n",
1499 load_inst->pcState().instAddr(),
1500 data_pkt->
req->hasVaddr() ?
1501 data_pkt->
req->getVaddr() : 0lu,
1503 load_inst->getHtmTransactionUid());
1531 if (store_it->completed()) {
1532 panic(
"Should not check one of these");
1550 load_inst->clearIssued();
1551 load_inst->effAddrValid(
false);
1557 "Store idx %i to load addr %#x\n",
1558 store_it._idx, request->
mainReq()->getVaddr());
1569 DPRINTF(
LSQUnit,
"Doing memory access for inst [sn:%lli] PC %s\n",
1570 load_inst->seqNum, load_inst->pcState());
1573 if (!load_inst->memData) {
1574 load_inst->memData =
new uint8_t[request->
mainReq()->getSize()];
1579 if (request->
mainReq()->isHTMCmd()) {
1583 *load_inst->memData = (uint64_t) 0x1ull;
1606 DPRINTF(
LSQUnit,
"Doing write to store idx %i, addr %#x | storeHead:%i "
1609 storeQueue[store_idx].instruction()->seqNum);
1612 unsigned size = request->
_size;
1614 bool store_no_data =
1616 storeQueue[store_idx].isAllZeros() = store_no_data;
1621 !request->
req()->isCacheMaintenance() &&
1622 !request->
req()->isAtomic())