gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
lsq_unit.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2010-2014, 2017-2021 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2004-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include "cpu/o3/lsq_unit.hh"
43
45#include "base/str.hh"
46#include "cpu/checker/cpu.hh"
47#include "cpu/o3/dyn_inst.hh"
48#include "cpu/o3/limits.hh"
49#include "cpu/o3/lsq.hh"
50#include "debug/Activity.hh"
51#include "debug/HtmCpu.hh"
52#include "debug/IEW.hh"
53#include "debug/LSQUnit.hh"
54#include "debug/O3PipeView.hh"
55#include "mem/packet.hh"
56#include "mem/request.hh"
57
58namespace gem5
59{
60
61namespace o3
62{
63
65 PacketPtr _pkt, LSQUnit *lsq_ptr)
67 inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
68{
69 assert(_inst->savedRequest);
70 _inst->savedRequest->writebackScheduled();
71}
72
73void
75{
76 assert(!lsqPtr->cpu->switchedOut());
77
78 lsqPtr->writeback(inst, pkt);
79
80 assert(inst->savedRequest);
81 inst->savedRequest->writebackDone();
82 delete pkt;
83}
84
85const char *
87{
88 return "Store writeback";
89}
90
91bool
93{
94 LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->senderState);
95 assert(request != nullptr);
96 bool ret = true;
97 /* Check that the request is still alive before any further action. */
98 if (!request->isReleased()) {
99 ret = request->recvTimingResp(pkt);
100 }
101 return ret;
102}
103
104void
106{
107 LSQRequest *request = dynamic_cast<LSQRequest *>(pkt->senderState);
108 DynInstPtr inst = request->instruction();
109
110 // hardware transactional memory
111 // sanity check
112 if (pkt->isHtmTransactional() && !inst->isSquashed()) {
113 assert(inst->getHtmTransactionUid() == pkt->getHtmTransactionUid());
114 }
115
116 // if in a HTM transaction, it's possible
117 // to abort within the cache hierarchy.
118 // This is signalled back to the processor
119 // through responses to memory requests.
120 if (pkt->htmTransactionFailedInCache()) {
121 // cannot do this for write requests because
122 // they cannot tolerate faults
123 const HtmCacheFailure htm_rc =
125 if (pkt->isWrite()) {
126 DPRINTF(HtmCpu,
127 "store notification (ignored) of HTM transaction failure "
128 "in cache - addr=0x%lx - rc=%s - htmUid=%d\n",
129 pkt->getAddr(), htmFailureToStr(htm_rc),
130 pkt->getHtmTransactionUid());
131 } else {
132 HtmFailureFaultCause fail_reason =
134
135 if (htm_rc == HtmCacheFailure::FAIL_SELF) {
136 fail_reason = HtmFailureFaultCause::SIZE;
137 } else if (htm_rc == HtmCacheFailure::FAIL_REMOTE) {
138 fail_reason = HtmFailureFaultCause::MEMORY;
139 } else if (htm_rc == HtmCacheFailure::FAIL_OTHER) {
140 // these are likely loads that were issued out of order
141 // they are faulted here, but it's unlikely that these will
142 // ever reach the commit head.
143 fail_reason = HtmFailureFaultCause::OTHER;
144 } else {
145 panic("HTM error - unhandled return code from cache (%s)",
146 htmFailureToStr(htm_rc));
147 }
148
149 inst->fault =
150 std::make_shared<GenericHtmFailureFault>(
151 inst->getHtmTransactionUid(),
152 fail_reason);
153
154 DPRINTF(HtmCpu,
155 "load notification of HTM transaction failure "
156 "in cache - pc=%s - addr=0x%lx - "
157 "rc=%u - htmUid=%d\n",
158 inst->pcState(), pkt->getAddr(),
159 htmFailureToStr(htm_rc), pkt->getHtmTransactionUid());
160 }
161 }
162
163 cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
164
165 assert(!cpu->switchedOut());
166 if (!inst->isSquashed()) {
167 if (request->needWBToRegister()) {
168 // Only loads, store conditionals and atomics perform the writeback
169 // after receving the response from the memory
170 assert(inst->isLoad() || inst->isStoreConditional() ||
171 inst->isAtomic());
172
173 // hardware transactional memory
174 if (pkt->htmTransactionFailedInCache()) {
177 }
178
179 writeback(inst, request->mainPacket());
180 if (inst->isStore() || inst->isAtomic()) {
181 request->writebackDone();
182 completeStore(request->instruction()->sqIt);
183 }
184 } else if (inst->isStore()) {
185 // This is a regular store (i.e., not store conditionals and
186 // atomics), so it can complete without writing back
187 completeStore(request->instruction()->sqIt);
188 }
189 }
190}
191
192LSQUnit::LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
193 : lsqID(-1), storeQueue(sqEntries), loadQueue(lqEntries),
194 storesToWB(0),
195 htmStarts(0), htmStops(0),
197 cacheBlockMask(0), stalled(false),
198 isStoreBlocked(false), storeInFlight(false), stats(nullptr)
199{
200}
201
202void
203LSQUnit::init(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params,
204 LSQ *lsq_ptr, unsigned id)
205{
206 lsqID = id;
207
208 cpu = cpu_ptr;
209 iewStage = iew_ptr;
210
211 lsq = lsq_ptr;
212
213 cpu->addStatGroup(csprintf("lsq%i", lsqID).c_str(), &stats);
214
215 DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",lsqID);
216
217 depCheckShift = params.LSQDepCheckShift;
218 checkLoads = params.LSQCheckLoads;
219 needsTSO = params.needsTSO;
220
221 resetState();
222}
223
224
225void
227{
228 storesToWB = 0;
229
230 // hardware transactional memory
231 // nesting depth
232 htmStarts = htmStops = 0;
233
234 storeWBIt = storeQueue.begin();
235
236 retryPkt = NULL;
237 memDepViolator = NULL;
238
239 stalled = false;
240
241 cacheBlockMask = ~(cpu->cacheLineSize() - 1);
242}
243
244std::string
246{
247 if (MaxThreads == 1) {
248 return iewStage->name() + ".lsq";
249 } else {
250 return iewStage->name() + ".lsq.thread" + std::to_string(lsqID);
251 }
252}
253
255 : statistics::Group(parent),
256 ADD_STAT(forwLoads, statistics::units::Count::get(),
257 "Number of loads that had data forwarded from stores"),
258 ADD_STAT(squashedLoads, statistics::units::Count::get(),
259 "Number of loads squashed"),
261 "Number of memory responses ignored because the instruction is "
262 "squashed"),
264 "Number of memory ordering violations"),
265 ADD_STAT(squashedStores, statistics::units::Count::get(),
266 "Number of stores squashed"),
268 "Number of loads that were rescheduled"),
269 ADD_STAT(blockedByCache, statistics::units::Count::get(),
270 "Number of times an access to memory failed due to the cache "
271 "being blocked"),
272 ADD_STAT(loadToUse, "Distribution of cycle latency between the "
273 "first time a load is issued and its completion"),
275 "Number of loads and stores written to the Load Store Queue")
276{
278 .init(0, 299, 10)
279 .flags(statistics::nozero);
280}
281
282void
284{
285 dcachePort = dcache_port;
286}
287
288void
290{
291 for (int i = 0; i < loadQueue.capacity(); ++i)
292 assert(!loadQueue[i].valid());
293
294 assert(storesToWB == 0);
295 assert(!retryPkt);
296}
297
298void
303
304void
306{
307 assert(inst->isMemRef());
308
309 assert(inst->isLoad() || inst->isStore() || inst->isAtomic());
310
311 if (inst->isLoad()) {
312 insertLoad(inst);
313 } else {
314 insertStore(inst);
315 }
316
317 inst->setInLSQ();
318}
319
320void
322{
323 assert(!loadQueue.full());
324 assert(loadQueue.size() < loadQueue.capacity());
325 ++stats.addedLoadsAndStores;
326
327 DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n",
328 load_inst->pcState(), loadQueue.tail(), load_inst->seqNum);
329
330 /* Grow the queue. */
331 loadQueue.advance_tail();
332
333 load_inst->sqIt = storeQueue.end();
334
335 assert(!loadQueue.back().valid());
336 loadQueue.back().set(load_inst);
337 load_inst->lqIdx = loadQueue.tail();
338 assert(load_inst->lqIdx > 0);
339 load_inst->lqIt = loadQueue.getIterator(load_inst->lqIdx);
340
341 // hardware transactional memory
342 // transactional state and nesting depth must be tracked
343 // in the in-order part of the core.
344 if (load_inst->isHtmStart()) {
345 htmStarts++;
346 DPRINTF(HtmCpu, ">> htmStarts++ (%d) : htmStops (%d)\n",
348
349 const int htm_depth = htmStarts - htmStops;
350 const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
351 auto htm_uid = htm_cpt->getHtmUid();
352
353 // for debugging purposes
354 if (!load_inst->inHtmTransactionalState()) {
355 htm_uid = htm_cpt->newHtmUid();
356 DPRINTF(HtmCpu, "generating new htmUid=%u\n", htm_uid);
357 if (htm_depth != 1) {
358 DPRINTF(HtmCpu,
359 "unusual HTM transactional depth (%d)"
360 " possibly caused by mispeculation - htmUid=%u\n",
361 htm_depth, htm_uid);
362 }
363 }
364 load_inst->setHtmTransactionalState(htm_uid, htm_depth);
365 }
366
367 if (load_inst->isHtmStop()) {
368 htmStops++;
369 DPRINTF(HtmCpu, ">> htmStarts (%d) : htmStops++ (%d)\n",
371
372 if (htmStops==1 && htmStarts==0) {
373 DPRINTF(HtmCpu,
374 "htmStops==1 && htmStarts==0. "
375 "This generally shouldn't happen "
376 "(unless due to misspeculation)\n");
377 }
378 }
379}
380
381void
383{
384 // Make sure it is not full before inserting an instruction.
385 assert(!storeQueue.full());
386 assert(storeQueue.size() < storeQueue.capacity());
387 ++stats.addedLoadsAndStores;
388
389 DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n",
390 store_inst->pcState(), storeQueue.tail(), store_inst->seqNum);
391 storeQueue.advance_tail();
392
393 store_inst->sqIdx = storeQueue.tail();
394 store_inst->sqIt = storeQueue.getIterator(store_inst->sqIdx);
395
396 store_inst->lqIdx = loadQueue.tail() + 1;
397 assert(store_inst->lqIdx > 0);
398 store_inst->lqIt = loadQueue.end();
399
400 storeQueue.back().set(store_inst);
401}
402
405{
407
408 memDepViolator = NULL;
409
410 return temp;
411}
412
413unsigned
415{
416 DPRINTF(LSQUnit, "LQ size: %d, #loads occupied: %d\n",
417 loadQueue.capacity(), loadQueue.size());
418 return loadQueue.capacity() - loadQueue.size();
419}
420
421unsigned
423{
424 DPRINTF(LSQUnit, "SQ size: %d, #stores occupied: %d\n",
425 storeQueue.capacity(), storeQueue.size());
426 return storeQueue.capacity() - storeQueue.size();
427
428 }
429
430void
432{
433 // Should only ever get invalidations in here
434 assert(pkt->isInvalidate());
435
436 DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
437
438 for (int x = 0; x < cpu->numContexts(); x++) {
439 gem5::ThreadContext *tc = cpu->getContext(x);
440 bool no_squash = cpu->thread[x]->noSquashFromTC;
441 cpu->thread[x]->noSquashFromTC = true;
443 cpu->thread[x]->noSquashFromTC = no_squash;
444 }
445
446 if (loadQueue.empty())
447 return;
448
449 auto iter = loadQueue.begin();
450
451 Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;
452
453 DynInstPtr ld_inst = iter->instruction();
454 assert(ld_inst);
455 LSQRequest *request = iter->request();
456
457 // Check that this snoop didn't just invalidate our lock flag
458 if (ld_inst->effAddrValid() && request &&
459 request->isCacheBlockHit(invalidate_addr, cacheBlockMask)
460 && ld_inst->memReqFlags & Request::LLSC) {
461 ld_inst->tcBase()->getIsaPtr()->handleLockedSnoopHit(ld_inst.get());
462 }
463
464 bool force_squash = false;
465
466 while (++iter != loadQueue.end()) {
467 ld_inst = iter->instruction();
468 assert(ld_inst);
469 request = iter->request();
470 if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered() || !request)
471 continue;
472
473 DPRINTF(LSQUnit, "-- inst [sn:%lli] to pktAddr:%#x\n",
474 ld_inst->seqNum, invalidate_addr);
475
476 if (force_squash ||
477 request->isCacheBlockHit(invalidate_addr, cacheBlockMask)) {
478 if (needsTSO) {
479 // If we have a TSO system, as all loads must be ordered with
480 // all other loads, this load as well as *all* subsequent loads
481 // need to be squashed to prevent possible load reordering.
482 force_squash = true;
483 }
484 if (ld_inst->possibleLoadViolation() || force_squash) {
485 DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n",
486 pkt->getAddr(), ld_inst->seqNum);
487
488 // Mark the load for re-execution
489 ld_inst->fault = std::make_shared<ReExec>();
490 request->setStateToFault();
491 } else {
492 DPRINTF(LSQUnit, "HitExternal Snoop for addr %#x [sn:%lli]\n",
493 pkt->getAddr(), ld_inst->seqNum);
494
495 // Make sure that we don't lose a snoop hitting a LOCKED
496 // address since the LOCK* flags don't get updated until
497 // commit.
498 if (ld_inst->memReqFlags & Request::LLSC) {
499 ld_inst->tcBase()->getIsaPtr()->
500 handleLockedSnoopHit(ld_inst.get());
501 }
502
503 // If a older load checks this and it's true
504 // then we might have missed the snoop
505 // in which case we need to invalidate to be sure
506 ld_inst->hitExternalSnoop(true);
507 }
508 }
509 }
510 return;
511}
512
513Fault
514LSQUnit::checkViolations(typename LoadQueue::iterator& loadIt,
515 const DynInstPtr& inst)
516{
517 Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
518 Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
519
525 while (loadIt != loadQueue.end()) {
526 DynInstPtr ld_inst = loadIt->instruction();
527 if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
528 ++loadIt;
529 continue;
530 }
531
532 Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift;
533 Addr ld_eff_addr2 =
534 (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
535
536 if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {
537 if (inst->isLoad()) {
538 // If this load is to the same block as an external snoop
539 // invalidate that we've observed then the load needs to be
540 // squashed as it could have newer data
541 if (ld_inst->hitExternalSnoop()) {
542 if (!memDepViolator ||
543 ld_inst->seqNum < memDepViolator->seqNum) {
544 DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] "
545 "and [sn:%lli] at address %#x\n",
546 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
547 memDepViolator = ld_inst;
548
549 ++stats.memOrderViolation;
550
551 return std::make_shared<GenericISA::M5PanicFault>(
552 "Detected fault with inst [sn:%lli] and "
553 "[sn:%lli] at address %#x\n",
554 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
555 }
556 }
557
558 // Otherwise, mark the load has a possible load violation and
559 // if we see a snoop before it's commited, we need to squash
560 ld_inst->possibleLoadViolation(true);
561 DPRINTF(LSQUnit, "Found possible load violation at addr: %#x"
562 " between instructions [sn:%lli] and [sn:%lli]\n",
563 inst_eff_addr1, inst->seqNum, ld_inst->seqNum);
564 } else {
565 // A load/store incorrectly passed this store.
566 // Check if we already have a violator, or if it's newer
567 // squash and refetch.
568 if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
569 break;
570
571 DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and "
572 "[sn:%lli] at address %#x\n",
573 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
574 memDepViolator = ld_inst;
575
576 ++stats.memOrderViolation;
577
578 return std::make_shared<GenericISA::M5PanicFault>(
579 "Detected fault with "
580 "inst [sn:%lli] and [sn:%lli] at address %#x\n",
581 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
582 }
583 }
584
585 ++loadIt;
586 }
587 return NoFault;
588}
589
590
591
592
593Fault
595{
596 // Execute a specific load.
597 Fault load_fault = NoFault;
598
599 DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n",
600 inst->pcState(), inst->seqNum);
601
602 assert(!inst->isSquashed());
603
604 if (inst->isExecuted()) {
605 DPRINTF(LSQUnit, "Load [sn:%lli] already executed\n", inst->seqNum);
606 return NoFault;
607 }
608
609 load_fault = inst->initiateAcc();
610
611 if (load_fault == NoFault && !inst->readMemAccPredicate()) {
612 assert(inst->readPredicate());
613 inst->setExecuted();
614 inst->completeAcc(nullptr);
615 iewStage->instToCommit(inst);
616 iewStage->activityThisCycle();
617 return NoFault;
618 }
619
620 if (inst->isTranslationDelayed() && load_fault == NoFault)
621 return load_fault;
622
623 if (load_fault != NoFault && inst->translationCompleted() &&
624 inst->savedRequest->isPartialFault()
625 && !inst->savedRequest->isComplete()) {
626 assert(inst->savedRequest->isSplit());
627 // If we have a partial fault where the mem access is not complete yet
628 // then the cache must have been blocked. This load will be re-executed
629 // when the cache gets unblocked. We will handle the fault when the
630 // mem access is complete.
631 return NoFault;
632 }
633
634 // If the instruction faulted or predicated false, then we need to send it
635 // along to commit without the instruction completing.
636 if (load_fault != NoFault || !inst->readPredicate()) {
637 // Send this instruction to commit, also make sure iew stage
638 // realizes there is activity. Mark it as executed unless it
639 // is a strictly ordered load that needs to hit the head of
640 // commit.
641 if (!inst->readPredicate())
642 inst->forwardOldRegs();
643 DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n",
644 inst->seqNum,
645 (load_fault != NoFault ? "fault" : "predication"));
646 if (!(inst->hasRequest() && inst->strictlyOrdered()) ||
647 inst->isAtCommit()) {
648 inst->setExecuted();
649 }
650 iewStage->instToCommit(inst);
651 iewStage->activityThisCycle();
652 } else {
653 if (inst->effAddrValid()) {
654 auto it = inst->lqIt;
655 ++it;
656
657 if (checkLoads)
658 return checkViolations(it, inst);
659 }
660 }
661
662 return load_fault;
663}
664
665Fault
667{
668 // Make sure that a store exists.
669 assert(storeQueue.size() != 0);
670
671 ssize_t store_idx = store_inst->sqIdx;
672
673 DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n",
674 store_inst->pcState(), store_inst->seqNum);
675
676 assert(!store_inst->isSquashed());
677
678 // Check the recently completed loads to see if any match this store's
679 // address. If so, then we have a memory ordering violation.
680 typename LoadQueue::iterator loadIt = store_inst->lqIt;
681
682 Fault store_fault = store_inst->initiateAcc();
683
684 if (store_inst->isTranslationDelayed() &&
685 store_fault == NoFault)
686 return store_fault;
687
688 if (!store_inst->readPredicate()) {
689 DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n",
690 store_inst->seqNum);
691 store_inst->forwardOldRegs();
692 return store_fault;
693 }
694
695 if (storeQueue[store_idx].size() == 0) {
696 DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n",
697 store_inst->pcState(), store_inst->seqNum);
698
699 if (store_inst->isAtomic()) {
700 // If the instruction faulted, then we need to send it along
701 // to commit without the instruction completing.
702 if (!(store_inst->hasRequest() && store_inst->strictlyOrdered()) ||
703 store_inst->isAtCommit()) {
704 store_inst->setExecuted();
705 }
706 iewStage->instToCommit(store_inst);
707 iewStage->activityThisCycle();
708 }
709
710 return store_fault;
711 }
712
713 assert(store_fault == NoFault);
714
715 if (store_inst->isStoreConditional() || store_inst->isAtomic()) {
716 // Store conditionals and Atomics need to set themselves as able to
717 // writeback if we haven't had a fault by here.
718 storeQueue[store_idx].canWB() = true;
719
720 ++storesToWB;
721 }
722
723 return checkViolations(loadIt, store_inst);
724
725}
726
727void
729{
730 assert(loadQueue.front().valid());
731
732 DynInstPtr inst = loadQueue.front().instruction();
733
734 DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n",
735 inst->pcState());
736
737 // Update histogram with memory latency from load
738 // Only take latency from load demand that where issued and did not fault
739 if (!inst->isInstPrefetch() && !inst->isDataPrefetch()
740 && inst->firstIssue != -1
741 && inst->lastWakeDependents != -1) {
742 stats.loadToUse.sample(cpu->ticksToCycles(
743 inst->lastWakeDependents - inst->firstIssue));
744 }
745
746 loadQueue.front().clear();
747 loadQueue.pop_front();
748}
749
750void
752{
753 assert(loadQueue.size() == 0 || loadQueue.front().valid());
754
755 while (loadQueue.size() != 0 && loadQueue.front().instruction()->seqNum
756 <= youngest_inst) {
757 commitLoad();
758 }
759}
760
761void
763{
764 assert(storeQueue.size() == 0 || storeQueue.front().valid());
765
766 /* Forward iterate the store queue (age order). */
767 for (auto& x : storeQueue) {
768 assert(x.valid());
769 // Mark any stores that are now committed and have not yet
770 // been marked as able to write back.
771 if (!x.canWB()) {
772 if (x.instruction()->seqNum > youngest_inst) {
773 break;
774 }
775 DPRINTF(LSQUnit, "Marking store as able to write back, PC "
776 "%s [sn:%lli]\n",
777 x.instruction()->pcState(),
778 x.instruction()->seqNum);
779
780 x.canWB() = true;
781
782 ++storesToWB;
783 }
784 }
785}
786
787void
789{
790 assert(isStoreBlocked);
791 storeWBIt->request()->sendPacketToCache();
792 if (storeWBIt->request()->isSent()){
794 }
795}
796
797void
799{
800 if (isStoreBlocked) {
801 DPRINTF(LSQUnit, "Writing back blocked store\n");
803 }
804
805 while (storesToWB > 0 &&
806 storeWBIt.dereferenceable() &&
807 storeWBIt->valid() &&
808 storeWBIt->canWB() &&
809 ((!needsTSO) || (!storeInFlight)) &&
810 lsq->cachePortAvailable(false)) {
811
812 if (isStoreBlocked) {
813 DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
814 " is blocked!\n");
815 break;
816 }
817
818 // Store didn't write any data so no need to write it back to
819 // memory.
820 if (storeWBIt->size() == 0) {
821 /* It is important that the preincrement happens at (or before)
822 * the call, as the the code of completeStore checks
823 * storeWBIt. */
825 continue;
826 }
827
828 if (storeWBIt->instruction()->isDataPrefetch()) {
829 storeWBIt++;
830 continue;
831 }
832
833 assert(storeWBIt->hasRequest());
834 assert(!storeWBIt->committed());
835
836 DynInstPtr inst = storeWBIt->instruction();
837 LSQRequest* request = storeWBIt->request();
838
839 // Process store conditionals or store release after all previous
840 // stores are completed
841 if ((request->mainReq()->isLLSC() ||
842 request->mainReq()->isRelease()) &&
843 (storeWBIt.idx() != storeQueue.head())) {
844 DPRINTF(LSQUnit, "Store idx:%i PC:%s to Addr:%#x "
845 "[sn:%lli] is %s%s and not head of the queue\n",
846 storeWBIt.idx(), inst->pcState(),
847 request->mainReq()->getPaddr(), inst->seqNum,
848 request->mainReq()->isLLSC() ? "SC" : "",
849 request->mainReq()->isRelease() ? "/Release" : "");
850 break;
851 }
852
853 storeWBIt->committed() = true;
854
855 assert(!inst->memData);
856 inst->memData = new uint8_t[request->_size];
857
858 if (storeWBIt->isAllZeros())
859 memset(inst->memData, 0, request->_size);
860 else
861 memcpy(inst->memData, storeWBIt->data(), request->_size);
862
863 request->buildPackets();
864
865 DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s "
866 "to Addr:%#x, data:%#x [sn:%lli]\n",
867 storeWBIt.idx(), inst->pcState(),
868 request->mainReq()->getPaddr(), (int)*(inst->memData),
869 inst->seqNum);
870
871 // @todo: Remove this SC hack once the memory system handles it.
872 if (inst->isStoreConditional()) {
873 // Disable recording the result temporarily. Writing to
874 // misc regs normally updates the result, but this is not
875 // the desired behavior when handling store conditionals.
876 inst->recordResult(false);
877 bool success = inst->tcBase()->getIsaPtr()->handleLockedWrite(
878 inst.get(), request->mainReq(), cacheBlockMask);
879 inst->recordResult(true);
880 request->packetSent();
881
882 if (!success) {
883 request->complete();
884 // Instantly complete this store.
885 DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed. "
886 "Instantly completing it.\n",
887 inst->seqNum);
888 PacketPtr new_pkt = new Packet(*request->packet());
889 WritebackEvent *wb = new WritebackEvent(inst,
890 new_pkt, this);
891 cpu->schedule(wb, curTick() + 1);
893 if (!storeQueue.empty())
894 storeWBIt++;
895 else
896 storeWBIt = storeQueue.end();
897 continue;
898 }
899 }
900
901 if (request->mainReq()->isLocalAccess()) {
902 assert(!inst->isStoreConditional());
903 assert(!inst->inHtmTransactionalState());
904 gem5::ThreadContext *thread = cpu->tcBase(lsqID);
905 PacketPtr main_pkt = new Packet(request->mainReq(),
907 main_pkt->dataStatic(inst->memData);
908 request->mainReq()->localAccessor(thread, main_pkt);
909 delete main_pkt;
911 storeWBIt++;
912 continue;
913 }
914 /* Send to cache */
915 request->sendPacketToCache();
916
917 /* If successful, do the post send */
918 if (request->isSent()) {
920 } else {
921 DPRINTF(LSQUnit, "D-Cache became blocked when writing [sn:%lli], "
922 "will retry later\n",
923 inst->seqNum);
924 }
925 }
926 assert(storesToWB >= 0);
927}
928
929void
930LSQUnit::squash(const InstSeqNum &squashed_num)
931{
932 DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
933 "(Loads:%i Stores:%i)\n", squashed_num, loadQueue.size(),
934 storeQueue.size());
935
936 while (loadQueue.size() != 0 &&
937 loadQueue.back().instruction()->seqNum > squashed_num) {
938 DPRINTF(LSQUnit,"Load Instruction PC %s squashed, "
939 "[sn:%lli]\n",
940 loadQueue.back().instruction()->pcState(),
941 loadQueue.back().instruction()->seqNum);
942
943 if (isStalled() && loadQueue.tail() == stallingLoadIdx) {
944 stalled = false;
946 stallingLoadIdx = 0;
947 }
948
949 // hardware transactional memory
950 // Squashing instructions can alter the transaction nesting depth
951 // and must be corrected before fetching resumes.
952 if (loadQueue.back().instruction()->isHtmStart())
953 {
954 htmStarts = (--htmStarts < 0) ? 0 : htmStarts;
955 DPRINTF(HtmCpu, ">> htmStarts-- (%d) : htmStops (%d)\n",
957 }
958 if (loadQueue.back().instruction()->isHtmStop())
959 {
960 htmStops = (--htmStops < 0) ? 0 : htmStops;
961 DPRINTF(HtmCpu, ">> htmStarts (%d) : htmStops-- (%d)\n",
963 }
964 // Clear the smart pointer to make sure it is decremented.
965 loadQueue.back().instruction()->setSquashed();
966 loadQueue.back().clear();
967
968 loadQueue.pop_back();
969 ++stats.squashedLoads;
970 }
971
972 // hardware transactional memory
973 // scan load queue (from oldest to youngest) for most recent valid htmUid
974 auto scan_it = loadQueue.begin();
975 uint64_t in_flight_uid = 0;
976 while (scan_it != loadQueue.end()) {
977 if (scan_it->instruction()->isHtmStart() &&
978 !scan_it->instruction()->isSquashed()) {
979 in_flight_uid = scan_it->instruction()->getHtmTransactionUid();
980 DPRINTF(HtmCpu, "loadQueue[%d]: found valid HtmStart htmUid=%u\n",
981 scan_it._idx, in_flight_uid);
982 }
983 scan_it++;
984 }
985 // If there's a HtmStart in the pipeline then use its htmUid,
986 // otherwise use the most recently committed uid
987 const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
988 if (htm_cpt) {
989 const uint64_t old_local_htm_uid = htm_cpt->getHtmUid();
990 uint64_t new_local_htm_uid;
991 if (in_flight_uid > 0)
992 new_local_htm_uid = in_flight_uid;
993 else
994 new_local_htm_uid = lastRetiredHtmUid;
995
996 if (old_local_htm_uid != new_local_htm_uid) {
997 DPRINTF(HtmCpu, "flush: lastRetiredHtmUid=%u\n",
999 DPRINTF(HtmCpu, "flush: resetting localHtmUid=%u\n",
1000 new_local_htm_uid);
1001
1002 htm_cpt->setHtmUid(new_local_htm_uid);
1003 }
1004 }
1005
1006 if (memDepViolator && squashed_num < memDepViolator->seqNum) {
1007 memDepViolator = NULL;
1008 }
1009
1010 while (storeQueue.size() != 0 &&
1011 storeQueue.back().instruction()->seqNum > squashed_num) {
1012 // Instructions marked as can WB are already committed.
1013 if (storeQueue.back().canWB()) {
1014 break;
1015 }
1016
1017 DPRINTF(LSQUnit,"Store Instruction PC %s squashed, "
1018 "idx:%i [sn:%lli]\n",
1019 storeQueue.back().instruction()->pcState(),
1020 storeQueue.tail(), storeQueue.back().instruction()->seqNum);
1021
1022 // I don't think this can happen. It should have been cleared
1023 // by the stalling load.
1024 if (isStalled() &&
1025 storeQueue.back().instruction()->seqNum == stallingStoreIsn) {
1026 panic("Is stalled should have been cleared by stalling load!\n");
1027 stalled = false;
1028 stallingStoreIsn = 0;
1029 }
1030
1031 // Clear the smart pointer to make sure it is decremented.
1032 storeQueue.back().instruction()->setSquashed();
1033
1034 // Must delete request now that it wasn't handed off to
1035 // memory. This is quite ugly. @todo: Figure out the proper
1036 // place to really handle request deletes.
1037 storeQueue.back().clear();
1038
1039 storeQueue.pop_back();
1040 ++stats.squashedStores;
1041 }
1042}
1043
1044uint64_t
1046{
1047 const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
1048 return htm_cpt->getHtmUid();
1049}
1050
1051void
1053{
1054 if (isStalled() &&
1055 storeWBIt->instruction()->seqNum == stallingStoreIsn) {
1056 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
1057 "load idx:%li\n",
1059 stalled = false;
1060 stallingStoreIsn = 0;
1061 iewStage->replayMemInst(loadQueue[stallingLoadIdx].instruction());
1062 }
1063
1064 if (!storeWBIt->instruction()->isStoreConditional()) {
1065 // The store is basically completed at this time. This
1066 // only works so long as the checker doesn't try to
1067 // verify the value in memory for stores.
1068 storeWBIt->instruction()->setCompleted();
1069
1070 if (cpu->checker) {
1071 cpu->checker->verify(storeWBIt->instruction());
1072 }
1073 }
1074
1075 if (needsTSO) {
1076 storeInFlight = true;
1077 }
1078
1079 storeWBIt++;
1080}
1081
1082void
1084{
1085 iewStage->wakeCPU();
1086
1087 // Squashed instructions do not need to complete their access.
1088 if (inst->isSquashed()) {
1089 assert (!inst->isStore() || inst->isStoreConditional());
1090 ++stats.ignoredResponses;
1091 return;
1092 }
1093
1094 if (!inst->isExecuted()) {
1095 inst->setExecuted();
1096
1097 if (inst->fault == NoFault) {
1098 // Complete access to copy data to proper place.
1099 inst->completeAcc(pkt);
1100 } else {
1101 // If the instruction has an outstanding fault, we cannot complete
1102 // the access as this discards the current fault.
1103
1104 // If we have an outstanding fault, the fault should only be of
1105 // type ReExec or - in case of a SplitRequest - a partial
1106 // translation fault
1107
1108 // Unless it's a hardware transactional memory fault
1109 auto htm_fault = std::dynamic_pointer_cast<
1110 GenericHtmFailureFault>(inst->fault);
1111
1112 if (!htm_fault) {
1113 assert(dynamic_cast<ReExec*>(inst->fault.get()) != nullptr ||
1114 inst->savedRequest->isPartialFault());
1115
1116 } else if (!pkt->htmTransactionFailedInCache()) {
1117 // Situation in which the instruction has a hardware
1118 // transactional memory fault but not the packet itself. This
1119 // can occur with ldp_uop microops since access is spread over
1120 // multiple packets.
1121 DPRINTF(HtmCpu,
1122 "%s writeback with HTM failure fault, "
1123 "however, completing packet is not aware of "
1124 "transaction failure. cause=%s htmUid=%u\n",
1125 inst->staticInst->getName(),
1126 htmFailureToStr(htm_fault->getHtmFailureFaultCause()),
1127 htm_fault->getHtmUid());
1128 }
1129
1130 DPRINTF(LSQUnit, "Not completing instruction [sn:%lli] access "
1131 "due to pending fault.\n", inst->seqNum);
1132 }
1133 }
1134
1135 // Need to insert instruction into queue to commit
1136 iewStage->instToCommit(inst);
1137
1138 iewStage->activityThisCycle();
1139
1140 // see if this load changed the PC
1141 iewStage->checkMisprediction(inst);
1142}
1143
1144void
1145LSQUnit::completeStore(typename StoreQueue::iterator store_idx)
1146{
1147 assert(store_idx->valid());
1148 store_idx->completed() = true;
1149 --storesToWB;
1150 // A bit conservative because a store completion may not free up entries,
1151 // but hopefully avoids two store completions in one cycle from making
1152 // the CPU tick twice.
1153 cpu->wakeCPU();
1154 cpu->activityThisCycle();
1155
1156 /* We 'need' a copy here because we may clear the entry from the
1157 * store queue. */
1158 DynInstPtr store_inst = store_idx->instruction();
1159 if (store_idx == storeQueue.begin()) {
1160 do {
1161 storeQueue.front().clear();
1162 storeQueue.pop_front();
1163 } while (storeQueue.front().completed() &&
1164 !storeQueue.empty());
1165
1166 iewStage->updateLSQNextCycle = true;
1167 }
1168
1169 DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
1170 "idx:%i\n",
1171 store_inst->seqNum, store_idx.idx() - 1, storeQueue.head() - 1);
1172
1173#if TRACING_ON
1174 if (debug::O3PipeView) {
1175 store_inst->storeTick =
1176 curTick() - store_inst->fetchTick;
1177 }
1178#endif
1179
1180 if (isStalled() &&
1181 store_inst->seqNum == stallingStoreIsn) {
1182 DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
1183 "load idx:%li\n",
1185 stalled = false;
1186 stallingStoreIsn = 0;
1187 iewStage->replayMemInst(loadQueue[stallingLoadIdx].instruction());
1188 }
1189
1190 store_inst->setCompleted();
1191
1192 if (needsTSO) {
1193 storeInFlight = false;
1194 }
1195
1196 // Tell the checker we've completed this instruction. Some stores
1197 // may get reported twice to the checker, but the checker can
1198 // handle that case.
1199 // Store conditionals cannot be sent to the checker yet, they have
1200 // to update the misc registers first which should take place
1201 // when they commit
1202 if (cpu->checker && !store_inst->isStoreConditional()) {
1203 cpu->checker->verify(store_inst);
1204 }
1205}
1206
1207bool
1208LSQUnit::trySendPacket(bool isLoad, PacketPtr data_pkt)
1209{
1210 bool ret = true;
1211 bool cache_got_blocked = false;
1212
1213 LSQRequest *request = dynamic_cast<LSQRequest*>(data_pkt->senderState);
1214
1215 if (!lsq->cacheBlocked() &&
1216 lsq->cachePortAvailable(isLoad)) {
1217 if (!dcachePort->sendTimingReq(data_pkt)) {
1218 ret = false;
1219 cache_got_blocked = true;
1220 }
1221 } else {
1222 ret = false;
1223 }
1224
1225 if (ret) {
1226 if (!isLoad) {
1227 isStoreBlocked = false;
1228 }
1229 lsq->cachePortBusy(isLoad);
1230 request->packetSent();
1231 } else {
1232 if (cache_got_blocked) {
1233 lsq->cacheBlocked(true);
1234 ++stats.blockedByCache;
1235 }
1236 if (!isLoad) {
1237 assert(request == storeWBIt->request());
1238 isStoreBlocked = true;
1239 }
1240 request->packetNotSent();
1241 }
1242 DPRINTF(LSQUnit, "Memory request (pkt: %s) from inst [sn:%llu] was"
1243 " %ssent (cache is blocked: %d, cache_got_blocked: %d)\n",
1244 data_pkt->print(), request->instruction()->seqNum,
1245 ret ? "": "not ", lsq->cacheBlocked(), cache_got_blocked);
1246 return ret;
1247}
1248
1249void
1251{
1252 DPRINTF(LSQUnit, "Unit %p marking stale translations %d %d\n", this,
1253 storeQueue.size(), loadQueue.size());
1254 for (auto& entry : storeQueue) {
1255 if (entry.valid() && entry.hasRequest())
1256 entry.request()->markAsStaleTranslation();
1257 }
1258 for (auto& entry : loadQueue) {
1259 if (entry.valid() && entry.hasRequest())
1260 entry.request()->markAsStaleTranslation();
1261 }
1262}
1263
1264bool
1266{
1267 DPRINTF(LSQUnit, "Unit %p checking stale translations\n", this);
1268 for (auto& entry : storeQueue) {
1269 if (entry.valid() && entry.hasRequest()
1270 && entry.request()->hasStaleTranslation())
1271 return true;
1272 }
1273 for (auto& entry : loadQueue) {
1274 if (entry.valid() && entry.hasRequest()
1275 && entry.request()->hasStaleTranslation())
1276 return true;
1277 }
1278 DPRINTF(LSQUnit, "Unit %p found no stale translations\n", this);
1279 return false;
1280}
1281
1282void
1284{
1285 if (isStoreBlocked) {
1286 DPRINTF(LSQUnit, "Receiving retry: blocked store\n");
1288 }
1289}
1290
1291void
1293{
1294 cprintf("Load store queue: Dumping instructions.\n");
1295 cprintf("Load queue size: %i\n", loadQueue.size());
1296 cprintf("Load queue: ");
1297
1298 for (const auto& e: loadQueue) {
1299 const DynInstPtr &inst(e.instruction());
1300 cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1301 }
1302 cprintf("\n");
1303
1304 cprintf("Store queue size: %i\n", storeQueue.size());
1305 cprintf("Store queue: ");
1306
1307 for (const auto& e: storeQueue) {
1308 const DynInstPtr &inst(e.instruction());
1309 cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1310 }
1311
1312 cprintf("\n");
1313}
1314
1315void LSQUnit::schedule(Event& ev, Tick when) { cpu->schedule(ev, when); }
1316
1317BaseMMU *LSQUnit::getMMUPtr() { return cpu->mmu; }
1318
1319unsigned int
1321{
1322 return cpu->cacheLineSize();
1323}
1324
1325Fault
1326LSQUnit::read(LSQRequest *request, ssize_t load_idx)
1327{
1328 LQEntry& load_entry = loadQueue[load_idx];
1329 const DynInstPtr& load_inst = load_entry.instruction();
1330
1331 load_entry.setRequest(request);
1332 assert(load_inst);
1333
1334 assert(!load_inst->isExecuted());
1335
1336 // Make sure this isn't a strictly ordered load
1337 // A bit of a hackish way to get strictly ordered accesses to work
1338 // only if they're at the head of the LSQ and are ready to commit
1339 // (at the head of the ROB too).
1340
1341 if (request->mainReq()->isStrictlyOrdered() &&
1342 (load_idx != loadQueue.head() || !load_inst->isAtCommit())) {
1343 // Tell IQ/mem dep unit that this instruction will need to be
1344 // rescheduled eventually
1345 iewStage->rescheduleMemInst(load_inst);
1346 load_inst->clearIssued();
1347 load_inst->effAddrValid(false);
1348 ++stats.rescheduledLoads;
1349 DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n",
1350 load_inst->seqNum, load_inst->pcState());
1351
1352 // Must delete request now that it wasn't handed off to
1353 // memory. This is quite ugly. @todo: Figure out the proper
1354 // place to really handle request deletes.
1355 load_entry.setRequest(nullptr);
1356 request->discard();
1357 return std::make_shared<GenericISA::M5PanicFault>(
1358 "Strictly ordered load [sn:%llx] PC %s\n",
1359 load_inst->seqNum, load_inst->pcState());
1360 }
1361
1362 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
1363 "storeHead: %i addr: %#x%s\n",
1364 load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1,
1365 request->mainReq()->getPaddr(), request->isSplit() ? " split" :
1366 "");
1367
1368 if (request->mainReq()->isLLSC()) {
1369 // Disable recording the result temporarily. Writing to misc
1370 // regs normally updates the result, but this is not the
1371 // desired behavior when handling store conditionals.
1372 load_inst->recordResult(false);
1373 load_inst->tcBase()->getIsaPtr()->handleLockedRead(load_inst.get(),
1374 request->mainReq());
1375 load_inst->recordResult(true);
1376 }
1377
1378 if (request->mainReq()->isLocalAccess()) {
1379 assert(!load_inst->memData);
1380 load_inst->memData = new uint8_t[MaxDataBytes];
1381
1382 gem5::ThreadContext *thread = cpu->tcBase(lsqID);
1383 PacketPtr main_pkt = new Packet(request->mainReq(), MemCmd::ReadReq);
1384
1385 main_pkt->dataStatic(load_inst->memData);
1386
1387 Cycles delay = request->mainReq()->localAccessor(thread, main_pkt);
1388
1389 WritebackEvent *wb = new WritebackEvent(load_inst, main_pkt, this);
1390 cpu->schedule(wb, cpu->clockEdge(delay));
1391 return NoFault;
1392 }
1393
1394 // Check the SQ for any previous stores that might lead to forwarding
1395 auto store_it = load_inst->sqIt;
1396 assert (store_it >= storeWBIt);
1397 // End once we've reached the top of the LSQ
1398 while (store_it != storeWBIt && !load_inst->isDataPrefetch()) {
1399 // Move the index to one younger
1400 store_it--;
1401 assert(store_it->valid());
1402 assert(store_it->instruction()->seqNum < load_inst->seqNum);
1403 int store_size = store_it->size();
1404
1405 // Cache maintenance instructions go down via the store
1406 // path but they carry no data and they shouldn't be
1407 // considered for forwarding
1408 if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&
1409 !(store_it->request()->mainReq() &&
1410 store_it->request()->mainReq()->isCacheMaintenance())) {
1411 assert(store_it->instruction()->effAddrValid());
1412
1413 // Check if the store data is within the lower and upper bounds of
1414 // addresses that the request needs.
1415 auto req_s = request->mainReq()->getVaddr();
1416 auto req_e = req_s + request->mainReq()->getSize();
1417 auto st_s = store_it->instruction()->effAddr;
1418 auto st_e = st_s + store_size;
1419
1420 bool store_has_lower_limit = req_s >= st_s;
1421 bool store_has_upper_limit = req_e <= st_e;
1422 bool lower_load_has_store_part = req_s < st_e;
1423 bool upper_load_has_store_part = req_e > st_s;
1424
1426
1427 // If the store entry is not atomic (atomic does not have valid
1428 // data), the store has all of the data needed, and
1429 // the load is not LLSC, then
1430 // we can forward data from the store to the load
1431 if (!store_it->instruction()->isAtomic() &&
1432 store_has_lower_limit && store_has_upper_limit &&
1433 !request->mainReq()->isLLSC()) {
1434
1435 const auto& store_req = store_it->request()->mainReq();
1436 coverage = store_req->isMasked() ?
1439 } else if (
1440 // This is the partial store-load forwarding case where a store
1441 // has only part of the load's data and the load isn't LLSC
1442 (!request->mainReq()->isLLSC() &&
1443 ((store_has_lower_limit && lower_load_has_store_part) ||
1444 (store_has_upper_limit && upper_load_has_store_part) ||
1445 (lower_load_has_store_part && upper_load_has_store_part))) ||
1446 // The load is LLSC, and the store has all or part of the
1447 // load's data
1448 (request->mainReq()->isLLSC() &&
1449 ((store_has_lower_limit || upper_load_has_store_part) &&
1450 (store_has_upper_limit || lower_load_has_store_part))) ||
1451 // The store entry is atomic and has all or part of the load's
1452 // data
1453 (store_it->instruction()->isAtomic() &&
1454 ((store_has_lower_limit || upper_load_has_store_part) &&
1455 (store_has_upper_limit || lower_load_has_store_part)))) {
1456
1458 }
1459
1461 // Get shift amount for offset into the store's data.
1462 int shift_amt = request->mainReq()->getVaddr() -
1463 store_it->instruction()->effAddr;
1464
1465 // Allocate memory if this is the first time a load is issued.
1466 if (!load_inst->memData) {
1467 load_inst->memData =
1468 new uint8_t[request->mainReq()->getSize()];
1469 }
1470 if (store_it->isAllZeros())
1471 memset(load_inst->memData, 0,
1472 request->mainReq()->getSize());
1473 else
1474 memcpy(load_inst->memData,
1475 store_it->data() + shift_amt,
1476 request->mainReq()->getSize());
1477
1478 DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
1479 "addr %#x\n", store_it._idx,
1480 request->mainReq()->getVaddr());
1481
1482 PacketPtr data_pkt = new Packet(request->mainReq(),
1484 data_pkt->dataStatic(load_inst->memData);
1485
1486 // hardware transactional memory
1487 // Store to load forwarding within a transaction
1488 // This should be okay because the store will be sent to
1489 // the memory subsystem and subsequently get added to the
1490 // write set of the transaction. The write set has a stronger
1491 // property than the read set, so the load doesn't necessarily
1492 // have to be there.
1493 assert(!request->mainReq()->isHTMCmd());
1494 if (load_inst->inHtmTransactionalState()) {
1495 assert (!storeQueue[store_it._idx].completed());
1496 assert (
1497 storeQueue[store_it._idx].instruction()->
1498 inHtmTransactionalState());
1499 assert (
1500 load_inst->getHtmTransactionUid() ==
1501 storeQueue[store_it._idx].instruction()->
1502 getHtmTransactionUid());
1503 data_pkt->setHtmTransactional(
1504 load_inst->getHtmTransactionUid());
1505 DPRINTF(HtmCpu, "HTM LD (ST2LDF) "
1506 "pc=0x%lx - vaddr=0x%lx - "
1507 "paddr=0x%lx - htmUid=%u\n",
1508 load_inst->pcState().instAddr(),
1509 data_pkt->req->hasVaddr() ?
1510 data_pkt->req->getVaddr() : 0lu,
1511 data_pkt->getAddr(),
1512 load_inst->getHtmTransactionUid());
1513 }
1514
1515 if (request->isAnyOutstandingRequest()) {
1516 assert(request->_numOutstandingPackets > 0);
1517 // There are memory requests packets in flight already.
1518 // This may happen if the store was not complete the
1519 // first time this load got executed. Signal the senderSate
1520 // that response packets should be discarded.
1521 request->discard();
1522 // Avoid checking snoops on this discarded request.
1523 load_entry.setRequest(nullptr);
1524 }
1525
1526 WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt,
1527 this);
1528
1529 // We'll say this has a 1 cycle load-store forwarding latency
1530 // for now.
1531 // @todo: Need to make this a parameter.
1532 cpu->schedule(wb, curTick());
1533
1534 // Don't need to do anything special for split loads.
1535 ++stats.forwLoads;
1536
1537 return NoFault;
1538 } else if (
1540 // If it's already been written back, then don't worry about
1541 // stalling on it.
1542 if (store_it->completed()) {
1543 panic("Should not check one of these");
1544 continue;
1545 }
1546
1547 // Must stall load and force it to retry, so long as it's the
1548 // oldest load that needs to do so.
1549 if (!stalled ||
1550 (stalled &&
1551 load_inst->seqNum <
1552 loadQueue[stallingLoadIdx].instruction()->seqNum)) {
1553 stalled = true;
1554 stallingStoreIsn = store_it->instruction()->seqNum;
1555 stallingLoadIdx = load_idx;
1556 }
1557
1558 // Tell IQ/mem dep unit that this instruction will need to be
1559 // rescheduled eventually
1560 iewStage->rescheduleMemInst(load_inst);
1561 load_inst->clearIssued();
1562 load_inst->effAddrValid(false);
1563 ++stats.rescheduledLoads;
1564
1565 // Do not generate a writeback event as this instruction is not
1566 // complete.
1567 DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
1568 "Store idx %i to load addr %#x\n",
1569 store_it._idx, request->mainReq()->getVaddr());
1570
1571 // Must discard the request.
1572 request->discard();
1573 load_entry.setRequest(nullptr);
1574 return NoFault;
1575 }
1576 }
1577 }
1578
1579 // If there's no forwarding case, then go access memory
1580 DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
1581 load_inst->seqNum, load_inst->pcState());
1582
1583 // Allocate memory if this is the first time a load is issued.
1584 if (!load_inst->memData) {
1585 load_inst->memData = new uint8_t[request->mainReq()->getSize()];
1586 }
1587
1588
1589 // hardware transactional memory
1590 if (request->mainReq()->isHTMCmd()) {
1591 // this is a simple sanity check
1592 // the Ruby cache controller will set
1593 // memData to 0x0ul if successful.
1594 *load_inst->memData = (uint64_t) 0x1ull;
1595 }
1596
1597 // For now, load throughput is constrained by the number of
1598 // load FUs only, and loads do not consume a cache port (only
1599 // stores do).
1600 // @todo We should account for cache port contention
1601 // and arbitrate between loads and stores.
1602
1603 // if we the cache is not blocked, do cache access
1604 // if the request is not sent and cache is unblocked
1605 // then put the instruction into retry queue so we do not need
1606 // an exta cycle to re-issue and execute
1607 request->buildPackets();
1608 request->sendPacketToCache();
1609 if (!request->isSent()) {
1610 if (!lsq->cacheBlocked()) {
1611 iewStage->retryMemInst(load_inst);
1612 } else {
1613 iewStage->blockMemInst(load_inst);
1614 }
1615 }
1616
1617 return NoFault;
1618}
1619
1620Fault
1621LSQUnit::write(LSQRequest *request, uint8_t *data, ssize_t store_idx)
1622{
1623 assert(storeQueue[store_idx].valid());
1624
1625 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i "
1626 "[sn:%llu]\n",
1627 store_idx - 1, request->req()->getPaddr(), storeQueue.head() - 1,
1628 storeQueue[store_idx].instruction()->seqNum);
1629
1630 storeQueue[store_idx].setRequest(request);
1631 unsigned size = request->_size;
1632 storeQueue[store_idx].size() = size;
1633 bool store_no_data =
1634 request->mainReq()->getFlags() & Request::STORE_NO_DATA;
1635 storeQueue[store_idx].isAllZeros() = store_no_data;
1636 assert(size <= SQEntry::DataSize || store_no_data);
1637
1638 // copy data into the storeQueue only if the store request has valid data
1639 if (!(request->req()->getFlags() & Request::CACHE_BLOCK_ZERO) &&
1640 !request->req()->isCacheMaintenance() &&
1641 !request->req()->isAtomic())
1642 memcpy(storeQueue[store_idx].data(), data, size);
1643
1644 // This function only writes the data to the store queue, so no fault
1645 // can happen here.
1646 return NoFault;
1647}
1648
1651{
1652 if (loadQueue.front().valid())
1653 return loadQueue.front().instruction()->seqNum;
1654 else
1655 return 0;
1656}
1657
1660{
1661 if (storeQueue.front().valid())
1662 return storeQueue.front().instruction()->seqNum;
1663 else
1664 return 0;
1665}
1666
1667} // namespace o3
1668} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
const char data[]
virtual void handleLockedSnoop(PacketPtr pkt, Addr cacheBlockMask)
Definition isa.hh:115
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
static const FlagsType AutoDelete
Definition eventq.hh:110
Event(Priority p=Default_Pri, Flags f=0)
Definition eventq.hh:407
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
Addr getAddr() const
Definition packet.hh:807
void setHtmTransactionFailedInCache(const HtmCacheFailure ret_code)
Stipulates that this packet/request has returned from the cache hierarchy in a failed transaction.
Definition packet.cc:493
void print(std::ostream &o, int verbosity=0, const std::string &prefix="") const
Definition packet.cc:368
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition packet.hh:1175
SenderState * senderState
This packet's sender state.
Definition packet.hh:545
HtmCacheFailure getHtmTransactionFailedInCacheRC() const
If a packet/request has returned from the cache hierarchy in a failed transaction,...
Definition packet.cc:509
bool isHtmTransactional() const
Returns whether or not this packet/request originates in the CPU executing in transactional mode,...
Definition packet.cc:523
bool isWrite() const
Definition packet.hh:594
uint64_t getHtmTransactionUid() const
If a packet/request originates in a CPU executing in transactional mode, i.e.
Definition packet.cc:529
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
void setHtmTransactional(uint64_t val)
Stipulates that this packet/request originates in the CPU executing in transactional mode,...
Definition packet.cc:516
bool isInvalidate() const
Definition packet.hh:609
bool htmTransactionFailedInCache() const
Returns whether or not this packet/request has returned from the cache hierarchy in a failed transact...
Definition packet.cc:503
T * data
The stored pointer.
Definition refcnt.hh:146
T * get() const
Directly access the pointer itself without taking a reference.
Definition refcnt.hh:227
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Definition port.hh:136
@ LLSC
The request is a Load locked/store conditional.
Definition request.hh:156
@ CACHE_BLOCK_ZERO
This is a write that is targeted and zeroing an entire cache block.
Definition request.hh:143
static const FlagsType STORE_NO_DATA
Definition request.hh:263
ThreadContext is the external interface to all thread state for anything outside of the CPU.
virtual BaseISA * getIsaPtr() const =0
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:94
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Definition iew.hh:88
void setRequest(LSQRequest *r)
Definition lsq_unit.hh:138
const DynInstPtr & instruction() const
Definition lsq_unit.hh:145
static constexpr size_t DataSize
Definition lsq_unit.hh:167
Writeback event, specifically for when stores forward data to loads.
Definition lsq_unit.hh:412
WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr)
Constructs a writeback event.
Definition lsq_unit.cc:64
PacketPtr pkt
The packet that would have been sent to memory.
Definition lsq_unit.hh:429
DynInstPtr inst
Instruction whose results are being written back.
Definition lsq_unit.hh:426
const char * description() const
Returns the description of this event.
Definition lsq_unit.cc:86
LSQUnit * lsqPtr
The pointer to the LSQ unit that issued the store.
Definition lsq_unit.hh:432
void process()
Processes the writeback event.
Definition lsq_unit.cc:74
void insertStore(const DynInstPtr &store_inst)
Inserts a store instruction.
Definition lsq_unit.cc:382
Fault write(LSQRequest *requst, uint8_t *data, ssize_t store_idx)
Executes the store at the given index.
Definition lsq_unit.cc:1621
Addr cacheBlockMask
Address Mask for a cache block (e.g.
Definition lsq_unit.hh:479
IEW * iewStage
Pointer to the IEW stage.
Definition lsq_unit.hh:402
bool isStoreBlocked
Whehter or not a store is blocked due to the memory system.
Definition lsq_unit.hh:497
void takeOverFrom()
Takes over from another CPU's thread.
Definition lsq_unit.cc:299
bool checkLoads
Should loads be checked for dependency issues.
Definition lsq_unit.hh:461
Fault read(LSQRequest *request, ssize_t load_idx)
Executes the load at the given index.
Definition lsq_unit.cc:1326
bool storeInFlight
Whether or not a store is in flight.
Definition lsq_unit.hh:500
CPU * cpu
Pointer to the CPU.
Definition lsq_unit.hh:399
InstSeqNum getStoreHeadSeqNum()
Returns the sequence number of the head store instruction.
Definition lsq_unit.cc:1659
InstSeqNum getLoadHeadSeqNum()
Returns the sequence number of the head load instruction.
Definition lsq_unit.cc:1650
unsigned depCheckShift
The number of places to shift addresses in the LSQ before checking for dependency violations.
Definition lsq_unit.hh:458
void storePostSend()
Handles completing the send of a store to memory.
Definition lsq_unit.cc:1052
RequestPort * dcachePort
Pointer to the dcache port.
Definition lsq_unit.hh:408
InstSeqNum stallingStoreIsn
The store that causes the stall due to partial store to load forwarding.
Definition lsq_unit.hh:489
bool isStalled()
Returns whether or not the LSQ unit is stalled.
Definition lsq_unit.hh:565
void insertLoad(const DynInstPtr &load_inst)
Inserts a load instruction.
Definition lsq_unit.cc:321
Fault executeStore(const DynInstPtr &inst)
Executes a store instruction.
Definition lsq_unit.cc:666
void insert(const DynInstPtr &inst)
Inserts an instruction.
Definition lsq_unit.cc:305
bool checkStaleTranslations() const
Definition lsq_unit.cc:1265
uint64_t getLatestHtmUid() const
Definition lsq_unit.cc:1045
void schedule(Event &ev, Tick when)
Schedule event for the cpu.
Definition lsq_unit.cc:1315
void writebackStores()
Writes back stores.
Definition lsq_unit.cc:798
Fault checkViolations(typename LoadQueue::iterator &loadIt, const DynInstPtr &inst)
Check for ordering violations in the LSQ.
Definition lsq_unit.cc:514
StoreQueue storeQueue
The store queue.
Definition lsq_unit.hh:449
Fault executeLoad(const DynInstPtr &inst)
Executes a load instruction.
Definition lsq_unit.cc:594
void commitLoad()
Commits the head load.
Definition lsq_unit.cc:728
LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
Constructs an LSQ unit.
Definition lsq_unit.cc:192
void completeStore(typename StoreQueue::iterator store_idx)
Completes the store at the specified index.
Definition lsq_unit.cc:1145
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition lsq_unit.cc:289
void setDcachePort(RequestPort *dcache_port)
Sets the pointer to the dcache port.
Definition lsq_unit.cc:283
unsigned int cacheLineSize()
Definition lsq_unit.cc:1320
void resetState()
Reset the LSQ state.
Definition lsq_unit.cc:226
unsigned numFreeStoreEntries()
Returns the number of free SQ entries.
Definition lsq_unit.cc:422
uint64_t lastRetiredHtmUid
Definition lsq_unit.hh:471
LSQ::LSQRequest LSQRequest
Definition lsq_unit.hh:93
void dumpInsts() const
Debugging function to dump instructions in the LSQ.
Definition lsq_unit.cc:1292
bool stalled
Whether or not the LSQ is stalled.
Definition lsq_unit.hh:485
ssize_t stallingLoadIdx
The index of the above store.
Definition lsq_unit.hh:491
LoadQueue loadQueue
The load queue.
Definition lsq_unit.hh:452
PacketPtr retryPkt
The packet that needs to be retried.
Definition lsq_unit.hh:494
void init(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params, LSQ *lsq_ptr, unsigned id)
Initializes the LSQ unit with the specified number of entries.
Definition lsq_unit.cc:203
DynInstPtr getMemDepViolator()
Returns the memory ordering violator.
Definition lsq_unit.cc:404
DynInstPtr memDepViolator
The oldest load that caused a memory ordering violation.
Definition lsq_unit.hh:503
void squash(const InstSeqNum &squashed_num)
Squashes all instructions younger than a specific sequence number.
Definition lsq_unit.cc:930
std::string name() const
Returns the name of the LSQ unit.
Definition lsq_unit.cc:245
void checkSnoop(PacketPtr pkt)
Check if an incoming invalidate hits in the lsq on a load that might have issued out of order wrt ano...
Definition lsq_unit.cc:431
static constexpr auto MaxDataBytes
Definition lsq_unit.hh:91
void recvRetry()
Handles doing the retry.
Definition lsq_unit.cc:1283
int storesToWB
The number of store instructions in the SQ waiting to writeback.
Definition lsq_unit.hh:464
unsigned numFreeLoadEntries()
Returns the number of free LQ entries.
Definition lsq_unit.cc:414
bool trySendPacket(bool isLoad, PacketPtr data_pkt)
Attempts to send a packet to the cache.
Definition lsq_unit.cc:1208
void writeback(const DynInstPtr &inst, PacketPtr pkt)
Writes back the instruction, sending it to IEW.
Definition lsq_unit.cc:1083
StoreQueue::iterator storeWBIt
The index of the first instruction that may be ready to be written back, and has not yet been written...
Definition lsq_unit.hh:476
BaseMMU * getMMUPtr()
Definition lsq_unit.cc:1317
void writebackBlockedStore()
Try to finish a previously blocked write back attempt.
Definition lsq_unit.cc:788
void commitLoads(InstSeqNum &youngest_inst)
Commits loads older than a specific sequence number.
Definition lsq_unit.cc:751
gem5::o3::LSQUnit::LSQUnitStats stats
void completeDataAccess(PacketPtr pkt)
Completes the data access that has been returned from the memory system.
Definition lsq_unit.cc:105
void startStaleTranslationFlush()
Definition lsq_unit.cc:1250
LSQ * lsq
Pointer to the LSQ.
Definition lsq_unit.hh:405
bool needsTSO
Flag for memory model.
Definition lsq_unit.hh:506
void commitStores(InstSeqNum &youngest_inst)
Commits stores older than a specific sequence number.
Definition lsq_unit.cc:762
ThreadID lsqID
The LSQUnit thread id.
Definition lsq_unit.hh:446
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
Definition lsq_unit.cc:92
virtual bool recvTimingResp(PacketPtr pkt)=0
virtual void buildPackets()=0
virtual RequestPtr mainReq()
Definition lsq.hh:408
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)=0
Test if the request accesses a particular cache line.
void discard()
The request is discarded (e.g.
Definition lsq.hh:548
PacketPtr packet(int idx=0)
Definition lsq.hh:398
uint32_t _numOutstandingPackets
Definition lsq.hh:288
void packetNotSent()
Update the status to reflect that a packet was not sent.
Definition lsq.hh:476
const uint32_t _size
Definition lsq.hh:285
bool isReleased()
Test if the LSQRequest has been released, i.e.
Definition lsq.hh:432
bool isAnyOutstandingRequest()
Test if there is any in-flight translation or mem access request.
Definition lsq.hh:418
virtual void sendPacketToCache()=0
bool isSplit() const
Definition lsq.hh:439
void packetSent()
Update the status to reflect that a packet was sent.
Definition lsq.hh:467
bool needWBToRegister() const
Definition lsq.hh:445
RequestPtr req(int idx=0)
Definition lsq.hh:392
virtual PacketPtr mainPacket()
Definition lsq.hh:401
const DynInstPtr & instruction()
Definition lsq.hh:364
Statistics container.
Definition group.hh:93
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
static const Priority Default_Pri
Default is zero for historical reasons.
Definition eventq.hh:182
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
Bitfield< 21 > writeback
Definition types.hh:126
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 9 > e
Definition misc_types.hh:65
Bitfield< 33 > id
Bitfield< 3 > x
Definition pagetable.hh:76
static constexpr int MaxThreads
Definition limits.hh:38
RefCountingPtr< DynInst > DynInstPtr
Units for Stats.
Definition units.hh:113
const FlagsType nozero
Don't print if this is zero.
Definition info.hh:67
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< FaultBase > Fault
Definition types.hh:249
void cprintf(const char *format, const Args &...args)
Definition cprintf.hh:155
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
std::string htmFailureToStr(HtmFailureFaultCause cause)
Convert enum into string to be used for debug purposes.
Definition htm.cc:44
uint64_t Tick
Tick count type.
Definition types.hh:58
Packet * PacketPtr
HtmCacheFailure
Definition htm.hh:60
std::string csprintf(const char *format, const Args &...args)
Definition cprintf.hh:161
constexpr decltype(nullptr) NoFault
Definition types.hh:253
HtmFailureFaultCause
Definition htm.hh:48
uint64_t InstSeqNum
Definition inst_seq.hh:40
Declaration of the Packet class.
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
statistics::Scalar blockedByCache
Number of times the LSQ is blocked due to the cache.
Definition lsq_unit.hh:536
statistics::Scalar forwLoads
Total number of loads forwaded from LSQ stores.
Definition lsq_unit.hh:517
LSQUnitStats(statistics::Group *parent)
Definition lsq_unit.cc:254
statistics::Scalar addedLoadsAndStores
Total number of loads and stores written to the load store queue.
Definition lsq_unit.hh:543
statistics::Scalar ignoredResponses
Total number of responses from the memory system that are ignored due to the instruction already bein...
Definition lsq_unit.hh:524
statistics::Distribution loadToUse
Distribution of cycle latency between the first time a load is issued and its completion.
Definition lsq_unit.hh:540
statistics::Scalar rescheduledLoads
Number of loads that were rescheduled.
Definition lsq_unit.hh:533
statistics::Scalar squashedStores
Total number of squashed stores.
Definition lsq_unit.hh:530
statistics::Scalar squashedLoads
Total number of squashed loads.
Definition lsq_unit.hh:520
statistics::Scalar memOrderViolation
Tota number of memory ordering violations.
Definition lsq_unit.hh:527

Generated on Mon May 26 2025 09:19:08 for gem5 by doxygen 1.13.2