43 #ifndef __CPU_O3_LSQ_UNIT_IMPL_HH__ 44 #define __CPU_O3_LSQ_UNIT_IMPL_HH__ 47 #include "arch/locked_mem.hh" 49 #include "config/the_isa.hh" 53 #include "debug/Activity.hh" 54 #include "debug/IEW.hh" 55 #include "debug/LSQUnit.hh" 56 #include "debug/O3PipeView.hh" 63 :
Event(Default_Pri, AutoDelete),
64 inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
66 assert(_inst->savedReq);
67 _inst->savedReq->writebackScheduled();
74 assert(!
lsqPtr->cpu->switchedOut());
78 assert(
inst->savedReq);
79 inst->savedReq->writebackDone();
87 return "Store writeback";
96 assert(req !=
nullptr);
99 if (senderState->alive()) {
100 ret = req->recvTimingResp(pkt);
102 senderState->outstanding--;
115 cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
121 assert(!
cpu->switchedOut());
122 if (!inst->isSquashed()) {
126 assert(inst->isLoad() || inst->isStoreConditional() ||
128 writeback(inst, state->request()->mainPacket());
129 if (inst->isStore() || inst->isAtomic()) {
134 }
else if (inst->isStore()) {
142 template <
class Impl>
154 LSQ *lsq_ptr,
unsigned id)
194 if (Impl::MaxThreads == 1) {
207 .
desc(
"Number of loads that had data forwarded from stores");
211 .
desc(
"Number of loads ignored due to an invalid address");
215 .
desc(
"Number of loads squashed");
219 .
desc(
"Number of memory responses ignored because the instruction is squashed");
222 .
name(
name() +
".memOrderViolation")
223 .
desc(
"Number of memory ordering violations");
227 .
desc(
"Number of stores squashed");
231 .
desc(
"Number of software prefetches ignored due to an invalid address");
235 .
desc(
"Number of blocked loads due to partial load-store forwarding");
239 .
desc(
"Number of loads that were rescheduled");
243 .
desc(
"Number of times an access to memory failed due to the cache being blocked");
271 template <
class Impl>
275 assert(inst->isMemRef());
277 assert(inst->isLoad() || inst->isStore() || inst->isAtomic());
279 if (inst->isLoad()) {
288 template <
class Impl>
311 template <
class Impl>
320 store_inst->pcState(),
storeQueue.tail(), store_inst->seqNum);
332 template <
class Impl>
333 typename Impl::DynInstPtr
343 template <
class Impl>
354 template <
class Impl>
366 template <
class Impl>
375 for (
int x = 0;
x <
cpu->numContexts();
x++) {
377 bool no_squash =
cpu->thread[
x]->noSquashFromTC;
378 cpu->thread[
x]->noSquashFromTC =
true;
380 cpu->thread[
x]->noSquashFromTC = no_squash;
395 if (ld_inst->effAddrValid() &&
400 bool force_squash =
false;
403 ld_inst = iter->instruction();
405 req = iter->request();
406 if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered())
410 ld_inst->seqNum, invalidate_addr);
420 if (ld_inst->possibleLoadViolation() || force_squash) {
422 pkt->
getAddr(), ld_inst->seqNum);
425 ld_inst->fault = std::make_shared<ReExec>();
426 req->setStateToFault();
429 pkt->
getAddr(), ld_inst->seqNum);
440 ld_inst->hitExternalSnoop(
true);
447 template <
class Impl>
462 if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
471 if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {
472 if (inst->isLoad()) {
476 if (ld_inst->hitExternalSnoop()) {
480 "and [sn:%lli] at address %#x\n",
481 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
486 return std::make_shared<GenericISA::M5PanicFault>(
487 "Detected fault with inst [sn:%lli] and " 488 "[sn:%lli] at address %#x\n",
489 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
495 ld_inst->possibleLoadViolation(
true);
497 " between instructions [sn:%lli] and [sn:%lli]\n",
498 inst_eff_addr1, inst->seqNum, ld_inst->seqNum);
507 "[sn:%lli] at address %#x\n",
508 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
513 return std::make_shared<GenericISA::M5PanicFault>(
514 "Detected fault with " 515 "inst [sn:%lli] and [sn:%lli] at address %#x\n",
516 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
528 template <
class Impl>
537 inst->pcState(), inst->seqNum);
539 assert(!inst->isSquashed());
541 load_fault = inst->initiateAcc();
543 if (load_fault ==
NoFault && !inst->readMemAccPredicate()) {
544 assert(inst->readPredicate());
546 inst->completeAcc(
nullptr);
552 if (inst->isTranslationDelayed() && load_fault ==
NoFault)
555 if (load_fault !=
NoFault && inst->translationCompleted() &&
556 inst->savedReq->isPartialFault() && !inst->savedReq->isComplete()) {
557 assert(inst->savedReq->isSplit());
567 if (load_fault !=
NoFault || !inst->readPredicate()) {
572 if (!inst->readPredicate())
573 inst->forwardOldRegs();
576 (load_fault !=
NoFault ?
"fault" :
"predication"));
577 if (!(inst->hasRequest() && inst->strictlyOrdered()) ||
578 inst->isAtCommit()) {
584 if (inst->effAddrValid()) {
585 auto it = inst->lqIt;
596 template <
class Impl>
604 int store_idx = store_inst->sqIdx;
607 store_inst->pcState(), store_inst->seqNum);
609 assert(!store_inst->isSquashed());
613 typename LoadQueue::iterator loadIt = store_inst->lqIt;
615 Fault store_fault = store_inst->initiateAcc();
617 if (store_inst->isTranslationDelayed() &&
621 if (!store_inst->readPredicate()) {
622 DPRINTF(
LSQUnit,
"Store [sn:%lli] not executed from predication\n",
624 store_inst->forwardOldRegs();
630 store_inst->pcState(), store_inst->seqNum);
635 assert(store_fault ==
NoFault);
637 if (store_inst->isStoreConditional() || store_inst->isAtomic()) {
649 template <
class Impl>
664 template <
class Impl>
676 template <
class Impl>
688 if (
x.instruction()->seqNum > youngest_inst) {
693 x.instruction()->pcState(),
694 x.instruction()->seqNum);
703 template <
class Impl>
708 storeWBIt->request()->sendPacketToCache();
714 template <
class Impl>
746 if (
storeWBIt->instruction()->isDataPrefetch()) {
759 if ((req->mainRequest()->isLLSC() ||
760 req->mainRequest()->isRelease()) &&
763 "[sn:%lli] is %s%s and not head of the queue\n",
765 req->request()->getPaddr(), inst->seqNum,
766 req->mainRequest()->isLLSC() ?
"SC" :
"",
767 req->mainRequest()->isRelease() ?
"/Release" :
"");
773 assert(!inst->memData);
774 inst->memData =
new uint8_t[req->_size];
777 memset(inst->memData, 0, req->_size);
779 memcpy(inst->memData,
storeWBIt->data(), req->_size);
782 if (req->senderState() ==
nullptr) {
784 state->isLoad =
false;
785 state->needWB =
false;
788 req->senderState(state);
789 if (inst->isStoreConditional() || inst->isAtomic()) {
791 state->needWB =
true;
797 "to Addr:%#x, data:%#x [sn:%lli]\n",
799 req->request()->getPaddr(), (int)*(inst->memData),
803 if (inst->isStoreConditional()) {
807 inst->recordResult(
false);
810 inst->recordResult(
true);
817 "Instantly completing it.\n",
832 if (req->request()->isLocalAccess()) {
833 assert(!inst->isStoreConditional());
838 req->request()->localAccessor(thread, main_pkt);
845 req->sendPacketToCache();
851 DPRINTF(
LSQUnit,
"D-Cache became blocked when writing [sn:%lli], " 852 "will retry later\n",
859 template <
class Impl>
864 "(Loads:%i Stores:%i)\n", squashed_num,
loads,
stores);
894 storeQueue.back().instruction()->seqNum > squashed_num) {
901 "idx:%i [sn:%lli]\n",
909 panic(
"Is stalled should have been cleared by stalling load!\n");
915 storeQueue.back().instruction()->setSquashed();
928 template <
class Impl>
942 if (!
storeWBIt->instruction()->isStoreConditional()) {
946 storeWBIt->instruction()->setCompleted();
960 template <
class Impl>
967 if (inst->isSquashed()) {
968 assert(!inst->isStore());
973 if (!inst->isExecuted()) {
978 inst->completeAcc(pkt);
986 assert(dynamic_cast<ReExec*>(inst->fault.get()) !=
nullptr ||
987 inst->savedReq->isPartialFault());
990 "due to pending fault.\n", inst->seqNum);
1000 iewStage->checkMisprediction(inst);
1003 template <
class Impl>
1007 assert(store_idx->valid());
1008 store_idx->completed() =
true;
1014 cpu->activityThisCycle();
1018 DynInstPtr store_inst = store_idx->instruction();
1027 iewStage->updateLSQNextCycle =
true;
1030 DPRINTF(
LSQUnit,
"Completing store [sn:%lli], idx:%i, store head " 1032 store_inst->seqNum, store_idx.
idx() - 1,
storeQueue.head() - 1);
1035 if (
DTRACE(O3PipeView)) {
1036 store_inst->storeTick =
1037 curTick() - store_inst->fetchTick;
1051 store_inst->setCompleted();
1063 if (
cpu->checker && !store_inst->isStoreConditional()) {
1064 cpu->checker->verify(store_inst);
1068 template <
class Impl>
1073 bool cache_got_blocked =
false;
1081 cache_got_blocked =
true;
1092 state->outstanding++;
1093 state->request()->packetSent();
1095 if (cache_got_blocked) {
1100 assert(state->request() ==
storeWBIt->request());
1103 state->request()->packetNotSent();
1108 template <
class Impl>
1118 template <
class Impl>
1122 cprintf(
"Load store queue: Dumping instructions.\n");
1128 cprintf(
"%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1137 cprintf(
"%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1143 template <
class Impl>
1147 return cpu->cacheLineSize();
1150 #endif//__CPU_O3_LSQ_UNIT_IMPL_HH__ A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
#define panic(...)
This implements a cprintf based panic() function.
MasterPort * dcachePort
Pointer to the dcache port.
unsigned numFreeLoadEntries()
Returns the number of free LQ entries.
void squash(const InstSeqNum &squashed_num)
Squashes all instructions younger than a specific sequence number.
Impl::DynInstPtr DynInstPtr
The request is a Load locked/store conditional.
decltype(nullptr) constexpr NoFault
Stats::Scalar invAddrSwpfs
Total number of software prefetches ignored due to invalid addresses.
Iterator to the circular queue.
iterator begin()
Iterators.
DynInstPtr memDepViolator
The oldest load that caused a memory ordering violation.
void completeStore(typename StoreQueue::iterator store_idx)
Completes the store at the specified index.
Stats::Scalar lsqForwLoads
Total number of loads forwaded from LSQ stores.
bool isStoreBlocked
Whehter or not a store is blocked due to the memory system.
static uint32_t moduloAdd(uint32_t op1, uint32_t op2, uint32_t size)
General modular addition.
LSQRequest * pendingRequest
The packet that is pending free cache ports.
DynInstPtr getMemDepViolator()
Returns the memory ordering violator.
Writeback event, specifically for when stores forward data to loads.
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
void insertStore(const DynInstPtr &store_inst)
Inserts a store instruction.
bool trySendPacket(bool isLoad, PacketPtr data_pkt)
Attempts to send a packet to the cache.
bool full() const
Is the queue full? A queue is full if the head is the 0^{th} element and the tail is the (size-1)^{th...
bool isStalled()
Returns whether or not the LSQ unit is stalled.
Stats::Scalar lsqRescheduledLoads
Number of loads that were rescheduled.
void resetState()
Reset the LSQ state.
bool handleLockedWrite(XC *xc, const RequestPtr &req, Addr cacheBlockMask)
iterator getIterator(size_t idx)
Return an iterator to an index in the vector.
StoreQueue::iterator storeWBIt
The index of the first instruction that may be ready to be written back, and has not yet been written...
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the slave port by calling its corresponding receive function...
void regStats()
Registers statistics.
typename Impl::CPUPol::LSQ::LSQRequest LSQRequest
typename LSQ::LSQSenderState LSQSenderState
void insertLoad(const DynInstPtr &load_inst)
Inserts a load instruction.
void storePostSend()
Handles completing the send of a store to memory.
Stats::Scalar invAddrLoads
Total number of loads ignored due to invalid addresses.
bool isInvalidate() const
ThreadContext is the external interface to all thread state for anything outside of the CPU...
Stats::Scalar lsqIgnoredResponses
Total number of responses from the memory system that are ignored due to the instruction already bein...
Stats::Scalar lsqSquashedLoads
Total number of squashed loads.
void recvRetry()
Handles doing the retry.
size_t idx() const
OutputIterator has no extra requirements.
DynInstPtr inst
Instruction whose results are being written back.
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
void drainSanityCheck() const
Perform sanity checks after a drain.
void takeOverFrom()
Takes over from another CPU's thread.
void commitStores(InstSeqNum &youngest_inst)
Commits stores older than a specific sequence number.
void writeback(const DynInstPtr &inst, PacketPtr pkt)
Writes back the instruction, sending it to IEW.
ThreadID lsqID
The LSQUnit thread id.
Fault executeStore(const DynInstPtr &inst)
Executes a store instruction.
void dumpInsts() const
Debugging function to dump instructions in the LSQ.
bool storeInFlight
Whether or not a store is in flight.
Tick curTick()
The current simulated tick.
void advance_tail()
Increases the tail by one.
void pop_back()
Circularly decrease the tail pointer.
void checkSnoop(PacketPtr pkt)
Check if an incoming invalidate hits in the lsq on a load that might have issued out of order wrt ano...
IEW * iewStage
Pointer to the IEW stage.
void cachePortBusy(bool is_load)
Another store port is in use.
Fault executeLoad(const DynInstPtr &inst)
Executes a load instruction.
bool stalled
Whether or not the LSQ is stalled.
void commitLoads(InstSeqNum &youngest_inst)
Commits loads older than a specific sequence number.
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
void commitLoad()
Commits the head load.
int stallingLoadIdx
The index of the above store.
void completeDataAccess(PacketPtr pkt)
Completes the data access that has been returned from the memory system.
void setDcachePort(MasterPort *dcache_port)
Sets the pointer to the dcache port.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
LSQUnit< Impl > * lsqPtr
The pointer to the LSQ unit that issued the store.
bool empty() const
Is the queue empty?
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
bool hasPendingRequest
Whether or not there is a packet that couldn't be sent because of a lack of cache ports...
void pop_front(size_t num_elem=1)
Circularly increase the head pointer.
PacketPtr retryPkt
The packet that needs to be retried.
InstSeqNum stallingStoreIsn
The store that causes the stall due to partial store to load forwarding.
Particularisation of the LSQSenderState to the SQ.
Stats::Scalar lsqSquashedStores
Total number of squashed stores.
void writebackBlockedStore()
Try to finish a previously blocked write back attempt.
LSQ * lsq
Pointer to the LSQ.
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
const char * description() const
Returns the description of this event.
int stores
The number of store instructions in the SQ.
std::string name() const
Returns the name of the LSQ unit.
bool checkLoads
Should loads be checked for dependency issues.
Declaration of the Packet class.
PacketPtr pkt
The packet that would have been sent to memory.
unsigned numFreeStoreEntries()
Returns the number of free SQ entries.
SenderState * senderState
This packet's sender state.
void process()
Processes the writeback event.
unsigned int cacheLineSize()
bool dereferenceable() const
Test dereferenceability.
void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, LSQ *lsq_ptr, unsigned id)
Initializes the LSQ unit with the specified number of entries.
int storesToWB
The number of store instructions in the SQ waiting to writeback.
void writebackStores()
Writes back stores.
bool needsTSO
Flag for memory model.
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
int loads
The number of load instructions in the LQ.
LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
Constructs an LSQ unit.
O3CPU * cpu
Pointer to the CPU.
void handleLockedSnoop(XC *xc, PacketPtr pkt, Addr cacheBlockMask)
Stats::Scalar lsqBlockedLoads
Ready loads blocked due to partial store-forwarding.
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Stats::Scalar lsqCacheBlocked
Number of times the LSQ is blocked due to the cache.
LoadQueue loadQueue
The load queue.
unsigned depCheckShift
The number of places to shift addresses in the LSQ before checking for dependency violations...
CircularQueue< SQEntry > storeQueue
The store queue.
bool cacheBlocked() const
Is D-cache blocked?
void insert(const DynInstPtr &inst)
Inserts an instruction.
Class that implements the actual LQ and SQ for each specific thread.
Fault checkViolations(typename LoadQueue::iterator &loadIt, const DynInstPtr &inst)
Check for ordering violations in the LSQ.
WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr)
Constructs a writeback event.
std::shared_ptr< FaultBase > Fault
const std::string to_string(sc_enc enc)
void handleLockedSnoopHit(XC *xc)
Stats::Scalar lsqMemOrderViolation
Tota number of memory ordering violations.
void cprintf(const char *format, const Args &...args)
ProbePointArg< PacketInfo > Packet
Packet probe point.
Addr cacheBlockMask
Address Mask for a cache block (e.g.