46 #ifndef __CPU_O3_LSQ_UNIT_IMPL_HH__ 47 #define __CPU_O3_LSQ_UNIT_IMPL_HH__ 50 #include "arch/locked_mem.hh" 52 #include "config/the_isa.hh" 56 #include "debug/Activity.hh" 57 #include "debug/IEW.hh" 58 #include "debug/LSQUnit.hh" 59 #include "debug/O3PipeView.hh" 66 :
Event(Default_Pri, AutoDelete),
67 inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
69 assert(_inst->savedReq);
70 _inst->savedReq->writebackScheduled();
77 assert(!
lsqPtr->cpu->switchedOut());
81 assert(
inst->savedReq);
82 inst->savedReq->writebackDone();
90 return "Store writeback";
99 assert(req !=
nullptr);
102 if (senderState->alive()) {
103 ret = req->recvTimingResp(pkt);
105 senderState->outstanding--;
118 cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
124 assert(!
cpu->switchedOut());
125 if (!inst->isSquashed()) {
129 assert(inst->isLoad() || inst->isStoreConditional() ||
131 writeback(inst, state->request()->mainPacket());
132 if (inst->isStore() || inst->isAtomic()) {
137 }
else if (inst->isStore()) {
145 template <
class Impl>
157 LSQ *lsq_ptr,
unsigned id)
197 if (Impl::MaxThreads == 1) {
210 .
desc(
"Number of loads that had data forwarded from stores");
214 .
desc(
"Number of loads ignored due to an invalid address");
218 .
desc(
"Number of loads squashed");
222 .
desc(
"Number of memory responses ignored because the instruction is squashed");
225 .
name(
name() +
".memOrderViolation")
226 .
desc(
"Number of memory ordering violations");
230 .
desc(
"Number of stores squashed");
234 .
desc(
"Number of software prefetches ignored due to an invalid address");
238 .
desc(
"Number of blocked loads due to partial load-store forwarding");
242 .
desc(
"Number of loads that were rescheduled");
246 .
desc(
"Number of times an access to memory failed due to the cache being blocked");
274 template <
class Impl>
278 assert(inst->isMemRef());
280 assert(inst->isLoad() || inst->isStore() || inst->isAtomic());
282 if (inst->isLoad()) {
291 template <
class Impl>
314 template <
class Impl>
323 store_inst->pcState(),
storeQueue.tail(), store_inst->seqNum);
335 template <
class Impl>
336 typename Impl::DynInstPtr
346 template <
class Impl>
357 template <
class Impl>
369 template <
class Impl>
378 for (
int x = 0;
x <
cpu->numContexts();
x++) {
380 bool no_squash =
cpu->thread[
x]->noSquashFromTC;
381 cpu->thread[
x]->noSquashFromTC =
true;
383 cpu->thread[
x]->noSquashFromTC = no_squash;
398 if (ld_inst->effAddrValid() &&
403 bool force_squash =
false;
406 ld_inst = iter->instruction();
408 req = iter->request();
409 if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered())
413 ld_inst->seqNum, invalidate_addr);
423 if (ld_inst->possibleLoadViolation() || force_squash) {
425 pkt->
getAddr(), ld_inst->seqNum);
428 ld_inst->fault = std::make_shared<ReExec>();
429 req->setStateToFault();
432 pkt->
getAddr(), ld_inst->seqNum);
443 ld_inst->hitExternalSnoop(
true);
450 template <
class Impl>
465 if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
474 if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {
475 if (inst->isLoad()) {
479 if (ld_inst->hitExternalSnoop()) {
483 "and [sn:%lli] at address %#x\n",
484 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
489 return std::make_shared<GenericISA::M5PanicFault>(
490 "Detected fault with inst [sn:%lli] and " 491 "[sn:%lli] at address %#x\n",
492 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
498 ld_inst->possibleLoadViolation(
true);
500 " between instructions [sn:%lli] and [sn:%lli]\n",
501 inst_eff_addr1, inst->seqNum, ld_inst->seqNum);
510 "[sn:%lli] at address %#x\n",
511 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
516 return std::make_shared<GenericISA::M5PanicFault>(
517 "Detected fault with " 518 "inst [sn:%lli] and [sn:%lli] at address %#x\n",
519 inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
531 template <
class Impl>
540 inst->pcState(), inst->seqNum);
542 assert(!inst->isSquashed());
544 load_fault = inst->initiateAcc();
546 if (load_fault ==
NoFault && !inst->readMemAccPredicate()) {
547 assert(inst->readPredicate());
549 inst->completeAcc(
nullptr);
555 if (inst->isTranslationDelayed() && load_fault ==
NoFault)
558 if (load_fault !=
NoFault && inst->translationCompleted() &&
559 inst->savedReq->isPartialFault() && !inst->savedReq->isComplete()) {
560 assert(inst->savedReq->isSplit());
570 if (load_fault !=
NoFault || !inst->readPredicate()) {
575 if (!inst->readPredicate())
576 inst->forwardOldRegs();
579 (load_fault !=
NoFault ?
"fault" :
"predication"));
580 if (!(inst->hasRequest() && inst->strictlyOrdered()) ||
581 inst->isAtCommit()) {
587 if (inst->effAddrValid()) {
588 auto it = inst->lqIt;
599 template <
class Impl>
607 int store_idx = store_inst->sqIdx;
610 store_inst->pcState(), store_inst->seqNum);
612 assert(!store_inst->isSquashed());
616 typename LoadQueue::iterator loadIt = store_inst->lqIt;
618 Fault store_fault = store_inst->initiateAcc();
620 if (store_inst->isTranslationDelayed() &&
624 if (!store_inst->readPredicate()) {
625 DPRINTF(
LSQUnit,
"Store [sn:%lli] not executed from predication\n",
627 store_inst->forwardOldRegs();
633 store_inst->pcState(), store_inst->seqNum);
638 assert(store_fault ==
NoFault);
640 if (store_inst->isStoreConditional() || store_inst->isAtomic()) {
652 template <
class Impl>
667 template <
class Impl>
679 template <
class Impl>
691 if (
x.instruction()->seqNum > youngest_inst) {
696 x.instruction()->pcState(),
697 x.instruction()->seqNum);
706 template <
class Impl>
711 storeWBIt->request()->sendPacketToCache();
717 template <
class Impl>
749 if (
storeWBIt->instruction()->isDataPrefetch()) {
761 assert(!inst->memData);
762 inst->memData =
new uint8_t[req->_size];
765 memset(inst->memData, 0, req->_size);
767 memcpy(inst->memData,
storeWBIt->data(), req->_size);
770 if (req->senderState() ==
nullptr) {
772 state->isLoad =
false;
773 state->needWB =
false;
776 req->senderState(state);
777 if (inst->isStoreConditional() || inst->isAtomic()) {
779 state->needWB =
true;
785 "to Addr:%#x, data:%#x [sn:%lli]\n",
787 req->request()->getPaddr(), (int)*(inst->memData),
791 if (inst->isStoreConditional()) {
795 inst->recordResult(
false);
798 inst->recordResult(
true);
805 "Instantly completing it.\n",
820 if (req->request()->isMmappedIpr()) {
821 assert(!inst->isStoreConditional());
826 req->handleIprWrite(thread, main_pkt);
833 req->sendPacketToCache();
839 DPRINTF(
LSQUnit,
"D-Cache became blocked when writing [sn:%lli], " 840 "will retry later\n",
847 template <
class Impl>
852 "(Loads:%i Stores:%i)\n", squashed_num,
loads,
stores);
882 storeQueue.back().instruction()->seqNum > squashed_num) {
889 "idx:%i [sn:%lli]\n",
897 panic(
"Is stalled should have been cleared by stalling load!\n");
903 storeQueue.back().instruction()->setSquashed();
916 template <
class Impl>
930 if (!
storeWBIt->instruction()->isStoreConditional()) {
934 storeWBIt->instruction()->setCompleted();
948 template <
class Impl>
955 if (inst->isSquashed()) {
956 assert(!inst->isStore());
961 if (!inst->isExecuted()) {
966 inst->completeAcc(pkt);
974 assert(dynamic_cast<ReExec*>(inst->fault.get()) !=
nullptr ||
975 inst->savedReq->isPartialFault());
978 "due to pending fault.\n", inst->seqNum);
991 template <
class Impl>
995 assert(store_idx->valid());
996 store_idx->completed() =
true;
1002 cpu->activityThisCycle();
1006 DynInstPtr store_inst = store_idx->instruction();
1015 iewStage->updateLSQNextCycle =
true;
1018 DPRINTF(
LSQUnit,
"Completing store [sn:%lli], idx:%i, store head " 1020 store_inst->seqNum, store_idx.
idx() - 1,
storeQueue.head() - 1);
1023 if (
DTRACE(O3PipeView)) {
1024 store_inst->storeTick =
1025 curTick() - store_inst->fetchTick;
1039 store_inst->setCompleted();
1051 if (
cpu->checker && !store_inst->isStoreConditional()) {
1052 cpu->checker->verify(store_inst);
1056 template <
class Impl>
1061 bool cache_got_blocked =
false;
1069 cache_got_blocked =
true;
1080 state->outstanding++;
1081 state->request()->packetSent();
1083 if (cache_got_blocked) {
1088 assert(state->request() ==
storeWBIt->request());
1091 state->request()->packetNotSent();
1096 template <
class Impl>
1106 template <
class Impl>
1110 cprintf(
"Load store queue: Dumping instructions.\n");
1116 cprintf(
"%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1125 cprintf(
"%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
1131 template <
class Impl>
1135 return cpu->cacheLineSize();
1138 #endif//__CPU_O3_LSQ_UNIT_IMPL_HH__ A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
#define panic(...)
This implements a cprintf based panic() function.
MasterPort * dcachePort
Pointer to the dcache port.
unsigned numFreeLoadEntries()
Returns the number of free LQ entries.
void squash(const InstSeqNum &squashed_num)
Squashes all instructions younger than a specific sequence number.
Impl::DynInstPtr DynInstPtr
decltype(nullptr) constexpr NoFault
Stats::Scalar invAddrSwpfs
Total number of software prefetches ignored due to invalid addresses.
Iterator to the circular queue.
iterator begin()
Iterators.
DynInstPtr memDepViolator
The oldest load that caused a memory ordering violation.
void completeStore(typename StoreQueue::iterator store_idx)
Completes the store at the specified index.
Stats::Scalar lsqForwLoads
Total number of loads forwaded from LSQ stores.
bool isStoreBlocked
Whehter or not a store is blocked due to the memory system.
static uint32_t moduloAdd(uint32_t op1, uint32_t op2, uint32_t size)
General modular addition.
LSQRequest * pendingRequest
The packet that is pending free cache ports.
DynInstPtr getMemDepViolator()
Returns the memory ordering violator.
The request is a Load locked/store conditional.
Writeback event, specifically for when stores forward data to loads.
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
void insertStore(const DynInstPtr &store_inst)
Inserts a store instruction.
bool trySendPacket(bool isLoad, PacketPtr data_pkt)
Attempts to send a packet to the cache.
bool full() const
Is the queue full? A queue is full if the head is the 0^{th} element and the tail is the (size-1)^{th...
bool isStalled()
Returns whether or not the LSQ unit is stalled.
Stats::Scalar lsqRescheduledLoads
Number of loads that were rescheduled.
void resetState()
Reset the LSQ state.
bool handleLockedWrite(XC *xc, const RequestPtr &req, Addr cacheBlockMask)
iterator getIterator(size_t idx)
Return an iterator to an index in the vector.
StoreQueue::iterator storeWBIt
The index of the first instruction that may be ready to be written back, and has not yet been written...
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the slave port by calling its corresponding receive function...
void regStats()
Registers statistics.
typename Impl::CPUPol::LSQ::LSQRequest LSQRequest
typename LSQ::LSQSenderState LSQSenderState
void insertLoad(const DynInstPtr &load_inst)
Inserts a load instruction.
void storePostSend()
Handles completing the send of a store to memory.
Stats::Scalar invAddrLoads
Total number of loads ignored due to invalid addresses.
bool isInvalidate() const
ThreadContext is the external interface to all thread state for anything outside of the CPU...
Stats::Scalar lsqIgnoredResponses
Total number of responses from the memory system that are ignored due to the instruction already bein...
Stats::Scalar lsqSquashedLoads
Total number of squashed loads.
void recvRetry()
Handles doing the retry.
size_t idx() const
OutputIterator has no extra requirements.
DynInstPtr inst
Instruction whose results are being written back.
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
void drainSanityCheck() const
Perform sanity checks after a drain.
void takeOverFrom()
Takes over from another CPU's thread.
void commitStores(InstSeqNum &youngest_inst)
Commits stores older than a specific sequence number.
void writeback(const DynInstPtr &inst, PacketPtr pkt)
Writes back the instruction, sending it to IEW.
ThreadID lsqID
The LSQUnit thread id.
Fault executeStore(const DynInstPtr &inst)
Executes a store instruction.
void dumpInsts() const
Debugging function to dump instructions in the LSQ.
bool storeInFlight
Whether or not a store is in flight.
Tick curTick()
The current simulated tick.
void advance_tail()
Increases the tail by one.
void pop_back()
Circularly decrease the tail pointer.
void checkSnoop(PacketPtr pkt)
Check if an incoming invalidate hits in the lsq on a load that might have issued out of order wrt ano...
IEW * iewStage
Pointer to the IEW stage.
void cachePortBusy(bool is_load)
Another store port is in use.
Fault executeLoad(const DynInstPtr &inst)
Executes a load instruction.
bool stalled
Whether or not the LSQ is stalled.
void commitLoads(InstSeqNum &youngest_inst)
Commits loads older than a specific sequence number.
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
void commitLoad()
Commits the head load.
int stallingLoadIdx
The index of the above store.
void completeDataAccess(PacketPtr pkt)
Completes the data access that has been returned from the memory system.
void setDcachePort(MasterPort *dcache_port)
Sets the pointer to the dcache port.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
LSQUnit< Impl > * lsqPtr
The pointer to the LSQ unit that issued the store.
bool empty() const
Is the queue empty?
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
bool hasPendingRequest
Whether or not there is a packet that couldn't be sent because of a lack of cache ports...
void pop_front(size_t num_elem=1)
Circularly increase the head pointer.
PacketPtr retryPkt
The packet that needs to be retried.
InstSeqNum stallingStoreIsn
The store that causes the stall due to partial store to load forwarding.
Particularisation of the LSQSenderState to the SQ.
Stats::Scalar lsqSquashedStores
Total number of squashed stores.
void writebackBlockedStore()
Try to finish a previously blocked write back attempt.
LSQ * lsq
Pointer to the LSQ.
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
const char * description() const
Returns the description of this event.
int stores
The number of store instructions in the SQ.
std::string name() const
Returns the name of the LSQ unit.
bool checkLoads
Should loads be checked for dependency issues.
Declaration of the Packet class.
PacketPtr pkt
The packet that would have been sent to memory.
unsigned numFreeStoreEntries()
Returns the number of free SQ entries.
SenderState * senderState
This packet's sender state.
void process()
Processes the writeback event.
unsigned int cacheLineSize()
bool dereferenceable() const
Test dereferenceability.
void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, LSQ *lsq_ptr, unsigned id)
Initializes the LSQ unit with the specified number of entries.
int storesToWB
The number of store instructions in the SQ waiting to writeback.
void writebackStores()
Writes back stores.
bool needsTSO
Flag for memory model.
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
int loads
The number of load instructions in the LQ.
LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
Constructs an LSQ unit.
O3CPU * cpu
Pointer to the CPU.
void handleLockedSnoop(XC *xc, PacketPtr pkt, Addr cacheBlockMask)
Stats::Scalar lsqBlockedLoads
Ready loads blocked due to partial store-forwarding.
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Stats::Scalar lsqCacheBlocked
Number of times the LSQ is blocked due to the cache.
LoadQueue loadQueue
The load queue.
unsigned depCheckShift
The number of places to shift addresses in the LSQ before checking for dependency violations...
CircularQueue< SQEntry > storeQueue
The store queue.
bool cacheBlocked() const
Is D-cache blocked?
void insert(const DynInstPtr &inst)
Inserts an instruction.
Class that implements the actual LQ and SQ for each specific thread.
Fault checkViolations(typename LoadQueue::iterator &loadIt, const DynInstPtr &inst)
Check for ordering violations in the LSQ.
WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr)
Constructs a writeback event.
std::shared_ptr< FaultBase > Fault
const std::string to_string(sc_enc enc)
void handleLockedSnoopHit(XC *xc)
Stats::Scalar lsqMemOrderViolation
Tota number of memory ordering violations.
void cprintf(const char *format, const Args &...args)
ProbePointArg< PacketInfo > Packet
Packet probe point.
Addr cacheBlockMask
Address Mask for a cache block (e.g.