42 #ifndef __CPU_O3_LSQ_UNIT_HH__ 43 #define __CPU_O3_LSQ_UNIT_HH__ 52 #include "arch/isa_traits.hh" 53 #include "arch/locked_mem.hh" 54 #include "config/the_isa.hh" 57 #include "debug/LSQUnit.hh" 61 struct DerivO3CPUParams;
82 typedef typename Impl::O3CPU
O3CPU;
84 typedef typename Impl::CPUPol::IEW
IEW;
85 typedef typename Impl::CPUPol::LSQ
LSQ;
89 using LSQRequest =
typename Impl::CPUPol::LSQ::LSQRequest;
105 : inst(nullptr), req(nullptr), _size(0), _valid(false)
112 if (req !=
nullptr) {
122 if (req !=
nullptr) {
167 static constexpr
size_t DataSize =
sizeof(_data);
170 : _canWB(false), _committed(false), _completed(false),
173 std::memset(_data, 0, DataSize);
190 _canWB = _completed = _committed = _isAllZeros =
false;
195 const bool&
canWB()
const {
return _canWB; }
203 const char*
data()
const {
return _data; }
211 PartialAddrRangeCoverage,
212 FullAddrRangeCoverage,
222 LSQUnit(uint32_t lqEntries, uint32_t sqEntries);
231 void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
232 LSQ *lsq_ptr,
unsigned id);
235 std::string
name()
const;
263 const DynInstPtr& inst);
406 using LSQSenderState::alive;
412 typename LoadQueue::iterator
idx;
425 using LSQSenderState::alive;
452 const char *description()
const;
590 return loadQueue.
front().valid()
591 ? loadQueue.
front().instruction()->seqNum
601 return storeQueue.
front().valid()
602 ? storeQueue.
front().instruction()->seqNum
615 template <
class Impl>
625 assert(!load_inst->isExecuted());
632 if (req->mainRequest()->isStrictlyOrdered() &&
633 (load_idx !=
loadQueue.
head() || !load_inst->isAtCommit())) {
636 iewStage->rescheduleMemInst(load_inst);
637 load_inst->clearIssued();
638 load_inst->effAddrValid(
false);
641 load_inst->seqNum, load_inst->pcState());
648 return std::make_shared<GenericISA::M5PanicFault>(
649 "Strictly ordered load [sn:%llx] PC %s\n",
650 load_inst->seqNum, load_inst->pcState());
654 "storeHead: %i addr: %#x%s\n",
655 load_idx - 1, load_inst->sqIt._idx,
storeQueue.head() - 1,
656 req->mainRequest()->getPaddr(), req->isSplit() ?
" split" :
"");
658 if (req->mainRequest()->isLLSC()) {
662 load_inst->recordResult(
false);
664 load_inst->recordResult(
true);
667 if (req->mainRequest()->isLocalAccess()) {
668 assert(!load_inst->memData);
676 Cycles delay = req->mainRequest()->localAccessor(thread, main_pkt);
679 cpu->schedule(wb,
cpu->clockEdge(delay));
684 auto store_it = load_inst->sqIt;
690 assert(store_it->valid());
691 assert(store_it->instruction()->seqNum < load_inst->seqNum);
692 int store_size = store_it->size();
697 if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&
698 !(store_it->request()->mainRequest() &&
699 store_it->request()->mainRequest()->isCacheMaintenance())) {
700 assert(store_it->instruction()->effAddrValid());
704 auto req_s = req->mainRequest()->getVaddr();
705 auto req_e = req_s + req->mainRequest()->getSize();
706 auto st_s = store_it->instruction()->effAddr;
707 auto st_e = st_s + store_size;
709 bool store_has_lower_limit = req_s >= st_s;
710 bool store_has_upper_limit = req_e <= st_e;
711 bool lower_load_has_store_part = req_s < st_e;
712 bool upper_load_has_store_part = req_e > st_s;
720 if (!store_it->instruction()->isAtomic() &&
721 store_has_lower_limit && store_has_upper_limit &&
722 !req->mainRequest()->isLLSC()) {
724 const auto& store_req = store_it->request()->mainRequest();
725 coverage = store_req->isMasked() ?
731 (!req->mainRequest()->isLLSC() &&
732 ((store_has_lower_limit && lower_load_has_store_part) ||
733 (store_has_upper_limit && upper_load_has_store_part) ||
734 (lower_load_has_store_part && upper_load_has_store_part))) ||
737 (req->mainRequest()->isLLSC() &&
738 ((store_has_lower_limit || upper_load_has_store_part) &&
739 (store_has_upper_limit || lower_load_has_store_part))) ||
742 (store_it->instruction()->isAtomic() &&
743 ((store_has_lower_limit || upper_load_has_store_part) &&
744 (store_has_upper_limit || lower_load_has_store_part)))) {
751 int shift_amt = req->mainRequest()->getVaddr() -
752 store_it->instruction()->effAddr;
755 if (!load_inst->memData) {
757 new uint8_t[req->mainRequest()->getSize()];
759 if (store_it->isAllZeros())
760 memset(load_inst->memData, 0,
761 req->mainRequest()->getSize());
763 memcpy(load_inst->memData,
764 store_it->data() + shift_amt,
765 req->mainRequest()->getSize());
768 "addr %#x\n", store_it._idx,
769 req->mainRequest()->getVaddr());
773 data_pkt->dataStatic(load_inst->memData);
775 if (req->isAnyOutstandingRequest()) {
776 assert(req->_numOutstandingPackets > 0);
781 req->discardSenderState();
799 if (store_it->completed()) {
800 panic(
"Should not check one of these");
817 iewStage->rescheduleMemInst(load_inst);
818 load_inst->clearIssued();
819 load_inst->effAddrValid(
false);
825 "Store idx %i to load addr %#x\n",
826 store_it._idx, req->mainRequest()->getVaddr());
837 DPRINTF(
LSQUnit,
"Doing memory access for inst [sn:%lli] PC %s\n",
838 load_inst->seqNum, load_inst->pcState());
841 if (!load_inst->memData) {
842 load_inst->memData =
new uint8_t[req->mainRequest()->getSize()];
852 if (req->senderState() ==
nullptr) {
855 state->isLoad =
true;
856 state->inst = load_inst;
857 state->isSplit = req->isSplit();
858 req->senderState(state);
861 req->sendPacketToCache();
868 template <
class Impl>
874 DPRINTF(
LSQUnit,
"Doing write to store idx %i, addr %#x | storeHead:%i " 876 store_idx - 1, req->request()->getPaddr(),
storeQueue.head() - 1,
880 unsigned size = req->_size;
884 storeQueue[store_idx].isAllZeros() = store_no_data;
889 !req->request()->isCacheMaintenance() &&
890 !req->request()->isAtomic())
898 #endif // __CPU_O3_LSQ_UNIT_HH__ A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
#define panic(...)
This implements a cprintf based panic() function.
int getStoreHead()
Returns the index of the head store instruction.
MasterPort * dcachePort
Pointer to the dcache port.
unsigned numFreeLoadEntries()
Returns the number of free LQ entries.
void squash(const InstSeqNum &squashed_num)
Squashes all instructions younger than a specific sequence number.
bool isEmpty() const
Returns if both the LQ and SQ are empty.
Impl::DynInstPtr DynInstPtr
CircularQueue< SQEntry >::iterator SQIterator
int getLoadHead()
Returns the index of the head load instruction.
decltype(nullptr) constexpr NoFault
Cycles is a wrapper class for representing cycle counts, i.e.
Stats::Scalar invAddrSwpfs
Total number of software prefetches ignored due to invalid addresses.
bool hasStoresToWB()
Returns if there are any stores to writeback.
LSQEntry()
Constructs an empty store queue entry.
Iterator to the circular queue.
static constexpr size_t DataSize
LSQUnit(const LSQUnit &l)
We cannot copy LSQUnit because it has stats for which copy contructor is deleted explicitly.
const bool & completed() const
DynInstPtr memDepViolator
The oldest load that caused a memory ordering violation.
bool valid() const
Member accessors.
void schedule(Event &ev, Tick when)
Schedule event for the cpu.
void completeStore(typename StoreQueue::iterator store_idx)
Completes the store at the specified index.
int numStores()
Returns the number of stores in the SQ.
Stats::Scalar lsqForwLoads
Total number of loads forwaded from LSQ stores.
bool isStoreBlocked
Whehter or not a store is blocked due to the memory system.
LSQRequest * pendingRequest
The packet that is pending free cache ports.
bool violation()
Returns if there is a memory ordering violation.
DynInstPtr getMemDepViolator()
Returns the memory ordering violator.
Writeback event, specifically for when stores forward data to loads.
void insertStore(const DynInstPtr &store_inst)
Inserts a store instruction.
bool trySendPacket(bool isLoad, PacketPtr data_pkt)
Attempts to send a packet to the cache.
bool full() const
Is the queue full? A queue is full if the head is the 0^{th} element and the tail is the (size-1)^{th...
TimeBuffer< IssueStruct >::wire fromIssue
Wire to read information from the issue stage time queue.
bool isStalled()
Returns whether or not the LSQ unit is stalled.
Stats::Scalar lsqRescheduledLoads
Number of loads that were rescheduled.
void resetState()
Reset the LSQ state.
const DynInstPtr & instruction() const
iterator getIterator(size_t idx)
Return an iterator to an index in the vector.
StoreQueue::iterator storeWBIt
The index of the first instruction that may be ready to be written back, and has not yet been written...
Derived class to hold any sender state the LSQ needs.
void regStats()
Registers statistics.
typename Impl::CPUPol::LSQ::LSQRequest LSQRequest
typename LSQ::LSQSenderState LSQSenderState
bool isFull()
Returns if either the LQ or SQ is full.
void insertLoad(const DynInstPtr &load_inst)
Inserts a load instruction.
void handleLockedRead(XC *xc, const RequestPtr &req)
int numLoads()
Returns the number of loads in the LQ.
LSQRequest * req
The request.
void storePostSend()
Handles completing the send of a store to memory.
Stats::Scalar invAddrLoads
Total number of loads ignored due to invalid addresses.
ThreadContext is the external interface to all thread state for anything outside of the CPU...
bool _committed
Whether or not the store is committed.
Stats::Scalar lsqIgnoredResponses
Total number of responses from the memory system that are ignored due to the instruction already bein...
Stats::Scalar lsqSquashedLoads
Total number of squashed loads.
void recvRetry()
Handles doing the retry.
This is a simple scalar statistic, like a counter.
DynInstPtr inst
Instruction whose results are being written back.
bool _isAllZeros
Does this request write all zeros and thus doesn't have any data attached to it.
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
SQSenderState(typename StoreQueue::iterator idx_)
void drainSanityCheck() const
Perform sanity checks after a drain.
void takeOverFrom()
Takes over from another CPU's thread.
void commitStores(InstSeqNum &youngest_inst)
Commits stores older than a specific sequence number.
void writeback(const DynInstPtr &inst, PacketPtr pkt)
Writes back the instruction, sending it to IEW.
bool sqEmpty() const
Returns if the SQ is empty.
ThreadID lsqID
The LSQUnit thread id.
Fault executeStore(const DynInstPtr &inst)
Executes a store instruction.
This is a write that is targeted and zeroing an entire cache block.
void dumpInsts() const
Debugging function to dump instructions in the LSQ.
InstSeqNum getStoreHeadSeqNum()
Returns the sequence number of the head store instruction.
Fault read(LSQRequest *req, int load_idx)
Executes the load at the given index.
bool storeInFlight
Whether or not a store is in flight.
bool & canWB()
Member accessors.
Tick curTick()
The current simulated tick.
void setRequest(LSQRequest *r)
bool willWB()
Returns if the LSQ unit will writeback on this cycle.
CircularQueue< LQEntry > LQueue
void checkSnoop(PacketPtr pkt)
Check if an incoming invalidate hits in the lsq on a load that might have issued out of order wrt ano...
IEW * iewStage
Pointer to the IEW stage.
Fault write(LSQRequest *req, uint8_t *data, int store_idx)
Executes the store at the given index.
bool lqFull()
Returns if the LQ is full.
CircularQueue< SQEntry > SQueue
uint64_t Tick
Tick count type.
AddrRangeCoverage
Coverage of one address range with another.
Fault executeLoad(const DynInstPtr &inst)
Executes a load instruction.
bool stalled
Whether or not the LSQ is stalled.
void commitLoads(InstSeqNum &youngest_inst)
Commits loads older than a specific sequence number.
SQEntry()
Constructs an empty store queue entry.
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
StoreQueue::iterator idx
The SQ index of the instruction.
void commitLoad()
Commits the head load.
int stallingLoadIdx
The index of the above store.
void completeDataAccess(PacketPtr pkt)
Completes the data access that has been returned from the memory system.
const bool & canWB() const
InstSeqNum getLoadHeadSeqNum()
Returns the sequence number of the head load instruction.
static const FlagsType STORE_NO_DATA
bool _completed
Whether or not the store is completed.
Fault executeLoad(int lq_idx)
void setDcachePort(MasterPort *dcache_port)
Sets the pointer to the dcache port.
CircularQueue< LQEntry >::iterator LQIterator
const bool & committed() const
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
LSQUnit< Impl > * lsqPtr
The pointer to the LSQ unit that issued the store.
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
bool hasPendingRequest
Whether or not there is a packet that couldn't be sent because of a lack of cache ports...
PacketPtr retryPkt
The packet that needs to be retried.
InstSeqNum stallingStoreIsn
The store that causes the stall due to partial store to load forwarding.
LQSenderState(typename LoadQueue::iterator idx_)
constexpr unsigned MaxVecRegLenInBytes
Particularisation of the LSQSenderState to the SQ.
Stats::Scalar lsqSquashedStores
Total number of squashed stores.
bool sqFull()
Returns if the SQ is full.
const uint32_t & size() const
const char * data() const
void writebackBlockedStore()
Try to finish a previously blocked write back attempt.
LSQ * lsq
Pointer to the LSQ.
int16_t ThreadID
Thread index/ID type.
const bool & isAllZeros() const
int stores
The number of store instructions in the SQ.
void set(const DynInstPtr &inst)
std::string name() const
Returns the name of the LSQ unit.
bool checkLoads
Should loads be checked for dependency issues.
Declaration of the Packet class.
PacketPtr pkt
The packet that would have been sent to memory.
unsigned numFreeStoreEntries()
Returns the number of free SQ entries.
unsigned getCount()
Returns the number of instructions in the LSQ.
static constexpr auto MaxDataBytes
unsigned int cacheLineSize()
bool dereferenceable() const
Test dereferenceability.
Vector Registers layout specification.
bool lqEmpty() const
Returns if the LQ is empty.
void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, LSQ *lsq_ptr, unsigned id)
Initializes the LSQ unit with the specified number of entries.
int storesToWB
The number of store instructions in the SQ waiting to writeback.
Particularisation of the LSQSenderState to the LQ.
void writebackStores()
Writes back stores.
bool needsTSO
Flag for memory model.
int loads
The number of load instructions in the LQ.
LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
Constructs an LSQ unit.
O3CPU * cpu
Pointer to the CPU.
Stats::Scalar lsqBlockedLoads
Ready loads blocked due to partial store-forwarding.
bool _canWB
Whether or not the store can writeback.
uint32_t _size
The size of the operation.
Stats::Scalar lsqCacheBlocked
Number of times the LSQ is blocked due to the cache.
LoadQueue loadQueue
The load queue.
unsigned depCheckShift
The number of places to shift addresses in the LSQ before checking for dependency violations...
CircularQueue< SQEntry > storeQueue
The store queue.
DynInstPtr inst
The instruction.
void insert(const DynInstPtr &inst)
Inserts an instruction.
Class that implements the actual LQ and SQ for each specific thread.
Impl::CPUPol::IssueStruct IssueStruct
Fault checkViolations(typename LoadQueue::iterator &loadIt, const DynInstPtr &inst)
Check for ordering violations in the LSQ.
LoadQueue::iterator idx
The LQ index of the instruction.
std::shared_ptr< FaultBase > Fault
Stats::Scalar lsqMemOrderViolation
Tota number of memory ordering violations.
int numStoresToWB()
Returns the number of stores to writeback.
ProbePointArg< PacketInfo > Packet
Packet probe point.
Addr cacheBlockMask
Address Mask for a cache block (e.g.