45 #ifndef __CPU_O3_LSQ_UNIT_HH__ 46 #define __CPU_O3_LSQ_UNIT_HH__ 55 #include "arch/isa_traits.hh" 56 #include "arch/locked_mem.hh" 57 #include "arch/mmapped_ipr.hh" 58 #include "config/the_isa.hh" 61 #include "debug/LSQUnit.hh" 65 struct DerivO3CPUParams;
86 typedef typename Impl::O3CPU
O3CPU;
88 typedef typename Impl::CPUPol::IEW
IEW;
89 typedef typename Impl::CPUPol::LSQ
LSQ;
93 using LSQRequest =
typename Impl::CPUPol::LSQ::LSQRequest;
109 : inst(nullptr), req(nullptr), _size(0), _valid(false)
116 if (req !=
nullptr) {
126 if (req !=
nullptr) {
171 static constexpr
size_t DataSize =
sizeof(_data);
174 : _canWB(false), _committed(false), _completed(false),
177 std::memset(_data, 0, DataSize);
194 _canWB = _completed = _committed = _isAllZeros =
false;
199 const bool&
canWB()
const {
return _canWB; }
207 const char*
data()
const {
return _data; }
215 PartialAddrRangeCoverage,
216 FullAddrRangeCoverage,
226 LSQUnit(uint32_t lqEntries, uint32_t sqEntries);
235 void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
236 LSQ *lsq_ptr,
unsigned id);
239 std::string
name()
const;
267 const DynInstPtr& inst);
410 using LSQSenderState::alive;
416 typename LoadQueue::iterator
idx;
429 using LSQSenderState::alive;
456 const char *description()
const;
594 return loadQueue.
front().valid()
595 ? loadQueue.
front().instruction()->seqNum
605 return storeQueue.
front().valid()
606 ? storeQueue.
front().instruction()->seqNum
619 template <
class Impl>
629 assert(!load_inst->isExecuted());
636 if (req->mainRequest()->isStrictlyOrdered() &&
637 (load_idx !=
loadQueue.
head() || !load_inst->isAtCommit())) {
640 iewStage->rescheduleMemInst(load_inst);
641 load_inst->clearIssued();
642 load_inst->effAddrValid(
false);
645 load_inst->seqNum, load_inst->pcState());
652 return std::make_shared<GenericISA::M5PanicFault>(
653 "Strictly ordered load [sn:%llx] PC %s\n",
654 load_inst->seqNum, load_inst->pcState());
658 "storeHead: %i addr: %#x%s\n",
659 load_idx - 1, load_inst->sqIt._idx,
storeQueue.head() - 1,
660 req->mainRequest()->getPaddr(), req->isSplit() ?
" split" :
"");
662 if (req->mainRequest()->isLLSC()) {
666 load_inst->recordResult(
false);
668 load_inst->recordResult(
true);
671 if (req->mainRequest()->isMmappedIpr()) {
672 assert(!load_inst->memData);
680 Cycles delay = req->handleIprRead(thread, main_pkt);
683 cpu->schedule(wb,
cpu->clockEdge(delay));
688 auto store_it = load_inst->sqIt;
694 assert(store_it->valid());
695 assert(store_it->instruction()->seqNum < load_inst->seqNum);
696 int store_size = store_it->size();
701 if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&
702 !(store_it->request()->mainRequest() &&
703 store_it->request()->mainRequest()->isCacheMaintenance())) {
704 assert(store_it->instruction()->effAddrValid());
708 auto req_s = req->mainRequest()->getVaddr();
709 auto req_e = req_s + req->mainRequest()->getSize();
710 auto st_s = store_it->instruction()->effAddr;
711 auto st_e = st_s + store_size;
713 bool store_has_lower_limit = req_s >= st_s;
714 bool store_has_upper_limit = req_e <= st_e;
715 bool lower_load_has_store_part = req_s < st_e;
716 bool upper_load_has_store_part = req_e > st_s;
724 if (!store_it->instruction()->isAtomic() &&
725 store_has_lower_limit && store_has_upper_limit &&
726 !req->mainRequest()->isLLSC()) {
728 const auto& store_req = store_it->request()->mainRequest();
729 coverage = store_req->isMasked() ?
735 (!req->mainRequest()->isLLSC() &&
736 ((store_has_lower_limit && lower_load_has_store_part) ||
737 (store_has_upper_limit && upper_load_has_store_part) ||
738 (lower_load_has_store_part && upper_load_has_store_part))) ||
741 (req->mainRequest()->isLLSC() &&
742 ((store_has_lower_limit || upper_load_has_store_part) &&
743 (store_has_upper_limit || lower_load_has_store_part))) ||
746 (store_it->instruction()->isAtomic() &&
747 ((store_has_lower_limit || upper_load_has_store_part) &&
748 (store_has_upper_limit || lower_load_has_store_part)))) {
755 int shift_amt = req->mainRequest()->getVaddr() -
756 store_it->instruction()->effAddr;
759 if (!load_inst->memData) {
761 new uint8_t[req->mainRequest()->getSize()];
763 if (store_it->isAllZeros())
764 memset(load_inst->memData, 0,
765 req->mainRequest()->getSize());
767 memcpy(load_inst->memData,
768 store_it->data() + shift_amt,
769 req->mainRequest()->getSize());
772 "addr %#x\n", store_it._idx,
773 req->mainRequest()->getVaddr());
777 data_pkt->dataStatic(load_inst->memData);
779 if (req->isAnyOutstandingRequest()) {
780 assert(req->_numOutstandingPackets > 0);
785 req->discardSenderState();
803 if (store_it->completed()) {
804 panic(
"Should not check one of these");
821 iewStage->rescheduleMemInst(load_inst);
822 load_inst->clearIssued();
823 load_inst->effAddrValid(
false);
829 "Store idx %i to load addr %#x\n",
830 store_it._idx, req->mainRequest()->getVaddr());
841 DPRINTF(
LSQUnit,
"Doing memory access for inst [sn:%lli] PC %s\n",
842 load_inst->seqNum, load_inst->pcState());
845 if (!load_inst->memData) {
846 load_inst->memData =
new uint8_t[req->mainRequest()->getSize()];
856 if (req->senderState() ==
nullptr) {
859 state->isLoad =
true;
860 state->inst = load_inst;
861 state->isSplit = req->isSplit();
862 req->senderState(state);
865 req->sendPacketToCache();
872 template <
class Impl>
878 DPRINTF(
LSQUnit,
"Doing write to store idx %i, addr %#x | storeHead:%i " 880 store_idx - 1, req->request()->getPaddr(),
storeQueue.head() - 1,
884 unsigned size = req->_size;
888 storeQueue[store_idx].isAllZeros() = store_no_data;
893 !req->request()->isCacheMaintenance() &&
894 !req->request()->isAtomic())
902 #endif // __CPU_O3_LSQ_UNIT_HH__ A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
#define panic(...)
This implements a cprintf based panic() function.
int getStoreHead()
Returns the index of the head store instruction.
MasterPort * dcachePort
Pointer to the dcache port.
unsigned numFreeLoadEntries()
Returns the number of free LQ entries.
void squash(const InstSeqNum &squashed_num)
Squashes all instructions younger than a specific sequence number.
bool isEmpty() const
Returns if both the LQ and SQ are empty.
Impl::DynInstPtr DynInstPtr
CircularQueue< SQEntry >::iterator SQIterator
int getLoadHead()
Returns the index of the head load instruction.
decltype(nullptr) constexpr NoFault
Cycles is a wrapper class for representing cycle counts, i.e.
Stats::Scalar invAddrSwpfs
Total number of software prefetches ignored due to invalid addresses.
bool hasStoresToWB()
Returns if there are any stores to writeback.
LSQEntry()
Constructs an empty store queue entry.
Iterator to the circular queue.
static constexpr size_t DataSize
LSQUnit(const LSQUnit &l)
We cannot copy LSQUnit because it has stats for which copy contructor is deleted explicitly.
const bool & completed() const
DynInstPtr memDepViolator
The oldest load that caused a memory ordering violation.
bool valid() const
Member accessors.
void schedule(Event &ev, Tick when)
Schedule event for the cpu.
void completeStore(typename StoreQueue::iterator store_idx)
Completes the store at the specified index.
int numStores()
Returns the number of stores in the SQ.
Stats::Scalar lsqForwLoads
Total number of loads forwaded from LSQ stores.
bool isStoreBlocked
Whehter or not a store is blocked due to the memory system.
LSQRequest * pendingRequest
The packet that is pending free cache ports.
bool violation()
Returns if there is a memory ordering violation.
DynInstPtr getMemDepViolator()
Returns the memory ordering violator.
Writeback event, specifically for when stores forward data to loads.
void insertStore(const DynInstPtr &store_inst)
Inserts a store instruction.
bool trySendPacket(bool isLoad, PacketPtr data_pkt)
Attempts to send a packet to the cache.
bool full() const
Is the queue full? A queue is full if the head is the 0^{th} element and the tail is the (size-1)^{th...
TimeBuffer< IssueStruct >::wire fromIssue
Wire to read information from the issue stage time queue.
bool isStalled()
Returns whether or not the LSQ unit is stalled.
Stats::Scalar lsqRescheduledLoads
Number of loads that were rescheduled.
void resetState()
Reset the LSQ state.
const DynInstPtr & instruction() const
iterator getIterator(size_t idx)
Return an iterator to an index in the vector.
StoreQueue::iterator storeWBIt
The index of the first instruction that may be ready to be written back, and has not yet been written...
Derived class to hold any sender state the LSQ needs.
void regStats()
Registers statistics.
typename Impl::CPUPol::LSQ::LSQRequest LSQRequest
typename LSQ::LSQSenderState LSQSenderState
bool isFull()
Returns if either the LQ or SQ is full.
void insertLoad(const DynInstPtr &load_inst)
Inserts a load instruction.
void handleLockedRead(XC *xc, const RequestPtr &req)
int numLoads()
Returns the number of loads in the LQ.
LSQRequest * req
The request.
void storePostSend()
Handles completing the send of a store to memory.
Stats::Scalar invAddrLoads
Total number of loads ignored due to invalid addresses.
ThreadContext is the external interface to all thread state for anything outside of the CPU...
bool _committed
Whether or not the store is committed.
Stats::Scalar lsqIgnoredResponses
Total number of responses from the memory system that are ignored due to the instruction already bein...
Stats::Scalar lsqSquashedLoads
Total number of squashed loads.
void recvRetry()
Handles doing the retry.
This is a simple scalar statistic, like a counter.
DynInstPtr inst
Instruction whose results are being written back.
bool _isAllZeros
Does this request write all zeros and thus doesn't have any data attached to it.
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
SQSenderState(typename StoreQueue::iterator idx_)
void drainSanityCheck() const
Perform sanity checks after a drain.
void takeOverFrom()
Takes over from another CPU's thread.
void commitStores(InstSeqNum &youngest_inst)
Commits stores older than a specific sequence number.
void writeback(const DynInstPtr &inst, PacketPtr pkt)
Writes back the instruction, sending it to IEW.
bool sqEmpty() const
Returns if the SQ is empty.
ThreadID lsqID
The LSQUnit thread id.
Fault executeStore(const DynInstPtr &inst)
Executes a store instruction.
void dumpInsts() const
Debugging function to dump instructions in the LSQ.
InstSeqNum getStoreHeadSeqNum()
Returns the sequence number of the head store instruction.
Fault read(LSQRequest *req, int load_idx)
Executes the load at the given index.
bool storeInFlight
Whether or not a store is in flight.
bool & canWB()
Member accessors.
Tick curTick()
The current simulated tick.
void setRequest(LSQRequest *r)
bool willWB()
Returns if the LSQ unit will writeback on this cycle.
CircularQueue< LQEntry > LQueue
void checkSnoop(PacketPtr pkt)
Check if an incoming invalidate hits in the lsq on a load that might have issued out of order wrt ano...
IEW * iewStage
Pointer to the IEW stage.
Fault write(LSQRequest *req, uint8_t *data, int store_idx)
Executes the store at the given index.
bool lqFull()
Returns if the LQ is full.
CircularQueue< SQEntry > SQueue
uint64_t Tick
Tick count type.
AddrRangeCoverage
Coverage of one address range with another.
Fault executeLoad(const DynInstPtr &inst)
Executes a load instruction.
bool stalled
Whether or not the LSQ is stalled.
void commitLoads(InstSeqNum &youngest_inst)
Commits loads older than a specific sequence number.
SQEntry()
Constructs an empty store queue entry.
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
StoreQueue::iterator idx
The SQ index of the instruction.
void commitLoad()
Commits the head load.
int stallingLoadIdx
The index of the above store.
void completeDataAccess(PacketPtr pkt)
Completes the data access that has been returned from the memory system.
const bool & canWB() const
InstSeqNum getLoadHeadSeqNum()
Returns the sequence number of the head load instruction.
static const FlagsType STORE_NO_DATA
bool _completed
Whether or not the store is completed.
Fault executeLoad(int lq_idx)
void setDcachePort(MasterPort *dcache_port)
Sets the pointer to the dcache port.
CircularQueue< LQEntry >::iterator LQIterator
const bool & committed() const
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
LSQUnit< Impl > * lsqPtr
The pointer to the LSQ unit that issued the store.
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
bool hasPendingRequest
Whether or not there is a packet that couldn't be sent because of a lack of cache ports...
PacketPtr retryPkt
The packet that needs to be retried.
InstSeqNum stallingStoreIsn
The store that causes the stall due to partial store to load forwarding.
LQSenderState(typename LoadQueue::iterator idx_)
constexpr unsigned MaxVecRegLenInBytes
Particularisation of the LSQSenderState to the SQ.
Stats::Scalar lsqSquashedStores
Total number of squashed stores.
bool sqFull()
Returns if the SQ is full.
const uint32_t & size() const
const char * data() const
void writebackBlockedStore()
Try to finish a previously blocked write back attempt.
LSQ * lsq
Pointer to the LSQ.
int16_t ThreadID
Thread index/ID type.
const bool & isAllZeros() const
int stores
The number of store instructions in the SQ.
void set(const DynInstPtr &inst)
std::string name() const
Returns the name of the LSQ unit.
bool checkLoads
Should loads be checked for dependency issues.
Declaration of the Packet class.
This is a write that is targeted and zeroing an entire cache block.
PacketPtr pkt
The packet that would have been sent to memory.
unsigned numFreeStoreEntries()
Returns the number of free SQ entries.
unsigned getCount()
Returns the number of instructions in the LSQ.
static constexpr auto MaxDataBytes
unsigned int cacheLineSize()
bool dereferenceable() const
Test dereferenceability.
Vector Registers layout specification.
bool lqEmpty() const
Returns if the LQ is empty.
void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, LSQ *lsq_ptr, unsigned id)
Initializes the LSQ unit with the specified number of entries.
int storesToWB
The number of store instructions in the SQ waiting to writeback.
Particularisation of the LSQSenderState to the LQ.
void writebackStores()
Writes back stores.
bool needsTSO
Flag for memory model.
int loads
The number of load instructions in the LQ.
LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
Constructs an LSQ unit.
O3CPU * cpu
Pointer to the CPU.
Stats::Scalar lsqBlockedLoads
Ready loads blocked due to partial store-forwarding.
bool _canWB
Whether or not the store can writeback.
uint32_t _size
The size of the operation.
Stats::Scalar lsqCacheBlocked
Number of times the LSQ is blocked due to the cache.
LoadQueue loadQueue
The load queue.
unsigned depCheckShift
The number of places to shift addresses in the LSQ before checking for dependency violations...
CircularQueue< SQEntry > storeQueue
The store queue.
DynInstPtr inst
The instruction.
void insert(const DynInstPtr &inst)
Inserts an instruction.
Class that implements the actual LQ and SQ for each specific thread.
Impl::CPUPol::IssueStruct IssueStruct
Fault checkViolations(typename LoadQueue::iterator &loadIt, const DynInstPtr &inst)
Check for ordering violations in the LSQ.
LoadQueue::iterator idx
The LQ index of the instruction.
std::shared_ptr< FaultBase > Fault
Stats::Scalar lsqMemOrderViolation
Tota number of memory ordering violations.
int numStoresToWB()
Returns the number of stores to writeback.
ProbePointArg< PacketInfo > Packet
Packet probe point.
Addr cacheBlockMask
Address Mask for a cache block (e.g.