Go to the documentation of this file.
42 #ifndef __CPU_O3_LSQ_UNIT_HH__
43 #define __CPU_O3_LSQ_UNIT_HH__
53 #include "arch/locked_mem.hh"
54 #include "config/the_isa.hh"
57 #include "debug/HtmCpu.hh"
58 #include "debug/LSQUnit.hh"
62 struct DerivO3CPUParams;
83 typedef typename Impl::O3CPU
O3CPU;
85 typedef typename Impl::CPUPol::IEW
IEW;
86 typedef typename Impl::CPUPol::LSQ
LSQ;
90 using LSQRequest =
typename Impl::CPUPol::LSQ::LSQRequest;
113 if (
req !=
nullptr) {
123 if (
req !=
nullptr) {
223 LSQUnit(uint32_t lqEntries, uint32_t sqEntries);
231 panic(
"LSQUnit is not copy-able");
235 void init(
O3CPU *cpu_ptr,
IEW *iew_ptr, DerivO3CPUParams *params,
236 LSQ *lsq_ptr,
unsigned id);
239 std::string
name()
const;
323 const auto& htm_cpt =
cpu->tcBase(
lsqID)->getHtmCheckpointPtr();
324 return htm_cpt->getHtmUid();
422 using LSQSenderState::alive;
428 typename LoadQueue::iterator
idx;
441 using LSQSenderState::alive;
634 template <
class Impl>
638 LQEntry& load_req = loadQueue[load_idx];
644 assert(!load_inst->isExecuted());
651 if (req->mainRequest()->isStrictlyOrdered() &&
652 (load_idx != loadQueue.head() || !load_inst->isAtCommit())) {
655 iewStage->rescheduleMemInst(load_inst);
656 load_inst->clearIssued();
657 load_inst->effAddrValid(
false);
658 ++stats.rescheduledLoads;
660 load_inst->seqNum, load_inst->pcState());
667 return std::make_shared<GenericISA::M5PanicFault>(
668 "Strictly ordered load [sn:%llx] PC %s\n",
669 load_inst->seqNum, load_inst->pcState());
673 "storeHead: %i addr: %#x%s\n",
674 load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1,
675 req->mainRequest()->getPaddr(), req->isSplit() ?
" split" :
"");
677 if (req->mainRequest()->isLLSC()) {
681 load_inst->recordResult(
false);
683 load_inst->recordResult(
true);
686 if (req->mainRequest()->isLocalAccess()) {
687 assert(!load_inst->memData);
688 assert(!load_inst->inHtmTransactionalState());
689 load_inst->memData =
new uint8_t[MaxDataBytes];
696 Cycles delay = req->mainRequest()->localAccessor(thread, main_pkt);
699 cpu->schedule(wb, cpu->clockEdge(delay));
704 if (req->mainRequest()->isHTMStart() || req->mainRequest()->isHTMCommit())
714 if (!load_inst->memData) {
716 new uint8_t[req->mainRequest()->getSize()];
718 memset(load_inst->memData, 0, req->mainRequest()->getSize());
722 if (load_inst->inHtmTransactionalState()) {
724 load_inst->getHtmTransactionUid());
729 cpu->schedule(wb, cpu->clockEdge(delay));
735 auto store_it = load_inst->sqIt;
736 assert (store_it >= storeWBIt);
738 while (store_it != storeWBIt) {
741 assert(store_it->valid());
742 assert(store_it->instruction()->seqNum < load_inst->seqNum);
743 int store_size = store_it->size();
748 if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&
749 !(store_it->request()->mainRequest() &&
750 store_it->request()->mainRequest()->isCacheMaintenance())) {
751 assert(store_it->instruction()->effAddrValid());
755 auto req_s = req->mainRequest()->getVaddr();
756 auto req_e = req_s + req->mainRequest()->getSize();
757 auto st_s = store_it->instruction()->effAddr;
758 auto st_e = st_s + store_size;
760 bool store_has_lower_limit = req_s >= st_s;
761 bool store_has_upper_limit = req_e <= st_e;
762 bool lower_load_has_store_part = req_s < st_e;
763 bool upper_load_has_store_part = req_e > st_s;
765 auto coverage = AddrRangeCoverage::NoAddrRangeCoverage;
771 if (!store_it->instruction()->isAtomic() &&
772 store_has_lower_limit && store_has_upper_limit &&
773 !req->mainRequest()->isLLSC()) {
775 const auto& store_req = store_it->request()->mainRequest();
776 coverage = store_req->isMasked() ?
777 AddrRangeCoverage::PartialAddrRangeCoverage :
778 AddrRangeCoverage::FullAddrRangeCoverage;
782 (!req->mainRequest()->isLLSC() &&
783 ((store_has_lower_limit && lower_load_has_store_part) ||
784 (store_has_upper_limit && upper_load_has_store_part) ||
785 (lower_load_has_store_part && upper_load_has_store_part))) ||
788 (req->mainRequest()->isLLSC() &&
789 ((store_has_lower_limit || upper_load_has_store_part) &&
790 (store_has_upper_limit || lower_load_has_store_part))) ||
793 (store_it->instruction()->isAtomic() &&
794 ((store_has_lower_limit || upper_load_has_store_part) &&
795 (store_has_upper_limit || lower_load_has_store_part)))) {
797 coverage = AddrRangeCoverage::PartialAddrRangeCoverage;
800 if (coverage == AddrRangeCoverage::FullAddrRangeCoverage) {
802 int shift_amt = req->mainRequest()->getVaddr() -
803 store_it->instruction()->effAddr;
806 if (!load_inst->memData) {
808 new uint8_t[req->mainRequest()->getSize()];
810 if (store_it->isAllZeros())
811 memset(load_inst->memData, 0,
812 req->mainRequest()->getSize());
814 memcpy(load_inst->memData,
815 store_it->data() + shift_amt,
816 req->mainRequest()->getSize());
819 "addr %#x\n", store_it._idx,
820 req->mainRequest()->getVaddr());
833 assert(!req->mainRequest()->isHTMCmd());
834 if (load_inst->inHtmTransactionalState()) {
835 assert (!storeQueue[store_it._idx].completed());
837 storeQueue[store_it._idx].instruction()->
838 inHtmTransactionalState());
840 load_inst->getHtmTransactionUid() ==
841 storeQueue[store_it._idx].instruction()->
842 getHtmTransactionUid());
844 load_inst->getHtmTransactionUid());
845 DPRINTF(HtmCpu,
"HTM LD (ST2LDF) "
846 "pc=0x%lx - vaddr=0x%lx - "
847 "paddr=0x%lx - htmUid=%u\n",
848 load_inst->instAddr(),
849 data_pkt->
req->hasVaddr() ?
850 data_pkt->
req->getVaddr() : 0lu,
852 load_inst->getHtmTransactionUid());
855 if (req->isAnyOutstandingRequest()) {
856 assert(req->_numOutstandingPackets > 0);
861 req->discardSenderState();
876 }
else if (coverage == AddrRangeCoverage::PartialAddrRangeCoverage) {
879 if (store_it->completed()) {
880 panic(
"Should not check one of these");
889 loadQueue[stallingLoadIdx].instruction()->seqNum)) {
891 stallingStoreIsn = store_it->instruction()->seqNum;
892 stallingLoadIdx = load_idx;
897 iewStage->rescheduleMemInst(load_inst);
898 load_inst->clearIssued();
899 load_inst->effAddrValid(
false);
900 ++stats.rescheduledLoads;
905 "Store idx %i to load addr %#x\n",
906 store_it._idx, req->mainRequest()->getVaddr());
917 DPRINTF(
LSQUnit,
"Doing memory access for inst [sn:%lli] PC %s\n",
918 load_inst->seqNum, load_inst->pcState());
921 if (!load_inst->memData) {
922 load_inst->memData =
new uint8_t[req->mainRequest()->getSize()];
927 if (req->mainRequest()->isHTMCmd()) {
931 *load_inst->memData = (uint64_t) 0x1ull;
941 if (req->senderState() ==
nullptr) {
943 loadQueue.getIterator(load_idx));
944 state->isLoad =
true;
945 state->inst = load_inst;
946 state->isSplit = req->isSplit();
947 req->senderState(state);
950 req->sendPacketToCache();
952 iewStage->blockMemInst(load_inst);
957 template <
class Impl>
961 assert(storeQueue[store_idx].valid());
963 DPRINTF(
LSQUnit,
"Doing write to store idx %i, addr %#x | storeHead:%i "
965 store_idx - 1, req->request()->getPaddr(), storeQueue.head() - 1,
966 storeQueue[store_idx].instruction()->seqNum);
968 storeQueue[store_idx].setRequest(req);
969 unsigned size = req->_size;
970 storeQueue[store_idx].size() = size;
973 storeQueue[store_idx].isAllZeros() = store_no_data;
974 assert(size <= SQEntry::DataSize || store_no_data);
978 !req->request()->isCacheMaintenance() &&
979 !req->request()->isAtomic())
980 memcpy(storeQueue[store_idx].
data(),
data, size);
987 #endif // __CPU_O3_LSQ_UNIT_HH__
void insertLoad(const DynInstPtr &load_inst)
Inserts a load instruction.
void writebackBlockedStore()
Try to finish a previously blocked write back attempt.
Impl::DynInstPtr DynInstPtr
bool _isAllZeros
Does this request write all zeros and thus doesn't have any data attached to it.
PacketPtr pkt
The packet that would have been sent to memory.
void dumpInsts() const
Debugging function to dump instructions in the LSQ.
void set(const DynInstPtr &inst)
void commitLoad()
Commits the head load.
unsigned depCheckShift
The number of places to shift addresses in the LSQ before checking for dependency violations.
char _data[MaxDataBytes]
The store data.
Stats::Scalar memOrderViolation
Tota number of memory ordering violations.
bool dereferenceable() const
Test dereferenceability.
LSQRequest * pendingRequest
The packet that is pending free cache ports.
unsigned getCount()
Returns the number of instructions in the LSQ.
void set(const DynInstPtr &inst)
InstSeqNum getStoreHeadSeqNum()
Returns the sequence number of the head store instruction.
const char * data() const
void storePostSend()
Handles completing the send of a store to memory.
int16_t ThreadID
Thread index/ID type.
bool isFull()
Returns if either the LQ or SQ is full.
@ NO_ACCESS
The request should not cause a memory access.
InstSeqNum stallingStoreIsn
The store that causes the stall due to partial store to load forwarding.
IEW * iewStage
Pointer to the IEW stage.
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
Stats::Scalar forwLoads
Total number of loads forwaded from LSQ stores.
int numStoresToWB()
Returns the number of stores to writeback.
unsigned int cacheLineSize()
SQSenderState(typename StoreQueue::iterator idx_)
SQEntry()
Constructs an empty store queue entry.
uint64_t Tick
Tick count type.
O3CPU * cpu
Pointer to the CPU.
bool isStalled()
Returns whether or not the LSQ unit is stalled.
void checkSnoop(PacketPtr pkt)
Check if an incoming invalidate hits in the lsq on a load that might have issued out of order wrt ano...
bool isEmpty() const
Returns if both the LQ and SQ are empty.
RequestPtr req
A pointer to the original request.
DynInstPtr getMemDepViolator()
Returns the memory ordering violator.
void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params, LSQ *lsq_ptr, unsigned id)
Initializes the LSQ unit with the specified number of entries.
int storesToWB
The number of store instructions in the SQ waiting to writeback.
constexpr unsigned MaxVecRegLenInBytes
const uint32_t & size() const
void writeback(const DynInstPtr &inst, PacketPtr pkt)
Writes back the instruction, sending it to IEW.
Particularisation of the LSQSenderState to the SQ.
int numStores()
Returns the number of stores in the SQ.
bool storeInFlight
Whether or not a store is in flight.
void commitLoads(InstSeqNum &youngest_inst)
Commits loads older than a specific sequence number.
Iterator to the circular queue.
CircularQueue< LQEntry > LQueue
unsigned numFreeStoreEntries()
Returns the number of free SQ entries.
static constexpr auto MaxDataBytes
void setRequest(LSQRequest *r)
Particularisation of the LSQSenderState to the LQ.
int stores
The number of store instructions in the SQ.
void drainSanityCheck() const
Perform sanity checks after a drain.
const bool & committed() const
bool full() const
Is the queue full? A queue is full if the head is the 0^{th} element and the tail is the (size-1)^{th...
void completeStore(typename StoreQueue::iterator store_idx)
Completes the store at the specified index.
This is a simple scalar statistic, like a counter.
Stats::Scalar blockedByCache
Number of times the LSQ is blocked due to the cache.
bool checkLoads
Should loads be checked for dependency issues.
void resetState()
Reset the LSQ state.
Class that implements the actual LQ and SQ for each specific thread.
DynInstPtr inst
Instruction whose results are being written back.
bool sqFull()
Returns if the SQ is full.
void takeOverFrom()
Takes over from another CPU's thread.
@ CACHE_BLOCK_ZERO
This is a write that is targeted and zeroing an entire cache block.
DynInstPtr inst
The instruction.
Stats::Scalar ignoredResponses
Total number of responses from the memory system that are ignored due to the instruction already bein...
LSQ * lsq
Pointer to the LSQ.
LoadQueue loadQueue
The load queue.
ThreadContext is the external interface to all thread state for anything outside of the CPU.
bool hasPendingRequest
Whether or not there is a packet that couldn't be sent because of a lack of cache ports.
WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr)
Constructs a writeback event.
uint64_t getLatestHtmUid() const
const bool & isAllZeros() const
bool lqEmpty() const
Returns if the LQ is empty.
std::shared_ptr< FaultBase > Fault
CircularQueue< SQEntry >::iterator SQIterator
bool & canWB()
Member accessors.
int getLoadHead()
Returns the index of the head load instruction.
LoadQueue::iterator idx
The LQ index of the instruction.
bool willWB()
Returns if the LSQ unit will writeback on this cycle.
Derived class to hold any sender state the LSQ needs.
void process()
Processes the writeback event.
void commitStores(InstSeqNum &youngest_inst)
Commits stores older than a specific sequence number.
void insert(const DynInstPtr &inst)
Inserts an instruction.
void resetHtmStartsStops()
StoreQueue::iterator storeWBIt
The index of the first instruction that may be ready to be written back, and has not yet been written...
AddrRangeCoverage
Coverage of one address range with another.
void setHtmTransactional(uint64_t val)
Stipulates that this packet/request originates in the CPU executing in transactional mode,...
void writebackStores()
Writes back stores.
bool lqFull()
Returns if the LQ is full.
Fault executeLoad(int lq_idx)
const bool & completed() const
Stats::Scalar rescheduledLoads
Number of loads that were rescheduled.
unsigned numFreeLoadEntries()
Returns the number of free LQ entries.
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
int loads
The number of load instructions in the LQ.
constexpr decltype(nullptr) NoFault
const DynInstPtr & instruction() const
int stallingLoadIdx
The index of the above store.
ThreadID lsqID
The LSQUnit thread id.
ProbePointArg< PacketInfo > Packet
Packet probe point.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
void makeResponse()
Take a request packet and modify it in place to be suitable for returning as a response to that reque...
bool trySendPacket(bool isLoad, PacketPtr data_pkt)
Attempts to send a packet to the cache.
RequestPort * dcachePort
Pointer to the dcache port.
TimeBuffer< IssueStruct >::wire fromIssue
Wire to read information from the issue stage time queue.
PacketPtr retryPkt
The packet that needs to be retried.
int numLoads()
Returns the number of loads in the LQ.
CircularQueue< SQEntry > SQueue
LSQRequest * req
The request.
void recvRetry()
Handles doing the retry.
LSQUnit(const LSQUnit &l)
We cannot copy LSQUnit because it has stats for which copy contructor is deleted explicitly.
void handleLockedRead(XC *xc, const RequestPtr &req)
int getStoreHead()
Returns the index of the head store instruction.
typename Impl::CPUPol::LSQ::LSQRequest LSQRequest
DynInstPtr memDepViolator
The oldest load that caused a memory ordering violation.
std::string name() const
Returns the name of the LSQ unit.
Fault read(LSQRequest *req, int load_idx)
Executes the load at the given index.
void setDcachePort(RequestPort *dcache_port)
Sets the pointer to the dcache port.
LQSenderState(typename LoadQueue::iterator idx_)
Stats::Scalar squashedStores
Total number of squashed stores.
CircularQueue< LQEntry >::iterator LQIterator
bool valid() const
Member accessors.
Fault executeStore(const DynInstPtr &inst)
Executes a store instruction.
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
bool needsTSO
Flag for memory model.
void insertStore(const DynInstPtr &store_inst)
Inserts a store instruction.
bool violation()
Returns if there is a memory ordering violation.
bool stalled
Whether or not the LSQ is stalled.
void completeDataAccess(PacketPtr pkt)
Completes the data access that has been returned from the memory system.
CircularQueue< SQEntry > storeQueue
The store queue.
@ PartialAddrRangeCoverage
void schedule(Event &ev, Tick when)
Schedule event for the cpu.
LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
Constructs an LSQ unit.
bool _committed
Whether or not the store is committed.
Cycles is a wrapper class for representing cycle counts, i.e.
Stats::Scalar squashedLoads
Total number of squashed loads.
bool _completed
Whether or not the store is completed.
Addr cacheBlockMask
Address Mask for a cache block (e.g.
InstSeqNum getLoadHeadSeqNum()
Returns the sequence number of the head load instruction.
bool isStoreBlocked
Whehter or not a store is blocked due to the memory system.
uint64_t lastRetiredHtmUid
bool _canWB
Whether or not the store can writeback.
bool hasStoresToWB()
Returns if there are any stores to writeback.
void setLastRetiredHtmUid(uint64_t htm_uid)
void squash(const InstSeqNum &squashed_num)
Squashes all instructions younger than a specific sequence number.
bool sqEmpty() const
Returns if the SQ is empty.
Impl::CPUPol::IssueStruct IssueStruct
static const FlagsType STORE_NO_DATA
LSQEntry()
Constructs an empty store queue entry.
StoreQueue::iterator idx
The SQ index of the instruction.
LSQUnitStats(Stats::Group *parent)
static constexpr size_t DataSize
Fault executeLoad(const DynInstPtr &inst)
Executes a load instruction.
uint32_t _size
The size of the operation.
const char * description() const
Returns the description of this event.
LSQUnit::LSQUnitStats stats
typename LSQ::LSQSenderState LSQSenderState
Writeback event, specifically for when stores forward data to loads.
#define panic(...)
This implements a cprintf based panic() function.
const bool & canWB() const
Fault write(LSQRequest *req, uint8_t *data, int store_idx)
Executes the store at the given index.
LSQUnit< Impl > * lsqPtr
The pointer to the LSQ unit that issued the store.
Fault checkViolations(typename LoadQueue::iterator &loadIt, const DynInstPtr &inst)
Check for ordering violations in the LSQ.
Tick curTick()
The current simulated tick.
Generated on Wed Sep 30 2020 14:02:09 for gem5 by doxygen 1.8.17