42#ifndef __CPU_O3_LSQ_HH__
43#define __CPU_O3_LSQ_HH__
59#include "enums/SMTQueuePolicy.hh"
66struct BaseO3CPUParams;
266 const Addr&
addr,
const uint32_t& size,
269 bool stale_translation=
false);
331 req()->setContext(context_id);
347 req()->setVirt(
vaddr, size, flags_, requestor_id,
pc);
380 assert (
_reqs.size() == 1);
566 virtual std::string
name()
const {
return "LSQRequest"; }
577 std::move(amo_op)) {}
590 virtual std::string
name()
const {
return "SingleDataRequest"; }
610 return "UnsquashableDirectRequest";
626 uint64_t* res=
nullptr) :
660 virtual std::string
name()
const {
return "SplitDataRequest"; }
664 LSQ(
CPU *cpu_ptr,
IEW *iew_ptr,
const BaseO3CPUParams ¶ms);
667 std::string
name()
const;
923 if (pol == SMTQueuePolicy::Dynamic) {
925 }
else if (pol == SMTQueuePolicy::Partitioned) {
928 }
else if (pol == SMTQueuePolicy::Threshold) {
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
Cycles is a wrapper class for representing cycle counts, i.e.
Wrapper that groups a few flag bits under the same undelying container.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
ThreadContext is the external interface to all thread state for anything outside of the CPU.
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Class that implements the actual LQ and SQ for each specific thread.
DcachePort class for the load/store queue.
virtual void recvTimingSnoopReq(PacketPtr pkt)
Receive a timing snoop request from the peer.
DcachePort(LSQ *_lsq, CPU *_cpu)
Default constructor.
virtual bool recvTimingResp(PacketPtr pkt)
Timing version of receive.
virtual void recvReqRetry()
Handles doing a retry of the previous send.
virtual void recvFunctionalSnoop(PacketPtr pkt)
Receive a functional snoop request packet from the peer.
virtual bool isSnooping() const
As this CPU requires snooping to maintain the load store queue change the behaviour from the base CPU...
Memory operation metadata.
virtual bool recvTimingResp(PacketPtr pkt)=0
@ WritebackScheduled
Store written back.
@ IsAtomic
True if this is an atomic request.
@ TranslationSquashed
Ownership tracking flags.
@ LSQEntryFreed
LSQ resources freed.
@ TranslationFinished
True if there are un-replied outbound translations.
@ WriteBackToRegister
True if this request needs to writeBack to register.
@ TranslationStarted
True if any translation has been sent to TLB.
@ Discarded
Request discarded.
std::vector< bool > _byteEnable
LSQRequest(LSQUnit *port, const DynInstPtr &inst, bool isLoad)
Flags< FlagsStorage > FlagsType
virtual ~LSQRequest()
Destructor.
virtual void buildPackets()=0
const RequestPtr req(int idx=0) const
void install()
Install the request in the LQ/SQ.
void release(Flag reason)
Release the LSQRequest.
virtual RequestPtr mainReq()
AtomicOpFunctorPtr _amo_op
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)=0
Test if the request accesses a particular cache line.
void discard()
The request is discarded (e.g.
ContextID contextId() const
void taskId(const uint32_t &v)
PacketPtr packet(int idx=0)
virtual void initiateTranslation()=0
void setContext(const ContextID &context_id)
Convenience getters/setters.
void setState(const State &newState)
bool isTranslationBlocked()
void addReq(Addr addr, unsigned size, const std::vector< bool > &byte_enable)
Helper function used to add a (sub)request, given its address addr, size size and byte-enable mask by...
void setVirt(Addr vaddr, unsigned size, Request::Flags flags_, RequestorID requestor_id, Addr pc)
Set up virtual request.
uint32_t _numOutstandingPackets
void packetNotSent()
Update the status to reflect that a packet was not sent.
bool isReleased()
Test if the LSQRequest has been released, i.e.
bool _hasStaleTranslation
bool isAnyOutstandingRequest()
Test if there is any in-flight translation or mem access request.
virtual void sendPacketToCache()=0
void sendFragmentToTranslation(int i)
void packetSent()
Update the status to reflect that a packet was sent.
const Request::Flags _flags
virtual Cycles handleLocalAccess(gem5::ThreadContext *thread, PacketPtr pkt)=0
Memory mapped IPR accesses.
uint32_t numTranslatedFragments
std::vector< Fault > _fault
bool isMemAccessRequired()
uint32_t numInTranslationFragments
bool squashed() const override
This function is used by the page table walker to determine if it should translate the a pending requ...
virtual std::string name() const
void markDelayed() override
Signal that the translation has been delayed due to a hw page table walk.
bool needWBToRegister() const
void writebackScheduled()
RequestPtr req(int idx=0)
virtual PacketPtr mainPacket()
std::vector< RequestPtr > _reqs
bool hasStaleTranslation() const
std::vector< PacketPtr > _packets
const DynInstPtr & instruction()
bool isTranslationComplete()
virtual void markAsStaleTranslation()=0
void freeLSQEntry()
The LSQ entry is cleared.
Addr getVaddr(int idx=0) const
virtual std::string name() const
virtual Cycles handleLocalAccess(gem5::ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
virtual ~SingleDataRequest()
virtual void initiateTranslation()
SingleDataRequest(LSQUnit *port, const DynInstPtr &inst, bool isLoad, const Addr &addr, const uint32_t &size, const Request::Flags &flags_, PacketDataPtr data=nullptr, uint64_t *res=nullptr, AtomicOpFunctorPtr amo_op=nullptr)
virtual bool recvTimingResp(PacketPtr pkt)
virtual void buildPackets()
virtual void markAsStaleTranslation()
virtual void sendPacketToCache()
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Test if the request accesses a particular cache line.
virtual bool recvTimingResp(PacketPtr pkt)
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Caches may probe into the load-store queue to enforce memory ordering guarantees.
virtual void initiateTranslation()
virtual void markAsStaleTranslation()
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
virtual PacketPtr mainPacket()
virtual Cycles handleLocalAccess(gem5::ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
virtual std::string name() const
virtual ~SplitDataRequest()
virtual void sendPacketToCache()
SplitDataRequest(LSQUnit *port, const DynInstPtr &inst, bool isLoad, const Addr &addr, const uint32_t &size, const Request::Flags &flags_, PacketDataPtr data=nullptr, uint64_t *res=nullptr)
virtual RequestPtr mainReq()
virtual void buildPackets()
uint32_t numReceivedPackets
virtual void initiateTranslation()
virtual void markAsStaleTranslation()
virtual ~UnsquashableDirectRequest()
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
UnsquashableDirectRequest(LSQUnit *port, const DynInstPtr &inst, const Request::Flags &flags_)
virtual std::string name() const
unsigned SQEntries
Total Size of SQ Entries.
bool isDrained() const
Has the LSQ drained?
int cacheLoadPorts
The number of cache ports available each cycle (loads only).
unsigned numFreeEntries(ThreadID tid)
Returns the number of free entries for a specific thread.
int usedStorePorts
The number of used cache ports in this cycle by stores.
int numHtmStarts(ThreadID tid) const
std::string name() const
Returns the name of the LSQ.
void commitStores(InstSeqNum &youngest_inst, ThreadID tid)
Commits stores up until the given sequence number for a specific thread.
Addr staleTranslationWaitTxnId
The ID if the transaction that made translations stale.
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
void checkStaleTranslations()
Checks if queues have any marked operations left, and sends the appropriate Sync Completion message i...
int getLoadHead(ThreadID tid)
Returns the head index of the load queue for a specific thread.
int entryAmount(ThreadID num_threads)
Number of entries needed for the given amount of threads.
void squash(const InstSeqNum &squashed_num, ThreadID tid)
Squash instructions from a thread until the specified sequence number.
bool sqEmpty() const
Returns if all of the SQs are empty.
void completeDataAccess(PacketPtr pkt)
Fault pushRequest(const DynInstPtr &inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector< bool > &byte_enable)
unsigned numFreeLoadEntries()
Returns the number of free load entries.
ThreadID numThreads
Number of Threads.
IEW * iewStage
The IEW stage pointer.
InstSeqNum getLoadHeadSeqNum(ThreadID tid)
Returns the sequence number of the head of the load queue.
std::list< ThreadID > * activeThreads
List of Active Threads in System.
DcachePort dcachePort
Data port.
void takeOverFrom()
Takes over execution from another CPU's thread.
DynInstPtr getMemDepViolator(ThreadID tid)
Gets the instruction that caused the memory ordering violation.
static uint32_t maxLSQAllocation(SMTQueuePolicy pol, uint32_t entries, uint32_t numThreads, uint32_t SMTThreshold)
Auxiliary function to calculate per-thread max LSQ allocation limit.
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets the pointer to the list of active threads.
bool cacheBlocked() const
Is D-cache blocked?
int numLoads()
Returns the total number of loads in the load queue.
void setLastRetiredHtmUid(ThreadID tid, uint64_t htmUid)
void dumpInsts() const
Debugging function to print out all instructions.
int usedLoadPorts
The number of used cache ports in this cycle by loads.
unsigned maxLQEntries
Max LQ Size - Used to Enforce Sharing Policies.
bool isFull()
Returns if the LSQ is full (either LQ or SQ is full).
void insertStore(const DynInstPtr &store_inst)
Inserts a store into the LSQ.
void recvReqRetry()
Retry the previous send that failed.
void commitLoads(InstSeqNum &youngest_inst, ThreadID tid)
Commits loads up until the given sequence number for a specific thread.
RequestPort & getDataPort()
Fault write(LSQRequest *request, uint8_t *data, ssize_t store_idx)
Executes a store operation, using the store specified at the store index.
void writebackStores(ThreadID tid)
Same as above, but only for one thread.
uint64_t getLatestHtmUid(ThreadID tid) const
bool willWB()
Returns if the LSQ will write back to memory this cycle.
int getStoreHead(ThreadID tid)
Returns the head index of the store queue.
LSQ(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams ¶ms)
Constructs an LSQ with the given parameters.
CPU * cpu
The CPU pointer.
bool _cacheBlocked
D-cache is blocked.
void drainSanityCheck() const
Perform sanity checks after a drain.
std::vector< LSQUnit > thread
The LSQ units for individual threads.
unsigned LQEntries
Total Size of LQ Entries.
int numHtmStops(ThreadID tid) const
void cachePortBusy(bool is_load)
Another store port is in use.
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
InstSeqNum getStoreHeadSeqNum(ThreadID tid)
Returns the sequence number of the head of the store queue.
bool isStalled()
Returns if the LSQ is stalled due to a memory operation that must be replayed.
void writebackStores()
Attempts to write back stores until all cache ports are used or the interface becomes blocked.
bool lqFull()
Returns if any of the LQs are full.
bool waitingForStaleTranslation
If the LSQ is currently waiting for stale translations.
unsigned maxSQEntries
Max SQ Size - Used to Enforce Sharing Policies.
bool lqEmpty() const
Returns if all of the LQs are empty.
int getCount()
Returns the number of instructions in all of the queues.
bool hasStoresToWB()
Returns whether or not there are any stores to write back to memory.
Fault read(LSQRequest *request, ssize_t load_idx)
Executes a read operation, using the load specified at the load index.
Fault executeStore(const DynInstPtr &inst)
Executes a store.
void tick()
Ticks the LSQ.
void insertLoad(const DynInstPtr &load_inst)
Inserts a load into the LSQ.
bool isEmpty() const
Returns if the LSQ is empty (both LQ and SQ are empty).
int numStores()
Returns the total number of stores in the store queue.
void recvTimingSnoopReq(PacketPtr pkt)
int cacheStorePorts
The number of cache ports available each cycle (stores only).
Fault executeLoad(const DynInstPtr &inst)
Executes a load.
bool violation()
Returns whether or not there was a memory ordering violation.
void resetHtmStartsStops(ThreadID tid)
SMTQueuePolicy lsqPolicy
The LSQ policy for SMT mode.
int numStoresToWB(ThreadID tid)
Returns the number of stores a specific thread has to write back.
unsigned numFreeStoreEntries()
Returns the number of free store entries.
bool sqFull()
Returns if any of the SQs are full.
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
void set(Type mask)
Set all flag's bits matching the given mask.
bool isSet(Type mask) const
Verifies whether any bit matching the given mask is set.
void clear()
Clear all flag's bits.
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< FaultBase > Fault
int16_t ThreadID
Thread index/ID type.
std::shared_ptr< Request > RequestPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int ContextID
Globally unique thread context ID.
Overload hash function for BasicBlockRange type.
A virtual base opaque structure used to hold state associated with the packet (e.g....