44 #ifndef __CPU_O3_LSQ_HH__ 45 #define __CPU_O3_LSQ_HH__ 54 #include "enums/SMTQueuePolicy.hh" 58 struct DerivO3CPUParams;
68 typedef typename Impl::O3CPU
O3CPU;
70 typedef typename Impl::CPUPol::IEW
IEW;
71 typedef typename Impl::CPUPol::LSQUnit
LSQUnit;
242 WbStore = 0x00000002,
243 Delayed = 0x00000004,
244 IsSplit = 0x00000008,
246 TranslationStarted = 0x00000010,
248 TranslationFinished = 0x00000020,
251 Complete = 0x00000100,
254 TranslationSquashed = 0x00000200,
256 Discarded = 0x00000400,
258 LSQEntryFreed = 0x00000800,
260 WritebackScheduled = 0x00001000,
261 WritebackDone = 0x00002000,
263 IsAtomic = 0x00004000
306 _state(
State::NotIssued), _senderState(nullptr),
307 _port(*port), _inst(inst), _data(nullptr),
308 _res(nullptr), _addr(0), _size(0), _flags(0),
309 _numOutstandingPackets(0), _amo_op(nullptr)
311 flags.
set(Flag::IsLoad, isLoad);
312 flags.
set(Flag::WbStore,
313 _inst->isStoreConditional() || _inst->isAtomic());
314 flags.
set(Flag::IsAtomic, _inst->isAtomic());
318 const Addr&
addr,
const uint32_t& size,
322 : _state(
State::NotIssued), _senderState(nullptr),
323 numTranslatedFragments(0),
324 numInTranslationFragments(0),
325 _port(*port), _inst(inst), _data(
data),
326 _res(res), _addr(addr), _size(size),
328 _numOutstandingPackets(0),
329 _amo_op(
std::move(amo_op))
331 flags.
set(Flag::IsLoad, isLoad);
332 flags.
set(Flag::WbStore,
333 _inst->isStoreConditional() || _inst->isAtomic());
334 flags.
set(Flag::IsAtomic, _inst->isAtomic());
341 return flags.
isSet(Flag::IsLoad);
347 return flags.
isSet(Flag::IsAtomic);
354 _port.
loadQueue[_inst->lqIdx].setRequest(
this);
358 _port.
storeQueue[_inst->sqIdx].setRequest(
this);
364 return _inst->isSquashed();
375 return flags.
isSet(Flag::LSQEntryFreed) ||
376 flags.
isSet(Flag::Discarded);
390 assert(reason == Flag::LSQEntryFreed || reason == Flag::Discarded);
391 if (!isAnyOutstandingRequest()) {
411 if (byte_enable.empty() ||
413 auto request = std::make_shared<Request>(_inst->getASID(),
414 addr, size, _flags, _inst->masterId(),
415 _inst->instAddr(), _inst->contextId(),
417 if (!byte_enable.empty()) {
418 request->setByteEnable(byte_enable);
430 assert(!isAnyOutstandingRequest());
431 _inst->savedReq =
nullptr;
435 for (
auto r: _packets)
470 for (
auto&
r: _requests)
474 uint32_t
taskId()
const {
return _taskId; }
480 return _requests.at(idx);
484 virtual void initiateTranslation() = 0;
491 assert (_packets.size() == 1);
498 assert (_requests.size() == 1);
506 for (
auto& pkt: _packets) {
508 pkt->senderState =
st;
525 assert(_senderState);
535 return numInTranslationFragments > 0 ||
536 _numOutstandingPackets > 0 ||
537 (flags.
isSet(Flag::WritebackScheduled) &&
538 !flags.
isSet(Flag::WritebackDone));
544 return flags.
isSet(Flag::IsSplit);
548 virtual void sendPacketToCache() = 0;
549 virtual void buildPackets() = 0;
560 virtual bool isCacheBlockHit(
Addr blockAddr,
Addr cacheBlockMask) = 0;
566 flags.
set(Flag::Sent);
575 flags.
set(Flag::Retry);
576 flags.
clear(Flag::Sent);
579 void sendFragmentToTranslation(
int i);
583 return flags.
isSet(Flag::Complete);
589 return _state == State::Translation;
595 return flags.
isSet(Flag::TranslationStarted) &&
602 return _state == State::Translation &&
603 flags.
isSet(Flag::TranslationStarted) &&
604 !flags.
isSet(Flag::TranslationFinished);
610 return flags.
isSet(Flag::Sent);
616 return _state == State::PartialFault;
622 return (_state == State::Request ||
623 (isPartialFault() &&
isLoad()));
638 release(Flag::LSQEntryFreed);
647 release(Flag::Discarded);
653 assert(_numOutstandingPackets > 0);
654 _numOutstandingPackets--;
655 if (_numOutstandingPackets == 0 && isReleased())
662 assert(!flags.
isSet(Flag::WritebackScheduled));
663 flags.
set(Flag::WritebackScheduled);
669 flags.
set(Flag::WritebackDone);
679 assert(numInTranslationFragments == 0);
680 flags.
set(Flag::TranslationSquashed);
690 flags.
set(Flag::Complete);
727 const Addr&
addr,
const uint32_t& size,
730 uint64_t* res =
nullptr,
733 std::move(amo_op)) {}
736 virtual void initiateTranslation();
740 virtual void sendPacketToCache();
741 virtual void buildPackets();
744 virtual bool isCacheBlockHit(
Addr blockAddr,
Addr cacheBlockMask);
786 const Addr&
addr,
const uint32_t& size,
789 uint64_t* res =
nullptr) :
793 numReceivedPackets(0),
797 flags.
set(Flag::IsSplit);
806 _mainPacket =
nullptr;
812 virtual void initiateTranslation();
813 virtual void sendPacketToCache();
814 virtual void buildPackets();
818 virtual bool isCacheBlockHit(
Addr blockAddr,
Addr cacheBlockMask);
825 LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params);
829 std::string
name()
const;
865 {
thread.at(tid).commitLoads(youngest_inst); }
871 {
thread.at(tid).commitStores(youngest_inst); }
887 thread.at(tid).squash(squashed_num);
902 return thread.at(tid).getMemDepViolator();
912 return thread.at(tid).getLoadHeadSeqNum();
922 return thread.at(tid).getStoreHeadSeqNum();
1087 if (pol == SMTQueuePolicy::Dynamic) {
1089 }
else if (pol == SMTQueuePolicy::Partitioned) {
1092 }
else if (pol == SMTQueuePolicy::Threshold) {
1096 return SMTThreshold;
1125 template <
class Impl>
1131 return thread.at(tid).read(req, load_idx);
1134 template <
class Impl>
1140 return thread.at(tid).write(req, data, store_idx);
1143 #endif // __CPU_O3_LSQ_HH__ A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
IEW * iewStage
The IEW stage pointer.
unsigned SQEntries
Total Size of SQ Entries.
void takeOverFrom()
Takes over execution from another CPU's thread.
void set(T v, ByteOrder endian)
Set the value in the data pointer to v using the specified endianness.
ThreadID numThreads
Number of Threads.
DynInstPtr inst
Instruction which initiated the access to memory.
Impl::DynInstPtr DynInstPtr
uint32_t _numOutstandingPackets
Fault pushRequest(const DynInstPtr &inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector< bool > &byte_enable)
Cycles is a wrapper class for representing cycle counts, i.e.
PacketPtr mainPkt
The main packet from a split load, used during writeback.
int getCount(ThreadID tid)
Returns the number of instructions in the queues of one thread.
void taskId(const uint32_t &v)
LSQSenderState * _senderState
unsigned LQEntries
Total Size of LQ Entries.
bool willWB()
Returns if the LSQ will write back to memory this cycle.
std::vector< RequestPtr > _requests
unsigned maxSQEntries
Max SQ Size - Used to Enforce Sharing Policies.
SMTQueuePolicy lsqPolicy
The LSQ policy for SMT mode.
void dumpInsts(ThreadID tid) const
Debugging function to print out instructions from a specific thread.
std::vector< LSQUnit > thread
The LSQ units for individual threads.
std::vector< bool > _byteEnable
std::string name() const
Returns the name of the LSQ.
void tick()
Ticks the LSQ.
std::shared_ptr< Request > RequestPtr
void packetSent()
Update the status to reflect that a packet was sent.
bool sqFull()
Returns if any of the SQs are full.
void packetNotSent()
Update the status to reflect that a packet was not sent.
bool isComplete()
Completes a packet and returns whether the access is finished.
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
const Request::Flags _flags
Cycles handleIprRead(ThreadContext *, Packet *)
LSQ< Impl > * lsq
Pointer to LSQ.
LSQRequest * _request
The senderState needs to know the LSQRequest who owns it.
bool violation()
Returns whether or not there was a memory ordering violation.
bool hasStoresToWB()
Returns whether or not there are any stores to write back to memory.
Overload hash function for BasicBlockRange type.
Derived class to hold any sender state the LSQ needs.
Fault executeLoad(const DynInstPtr &inst)
Executes a load.
LSQSenderState(LSQRequest *request, bool isLoad_)
Default constructor.
bool isTranslationComplete()
int usedStorePorts
The number of used cache ports in this cycle by stores.
virtual ~LSQRequest()
Destructor.
bool isFull()
Returns if the LSQ is full (either LQ or SQ is full).
int getLoadHead(ThreadID tid)
Returns the head index of the load queue for a specific thread.
bool lqEmpty() const
Returns if all of the LQs are empty.
ThreadContext is the external interface to all thread state for anything outside of the CPU...
bool hasStoresToWB(ThreadID tid)
Returns whether or not a specific thread has any stores to write back to memory.
void writebackStores()
Attempts to write back stores until all cache ports are used or the interface becomes blocked...
bool needWB
Whether or not the instruction will need to writeback.
std::vector< PacketPtr > _packets
void dumpInsts() const
Debugging function to print out all instructions.
AtomicOpFunctorPtr _amo_op
bool violation(ThreadID tid)
Returns whether or not there was a memory ordering violation for a specific thread.
LSQRequest(LSQUnit *port, const DynInstPtr &inst, bool isLoad)
bool willWB(ThreadID tid)
Returns if the LSQ of a specific thread will write back to memory this cycle.
::Flags< FlagsStorage > FlagsType
bool isSplit
Whether or not this access is split in two.
uint32_t numInTranslationFragments
void writebackScheduled()
virtual bool squashed() const override
This function is used by the page table walker to determine if it should translate the a pending requ...
Impl::CPUPol::LSQUnit LSQUnit
void release(Flag reason)
Release the LSQRequest.
bool isMemAccessRequired()
bool _cacheBlocked
D-cache is blocked.
void senderState(LSQSenderState *st)
void insertLoad(const DynInstPtr &load_inst)
Inserts a load into the LSQ.
bool isDrained() const
Has the LSQ drained?
Addr getVaddr(int idx=0) const
void setState(const State &newState)
void cachePortBusy(bool is_load)
Another store port is in use.
unsigned numFreeStoreEntries()
Returns the number of free store entries.
InstSeqNum getLoadHeadSeqNum(ThreadID tid)
Returns the sequence number of the head of the load queue.
DcachePort dcachePort
Data port.
std::vector< Fault > _fault
void setContext(const ContextID &context_id)
Convenience getters/setters.
int numStores(ThreadID tid)
Returns the total number of stores for a single thread.
int usedLoadPorts
The number of used cache ports in this cycle by loads.
void drainSanityCheck() const
Perform sanity checks after a drain.
int numLoads()
Returns the total number of loads in the load queue.
DcachePort(LSQ< Impl > *_lsq, FullO3CPU< Impl > *_cpu)
Default constructor.
unsigned maxLQEntries
Max LQ Size - Used to Enforce Sharing Policies.
void sendFragmentToTranslation(int i)
int numStoresToWB(ThreadID tid)
Returns the number of stores a specific thread has to write back.
Fault executeStore(const DynInstPtr &inst)
Executes a store.
Fault write(LSQRequest *req, uint8_t *data, int store_idx)
Executes a store operation, using the store specified at the store index.
void addRequest(Addr addr, unsigned size, const std::vector< bool > &byte_enable)
Helper function used to add a (sub)request, given its address addr, size size and byte-enable mask by...
virtual bool isSnooping() const
As this CPU requires snooping to maintain the load store queue change the behaviour from the base CPU...
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
bool deleted
Has the request been deleted? LSQ entries can be squashed before the response comes back...
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
InstSeqNum getStoreHeadSeqNum(ThreadID tid)
Returns the sequence number of the head of the store queue.
void markDelayed() override
Signal that the translation has been delayed due to a hw page table walk.
uint32_t numTranslatedFragments
bool isLoad
Whether or not it is a load.
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a SimObject that sees the packet.
void insertStore(const DynInstPtr &store_inst)
Inserts a store into the LSQ.
bool isReleased()
Test if the LSQRequest has been released, i.e.
uint32_t _entryIdx
LQ/SQ entry idx.
bool isEmpty() const
Returns if the LSQ is empty (both LQ and SQ are empty).
PacketPtr pendingPacket
A second packet from a split store that needs sending.
virtual void complete()=0
void discardSenderState()
Mark senderState as discarded.
int getCount()
Returns the number of instructions in all of the queues.
bool isTranslationBlocked()
int16_t ThreadID
Thread index/ID type.
SingleDataRequest(LSQUnit *port, const DynInstPtr &inst, bool isLoad, const Addr &addr, const uint32_t &size, const Request::Flags &flags_, PacketDataPtr data=nullptr, uint64_t *res=nullptr, AtomicOpFunctorPtr amo_op=nullptr)
void commitStores(InstSeqNum &youngest_inst, ThreadID tid)
Commits stores up until the given sequence number for a specific thread.
const LSQSenderState * senderState() const
uint8_t outstanding
Number of outstanding packets to complete.
virtual void recvFunctionalSnoop(PacketPtr pkt)
Receive a functional snoop request packet from the peer.
unsigned numFreeLoadEntries()
Returns the number of free load entries.
int entryAmount(ThreadID num_threads)
Number of entries needed for the given amount of threads.
virtual PacketPtr mainPacket()
LSQRequest(LSQUnit *port, const DynInstPtr &inst, bool isLoad, const Addr &addr, const uint32_t &size, const Request::Flags &flags_, PacketDataPtr data=nullptr, uint64_t *res=nullptr, AtomicOpFunctorPtr amo_op=nullptr)
RequestPtr request(int idx=0)
Fault read(LSQRequest *req, int load_idx)
Executes a read operation, using the load specified at the load index.
void recvReqRetry()
Retry the previous send that failed.
DcachePort class for the load/store queue.
const DynInstPtr & instruction()
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets the pointer to the list of active threads.
uint32_t numReceivedPackets
int cacheStorePorts
The number of cache ports available each cycle (stores only).
virtual RequestPtr mainRequest()
LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
Constructs an LSQ with the given parameters.
int numStores()
Returns the total number of stores in the store queue.
bool isAnyOutstandingRequest()
Test if there is any in-flight translation or mem access request.
void setVirt(int asid, Addr vaddr, unsigned size, Request::Flags flags_, MasterID mid, Addr pc)
Set up virtual request.
void install()
Install the request in the LQ/SQ.
bool isAnyActiveElement(const std::vector< bool >::const_iterator &it_start, const std::vector< bool >::const_iterator &it_end)
Test if there is any active element in an enablement range.
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
SplitDataRequest(LSQUnit *port, const DynInstPtr &inst, bool isLoad, const Addr &addr, const uint32_t &size, const Request::Flags &flags_, PacketDataPtr data=nullptr, uint64_t *res=nullptr)
virtual ~SingleDataRequest()
void discard()
The request is discarded (e.g.
Memory operation metadata.
void completeDataAccess(PacketPtr pkt)
int cacheLoadPorts
The number of cache ports available each cycle (loads only).
void regStats()
Registers statistics of each LSQ unit.
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
LoadQueue loadQueue
The load queue.
virtual ~SplitDataRequest()
Cycles handleIprWrite(ThreadContext *, Packet *)
bool pktToSend
Whether or not there is a packet that needs sending.
void squash(const InstSeqNum &squashed_num, ThreadID tid)
Squash instructions from a thread until the specified sequence number.
CircularQueue< SQEntry > storeQueue
The store queue.
static uint32_t maxLSQAllocation(SMTQueuePolicy pol, uint32_t entries, uint32_t numThreads, uint32_t SMTThreshold)
Auxiliary function to calculate per-thread max LSQ allocation limit.
bool cacheBlocked() const
Is D-cache blocked?
PacketPtr packet(int idx=0)
MasterPort & getDataPort()
O3CPU * cpu
The CPU pointer.
std::list< ThreadID > * activeThreads
List of Active Threads in System.
bool isStalled()
Returns if the LSQ is stalled due to a memory operation that must be replayed.
void freeLSQEntry()
The LSQ entry is cleared.
DynInstPtr getMemDepViolator(ThreadID tid)
Gets the instruction that caused the memory ordering violation.
void recvTimingSnoopReq(PacketPtr pkt)
std::shared_ptr< FaultBase > Fault
unsigned numFreeEntries(ThreadID tid)
Returns the number of free entries for a specific thread.
int getStoreHead(ThreadID tid)
Returns the head index of the store queue.
int ContextID
Globally unique thread context ID.
int numLoads(ThreadID tid)
Returns the total number of loads for a single thread.
void commitLoads(InstSeqNum &youngest_inst, ThreadID tid)
Commits loads up until the given sequence number for a specific thread.
bool sqEmpty() const
Returns if all of the SQs are empty.
const RequestPtr request(int idx=0) const
bool lqFull()
Returns if any of the LQs are full.
FullO3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time ...