42 #ifndef __CPU_O3_LSQ_IMPL_HH__ 43 #define __CPU_O3_LSQ_IMPL_HH__ 52 #include "debug/Drain.hh" 53 #include "debug/Fetch.hh" 54 #include "debug/LSQ.hh" 55 #include "debug/Writeback.hh" 56 #include "params/DerivO3CPU.hh" 62 : cpu(cpu_ptr), iewStage(iew_ptr),
64 cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
65 cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
66 lsqPolicy(params->smtLSQPolicy),
67 LQEntries(params->LQEntries),
68 SQEntries(params->SQEntries),
69 maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
70 params->smtLSQThreshold)),
71 maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
72 params->smtLSQThreshold)),
73 dcachePort(this, cpu_ptr),
74 numThreads(params->numThreads)
83 if (
lsqPolicy == SMTQueuePolicy::Dynamic) {
84 DPRINTF(
LSQ,
"LSQ sharing policy set to Dynamic\n");
85 }
else if (
lsqPolicy == SMTQueuePolicy::Partitioned) {
86 DPRINTF(Fetch,
"LSQ sharing policy set to Partitioned: " 87 "%i entries per LQ | %i entries per SQ\n",
89 }
else if (
lsqPolicy == SMTQueuePolicy::Threshold) {
91 assert(params->smtLSQThreshold > params->LQEntries);
92 assert(params->smtLSQThreshold > params->SQEntries);
94 DPRINTF(
LSQ,
"LSQ sharing policy set to Threshold: " 95 "%i entries per LQ | %i entries per SQ\n",
98 panic(
"Invalid LSQ sharing policy. Options are: Dynamic, " 99 "Partitioned, Threshold");
105 thread[tid].init(
cpu, iew_ptr, params,
this, tid);
136 template <
class Impl>
146 template <
class Impl>
153 DPRINTF(Drain,
"Not drained, LQ not empty.\n");
158 DPRINTF(Drain,
"Not drained, SQ not empty.\n");
165 template <
class Impl>
173 thread[tid].takeOverFrom();
177 template <
class Impl>
232 ThreadID tid = load_inst->threadNumber;
234 thread[tid].insertLoad(load_inst);
241 ThreadID tid = store_inst->threadNumber;
243 thread[tid].insertStore(store_inst);
252 return thread[tid].executeLoad(inst);
261 return thread[tid].executeStore(inst);
271 while (threads != end) {
275 DPRINTF(Writeback,
"[tid:%i] Writing back stores. %i stores " 279 thread[tid].writebackStores();
291 while (threads != end) {
301 template <
class Impl>
313 template <
class Impl>
318 thread[
cpu->contextToThread(senderState->contextId())]
322 template <
class Impl>
327 DPRINTF(
LSQ,
"Got error packet back for address: %#X\n",
331 panic_if(!senderState,
"Got packet back with unknown sender state\n");
347 DPRINTF(
LSQ,
"received invalidation with response for addr:%#x\n",
351 thread[tid].checkSnoop(pkt);
355 senderState->request()->packetReplied();
360 template <
class Impl>
369 DPRINTF(
LSQ,
"received invalidation for addr:%#x\n",
372 thread[tid].checkSnoop(pkt);
386 while (threads != end) {
404 while (threads != end) {
422 while (threads != end) {
425 total +=
thread[tid].numStores();
440 while (threads != end) {
443 total +=
thread[tid].numFreeLoadEntries();
458 while (threads != end) {
461 total +=
thread[tid].numFreeStoreEntries();
471 return thread[tid].numFreeLoadEntries();
478 return thread[tid].numFreeStoreEntries();
488 while (threads != end) {
504 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
524 while (threads != end) {
541 while (threads != end) {
558 while (threads != end) {
574 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
577 return thread[tid].lqFull();
587 while (threads != end) {
603 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
606 return thread[tid].sqFull();
616 while (threads != end) {
630 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
633 return thread[tid].isStalled();
643 while (threads != end) {
660 while (threads != end) {
677 while (threads != end) {
694 bool isAtomic M5_VAR_USED = !isLoad && amo_op;
696 ThreadID tid =
cpu->contextToThread(inst->contextId());
697 auto cacheLineSize =
cpu->cacheLineSize();
707 assert(!isAtomic || (isAtomic && !needs_burst));
709 if (inst->translationStarted()) {
710 req = inst->savedReq;
715 size, flags, data, res);
718 size, flags, data, res, std::move(amo_op));
721 if (!byte_enable.empty()) {
738 inst->effSize = size;
739 inst->effAddrValid(
true);
742 inst->reqToVerify = std::make_shared<Request>(*req->
request());
746 fault =
cpu->read(req, inst->lqIdx);
748 fault =
cpu->write(req, data, inst->sqIdx);
754 inst->getFault() = fault;
756 inst->setMemAccPredicate(
false);
764 inst->traceData->setMem(addr, size, flags);
766 return inst->getFault();
774 _fault.push_back(fault);
775 numInTranslationFragments = 0;
776 numTranslatedFragments = 1;
779 if (_inst->isSquashed()) {
780 this->squashTranslation();
782 _inst->strictlyOrdered(req->isStrictlyOrdered());
784 flags.set(Flag::TranslationFinished);
786 _inst->physEffAddr = req->getPaddr();
787 _inst->memReqFlags = req->getFlags();
788 if (req->isCondSwap()) {
790 req->setExtraData(*_res);
792 setState(State::Request);
808 for (i = 0; i < _requests.size() && _requests[
i] != req; i++);
809 assert(i < _requests.size());
812 numInTranslationFragments--;
813 numTranslatedFragments++;
816 mainReq->setFlags(req->getFlags());
818 if (numTranslatedFragments == _requests.size()) {
819 if (_inst->isSquashed()) {
820 this->squashTranslation();
822 _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
823 flags.set(Flag::TranslationFinished);
824 _inst->translationCompleted(
true);
826 for (i = 0; i < _fault.size() && _fault[
i] ==
NoFault; i++);
828 _inst->physEffAddr = request(0)->getPaddr();
829 _inst->memReqFlags = mainReq->getFlags();
830 if (mainReq->isCondSwap()) {
831 assert (i == _fault.size());
833 mainReq->setExtraData(*_res);
835 if (i == _fault.size()) {
837 setState(State::Request);
839 _inst->fault = _fault[
i];
840 setState(State::PartialFault);
843 _inst->fault = _fault[0];
855 assert(_requests.size() == 0);
857 this->addRequest(_addr, _size, _byteEnable);
859 if (_requests.size() > 0) {
860 _requests.back()->setReqInstSeqNum(_inst->seqNum);
861 _requests.back()->taskId(_taskId);
862 _inst->translationStarted(
true);
863 setState(State::Translation);
864 flags.set(Flag::TranslationStarted);
866 _inst->savedReq =
this;
867 sendFragmentToTranslation(0);
869 _inst->setMemAccPredicate(
false);
891 auto cacheLineSize = _port.cacheLineSize();
892 Addr base_addr = _addr;
895 uint32_t size_so_far = 0;
897 mainReq = std::make_shared<Request>(base_addr,
898 _size, _flags, _inst->masterId(),
899 _inst->instAddr(), _inst->contextId());
900 if (!_byteEnable.empty()) {
901 mainReq->setByteEnable(_byteEnable);
908 mainReq->setPaddr(0);
911 if (_byteEnable.empty()) {
912 this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
914 auto it_start = _byteEnable.begin();
915 auto it_end = _byteEnable.begin() + (next_addr - base_addr);
916 this->addRequest(base_addr, next_addr - base_addr,
919 size_so_far = next_addr - base_addr;
922 base_addr = next_addr;
923 while (base_addr != final_addr) {
924 if (_byteEnable.empty()) {
925 this->addRequest(base_addr, cacheLineSize, _byteEnable);
927 auto it_start = _byteEnable.begin() + size_so_far;
928 auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
929 this->addRequest(base_addr, cacheLineSize,
932 size_so_far += cacheLineSize;
933 base_addr += cacheLineSize;
937 if (size_so_far < _size) {
938 if (_byteEnable.empty()) {
939 this->addRequest(base_addr, _size - size_so_far, _byteEnable);
941 auto it_start = _byteEnable.begin() + size_so_far;
942 auto it_end = _byteEnable.end();
943 this->addRequest(base_addr, _size - size_so_far,
948 if (_requests.size() > 0) {
950 for (
auto&
r: _requests) {
951 r->setReqInstSeqNum(_inst->seqNum);
955 _inst->translationStarted(
true);
956 setState(State::Translation);
957 flags.set(Flag::TranslationStarted);
958 this->_inst->savedReq =
this;
959 numInTranslationFragments = 0;
960 numTranslatedFragments = 0;
961 _fault.resize(_requests.size());
963 for (uint32_t
i = 0;
i < _requests.size();
i++) {
964 sendFragmentToTranslation(
i);
967 _inst->setMemAccPredicate(
false);
975 numInTranslationFragments++;
976 _port.dTLB()->translateTiming(
978 this->_inst->thread->getTC(),
this,
986 assert(_numOutstandingPackets == 1);
988 flags.set(Flag::Complete);
990 assert(pkt == _packets.front());
991 _port.completeDataAccess(pkt);
1000 uint32_t pktIdx = 0;
1001 while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
1003 assert(pktIdx < _packets.size());
1004 numReceivedPackets++;
1006 if (numReceivedPackets == _packets.size()) {
1007 flags.set(Flag::Complete);
1017 _port.completeDataAccess(resp);
1023 template<
class Impl>
1027 assert(_senderState);
1029 if (_packets.size() == 0) {
1034 _packets.back()->dataStatic(_inst->memData);
1035 _packets.back()->senderState = _senderState;
1037 assert(_packets.size() == 1);
1040 template<
class Impl>
1045 Addr base_address = _addr;
1047 if (_packets.size() == 0) {
1051 _mainPacket->dataStatic(_inst->memData);
1053 for (
int i = 0;
i < _requests.size() && _fault[
i] ==
NoFault;
i++) {
1057 ptrdiff_t
offset = r->getVaddr() - base_address;
1061 uint8_t* req_data =
new uint8_t[r->getSize()];
1062 std::memcpy(req_data,
1063 _inst->memData + offset,
1068 _packets.push_back(pkt);
1071 assert(_packets.size() > 0);
1074 template<
class Impl>
1078 assert(_numOutstandingPackets == 0);
1079 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1080 _numOutstandingPackets = 1;
1083 template<
class Impl>
1088 while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1089 lsqUnit()->trySendPacket(isLoad(),
1090 _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1091 _numOutstandingPackets++;
1095 template<
class Impl>
1100 return pkt->
req->localAccessor(thread, pkt);
1103 template<
class Impl>
1111 for (
auto r: _requests) {
1115 Cycles d =
r->localAccessor(thread, pkt);
1118 offset +=
r->getSize();
1124 template<
class Impl>
1146 template<
class Impl>
1150 bool is_hit =
false;
1151 for (
auto &
r: _requests) {
1161 if (
r->hasPaddr() && (
r->getPaddr() & blockMask) == blockAddr) {
1169 template <
class Impl>
1173 return lsq->recvTimingResp(pkt);
1176 template <
class Impl>
1180 for (
ThreadID tid = 0; tid <
cpu->numThreads; tid++) {
1181 if (
cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1185 lsq->recvTimingSnoopReq(pkt);
1188 template <
class Impl>
1192 lsq->recvReqRetry();
1195 #endif//__CPU_O3_LSQ_IMPL_HH__ IEW * iewStage
The IEW stage pointer.
#define panic(...)
This implements a cprintf based panic() function.
void takeOverFrom()
Takes over execution from another CPU's thread.
virtual void initiateTranslation()
ThreadID numThreads
Number of Threads.
Impl::DynInstPtr DynInstPtr
decltype(nullptr) constexpr NoFault
Fault pushRequest(const DynInstPtr &inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector< bool > &byte_enable)
Cycles is a wrapper class for representing cycle counts, i.e.
virtual bool recvTimingResp(PacketPtr pkt)
Timing version of receive.
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Caches may probe into the load-store queue to enforce memory ordering guarantees. ...
void taskId(const uint32_t &v)
bool transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
Returns true if the given memory access (address, size) needs to be fragmented across aligned fixed-s...
bool willWB()
Returns if the LSQ will write back to memory this cycle.
std::vector< RequestPtr > _requests
virtual void sendPacketToCache()
unsigned maxSQEntries
Max SQ Size - Used to Enforce Sharing Policies.
SMTQueuePolicy lsqPolicy
The LSQ policy for SMT mode.
std::vector< LSQUnit > thread
The LSQ units for individual threads.
std::vector< bool > _byteEnable
std::string name() const
Returns the name of the LSQ.
void tick()
Ticks the LSQ.
std::shared_ptr< Request > RequestPtr
bool sqFull()
Returns if any of the SQs are full.
static PacketPtr createWrite(const RequestPtr &req)
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
virtual void recvReqRetry()
Handles doing a retry of the previous send.
bool violation()
Returns whether or not there was a memory ordering violation.
bool hasStoresToWB()
Returns whether or not there are any stores to write back to memory.
virtual Cycles handleLocalAccess(ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
Overload hash function for BasicBlockRange type.
Derived class to hold any sender state the LSQ needs.
Fault executeLoad(const DynInstPtr &inst)
Executes a load.
bool isTranslationComplete()
int usedStorePorts
The number of used cache ports in this cycle by stores.
bool isFull()
Returns if the LSQ is full (either LQ or SQ is full).
bool lqEmpty() const
Returns if all of the LQs are empty.
bool isInvalidate() const
ThreadContext is the external interface to all thread state for anything outside of the CPU...
T * getPtr()
get a pointer to the data ptr.
void writebackStores()
Attempts to write back stores until all cache ports are used or the interface becomes blocked...
void dumpInsts() const
Debugging function to print out all instructions.
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
RequestPtr req
A pointer to the original request.
virtual void finish(const Fault &fault, const RequestPtr &req, ThreadContext *tc, BaseTLB::Mode mode)
bool isMemAccessRequired()
bool _cacheBlocked
D-cache is blocked.
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Test if the request accesses a particular cache line.
void insertLoad(const DynInstPtr &load_inst)
Inserts a load into the LSQ.
bool isDrained() const
Has the LSQ drained?
Addr getVaddr(int idx=0) const
void cachePortBusy(bool is_load)
Another store port is in use.
unsigned numFreeStoreEntries()
Returns the number of free store entries.
virtual void recvTimingSnoopReq(PacketPtr pkt)
Receive a timing snoop request from the peer.
virtual void buildPackets()
DcachePort dcachePort
Data port.
int usedLoadPorts
The number of used cache ports in this cycle by loads.
void drainSanityCheck() const
Perform sanity checks after a drain.
int numLoads()
Returns the total number of loads in the load queue.
unsigned maxLQEntries
Max LQ Size - Used to Enforce Sharing Policies.
void sendFragmentToTranslation(int i)
int numStoresToWB(ThreadID tid)
Returns the number of stores a specific thread has to write back.
Fault executeStore(const DynInstPtr &inst)
Executes a store.
virtual void finish(const Fault &fault, const RequestPtr &req, ThreadContext *tc, BaseTLB::Mode mode)
virtual Cycles handleLocalAccess(ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
virtual void sendPacketToCache()
void insertStore(const DynInstPtr &store_inst)
Inserts a store into the LSQ.
const FlagsType total
Print the total.
bool isEmpty() const
Returns if the LSQ is empty (both LQ and SQ are empty).
int getCount()
Returns the number of instructions in all of the queues.
virtual bool recvTimingResp(PacketPtr pkt)
int16_t ThreadID
Thread index/ID type.
virtual void initiateTranslation()
uint8_t outstanding
Number of outstanding packets to complete.
unsigned numFreeLoadEntries()
Returns the number of free load entries.
RequestPtr request(int idx=0)
void recvReqRetry()
Retry the previous send that failed.
SenderState * senderState
This packet's sender state.
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets the pointer to the list of active threads.
int cacheStorePorts
The number of cache ports available each cycle (stores only).
virtual void buildPackets()
LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
Constructs an LSQ with the given parameters.
int numStores()
Returns the total number of stores in the store queue.
virtual bool recvTimingResp(PacketPtr pkt)
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Memory operation metadata.
void completeDataAccess(PacketPtr pkt)
int cacheLoadPorts
The number of cache ports available each cycle (loads only).
virtual PacketPtr mainPacket()
void regStats()
Registers statistics of each LSQ unit.
virtual RequestPtr mainRequest()
Addr addrBlockAlign(Addr addr, Addr block_size)
Returns the address of the closest aligned fixed-size block to the given address. ...
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
virtual void initiateTranslation()=0
bool cacheBlocked() const
Is D-cache blocked?
const std::string & cmdString() const
Return the string name of the cmd field (for debugging and tracing).
O3CPU * cpu
The CPU pointer.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
std::list< ThreadID > * activeThreads
List of Active Threads in System.
bool isStalled()
Returns if the LSQ is stalled due to a memory operation that must be replayed.
void recvTimingSnoopReq(PacketPtr pkt)
std::shared_ptr< FaultBase > Fault
bool sqEmpty() const
Returns if all of the SQs are empty.
ProbePointArg< PacketInfo > Packet
Packet probe point.
bool lqFull()
Returns if any of the LQs are full.