44 #ifndef __CPU_O3_LSQ_IMPL_HH__ 45 #define __CPU_O3_LSQ_IMPL_HH__ 54 #include "debug/Drain.hh" 55 #include "debug/Fetch.hh" 56 #include "debug/LSQ.hh" 57 #include "debug/Writeback.hh" 58 #include "params/DerivO3CPU.hh" 64 : cpu(cpu_ptr), iewStage(iew_ptr),
66 cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
67 cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
68 lsqPolicy(params->smtLSQPolicy),
69 LQEntries(params->LQEntries),
70 SQEntries(params->SQEntries),
71 maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
72 params->smtLSQThreshold)),
73 maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
74 params->smtLSQThreshold)),
75 dcachePort(this, cpu_ptr),
76 numThreads(params->numThreads)
85 if (
lsqPolicy == SMTQueuePolicy::Dynamic) {
86 DPRINTF(
LSQ,
"LSQ sharing policy set to Dynamic\n");
87 }
else if (
lsqPolicy == SMTQueuePolicy::Partitioned) {
88 DPRINTF(Fetch,
"LSQ sharing policy set to Partitioned: " 89 "%i entries per LQ | %i entries per SQ\n",
91 }
else if (
lsqPolicy == SMTQueuePolicy::Threshold) {
93 assert(params->smtLSQThreshold > params->LQEntries);
94 assert(params->smtLSQThreshold > params->SQEntries);
96 DPRINTF(
LSQ,
"LSQ sharing policy set to Threshold: " 97 "%i entries per LQ | %i entries per SQ\n",
100 panic(
"Invalid LSQ sharing policy. Options are: Dynamic, " 101 "Partitioned, Threshold");
107 thread[tid].init(
cpu, iew_ptr, params,
this, tid);
138 template <
class Impl>
148 template <
class Impl>
155 DPRINTF(Drain,
"Not drained, LQ not empty.\n");
160 DPRINTF(Drain,
"Not drained, SQ not empty.\n");
167 template <
class Impl>
175 thread[tid].takeOverFrom();
179 template <
class Impl>
234 ThreadID tid = load_inst->threadNumber;
236 thread[tid].insertLoad(load_inst);
243 ThreadID tid = store_inst->threadNumber;
245 thread[tid].insertStore(store_inst);
254 return thread[tid].executeLoad(inst);
263 return thread[tid].executeStore(inst);
273 while (threads != end) {
277 DPRINTF(Writeback,
"[tid:%i] Writing back stores. %i stores " 281 thread[tid].writebackStores();
293 while (threads != end) {
303 template <
class Impl>
315 template <
class Impl>
320 thread[
cpu->contextToThread(senderState->contextId())]
324 template <
class Impl>
329 DPRINTF(
LSQ,
"Got error packet back for address: %#X\n",
333 panic_if(!senderState,
"Got packet back with unknown sender state\n");
349 DPRINTF(
LSQ,
"received invalidation with response for addr:%#x\n",
353 thread[tid].checkSnoop(pkt);
357 senderState->request()->packetReplied();
362 template <
class Impl>
371 DPRINTF(
LSQ,
"received invalidation for addr:%#x\n",
374 thread[tid].checkSnoop(pkt);
388 while (threads != end) {
406 while (threads != end) {
424 while (threads != end) {
427 total +=
thread[tid].numStores();
442 while (threads != end) {
445 total +=
thread[tid].numFreeLoadEntries();
460 while (threads != end) {
463 total +=
thread[tid].numFreeStoreEntries();
473 return thread[tid].numFreeLoadEntries();
480 return thread[tid].numFreeStoreEntries();
490 while (threads != end) {
506 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
526 while (threads != end) {
543 while (threads != end) {
560 while (threads != end) {
576 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
579 return thread[tid].lqFull();
589 while (threads != end) {
605 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
608 return thread[tid].sqFull();
618 while (threads != end) {
632 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
635 return thread[tid].isStalled();
645 while (threads != end) {
662 while (threads != end) {
679 while (threads != end) {
698 ThreadID tid =
cpu->contextToThread(inst->contextId());
699 auto cacheLineSize =
cpu->cacheLineSize();
709 assert(!isAtomic || (isAtomic && !needs_burst));
711 if (inst->translationStarted()) {
712 req = inst->savedReq;
717 size, flags, data, res);
720 size, flags, data, res, std::move(amo_op));
723 if (!byte_enable.empty()) {
740 inst->effSize = size;
741 inst->effAddrValid(
true);
744 inst->reqToVerify = std::make_shared<Request>(*req->
request());
748 fault =
cpu->read(req, inst->lqIdx);
750 fault =
cpu->write(req, data, inst->sqIdx);
756 inst->getFault() = fault;
758 inst->setMemAccPredicate(
false);
766 inst->traceData->setMem(addr, size, flags);
768 return inst->getFault();
776 _fault.push_back(fault);
777 numInTranslationFragments = 0;
778 numTranslatedFragments = 1;
781 if (_inst->isSquashed()) {
782 this->squashTranslation();
784 _inst->strictlyOrdered(req->isStrictlyOrdered());
786 flags.set(Flag::TranslationFinished);
788 _inst->physEffAddr = req->getPaddr();
789 _inst->memReqFlags = req->getFlags();
790 if (req->isCondSwap()) {
792 req->setExtraData(*_res);
794 setState(State::Request);
810 for (i = 0; i < _requests.size() && _requests[
i] != req; i++);
811 assert(i < _requests.size());
814 numInTranslationFragments--;
815 numTranslatedFragments++;
818 mainReq->setFlags(req->getFlags());
820 if (numTranslatedFragments == _requests.size()) {
821 if (_inst->isSquashed()) {
822 this->squashTranslation();
824 _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
825 flags.set(Flag::TranslationFinished);
826 _inst->translationCompleted(
true);
828 for (i = 0; i < _fault.size() && _fault[
i] ==
NoFault; i++);
830 _inst->physEffAddr = request(0)->getPaddr();
831 _inst->memReqFlags = mainReq->getFlags();
832 if (mainReq->isCondSwap()) {
833 assert (i == _fault.size());
835 mainReq->setExtraData(*_res);
837 if (i == _fault.size()) {
839 setState(State::Request);
841 _inst->fault = _fault[
i];
842 setState(State::PartialFault);
845 _inst->fault = _fault[0];
857 assert(_requests.size() == 0);
859 this->addRequest(_addr, _size, _byteEnable);
861 if (_requests.size() > 0) {
862 _requests.back()->setReqInstSeqNum(_inst->seqNum);
863 _requests.back()->taskId(_taskId);
864 _inst->translationStarted(
true);
865 setState(State::Translation);
866 flags.set(Flag::TranslationStarted);
868 _inst->savedReq =
this;
869 sendFragmentToTranslation(0);
871 _inst->setMemAccPredicate(
false);
893 auto cacheLineSize = _port.cacheLineSize();
894 Addr base_addr = _addr;
897 uint32_t size_so_far = 0;
899 mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
900 _size, _flags, _inst->masterId(),
901 _inst->instAddr(), _inst->contextId());
902 if (!_byteEnable.empty()) {
903 mainReq->setByteEnable(_byteEnable);
910 mainReq->setPaddr(0);
913 if (_byteEnable.empty()) {
914 this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
916 auto it_start = _byteEnable.begin();
917 auto it_end = _byteEnable.begin() + (next_addr - base_addr);
918 this->addRequest(base_addr, next_addr - base_addr,
921 size_so_far = next_addr - base_addr;
924 base_addr = next_addr;
925 while (base_addr != final_addr) {
926 if (_byteEnable.empty()) {
927 this->addRequest(base_addr, cacheLineSize, _byteEnable);
929 auto it_start = _byteEnable.begin() + size_so_far;
930 auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
931 this->addRequest(base_addr, cacheLineSize,
934 size_so_far += cacheLineSize;
935 base_addr += cacheLineSize;
939 if (size_so_far < _size) {
940 if (_byteEnable.empty()) {
941 this->addRequest(base_addr, _size - size_so_far, _byteEnable);
943 auto it_start = _byteEnable.begin() + size_so_far;
944 auto it_end = _byteEnable.end();
945 this->addRequest(base_addr, _size - size_so_far,
950 if (_requests.size() > 0) {
952 for (
auto&
r: _requests) {
953 r->setReqInstSeqNum(_inst->seqNum);
957 _inst->translationStarted(
true);
958 setState(State::Translation);
959 flags.set(Flag::TranslationStarted);
960 this->_inst->savedReq =
this;
961 numInTranslationFragments = 0;
962 numTranslatedFragments = 0;
963 _fault.resize(_requests.size());
965 for (uint32_t
i = 0;
i < _requests.size();
i++) {
966 sendFragmentToTranslation(
i);
969 _inst->setMemAccPredicate(
false);
977 numInTranslationFragments++;
978 _port.dTLB()->translateTiming(
980 this->_inst->thread->getTC(),
this,
988 assert(_numOutstandingPackets == 1);
990 flags.set(Flag::Complete);
992 assert(pkt == _packets.front());
993 _port.completeDataAccess(pkt);
1002 uint32_t pktIdx = 0;
1003 while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
1005 assert(pktIdx < _packets.size());
1006 numReceivedPackets++;
1008 if (numReceivedPackets == _packets.size()) {
1009 flags.set(Flag::Complete);
1019 _port.completeDataAccess(resp);
1025 template<
class Impl>
1029 assert(_senderState);
1031 if (_packets.size() == 0) {
1036 _packets.back()->dataStatic(_inst->memData);
1037 _packets.back()->senderState = _senderState;
1039 assert(_packets.size() == 1);
1042 template<
class Impl>
1047 Addr base_address = _addr;
1049 if (_packets.size() == 0) {
1053 _mainPacket->dataStatic(_inst->memData);
1055 for (
int i = 0;
i < _requests.size() && _fault[
i] ==
NoFault;
i++) {
1059 ptrdiff_t
offset = r->getVaddr() - base_address;
1063 uint8_t* req_data =
new uint8_t[r->getSize()];
1064 std::memcpy(req_data,
1065 _inst->memData + offset,
1070 _packets.push_back(pkt);
1073 assert(_packets.size() > 0);
1076 template<
class Impl>
1080 assert(_numOutstandingPackets == 0);
1081 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1082 _numOutstandingPackets = 1;
1085 template<
class Impl>
1090 while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1091 lsqUnit()->trySendPacket(isLoad(),
1092 _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1093 _numOutstandingPackets++;
1097 template<
class Impl>
1105 template<
class Impl>
1111 for (
auto r: _requests) {
1115 offset +=
r->getSize();
1120 template<
class Impl>
1128 template<
class Impl>
1136 for (
auto r: _requests) {
1142 offset +=
r->getSize();
1148 template<
class Impl>
1170 template<
class Impl>
1174 bool is_hit =
false;
1175 for (
auto &
r: _requests) {
1185 if (
r->hasPaddr() && (
r->getPaddr() & blockMask) == blockAddr) {
1193 template <
class Impl>
1197 return lsq->recvTimingResp(pkt);
1200 template <
class Impl>
1204 for (
ThreadID tid = 0; tid <
cpu->numThreads; tid++) {
1205 if (
cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1209 lsq->recvTimingSnoopReq(pkt);
1212 template <
class Impl>
1216 lsq->recvReqRetry();
1219 #endif//__CPU_O3_LSQ_IMPL_HH__ IEW * iewStage
The IEW stage pointer.
#define panic(...)
This implements a cprintf based panic() function.
void takeOverFrom()
Takes over execution from another CPU's thread.
virtual void initiateTranslation()
ThreadID numThreads
Number of Threads.
Impl::DynInstPtr DynInstPtr
decltype(nullptr) constexpr NoFault
Fault pushRequest(const DynInstPtr &inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector< bool > &byte_enable)
Cycles is a wrapper class for representing cycle counts, i.e.
virtual bool recvTimingResp(PacketPtr pkt)
Timing version of receive.
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Caches may probe into the load-store queue to enforce memory ordering guarantees. ...
void taskId(const uint32_t &v)
bool transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
Returns true if the given memory access (address, size) needs to be fragmented across aligned fixed-s...
bool willWB()
Returns if the LSQ will write back to memory this cycle.
std::vector< RequestPtr > _requests
virtual void sendPacketToCache()
virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
unsigned maxSQEntries
Max SQ Size - Used to Enforce Sharing Policies.
SMTQueuePolicy lsqPolicy
The LSQ policy for SMT mode.
std::vector< LSQUnit > thread
The LSQ units for individual threads.
std::vector< bool > _byteEnable
std::string name() const
Returns the name of the LSQ.
virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt)
void tick()
Ticks the LSQ.
std::shared_ptr< Request > RequestPtr
bool sqFull()
Returns if any of the SQs are full.
static PacketPtr createWrite(const RequestPtr &req)
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Cycles handleIprRead(ThreadContext *, Packet *)
virtual void recvReqRetry()
Handles doing a retry of the previous send.
bool violation()
Returns whether or not there was a memory ordering violation.
bool hasStoresToWB()
Returns whether or not there are any stores to write back to memory.
Overload hash function for BasicBlockRange type.
Derived class to hold any sender state the LSQ needs.
Fault executeLoad(const DynInstPtr &inst)
Executes a load.
bool isTranslationComplete()
int usedStorePorts
The number of used cache ports in this cycle by stores.
bool isFull()
Returns if the LSQ is full (either LQ or SQ is full).
bool lqEmpty() const
Returns if all of the LQs are empty.
bool isInvalidate() const
ThreadContext is the external interface to all thread state for anything outside of the CPU...
T * getPtr()
get a pointer to the data ptr.
void writebackStores()
Attempts to write back stores until all cache ports are used or the interface becomes blocked...
void dumpInsts() const
Debugging function to print out all instructions.
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
virtual void finish(const Fault &fault, const RequestPtr &req, ThreadContext *tc, BaseTLB::Mode mode)
bool isMemAccessRequired()
bool _cacheBlocked
D-cache is blocked.
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Test if the request accesses a particular cache line.
void insertLoad(const DynInstPtr &load_inst)
Inserts a load into the LSQ.
bool isDrained() const
Has the LSQ drained?
Addr getVaddr(int idx=0) const
void cachePortBusy(bool is_load)
Another store port is in use.
unsigned numFreeStoreEntries()
Returns the number of free store entries.
virtual void recvTimingSnoopReq(PacketPtr pkt)
Receive a timing snoop request from the peer.
virtual void buildPackets()
virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
DcachePort dcachePort
Data port.
int usedLoadPorts
The number of used cache ports in this cycle by loads.
void drainSanityCheck() const
Perform sanity checks after a drain.
int numLoads()
Returns the total number of loads in the load queue.
unsigned maxLQEntries
Max LQ Size - Used to Enforce Sharing Policies.
void sendFragmentToTranslation(int i)
int numStoresToWB(ThreadID tid)
Returns the number of stores a specific thread has to write back.
Fault executeStore(const DynInstPtr &inst)
Executes a store.
virtual void finish(const Fault &fault, const RequestPtr &req, ThreadContext *tc, BaseTLB::Mode mode)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
virtual void sendPacketToCache()
void insertStore(const DynInstPtr &store_inst)
Inserts a store into the LSQ.
const FlagsType total
Print the total.
bool isEmpty() const
Returns if the LSQ is empty (both LQ and SQ are empty).
int getCount()
Returns the number of instructions in all of the queues.
virtual bool recvTimingResp(PacketPtr pkt)
int16_t ThreadID
Thread index/ID type.
virtual void initiateTranslation()
uint8_t outstanding
Number of outstanding packets to complete.
unsigned numFreeLoadEntries()
Returns the number of free load entries.
RequestPtr request(int idx=0)
void recvReqRetry()
Retry the previous send that failed.
SenderState * senderState
This packet's sender state.
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets the pointer to the list of active threads.
int cacheStorePorts
The number of cache ports available each cycle (stores only).
virtual void buildPackets()
LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
Constructs an LSQ with the given parameters.
int numStores()
Returns the total number of stores in the store queue.
virtual bool recvTimingResp(PacketPtr pkt)
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Memory operation metadata.
void completeDataAccess(PacketPtr pkt)
int cacheLoadPorts
The number of cache ports available each cycle (loads only).
virtual PacketPtr mainPacket()
void regStats()
Registers statistics of each LSQ unit.
virtual RequestPtr mainRequest()
Addr addrBlockAlign(Addr addr, Addr block_size)
Returns the address of the closest aligned fixed-size block to the given address. ...
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
virtual void initiateTranslation()=0
Cycles handleIprWrite(ThreadContext *, Packet *)
bool cacheBlocked() const
Is D-cache blocked?
static const int NumArgumentRegs M5_VAR_USED
const std::string & cmdString() const
Return the string name of the cmd field (for debugging and tracing).
O3CPU * cpu
The CPU pointer.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt)
std::list< ThreadID > * activeThreads
List of Active Threads in System.
bool isStalled()
Returns if the LSQ is stalled due to a memory operation that must be replayed.
void recvTimingSnoopReq(PacketPtr pkt)
std::shared_ptr< FaultBase > Fault
bool sqEmpty() const
Returns if all of the SQs are empty.
ProbePointArg< PacketInfo > Packet
Packet probe point.
bool lqFull()
Returns if any of the LQs are full.