42 #ifndef __CPU_O3_LSQ_IMPL_HH__
43 #define __CPU_O3_LSQ_IMPL_HH__
52 #include "debug/Drain.hh"
53 #include "debug/Fetch.hh"
54 #include "debug/HtmCpu.hh"
55 #include "debug/LSQ.hh"
56 #include "debug/Writeback.hh"
57 #include "params/DerivO3CPU.hh"
61 : cpu(cpu_ptr), iewStage(iew_ptr),
63 cacheStorePorts(params.cacheStorePorts), usedStorePorts(0),
64 cacheLoadPorts(params.cacheLoadPorts), usedLoadPorts(0),
65 lsqPolicy(params.smtLSQPolicy),
66 LQEntries(params.LQEntries),
67 SQEntries(params.SQEntries),
68 maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params.numThreads,
69 params.smtLSQThreshold)),
70 maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params.numThreads,
71 params.smtLSQThreshold)),
72 dcachePort(this, cpu_ptr),
73 numThreads(params.numThreads)
82 if (
lsqPolicy == SMTQueuePolicy::Dynamic) {
83 DPRINTF(
LSQ,
"LSQ sharing policy set to Dynamic\n");
84 }
else if (
lsqPolicy == SMTQueuePolicy::Partitioned) {
85 DPRINTF(Fetch,
"LSQ sharing policy set to Partitioned: "
86 "%i entries per LQ | %i entries per SQ\n",
88 }
else if (
lsqPolicy == SMTQueuePolicy::Threshold) {
90 assert(params.smtLSQThreshold > params.LQEntries);
91 assert(params.smtLSQThreshold > params.SQEntries);
93 DPRINTF(
LSQ,
"LSQ sharing policy set to Threshold: "
94 "%i entries per LQ | %i entries per SQ\n",
97 panic(
"Invalid LSQ sharing policy. Options are: Dynamic, "
98 "Partitioned, Threshold");
104 thread[tid].init(
cpu, iew_ptr, params,
this, tid);
114 return iewStage->name() +
".lsq";
121 activeThreads = at_ptr;
122 assert(activeThreads != 0);
125 template <
class Impl>
131 for (
ThreadID tid = 0; tid < numThreads; tid++)
132 thread[tid].drainSanityCheck();
135 template <
class Impl>
142 DPRINTF(Drain,
"Not drained, LQ not empty.\n");
147 DPRINTF(Drain,
"Not drained, SQ not empty.\n");
154 template <
class Impl>
159 _cacheBlocked =
false;
161 for (
ThreadID tid = 0; tid < numThreads; tid++) {
162 thread[tid].takeOverFrom();
166 template <
class Impl>
171 if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked)
172 iewStage->cacheUnblocked();
182 return _cacheBlocked;
198 ret = usedLoadPorts < cacheLoadPorts;
200 ret = usedStorePorts < cacheStorePorts;
209 assert(cachePortAvailable(is_load));
221 ThreadID tid = load_inst->threadNumber;
223 thread[tid].insertLoad(load_inst);
230 ThreadID tid = store_inst->threadNumber;
232 thread[tid].insertStore(store_inst);
241 return thread[tid].executeLoad(inst);
250 return thread[tid].executeStore(inst);
260 while (threads != end) {
263 if (numStoresToWB(tid) > 0) {
264 DPRINTF(Writeback,
"[tid:%i] Writing back stores. %i stores "
265 "available for Writeback.\n", tid, numStoresToWB(tid));
268 thread[tid].writebackStores();
280 while (threads != end) {
283 if (thread[tid].violation())
290 template <
class Impl>
294 iewStage->cacheUnblocked();
297 for (
ThreadID tid : *activeThreads) {
298 thread[tid].recvRetry();
302 template <
class Impl>
307 thread[cpu->contextToThread(senderState->contextId())]
308 .completeDataAccess(pkt);
311 template <
class Impl>
316 DPRINTF(
LSQ,
"Got error packet back for address: %#X\n",
320 panic_if(!senderState,
"Got packet back with unknown sender state\n");
322 thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
336 DPRINTF(
LSQ,
"received invalidation with response for addr:%#x\n",
339 for (
ThreadID tid = 0; tid < numThreads; tid++) {
340 thread[tid].checkSnoop(pkt);
344 senderState->request()->packetReplied();
349 template <
class Impl>
358 DPRINTF(
LSQ,
"received invalidation for addr:%#x\n",
360 for (
ThreadID tid = 0; tid < numThreads; tid++) {
361 thread[tid].checkSnoop(pkt);
375 while (threads != end) {
378 total += getCount(tid);
393 while (threads != end) {
396 total += numLoads(tid);
411 while (threads != end) {
414 total += thread[tid].numStores();
429 while (threads != end) {
432 total += thread[tid].numFreeLoadEntries();
447 while (threads != end) {
450 total += thread[tid].numFreeStoreEntries();
460 return thread[tid].numFreeLoadEntries();
467 return thread[tid].numFreeStoreEntries();
477 while (threads != end) {
480 if (!(thread[tid].lqFull() || thread[tid].sqFull()))
493 if (lsqPolicy == SMTQueuePolicy::Dynamic)
496 return thread[tid].lqFull() || thread[tid].sqFull();
503 return lqEmpty() && sqEmpty();
513 while (threads != end) {
516 if (!thread[tid].lqEmpty())
530 while (threads != end) {
533 if (!thread[tid].sqEmpty())
547 while (threads != end) {
550 if (!thread[tid].lqFull())
563 if (lsqPolicy == SMTQueuePolicy::Dynamic)
566 return thread[tid].lqFull();
576 while (threads != end) {
592 if (lsqPolicy == SMTQueuePolicy::Dynamic)
595 return thread[tid].sqFull();
605 while (threads != end) {
608 if (!thread[tid].isStalled())
619 if (lsqPolicy == SMTQueuePolicy::Dynamic)
622 return thread[tid].isStalled();
632 while (threads != end) {
635 if (hasStoresToWB(tid))
649 while (threads != end) {
666 while (threads != end) {
669 thread[tid].dumpInsts();
683 M5_VAR_USED
bool isAtomic = !isLoad && amo_op;
685 ThreadID tid = cpu->contextToThread(inst->contextId());
686 auto cacheLineSize = cpu->cacheLineSize();
696 assert(!isAtomic || (isAtomic && !needs_burst));
700 if (inst->translationStarted()) {
701 req = inst->savedReq;
705 assert(
addr == 0x0lu);
708 }
else if (needs_burst) {
710 size, flags,
data, res);
713 size, flags,
data, res, std::move(amo_op));
718 req->
taskId(cpu->taskId());
731 inst->effSize = size;
732 inst->effAddrValid(
true);
735 inst->reqToVerify = std::make_shared<Request>(*req->
request());
739 fault = cpu->read(req, inst->lqIdx);
741 fault = cpu->write(req,
data, inst->sqIdx);
747 inst->getFault() = fault;
749 inst->setMemAccPredicate(
false);
757 inst->traceData->setMem(
addr, size, flags);
759 return inst->getFault();
772 if (
_inst->isSquashed()) {
775 _inst->strictlyOrdered(req->isStrictlyOrdered());
777 flags.
set(Flag::TranslationFinished);
779 _inst->physEffAddr = req->getPaddr();
780 _inst->memReqFlags = req->getFlags();
781 if (req->isCondSwap()) {
783 req->setExtraData(*
_res);
801 for (
i = 0;
i < _requests.size() && _requests[
i] != req;
i++);
802 assert(
i < _requests.size());
805 numInTranslationFragments--;
806 numTranslatedFragments++;
809 mainReq->setFlags(req->getFlags());
811 if (numTranslatedFragments == _requests.size()) {
812 if (_inst->isSquashed()) {
813 this->squashTranslation();
815 _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
816 flags.set(Flag::TranslationFinished);
817 _inst->translationCompleted(
true);
819 for (
i = 0;
i < _fault.size() && _fault[
i] ==
NoFault;
i++);
821 _inst->physEffAddr = request(0)->getPaddr();
822 _inst->memReqFlags = mainReq->getFlags();
823 if (mainReq->isCondSwap()) {
824 assert (
i == _fault.size());
826 mainReq->setExtraData(*_res);
828 if (
i == _fault.size()) {
830 setState(State::Request);
832 _inst->fault = _fault[
i];
833 setState(State::PartialFault);
836 _inst->fault = _fault[0];
848 assert(_requests.size() == 0);
850 this->addRequest(_addr, _size, _byteEnable);
852 if (_requests.size() > 0) {
853 _requests.back()->setReqInstSeqNum(_inst->seqNum);
854 _requests.back()->taskId(_taskId);
855 _inst->translationStarted(
true);
856 setState(State::Translation);
857 flags.set(Flag::TranslationStarted);
859 _inst->savedReq =
this;
860 sendFragmentToTranslation(0);
862 _inst->setMemAccPredicate(
false);
884 auto cacheLineSize = _port.cacheLineSize();
885 Addr base_addr = _addr;
888 uint32_t size_so_far = 0;
890 mainReq = std::make_shared<Request>(base_addr,
891 _size, _flags, _inst->requestorId(),
892 _inst->instAddr(), _inst->contextId());
893 mainReq->setByteEnable(_byteEnable);
899 mainReq->setPaddr(0);
902 auto it_start = _byteEnable.begin();
903 auto it_end = _byteEnable.begin() + (next_addr - base_addr);
904 this->addRequest(base_addr, next_addr - base_addr,
906 size_so_far = next_addr - base_addr;
909 base_addr = next_addr;
910 while (base_addr != final_addr) {
911 auto it_start = _byteEnable.begin() + size_so_far;
912 auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
913 this->addRequest(base_addr, cacheLineSize,
915 size_so_far += cacheLineSize;
916 base_addr += cacheLineSize;
920 if (size_so_far < _size) {
921 auto it_start = _byteEnable.begin() + size_so_far;
922 auto it_end = _byteEnable.end();
923 this->addRequest(base_addr, _size - size_so_far,
927 if (_requests.size() > 0) {
929 for (
auto&
r: _requests) {
930 r->setReqInstSeqNum(_inst->seqNum);
934 _inst->translationStarted(
true);
935 setState(State::Translation);
936 flags.set(Flag::TranslationStarted);
937 this->_inst->savedReq =
this;
938 numInTranslationFragments = 0;
939 numTranslatedFragments = 0;
940 _fault.resize(_requests.size());
942 for (uint32_t
i = 0;
i < _requests.size();
i++) {
943 sendFragmentToTranslation(
i);
946 _inst->setMemAccPredicate(
false);
954 numInTranslationFragments++;
955 _port.getMMUPtr()->translateTiming(
957 this->_inst->thread->getTC(),
this,
965 assert(_numOutstandingPackets == 1);
969 assert(pkt == _packets.front());
970 _port.completeDataAccess(pkt);
980 while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
982 assert(pktIdx < _packets.size());
983 numReceivedPackets++;
984 state->outstanding--;
985 if (numReceivedPackets == _packets.size()) {
996 _port.completeDataAccess(resp);
1002 template<
class Impl>
1006 assert(_senderState);
1008 if (_packets.size() == 0) {
1013 _packets.back()->dataStatic(_inst->memData);
1014 _packets.back()->senderState = _senderState;
1019 if (_inst->inHtmTransactionalState()) {
1020 _packets.back()->setHtmTransactional(
1021 _inst->getHtmTransactionUid());
1024 "HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n",
1025 isLoad() ?
"LD" :
"ST",
1027 _packets.back()->req->hasVaddr() ?
1028 _packets.back()->req->getVaddr() : 0lu,
1029 _packets.back()->getAddr(),
1030 _inst->getHtmTransactionUid());
1033 assert(_packets.size() == 1);
1036 template<
class Impl>
1041 Addr base_address = _addr;
1043 if (_packets.size() == 0) {
1047 _mainPacket->dataStatic(_inst->memData);
1052 if (_inst->inHtmTransactionalState()) {
1053 _mainPacket->setHtmTransactional(
1054 _inst->getHtmTransactionUid());
1056 "HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1058 _mainPacket->req->hasVaddr() ?
1059 _mainPacket->req->getVaddr() : 0lu,
1060 _mainPacket->getAddr(),
1061 _inst->getHtmTransactionUid());
1064 for (
int i = 0;
i < _requests.size() && _fault[
i] ==
NoFault;
i++) {
1068 ptrdiff_t
offset =
r->getVaddr() - base_address;
1072 uint8_t* req_data =
new uint8_t[
r->getSize()];
1073 std::memcpy(req_data,
1079 _packets.push_back(pkt);
1084 if (_inst->inHtmTransactionalState()) {
1085 _packets.back()->setHtmTransactional(
1086 _inst->getHtmTransactionUid());
1088 "HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1089 isLoad() ?
"LD" :
"ST",
1092 _packets.back()->req->hasVaddr() ?
1093 _packets.back()->req->getVaddr() : 0lu,
1094 _packets.back()->getAddr(),
1095 _inst->getHtmTransactionUid());
1099 assert(_packets.size() > 0);
1102 template<
class Impl>
1106 assert(_numOutstandingPackets == 0);
1107 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1108 _numOutstandingPackets = 1;
1111 template<
class Impl>
1116 while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1117 lsqUnit()->trySendPacket(isLoad(),
1118 _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1119 _numOutstandingPackets++;
1123 template<
class Impl>
1128 return pkt->
req->localAccessor(
thread, pkt);
1131 template<
class Impl>
1139 for (
auto r: _requests) {
1152 template<
class Impl>
1174 template<
class Impl>
1178 bool is_hit =
false;
1179 for (
auto &
r: _requests) {
1189 if (
r->hasPaddr() && (
r->getPaddr() & blockMask) == blockAddr) {
1197 template <
class Impl>
1201 return lsq->recvTimingResp(pkt);
1204 template <
class Impl>
1208 for (
ThreadID tid = 0; tid <
cpu->numThreads; tid++) {
1209 if (
cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1213 lsq->recvTimingSnoopReq(pkt);
1216 template <
class Impl>
1220 lsq->recvReqRetry();
1223 template<
class Impl>
1228 nullptr, nullptr, nullptr)
1238 _requests.back()->setInstCount(
_inst->getCpuPtr()->totalInsts());
1244 _inst->savedReq =
this;
1248 panic(
"unexpected behaviour");
1252 template<
class Impl>
1261 flags.set(Flag::TranslationStarted);
1262 flags.set(Flag::TranslationFinished);
1264 _inst->translationStarted(
true);
1265 _inst->translationCompleted(
true);
1267 setState(State::Request);
1270 template<
class Impl>
1275 panic(
"unexpected behaviour");
1278 #endif//__CPU_O3_LSQ_IMPL_HH__