42 #ifndef __CPU_O3_LSQ_IMPL_HH__
43 #define __CPU_O3_LSQ_IMPL_HH__
52 #include "debug/Drain.hh"
53 #include "debug/Fetch.hh"
54 #include "debug/HtmCpu.hh"
55 #include "debug/LSQ.hh"
56 #include "debug/Writeback.hh"
57 #include "params/DerivO3CPU.hh"
63 : cpu(cpu_ptr), iewStage(iew_ptr),
65 cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
66 cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
67 lsqPolicy(params->smtLSQPolicy),
68 LQEntries(params->LQEntries),
69 SQEntries(params->SQEntries),
70 maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
71 params->smtLSQThreshold)),
72 maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
73 params->smtLSQThreshold)),
74 dcachePort(this, cpu_ptr),
75 numThreads(params->numThreads)
84 if (
lsqPolicy == SMTQueuePolicy::Dynamic) {
85 DPRINTF(
LSQ,
"LSQ sharing policy set to Dynamic\n");
86 }
else if (
lsqPolicy == SMTQueuePolicy::Partitioned) {
87 DPRINTF(Fetch,
"LSQ sharing policy set to Partitioned: "
88 "%i entries per LQ | %i entries per SQ\n",
90 }
else if (
lsqPolicy == SMTQueuePolicy::Threshold) {
92 assert(params->smtLSQThreshold > params->LQEntries);
93 assert(params->smtLSQThreshold > params->SQEntries);
95 DPRINTF(
LSQ,
"LSQ sharing policy set to Threshold: "
96 "%i entries per LQ | %i entries per SQ\n",
99 panic(
"Invalid LSQ sharing policy. Options are: Dynamic, "
100 "Partitioned, Threshold");
106 thread[tid].init(
cpu, iew_ptr, params,
this, tid);
116 return iewStage->name() +
".lsq";
123 activeThreads = at_ptr;
124 assert(activeThreads != 0);
127 template <
class Impl>
133 for (
ThreadID tid = 0; tid < numThreads; tid++)
134 thread[tid].drainSanityCheck();
137 template <
class Impl>
144 DPRINTF(Drain,
"Not drained, LQ not empty.\n");
149 DPRINTF(Drain,
"Not drained, SQ not empty.\n");
156 template <
class Impl>
161 _cacheBlocked =
false;
163 for (
ThreadID tid = 0; tid < numThreads; tid++) {
164 thread[tid].takeOverFrom();
168 template <
class Impl>
173 if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked)
174 iewStage->cacheUnblocked();
184 return _cacheBlocked;
200 ret = usedLoadPorts < cacheLoadPorts;
202 ret = usedStorePorts < cacheStorePorts;
211 assert(cachePortAvailable(is_load));
223 ThreadID tid = load_inst->threadNumber;
225 thread[tid].insertLoad(load_inst);
232 ThreadID tid = store_inst->threadNumber;
234 thread[tid].insertStore(store_inst);
243 return thread[tid].executeLoad(inst);
252 return thread[tid].executeStore(inst);
262 while (threads != end) {
265 if (numStoresToWB(tid) > 0) {
266 DPRINTF(Writeback,
"[tid:%i] Writing back stores. %i stores "
267 "available for Writeback.\n", tid, numStoresToWB(tid));
270 thread[tid].writebackStores();
282 while (threads != end) {
285 if (thread[tid].violation())
292 template <
class Impl>
296 iewStage->cacheUnblocked();
299 for (
ThreadID tid : *activeThreads) {
300 thread[tid].recvRetry();
304 template <
class Impl>
309 thread[cpu->contextToThread(senderState->contextId())]
310 .completeDataAccess(pkt);
313 template <
class Impl>
318 DPRINTF(
LSQ,
"Got error packet back for address: %#X\n",
322 panic_if(!senderState,
"Got packet back with unknown sender state\n");
324 thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
338 DPRINTF(
LSQ,
"received invalidation with response for addr:%#x\n",
341 for (
ThreadID tid = 0; tid < numThreads; tid++) {
342 thread[tid].checkSnoop(pkt);
346 senderState->request()->packetReplied();
351 template <
class Impl>
360 DPRINTF(
LSQ,
"received invalidation for addr:%#x\n",
362 for (
ThreadID tid = 0; tid < numThreads; tid++) {
363 thread[tid].checkSnoop(pkt);
377 while (threads != end) {
380 total += getCount(tid);
395 while (threads != end) {
398 total += numLoads(tid);
413 while (threads != end) {
416 total += thread[tid].numStores();
431 while (threads != end) {
434 total += thread[tid].numFreeLoadEntries();
449 while (threads != end) {
452 total += thread[tid].numFreeStoreEntries();
462 return thread[tid].numFreeLoadEntries();
469 return thread[tid].numFreeStoreEntries();
479 while (threads != end) {
482 if (!(thread[tid].lqFull() || thread[tid].sqFull()))
495 if (lsqPolicy == SMTQueuePolicy::Dynamic)
498 return thread[tid].lqFull() || thread[tid].sqFull();
505 return lqEmpty() && sqEmpty();
515 while (threads != end) {
518 if (!thread[tid].lqEmpty())
532 while (threads != end) {
535 if (!thread[tid].sqEmpty())
549 while (threads != end) {
552 if (!thread[tid].lqFull())
565 if (lsqPolicy == SMTQueuePolicy::Dynamic)
568 return thread[tid].lqFull();
578 while (threads != end) {
594 if (lsqPolicy == SMTQueuePolicy::Dynamic)
597 return thread[tid].sqFull();
607 while (threads != end) {
610 if (!thread[tid].isStalled())
621 if (lsqPolicy == SMTQueuePolicy::Dynamic)
624 return thread[tid].isStalled();
634 while (threads != end) {
637 if (hasStoresToWB(tid))
651 while (threads != end) {
668 while (threads != end) {
671 thread[tid].dumpInsts();
685 bool isAtomic M5_VAR_USED = !isLoad && amo_op;
687 ThreadID tid = cpu->contextToThread(inst->contextId());
688 auto cacheLineSize = cpu->cacheLineSize();
698 assert(!isAtomic || (isAtomic && !needs_burst));
702 if (inst->translationStarted()) {
703 req = inst->savedReq;
707 assert(
addr == 0x0lu);
710 }
else if (needs_burst) {
712 size, flags,
data, res);
715 size, flags,
data, res, std::move(amo_op));
718 if (!byte_enable.empty()) {
722 req->
taskId(cpu->taskId());
735 inst->effSize = size;
736 inst->effAddrValid(
true);
739 inst->reqToVerify = std::make_shared<Request>(*req->
request());
743 fault = cpu->read(req, inst->lqIdx);
745 fault = cpu->write(req,
data, inst->sqIdx);
751 inst->getFault() = fault;
753 inst->setMemAccPredicate(
false);
761 inst->traceData->setMem(
addr, size, flags);
763 return inst->getFault();
776 if (
_inst->isSquashed()) {
779 _inst->strictlyOrdered(req->isStrictlyOrdered());
781 flags.
set(Flag::TranslationFinished);
783 _inst->physEffAddr = req->getPaddr();
784 _inst->memReqFlags = req->getFlags();
785 if (req->isCondSwap()) {
787 req->setExtraData(*
_res);
805 for (
i = 0;
i < _requests.size() && _requests[
i] != req;
i++);
806 assert(
i < _requests.size());
809 numInTranslationFragments--;
810 numTranslatedFragments++;
813 mainReq->setFlags(req->getFlags());
815 if (numTranslatedFragments == _requests.size()) {
816 if (_inst->isSquashed()) {
817 this->squashTranslation();
819 _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
820 flags.set(Flag::TranslationFinished);
821 _inst->translationCompleted(
true);
823 for (
i = 0;
i < _fault.size() && _fault[
i] ==
NoFault;
i++);
825 _inst->physEffAddr = request(0)->getPaddr();
826 _inst->memReqFlags = mainReq->getFlags();
827 if (mainReq->isCondSwap()) {
828 assert (
i == _fault.size());
830 mainReq->setExtraData(*_res);
832 if (
i == _fault.size()) {
834 setState(State::Request);
836 _inst->fault = _fault[
i];
837 setState(State::PartialFault);
840 _inst->fault = _fault[0];
852 assert(_requests.size() == 0);
854 this->addRequest(_addr, _size, _byteEnable);
856 if (_requests.size() > 0) {
857 _requests.back()->setReqInstSeqNum(_inst->seqNum);
858 _requests.back()->taskId(_taskId);
859 _inst->translationStarted(
true);
860 setState(State::Translation);
861 flags.set(Flag::TranslationStarted);
863 _inst->savedReq =
this;
864 sendFragmentToTranslation(0);
866 _inst->setMemAccPredicate(
false);
888 auto cacheLineSize = _port.cacheLineSize();
889 Addr base_addr = _addr;
892 uint32_t size_so_far = 0;
894 mainReq = std::make_shared<Request>(base_addr,
895 _size, _flags, _inst->requestorId(),
896 _inst->instAddr(), _inst->contextId());
897 if (!_byteEnable.empty()) {
898 mainReq->setByteEnable(_byteEnable);
905 mainReq->setPaddr(0);
908 if (_byteEnable.empty()) {
909 this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
911 auto it_start = _byteEnable.begin();
912 auto it_end = _byteEnable.begin() + (next_addr - base_addr);
913 this->addRequest(base_addr, next_addr - base_addr,
916 size_so_far = next_addr - base_addr;
919 base_addr = next_addr;
920 while (base_addr != final_addr) {
921 if (_byteEnable.empty()) {
922 this->addRequest(base_addr, cacheLineSize, _byteEnable);
924 auto it_start = _byteEnable.begin() + size_so_far;
925 auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
926 this->addRequest(base_addr, cacheLineSize,
929 size_so_far += cacheLineSize;
930 base_addr += cacheLineSize;
934 if (size_so_far < _size) {
935 if (_byteEnable.empty()) {
936 this->addRequest(base_addr, _size - size_so_far, _byteEnable);
938 auto it_start = _byteEnable.begin() + size_so_far;
939 auto it_end = _byteEnable.end();
940 this->addRequest(base_addr, _size - size_so_far,
945 if (_requests.size() > 0) {
947 for (
auto&
r: _requests) {
948 r->setReqInstSeqNum(_inst->seqNum);
952 _inst->translationStarted(
true);
953 setState(State::Translation);
954 flags.set(Flag::TranslationStarted);
955 this->_inst->savedReq =
this;
956 numInTranslationFragments = 0;
957 numTranslatedFragments = 0;
958 _fault.resize(_requests.size());
960 for (uint32_t
i = 0;
i < _requests.size();
i++) {
961 sendFragmentToTranslation(
i);
964 _inst->setMemAccPredicate(
false);
972 numInTranslationFragments++;
973 _port.dTLB()->translateTiming(
975 this->_inst->thread->getTC(),
this,
983 assert(_numOutstandingPackets == 1);
987 assert(pkt == _packets.front());
988 _port.completeDataAccess(pkt);
998 while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
1000 assert(pktIdx < _packets.size());
1001 numReceivedPackets++;
1002 state->outstanding--;
1003 if (numReceivedPackets == _packets.size()) {
1014 _port.completeDataAccess(resp);
1020 template<
class Impl>
1024 assert(_senderState);
1026 if (_packets.size() == 0) {
1031 _packets.back()->dataStatic(_inst->memData);
1032 _packets.back()->senderState = _senderState;
1037 if (_inst->inHtmTransactionalState()) {
1038 _packets.back()->setHtmTransactional(
1039 _inst->getHtmTransactionUid());
1042 "HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n",
1043 isLoad() ?
"LD" :
"ST",
1045 _packets.back()->req->hasVaddr() ?
1046 _packets.back()->req->getVaddr() : 0lu,
1047 _packets.back()->getAddr(),
1048 _inst->getHtmTransactionUid());
1051 assert(_packets.size() == 1);
1054 template<
class Impl>
1059 Addr base_address = _addr;
1061 if (_packets.size() == 0) {
1065 _mainPacket->dataStatic(_inst->memData);
1070 if (_inst->inHtmTransactionalState()) {
1071 _mainPacket->setHtmTransactional(
1072 _inst->getHtmTransactionUid());
1074 "HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1076 _mainPacket->req->hasVaddr() ?
1077 _mainPacket->req->getVaddr() : 0lu,
1078 _mainPacket->getAddr(),
1079 _inst->getHtmTransactionUid());
1082 for (
int i = 0;
i < _requests.size() && _fault[
i] ==
NoFault;
i++) {
1086 ptrdiff_t
offset =
r->getVaddr() - base_address;
1090 uint8_t* req_data =
new uint8_t[
r->getSize()];
1091 std::memcpy(req_data,
1097 _packets.push_back(pkt);
1102 if (_inst->inHtmTransactionalState()) {
1103 _packets.back()->setHtmTransactional(
1104 _inst->getHtmTransactionUid());
1106 "HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1107 isLoad() ?
"LD" :
"ST",
1110 _packets.back()->req->hasVaddr() ?
1111 _packets.back()->req->getVaddr() : 0lu,
1112 _packets.back()->getAddr(),
1113 _inst->getHtmTransactionUid());
1117 assert(_packets.size() > 0);
1120 template<
class Impl>
1124 assert(_numOutstandingPackets == 0);
1125 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1126 _numOutstandingPackets = 1;
1129 template<
class Impl>
1134 while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1135 lsqUnit()->trySendPacket(isLoad(),
1136 _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1137 _numOutstandingPackets++;
1141 template<
class Impl>
1146 return pkt->
req->localAccessor(
thread, pkt);
1149 template<
class Impl>
1157 for (
auto r: _requests) {
1170 template<
class Impl>
1192 template<
class Impl>
1196 bool is_hit =
false;
1197 for (
auto &
r: _requests) {
1207 if (
r->hasPaddr() && (
r->getPaddr() & blockMask) == blockAddr) {
1215 template <
class Impl>
1219 return lsq->recvTimingResp(pkt);
1222 template <
class Impl>
1226 for (
ThreadID tid = 0; tid <
cpu->numThreads; tid++) {
1227 if (
cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1231 lsq->recvTimingSnoopReq(pkt);
1234 template <
class Impl>
1238 lsq->recvReqRetry();
1241 template<
class Impl>
1246 nullptr, nullptr, nullptr)
1256 _requests.back()->setInstCount(
_inst->getCpuPtr()->totalInsts());
1262 _inst->savedReq =
this;
1266 panic(
"unexpected behaviour");
1270 template<
class Impl>
1279 flags.set(Flag::TranslationStarted);
1280 flags.set(Flag::TranslationFinished);
1282 _inst->translationStarted(
true);
1283 _inst->translationCompleted(
true);
1285 setState(State::Request);
1288 template<
class Impl>
1293 panic(
"unexpected behaviour");
1296 #endif//__CPU_O3_LSQ_IMPL_HH__