54 #include "debug/Drain.hh"
55 #include "debug/Fetch.hh"
56 #include "debug/HtmCpu.hh"
57 #include "debug/LSQ.hh"
58 #include "debug/Writeback.hh"
59 #include "params/BaseO3CPU.hh"
82 params.smtLSQThreshold)),
84 params.smtLSQThreshold)),
95 if (
lsqPolicy == SMTQueuePolicy::Dynamic) {
96 DPRINTF(
LSQ,
"LSQ sharing policy set to Dynamic\n");
97 }
else if (
lsqPolicy == SMTQueuePolicy::Partitioned) {
99 "%i entries per LQ | %i entries per SQ\n",
101 }
else if (
lsqPolicy == SMTQueuePolicy::Threshold) {
103 assert(params.smtLSQThreshold > params.LQEntries);
104 assert(params.smtLSQThreshold > params.SQEntries);
106 DPRINTF(
LSQ,
"LSQ sharing policy set to Threshold: "
107 "%i entries per LQ | %i entries per SQ\n",
110 panic(
"Invalid LSQ sharing policy. Options are: Dynamic, "
111 "Partitioned, Threshold");
117 thread[tid].init(
cpu, iew_ptr, params,
this, tid);
151 DPRINTF(Drain,
"Not drained, LQ not empty.\n");
156 DPRINTF(Drain,
"Not drained, SQ not empty.\n");
170 thread[tid].takeOverFrom();
223 ThreadID tid = load_inst->threadNumber;
225 thread[tid].insertLoad(load_inst);
231 ThreadID tid = store_inst->threadNumber;
233 thread[tid].insertStore(store_inst);
241 return thread[tid].executeLoad(inst);
249 return thread[tid].executeStore(inst);
255 thread.at(tid).commitLoads(youngest_inst);
261 thread.at(tid).commitStores(youngest_inst);
270 while (threads != end) {
274 DPRINTF(Writeback,
"[tid:%i] Writing back stores. %i stores "
278 thread[tid].writebackStores();
285 thread.at(tid).squash(squashed_num);
295 while (threads != end) {
310 return thread.at(tid).getMemDepViolator();
316 return thread.at(tid).getLoadHead();
322 return thread.at(tid).getLoadHeadSeqNum();
328 return thread.at(tid).getStoreHead();
334 return thread.at(tid).getStoreHeadSeqNum();
349 return thread[tid].numHtmStarts();
357 return thread[tid].numHtmStops();
364 thread[tid].resetHtmStartsStops();
373 return thread[tid].getLatestHtmUid();
380 thread[tid].setLastRetiredHtmUid(htmUid);
406 DPRINTF(
LSQ,
"Got error packet back for address: %#X\n",
410 panic_if(!request,
"Got packet back with unknown sender state\n");
426 DPRINTF(
LSQ,
"received invalidation with response for addr:%#x\n",
430 thread[tid].checkSnoop(pkt);
451 DPRINTF(
LSQ,
"received invalidation for addr:%#x\n",
454 thread[tid].checkSnoop(pkt);
456 }
else if (pkt->
req && pkt->
req->isTlbiExtSync()) {
463 for (
auto& unit :
thread) {
464 unit.startStaleTranslationFlush();
480 while (threads != end) {
497 while (threads != end) {
514 while (threads != end) {
531 while (threads != end) {
548 while (threads != end) {
560 return thread[tid].numFreeLoadEntries();
566 return thread[tid].numFreeStoreEntries();
575 while (threads != end) {
590 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
608 while (threads != end) {
624 while (threads != end) {
640 while (threads != end) {
655 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
658 return thread[tid].lqFull();
667 while (threads != end) {
682 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
685 return thread[tid].sqFull();
694 while (threads != end) {
707 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
710 return thread[tid].isStalled();
719 while (threads != end) {
732 return thread.at(tid).hasStoresToWB();
738 return thread.at(tid).numStoresToWB();
747 while (threads != end) {
760 return thread.at(tid).willWB();
769 while (threads != end) {
779 thread.at(tid).dumpInsts();
790 [[maybe_unused]]
bool isAtomic = !isLoad && amo_op;
792 ThreadID tid =
cpu->contextToThread(inst->contextId());
793 auto cacheLineSize =
cpu->cacheLineSize();
803 assert(!isAtomic || (isAtomic && !needs_burst));
808 if (inst->translationStarted()) {
809 request = inst->savedRequest;
812 if (htm_cmd || tlbi_cmd) {
813 assert(
addr == 0x0lu);
816 }
else if (needs_burst) {
821 size,
flags,
data, res, std::move(amo_op));
838 inst->effAddr = request->
getVaddr();
839 inst->effSize = size;
840 inst->effAddrValid(
true);
843 inst->reqToVerify = std::make_shared<Request>(*request->
req());
847 fault =
read(request, inst->lqIdx);
849 fault =
write(request,
data, inst->sqIdx);
855 inst->getFault() = fault;
857 inst->setMemAccPredicate(
false);
865 inst->traceData->setMem(
addr, size,
flags);
867 return inst->getFault();
879 if (
_inst->isSquashed()) {
882 _inst->strictlyOrdered(request->isStrictlyOrdered());
884 flags.
set(Flag::TranslationFinished);
886 _inst->physEffAddr = request->getPaddr();
887 _inst->memReqFlags = request->getFlags();
888 if (request->isCondSwap()) {
890 request->setExtraData(*
_res);
907 for (
i = 0;
i < _reqs.size() && _reqs[
i] != req;
i++);
908 assert(
i < _reqs.size());
911 numInTranslationFragments--;
912 numTranslatedFragments++;
915 _mainReq->setFlags(req->getFlags());
917 if (numTranslatedFragments == _reqs.size()) {
918 if (_inst->isSquashed()) {
921 _inst->strictlyOrdered(_mainReq->isStrictlyOrdered());
922 flags.set(Flag::TranslationFinished);
923 _inst->translationCompleted(
true);
925 for (
i = 0;
i < _fault.size() && _fault[
i] ==
NoFault;
i++);
928 _inst->memReqFlags = _mainReq->getFlags();
929 if (_mainReq->isCondSwap()) {
930 assert (
i == _fault.size());
932 _mainReq->setExtraData(*_res);
934 if (
i == _fault.size()) {
936 setState(State::Request);
938 _inst->fault = _fault[
i];
939 setState(State::PartialFault);
942 _inst->fault = _fault[0];
953 assert(_reqs.size() == 0);
955 addReq(_addr, _size, _byteEnable);
957 if (_reqs.size() > 0) {
958 _reqs.back()->setReqInstSeqNum(_inst->seqNum);
959 _reqs.back()->taskId(_taskId);
960 _inst->translationStarted(
true);
961 setState(State::Translation);
962 flags.set(Flag::TranslationStarted);
964 _inst->savedRequest =
this;
965 sendFragmentToTranslation(0);
967 _inst->setMemAccPredicate(
false);
986 auto cacheLineSize = _port.cacheLineSize();
987 Addr base_addr = _addr;
990 uint32_t size_so_far = 0;
992 _mainReq = std::make_shared<Request>(base_addr,
993 _size, _flags, _inst->requestorId(),
994 _inst->pcState().instAddr(), _inst->contextId());
995 _mainReq->setByteEnable(_byteEnable);
1001 _mainReq->setPaddr(0);
1004 auto it_start = _byteEnable.begin();
1005 auto it_end = _byteEnable.begin() + (next_addr - base_addr);
1006 addReq(base_addr, next_addr - base_addr,
1008 size_so_far = next_addr - base_addr;
1011 base_addr = next_addr;
1012 while (base_addr != final_addr) {
1013 auto it_start = _byteEnable.begin() + size_so_far;
1014 auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
1015 addReq(base_addr, cacheLineSize,
1017 size_so_far += cacheLineSize;
1018 base_addr += cacheLineSize;
1022 if (size_so_far < _size) {
1023 auto it_start = _byteEnable.begin() + size_so_far;
1024 auto it_end = _byteEnable.end();
1025 addReq(base_addr, _size - size_so_far,
1029 if (_reqs.size() > 0) {
1031 for (
auto&
r: _reqs) {
1032 r->setReqInstSeqNum(_inst->seqNum);
1036 _inst->translationStarted(
true);
1037 setState(State::Translation);
1038 flags.set(Flag::TranslationStarted);
1039 _inst->savedRequest =
this;
1040 numInTranslationFragments = 0;
1041 numTranslatedFragments = 0;
1042 _fault.resize(_reqs.size());
1044 for (uint32_t
i = 0;
i < _reqs.size();
i++) {
1045 sendFragmentToTranslation(
i);
1048 _inst->setMemAccPredicate(
false);
1054 _state(
State::NotIssued),
1055 _port(*port), _inst(inst), _data(nullptr),
1056 _res(nullptr), _addr(0), _size(0), _flags(0),
1057 _numOutstandingPackets(0), _amo_op(nullptr)
1060 flags.
set(Flag::WriteBackToRegister,
1061 _inst->isStoreConditional() ||
_inst->isAtomic() ||
1071 bool stale_translation)
1072 : _state(
State::NotIssued),
1073 numTranslatedFragments(0),
1074 numInTranslationFragments(0),
1075 _port(*port), _inst(inst), _data(
data),
1076 _res(res), _addr(
addr), _size(size),
1078 _numOutstandingPackets(0),
1079 _amo_op(
std::move(amo_op)),
1080 _hasStaleTranslation(stale_translation)
1083 flags.
set(Flag::WriteBackToRegister,
1084 _inst->isStoreConditional() ||
_inst->isAtomic() ||
1094 _port.loadQueue[_inst->lqIdx].setRequest(
this);
1098 _port.storeQueue[_inst->sqIdx].setRequest(
this);
1109 auto req = std::make_shared<Request>(
1110 addr, size, _flags, _inst->requestorId(),
1111 _inst->pcState().instAddr(), _inst->contextId(),
1112 std::move(_amo_op));
1113 req->setByteEnable(byte_enable);
1117 req->setLocalAccessor(
1120 if ((req->isHTMStart() || req->isHTMCommit())) {
1121 auto& inst = this->instruction();
1122 assert(inst->inHtmTransactionalState());
1123 pkt->setHtmTransactional(
1124 inst->getHtmTransactionUid());
1131 _reqs.push_back(req);
1137 assert(!isAnyOutstandingRequest());
1138 _inst->savedRequest =
nullptr;
1140 for (
auto r: _packets)
1147 return _inst->contextId();
1153 numInTranslationFragments++;
1154 _port.getMMUPtr()->translateTiming(req(
i), _inst->thread->getTC(),
1164 (!
flags.isSet(Flag::Discarded)) &&
1165 (
flags.isSet(Flag::TranslationStarted))) {
1166 _hasStaleTranslation =
true;
1169 DPRINTF(
LSQ,
"SingleDataRequest %d 0x%08x isBlocking:%d\n",
1170 (
int)_state, (uint32_t)
flags, _hasStaleTranslation);
1179 (!
flags.isSet(Flag::Discarded)) &&
1180 (
flags.isSet(Flag::TranslationStarted))) {
1181 _hasStaleTranslation =
true;
1184 DPRINTF(
LSQ,
"SplitDataRequest %d 0x%08x isBlocking:%d\n",
1185 (
int)_state, (uint32_t)
flags, _hasStaleTranslation);
1191 assert(_numOutstandingPackets == 1);
1193 assert(pkt == _packets.front());
1194 _port.completeDataAccess(pkt);
1195 _hasStaleTranslation =
false;
1202 uint32_t pktIdx = 0;
1203 while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
1205 assert(pktIdx < _packets.size());
1206 numReceivedPackets++;
1207 if (numReceivedPackets == _packets.size()) {
1218 _port.completeDataAccess(resp);
1221 _hasStaleTranslation =
false;
1229 if (_packets.size() == 0) {
1234 _packets.back()->dataStatic(_inst->memData);
1235 _packets.back()->senderState =
this;
1240 if (_inst->inHtmTransactionalState()) {
1241 _packets.back()->setHtmTransactional(
1242 _inst->getHtmTransactionUid());
1245 "HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n",
1246 isLoad() ?
"LD" :
"ST",
1247 _inst->pcState().instAddr(),
1248 _packets.back()->req->hasVaddr() ?
1249 _packets.back()->req->getVaddr() : 0lu,
1250 _packets.back()->getAddr(),
1251 _inst->getHtmTransactionUid());
1254 assert(_packets.size() == 1);
1261 Addr base_address = _addr;
1263 if (_packets.size() == 0) {
1267 _mainPacket->dataStatic(_inst->memData);
1272 if (_inst->inHtmTransactionalState()) {
1273 _mainPacket->setHtmTransactional(
1274 _inst->getHtmTransactionUid());
1276 "HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1277 _inst->pcState().instAddr(),
1278 _mainPacket->req->hasVaddr() ?
1279 _mainPacket->req->getVaddr() : 0lu,
1280 _mainPacket->getAddr(),
1281 _inst->getHtmTransactionUid());
1284 for (
int i = 0;
i < _reqs.size() && _fault[
i] ==
NoFault;
i++) {
1288 ptrdiff_t
offset = req->getVaddr() - base_address;
1292 uint8_t* req_data =
new uint8_t[req->getSize()];
1293 std::memcpy(req_data,
1299 _packets.push_back(pkt);
1304 if (_inst->inHtmTransactionalState()) {
1305 _packets.back()->setHtmTransactional(
1306 _inst->getHtmTransactionUid());
1308 "HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1309 isLoad() ?
"LD" :
"ST",
1311 _inst->pcState().instAddr(),
1312 _packets.back()->req->hasVaddr() ?
1313 _packets.back()->req->getVaddr() : 0lu,
1314 _packets.back()->getAddr(),
1315 _inst->getHtmTransactionUid());
1319 assert(_packets.size() > 0);
1325 assert(_numOutstandingPackets == 0);
1326 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1327 _numOutstandingPackets = 1;
1334 while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1335 lsqUnit()->trySendPacket(isLoad(),
1336 _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1337 _numOutstandingPackets++;
1345 return pkt->
req->localAccessor(
thread, pkt);
1355 for (
auto r: _reqs) {
1392 bool is_hit =
false;
1393 for (
auto &
r: _reqs) {
1403 if (
r->hasPaddr() && (
r->getPaddr() & blockMask) == blockAddr) {
1414 return lsq->recvTimingResp(pkt);
1420 for (
ThreadID tid = 0; tid <
cpu->numThreads; tid++) {
1421 if (
cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1425 lsq->recvTimingSnoopReq(pkt);
1431 lsq->recvReqRetry();
1439 nullptr, nullptr, nullptr)
1451 assert(_reqs.size() == 0);
1453 addReq(_addr, _size, _byteEnable);
1455 if (_reqs.size() > 0) {
1456 _reqs.back()->setReqInstSeqNum(_inst->seqNum);
1457 _reqs.back()->taskId(_taskId);
1458 _reqs.back()->setPaddr(_addr);
1459 _reqs.back()->setInstCount(_inst->getCpuPtr()->totalInsts());
1461 _inst->strictlyOrdered(_reqs.back()->isStrictlyOrdered());
1463 _inst->physEffAddr = _reqs.back()->getPaddr();
1464 _inst->memReqFlags = _reqs.back()->getFlags();
1465 _inst->savedRequest =
this;
1467 flags.set(Flag::TranslationStarted);
1468 flags.set(Flag::TranslationFinished);
1470 _inst->translationStarted(
true);
1471 _inst->translationCompleted(
true);
1473 setState(State::Request);
1475 panic(
"unexpected behaviour in initiateTranslation()");
1484 _hasStaleTranslation =
false;
1492 panic(
"unexpected behaviour - finish()");
1500 DPRINTF(
LSQ,
"Checking pending TLBI sync\n");
1502 for (
const auto& unit :
thread) {
1503 if (unit.checkStaleTranslations())
1506 DPRINTF(
LSQ,
"No threads have blocking TLBI sync\n");
1512 cpu->dataRequestorId());
1518 panic(
"Couldn't send TLBI_EXT_SYNC_COMP message");
1528 assert(request->
req()->contextId() == request->
contextId());
1529 ThreadID tid =
cpu->contextToThread(request->
req()->contextId());
1531 return thread.at(tid).read(request, load_idx);
1537 ThreadID tid =
cpu->contextToThread(request->
req()->contextId());
1539 return thread.at(tid).write(request,
data, store_idx);