54 #include "debug/Drain.hh"
55 #include "debug/Fetch.hh"
56 #include "debug/HtmCpu.hh"
57 #include "debug/LSQ.hh"
58 #include "debug/Writeback.hh"
59 #include "params/O3CPU.hh"
80 params.smtLSQThreshold)),
82 params.smtLSQThreshold)),
93 if (
lsqPolicy == SMTQueuePolicy::Dynamic) {
94 DPRINTF(
LSQ,
"LSQ sharing policy set to Dynamic\n");
95 }
else if (
lsqPolicy == SMTQueuePolicy::Partitioned) {
97 "%i entries per LQ | %i entries per SQ\n",
99 }
else if (
lsqPolicy == SMTQueuePolicy::Threshold) {
101 assert(params.smtLSQThreshold > params.LQEntries);
102 assert(params.smtLSQThreshold > params.SQEntries);
104 DPRINTF(
LSQ,
"LSQ sharing policy set to Threshold: "
105 "%i entries per LQ | %i entries per SQ\n",
108 panic(
"Invalid LSQ sharing policy. Options are: Dynamic, "
109 "Partitioned, Threshold");
115 thread[tid].init(
cpu, iew_ptr, params,
this, tid);
149 DPRINTF(Drain,
"Not drained, LQ not empty.\n");
154 DPRINTF(Drain,
"Not drained, SQ not empty.\n");
168 thread[tid].takeOverFrom();
221 ThreadID tid = load_inst->threadNumber;
223 thread[tid].insertLoad(load_inst);
229 ThreadID tid = store_inst->threadNumber;
231 thread[tid].insertStore(store_inst);
239 return thread[tid].executeLoad(inst);
247 return thread[tid].executeStore(inst);
253 thread.at(tid).commitLoads(youngest_inst);
259 thread.at(tid).commitStores(youngest_inst);
268 while (threads != end) {
272 DPRINTF(Writeback,
"[tid:%i] Writing back stores. %i stores "
276 thread[tid].writebackStores();
283 thread.at(tid).squash(squashed_num);
293 while (threads != end) {
308 return thread.at(tid).getMemDepViolator();
314 return thread.at(tid).getLoadHead();
320 return thread.at(tid).getLoadHeadSeqNum();
326 return thread.at(tid).getStoreHead();
332 return thread.at(tid).getStoreHeadSeqNum();
347 return thread[tid].numHtmStarts();
355 return thread[tid].numHtmStops();
362 thread[tid].resetHtmStartsStops();
371 return thread[tid].getLatestHtmUid();
378 thread[tid].setLastRetiredHtmUid(htmUid);
404 DPRINTF(
LSQ,
"Got error packet back for address: %#X\n",
408 panic_if(!request,
"Got packet back with unknown sender state\n");
424 DPRINTF(
LSQ,
"received invalidation with response for addr:%#x\n",
428 thread[tid].checkSnoop(pkt);
445 DPRINTF(
LSQ,
"received invalidation for addr:%#x\n",
448 thread[tid].checkSnoop(pkt);
461 while (threads != end) {
478 while (threads != end) {
495 while (threads != end) {
512 while (threads != end) {
529 while (threads != end) {
541 return thread[tid].numFreeLoadEntries();
547 return thread[tid].numFreeStoreEntries();
556 while (threads != end) {
571 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
589 while (threads != end) {
605 while (threads != end) {
621 while (threads != end) {
636 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
639 return thread[tid].lqFull();
648 while (threads != end) {
663 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
666 return thread[tid].sqFull();
675 while (threads != end) {
688 if (
lsqPolicy == SMTQueuePolicy::Dynamic)
691 return thread[tid].isStalled();
700 while (threads != end) {
713 return thread.at(tid).hasStoresToWB();
719 return thread.at(tid).numStoresToWB();
728 while (threads != end) {
741 return thread.at(tid).willWB();
750 while (threads != end) {
760 thread.at(tid).dumpInsts();
771 [[maybe_unused]]
bool isAtomic = !isLoad && amo_op;
773 ThreadID tid =
cpu->contextToThread(inst->contextId());
774 auto cacheLineSize =
cpu->cacheLineSize();
784 assert(!isAtomic || (isAtomic && !needs_burst));
788 if (inst->translationStarted()) {
789 request = inst->savedRequest;
793 assert(
addr == 0x0lu);
796 }
else if (needs_burst) {
798 size, flags,
data, res);
801 size, flags,
data, res, std::move(amo_op));
818 inst->effAddr = request->
getVaddr();
819 inst->effSize = size;
820 inst->effAddrValid(
true);
823 inst->reqToVerify = std::make_shared<Request>(*request->
req());
827 fault =
read(request, inst->lqIdx);
829 fault =
write(request,
data, inst->sqIdx);
835 inst->getFault() = fault;
837 inst->setMemAccPredicate(
false);
845 inst->traceData->setMem(
addr, size, flags);
847 return inst->getFault();
859 if (
_inst->isSquashed()) {
862 _inst->strictlyOrdered(request->isStrictlyOrdered());
864 flags.
set(Flag::TranslationFinished);
866 _inst->physEffAddr = request->getPaddr();
867 _inst->memReqFlags = request->getFlags();
868 if (request->isCondSwap()) {
870 request->setExtraData(*
_res);
887 for (
i = 0;
i < _reqs.size() && _reqs[
i] != req;
i++);
888 assert(
i < _reqs.size());
891 numInTranslationFragments--;
892 numTranslatedFragments++;
895 _mainReq->setFlags(req->getFlags());
897 if (numTranslatedFragments == _reqs.size()) {
898 if (_inst->isSquashed()) {
901 _inst->strictlyOrdered(_mainReq->isStrictlyOrdered());
902 flags.set(Flag::TranslationFinished);
903 _inst->translationCompleted(
true);
905 for (
i = 0;
i < _fault.size() && _fault[
i] ==
NoFault;
i++);
908 _inst->memReqFlags = _mainReq->getFlags();
909 if (_mainReq->isCondSwap()) {
910 assert (
i == _fault.size());
912 _mainReq->setExtraData(*_res);
914 if (
i == _fault.size()) {
916 setState(State::Request);
918 _inst->fault = _fault[
i];
919 setState(State::PartialFault);
922 _inst->fault = _fault[0];
933 assert(_reqs.size() == 0);
935 addReq(_addr, _size, _byteEnable);
937 if (_reqs.size() > 0) {
938 _reqs.back()->setReqInstSeqNum(_inst->seqNum);
939 _reqs.back()->taskId(_taskId);
940 _inst->translationStarted(
true);
941 setState(State::Translation);
942 flags.set(Flag::TranslationStarted);
944 _inst->savedRequest =
this;
945 sendFragmentToTranslation(0);
947 _inst->setMemAccPredicate(
false);
966 auto cacheLineSize = _port.cacheLineSize();
967 Addr base_addr = _addr;
970 uint32_t size_so_far = 0;
972 _mainReq = std::make_shared<Request>(base_addr,
973 _size, _flags, _inst->requestorId(),
974 _inst->pcState().instAddr(), _inst->contextId());
975 _mainReq->setByteEnable(_byteEnable);
981 _mainReq->setPaddr(0);
984 auto it_start = _byteEnable.begin();
985 auto it_end = _byteEnable.begin() + (next_addr - base_addr);
986 addReq(base_addr, next_addr - base_addr,
988 size_so_far = next_addr - base_addr;
991 base_addr = next_addr;
992 while (base_addr != final_addr) {
993 auto it_start = _byteEnable.begin() + size_so_far;
994 auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
995 addReq(base_addr, cacheLineSize,
997 size_so_far += cacheLineSize;
998 base_addr += cacheLineSize;
1002 if (size_so_far < _size) {
1003 auto it_start = _byteEnable.begin() + size_so_far;
1004 auto it_end = _byteEnable.end();
1005 addReq(base_addr, _size - size_so_far,
1009 if (_reqs.size() > 0) {
1011 for (
auto&
r: _reqs) {
1012 r->setReqInstSeqNum(_inst->seqNum);
1016 _inst->translationStarted(
true);
1017 setState(State::Translation);
1018 flags.set(Flag::TranslationStarted);
1019 _inst->savedRequest =
this;
1020 numInTranslationFragments = 0;
1021 numTranslatedFragments = 0;
1022 _fault.resize(_reqs.size());
1024 for (uint32_t
i = 0;
i < _reqs.size();
i++) {
1025 sendFragmentToTranslation(
i);
1028 _inst->setMemAccPredicate(
false);
1034 _state(
State::NotIssued),
1035 _port(*port), _inst(inst), _data(nullptr),
1036 _res(nullptr), _addr(0), _size(0), _flags(0),
1037 _numOutstandingPackets(0), _amo_op(nullptr)
1040 flags.
set(Flag::WriteBackToRegister,
1041 _inst->isStoreConditional() ||
_inst->isAtomic() ||
1051 : _state(
State::NotIssued),
1052 numTranslatedFragments(0),
1053 numInTranslationFragments(0),
1054 _port(*port), _inst(inst), _data(
data),
1055 _res(res), _addr(
addr), _size(size),
1057 _numOutstandingPackets(0),
1058 _amo_op(
std::move(amo_op))
1061 flags.
set(Flag::WriteBackToRegister,
1062 _inst->isStoreConditional() ||
_inst->isAtomic() ||
1072 _port.loadQueue[_inst->lqIdx].setRequest(
this);
1076 _port.storeQueue[_inst->sqIdx].setRequest(
this);
1087 auto req = std::make_shared<Request>(
1088 addr, size, _flags, _inst->requestorId(),
1089 _inst->pcState().instAddr(), _inst->contextId(),
1090 std::move(_amo_op));
1091 req->setByteEnable(byte_enable);
1092 _reqs.push_back(req);
1098 assert(!isAnyOutstandingRequest());
1099 _inst->savedRequest =
nullptr;
1101 for (
auto r: _packets)
1108 return _inst->contextId();
1114 numInTranslationFragments++;
1115 _port.getMMUPtr()->translateTiming(req(
i), _inst->thread->getTC(),
1122 assert(_numOutstandingPackets == 1);
1124 assert(pkt == _packets.front());
1125 _port.completeDataAccess(pkt);
1132 uint32_t pktIdx = 0;
1133 while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
1135 assert(pktIdx < _packets.size());
1136 numReceivedPackets++;
1137 if (numReceivedPackets == _packets.size()) {
1148 _port.completeDataAccess(resp);
1158 if (_packets.size() == 0) {
1163 _packets.back()->dataStatic(_inst->memData);
1164 _packets.back()->senderState =
this;
1169 if (_inst->inHtmTransactionalState()) {
1170 _packets.back()->setHtmTransactional(
1171 _inst->getHtmTransactionUid());
1174 "HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n",
1175 isLoad() ?
"LD" :
"ST",
1176 _inst->pcState().instAddr(),
1177 _packets.back()->req->hasVaddr() ?
1178 _packets.back()->req->getVaddr() : 0lu,
1179 _packets.back()->getAddr(),
1180 _inst->getHtmTransactionUid());
1183 assert(_packets.size() == 1);
1190 Addr base_address = _addr;
1192 if (_packets.size() == 0) {
1196 _mainPacket->dataStatic(_inst->memData);
1201 if (_inst->inHtmTransactionalState()) {
1202 _mainPacket->setHtmTransactional(
1203 _inst->getHtmTransactionUid());
1205 "HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1206 _inst->pcState().instAddr(),
1207 _mainPacket->req->hasVaddr() ?
1208 _mainPacket->req->getVaddr() : 0lu,
1209 _mainPacket->getAddr(),
1210 _inst->getHtmTransactionUid());
1213 for (
int i = 0;
i < _reqs.size() && _fault[
i] ==
NoFault;
i++) {
1217 ptrdiff_t
offset = req->getVaddr() - base_address;
1221 uint8_t* req_data =
new uint8_t[req->getSize()];
1222 std::memcpy(req_data,
1228 _packets.push_back(pkt);
1233 if (_inst->inHtmTransactionalState()) {
1234 _packets.back()->setHtmTransactional(
1235 _inst->getHtmTransactionUid());
1237 "HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1238 isLoad() ?
"LD" :
"ST",
1240 _inst->pcState().instAddr(),
1241 _packets.back()->req->hasVaddr() ?
1242 _packets.back()->req->getVaddr() : 0lu,
1243 _packets.back()->getAddr(),
1244 _inst->getHtmTransactionUid());
1248 assert(_packets.size() > 0);
1254 assert(_numOutstandingPackets == 0);
1255 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1256 _numOutstandingPackets = 1;
1263 while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1264 lsqUnit()->trySendPacket(isLoad(),
1265 _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1266 _numOutstandingPackets++;
1274 return pkt->
req->localAccessor(
thread, pkt);
1284 for (
auto r: _reqs) {
1321 bool is_hit =
false;
1322 for (
auto &
r: _reqs) {
1332 if (
r->hasPaddr() && (
r->getPaddr() & blockMask) == blockAddr) {
1343 return lsq->recvTimingResp(pkt);
1349 for (
ThreadID tid = 0; tid <
cpu->numThreads; tid++) {
1350 if (
cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1354 lsq->recvTimingSnoopReq(pkt);
1360 lsq->recvReqRetry();
1366 nullptr, nullptr, nullptr)
1378 assert(_reqs.size() == 0);
1380 addReq(_addr, _size, _byteEnable);
1382 if (_reqs.size() > 0) {
1383 _reqs.back()->setReqInstSeqNum(_inst->seqNum);
1384 _reqs.back()->taskId(_taskId);
1385 _reqs.back()->setPaddr(_addr);
1386 _reqs.back()->setInstCount(_inst->getCpuPtr()->totalInsts());
1388 _inst->strictlyOrdered(_reqs.back()->isStrictlyOrdered());
1390 _inst->physEffAddr = _reqs.back()->getPaddr();
1391 _inst->memReqFlags = _reqs.back()->getFlags();
1392 _inst->savedRequest =
this;
1394 flags.set(Flag::TranslationStarted);
1395 flags.set(Flag::TranslationFinished);
1397 _inst->translationStarted(
true);
1398 _inst->translationCompleted(
true);
1400 setState(State::Request);
1402 panic(
"unexpected behaviour in initiateTranslation()");
1410 panic(
"unexpected behaviour - finish()");
1416 assert(request->
req()->contextId() == request->
contextId());
1417 ThreadID tid =
cpu->contextToThread(request->
req()->contextId());
1419 return thread.at(tid).read(request, load_idx);
1425 ThreadID tid =
cpu->contextToThread(request->
req()->contextId());
1427 return thread.at(tid).write(request,
data, store_idx);