50 #include "debug/Activity.hh"
51 #include "debug/MinorMem.hh"
72 issuedToMemory(false),
73 isTranslationDelayed(false),
76 request = std::make_shared<Request>();
82 SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
83 std::unique_ptr<PCStateBase> old_pc(thread.
pcState().
clone());
85 [[maybe_unused]]
Fault fault = inst->translationFault;
88 inst->translationFault = inst->staticInst->initiateAcc(&context,
nullptr);
89 if (inst->translationFault ==
NoFault) {
91 "Translation fault suppressed for inst:%s\n", *inst);
93 assert(inst->translationFault == fault);
101 DPRINTFS(MinorMem, (&port),
"Complete disabled mem access for inst:%s\n",
104 SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
105 std::unique_ptr<PCStateBase> old_pc(thread.
pcState().
clone());
110 inst->staticInst->completeAcc(
nullptr, &context, inst->traceData);
118 port.cpu.threads[inst->id.threadId]->setMemAccPredicate(
false);
119 DPRINTFS(MinorMem, (&port),
"Disable mem access for inst:%s\n", *inst);
124 Addr req1_addr,
unsigned int req1_size,
125 Addr req2_addr,
unsigned int req2_size)
129 Addr req2_end_addr = req2_addr + req2_size;
130 Addr req1_end_addr = req1_addr + req1_size;
134 if (req1_addr >= req2_end_addr || req1_end_addr <= req2_addr)
136 else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr)
148 request->getPaddr(), request->getSize(),
149 other_request->
request->getPaddr(), other_request->
request->getSize());
162 return inst->isInst() && inst->staticInst->isFullMemBarrier();
168 return state == StoreToStoreBuffer;
174 DPRINTFS(MinorMem, (&port),
"Setting state from %d to %d for request:"
175 " %s\n",
state, new_state, *inst);
190 os << (isLoad ?
'R' :
'W') <<
';';
191 inst->reportData(
os);
200 os <<
"PartialAddrRangeCoverage";
203 os <<
"FullAddrRangeCoverage";
206 os <<
"NoAddrRangeCoverage";
209 os <<
"AddrRangeCoverage-" <<
static_cast<int>(coverage);
223 os <<
"InTranslation";
232 os <<
"RequestIssuing";
235 os <<
"StoreToStoreBuffer";
238 os <<
"StoreInStoreBuffer";
241 os <<
"StoreBufferIssuing";
244 os <<
"RequestNeedsRetry";
247 os <<
"StoreBufferNeedsRetry";
253 os <<
"LSQRequestState-" <<
static_cast<int>(
state);
262 bool is_last_barrier =
265 DPRINTF(MinorMem,
"Moving %s barrier out of store buffer inst: %s\n",
266 (is_last_barrier ?
"last" :
"a"), *inst);
276 port.numAccessesInDTLB--;
278 DPRINTFS(MinorMem, (&port),
"Received translation response for"
279 " request: %s delayed:%d %s\n", *inst, isTranslationDelayed,
280 fault_ !=
NoFault ? fault_->name() :
"");
283 inst->translationFault = fault_;
284 if (isTranslationDelayed) {
285 tryToSuppressFault();
286 if (inst->translationFault ==
NoFault) {
287 completeDisabledMemAccess();
291 setState(Translated);
293 setState(Translated);
296 port.tryToSendToTransfers(
this);
308 const auto &byte_enable = request->getByteEnable();
310 port.numAccessesInDTLB++;
314 DPRINTFS(MinorMem, (&port),
"Submitting DTLB request\n");
329 DPRINTFS(MinorMem, (&port),
"Retiring packet\n");
331 packetInFlight =
false;
339 port.numAccessesInDTLB--;
341 [[maybe_unused]]
unsigned int expected_fragment_index =
342 numTranslatedFragments;
344 numInTranslationFragments--;
345 numTranslatedFragments++;
347 DPRINTFS(MinorMem, (&port),
"Received translation response for fragment"
348 " %d of request: %s delayed:%d %s\n", expected_fragment_index,
349 *inst, isTranslationDelayed,
350 fault_ !=
NoFault ? fault_->name() :
"");
352 assert(request_ == fragmentRequests[expected_fragment_index]);
360 inst->translationFault = fault_;
362 DPRINTFS(MinorMem, (&port),
"Faulting translation for fragment:"
363 " %d of request: %s\n",
364 expected_fragment_index, *inst);
366 if (expected_fragment_index > 0 || isTranslationDelayed)
367 tryToSuppressFault();
368 if (expected_fragment_index == 0) {
369 if (isTranslationDelayed && inst->translationFault ==
NoFault) {
370 completeDisabledMemAccess();
373 setState(Translated);
375 }
else if (inst->translationFault ==
NoFault) {
376 setState(Translated);
377 numTranslatedFragments--;
378 makeFragmentPackets();
380 setState(Translated);
382 port.tryToSendToTransfers(
this);
383 }
else if (numTranslatedFragments == numFragments) {
384 makeFragmentPackets();
385 setState(Translated);
386 port.tryToSendToTransfers(
this);
389 assert(!translationEvent.scheduled());
390 port.cpu.schedule(translationEvent,
curTick());
400 numInTranslationFragments(0),
401 numTranslatedFragments(0),
402 numIssuedFragments(0),
403 numRetiredFragments(0),
413 for (
auto i = fragmentPackets.begin();
414 i != fragmentPackets.end();
i++)
423 Addr base_addr = request->getVaddr();
424 unsigned int whole_size = request->getSize();
425 unsigned int line_width = port.lineWidth;
427 unsigned int fragment_size;
444 unsigned int first_fragment_offset =
446 unsigned int last_fragment_size =
448 unsigned int first_fragment_size =
449 line_width - first_fragment_offset;
451 unsigned int middle_fragments_total_size =
452 whole_size - (first_fragment_size + last_fragment_size);
456 unsigned int middle_fragment_count =
457 middle_fragments_total_size / line_width;
459 numFragments = 1 + middle_fragment_count +
460 (last_fragment_size == 0 ? 0 : 1);
462 DPRINTFS(MinorMem, (&port),
"Dividing transfer into %d fragmentRequests."
463 " First fragment size: %d Last fragment size: %d\n",
464 numFragments, first_fragment_size,
465 (last_fragment_size == 0 ? line_width : last_fragment_size));
467 assert(((middle_fragment_count * line_width) +
468 first_fragment_size + last_fragment_size) == whole_size);
470 fragment_addr = base_addr;
471 fragment_size = first_fragment_size;
474 Addr end_addr = base_addr + whole_size;
476 auto& byte_enable = request->getByteEnable();
477 unsigned int num_disabled_fragments = 0;
479 for (
unsigned int fragment_index = 0; fragment_index < numFragments;
482 [[maybe_unused]]
bool is_last_fragment =
false;
484 if (fragment_addr == base_addr) {
486 fragment_size = first_fragment_size;
488 if ((fragment_addr + line_width) > end_addr) {
490 fragment_size = end_addr - fragment_addr;
491 is_last_fragment =
true;
494 fragment_size = line_width;
498 RequestPtr fragment = std::make_shared<Request>();
499 bool disabled_fragment =
false;
501 fragment->setContext(request->contextId());
503 auto it_start = byte_enable.begin() +
504 (fragment_addr - base_addr);
505 auto it_end = byte_enable.begin() +
506 (fragment_addr - base_addr) + fragment_size;
509 fragment_addr, fragment_size, request->getFlags(),
510 request->requestorId(),
514 disabled_fragment =
true;
517 if (!disabled_fragment) {
518 DPRINTFS(MinorMem, (&port),
"Generating fragment addr: 0x%x"
519 " size: %d (whole request addr: 0x%x size: %d) %s\n",
520 fragment_addr, fragment_size, base_addr, whole_size,
521 (is_last_fragment ?
"last fragment" :
""));
523 fragmentRequests.push_back(fragment);
525 num_disabled_fragments++;
528 fragment_addr += fragment_size;
530 assert(numFragments >= num_disabled_fragments);
531 numFragments -= num_disabled_fragments;
537 assert(numTranslatedFragments > 0);
538 Addr base_addr = request->getVaddr();
540 DPRINTFS(MinorMem, (&port),
"Making packets for request: %s\n", *inst);
542 for (
unsigned int fragment_index = 0;
543 fragment_index < numTranslatedFragments;
546 RequestPtr fragment = fragmentRequests[fragment_index];
548 DPRINTFS(MinorMem, (&port),
"Making packet %d for request: %s"
550 fragment_index, *inst,
551 (fragment->hasPaddr() ?
"has paddr" :
"no paddr"),
552 (fragment->hasPaddr() ? fragment->getPaddr() : 0));
554 Addr fragment_addr = fragment->getVaddr();
555 unsigned int fragment_size = fragment->getSize();
557 uint8_t *request_data = NULL;
562 request_data =
new uint8_t[fragment_size];
563 std::memcpy(request_data,
data + (fragment_addr - base_addr),
567 assert(fragment->hasPaddr());
572 fragmentPackets.push_back(fragment_packet);
574 request->setFlags(fragment->getFlags());
580 request->setPaddr(fragmentRequests[0]->getPaddr());
587 makeFragmentRequests();
589 if (numFragments > 0) {
591 numInTranslationFragments = 0;
592 numTranslatedFragments = 0;
602 sendNextFragmentToTranslation();
612 assert(numIssuedFragments < numTranslatedFragments);
614 return fragmentPackets[numIssuedFragments];
620 assert(numIssuedFragments < numTranslatedFragments);
622 numIssuedFragments++;
628 assert(inst->translationFault ==
NoFault);
629 assert(numRetiredFragments < numTranslatedFragments);
631 DPRINTFS(MinorMem, (&port),
"Retiring fragment addr: 0x%x size: %d"
632 " offset: 0x%x (retired fragment num: %d)\n",
633 response->
req->getVaddr(), response->
req->getSize(),
634 request->getVaddr() - response->
req->getVaddr(),
635 numRetiredFragments);
637 numRetiredFragments++;
642 DPRINTFS(MinorMem, (&port),
"Skipping this fragment\n");
643 }
else if (response->
isError()) {
645 DPRINTFS(MinorMem, (&port),
"Fragment has an error, skipping\n");
647 packet->copyError(response);
654 data =
new uint8_t[request->getSize()];
660 data + (response->
req->getVaddr() - request->getVaddr()),
662 response->
req->getSize());
667 if (skipped && !hasPacketsInMemSystem()) {
668 DPRINTFS(MinorMem, (&port),
"Completed skipped burst\n");
670 if (packet->needsResponse())
671 packet->makeResponse();
674 if (numRetiredFragments == numTranslatedFragments)
677 if (!skipped && isComplete()) {
678 DPRINTFS(MinorMem, (&port),
"Completed burst %d\n", packet != NULL);
680 DPRINTFS(MinorMem, (&port),
"Retired packet isRead: %d isWrite: %d"
681 " needsResponse: %d packetSize: %s requestSize: %s responseSize:"
682 " %s\n", packet->isRead(), packet->isWrite(),
683 packet->needsResponse(), packet->getSize(), request->getSize(),
689 data =
new uint8_t[request->getSize()];
693 DPRINTFS(MinorMem, (&port),
"Copying read data\n");
694 std::memcpy(packet->getPtr<uint8_t>(),
data, request->getSize());
696 packet->makeResponse();
705 unsigned int fragment_index = numTranslatedFragments;
710 DPRINTFS(MinorMem, (&port),
"Submitting DTLB request for fragment: %d\n",
713 port.numAccessesInDTLB++;
714 numInTranslationFragments++;
717 fragmentRequests[fragment_index], thread,
this, (isLoad ?
725 return slots.size() < numSlots;
731 auto found = std::find(slots.begin(), slots.end(), request);
733 if (found != slots.end()) {
734 DPRINTF(MinorMem,
"Deleting request: %s %s %s from StoreBuffer\n",
735 request, *found, *(request->
inst));
746 warn(
"%s: store buffer insertion without space to insert from"
747 " inst: %s\n",
name(), *(request->
inst));
750 DPRINTF(MinorMem,
"Pushing store: %s into store buffer\n", request);
752 numUnissuedAccesses++;
757 slots.push_back(request);
766 unsigned int &found_slot)
768 unsigned int slot_index = slots.size() - 1;
769 auto i = slots.rbegin();
781 slot->
inst->id.threadId == request->
inst->id.threadId &&
782 !slot->
packet->
req->isCacheMaintenance()) {
786 DPRINTF(MinorMem,
"Forwarding: slot: %d result: %s thisAddr:"
787 " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n",
788 slot_index, coverage,
792 found_slot = slot_index;
807 unsigned int slot_number)
809 assert(slot_number < slots.size());
820 Addr addr_offset = load_addr - store_addr;
822 unsigned int load_size = load->
request->getSize();
824 DPRINTF(MinorMem,
"Forwarding %d bytes for addr: 0x%x from store buffer"
825 " slot: %d addr: 0x%x addressOffset: 0x%x\n",
826 load_size, load_addr, slot_number,
827 store_addr, addr_offset);
830 void *store_packet_data = store->
packet->
getPtr<uint8_t>() + addr_offset;
832 std::memcpy(load_packet_data, store_packet_data, load_size);
841 numUnissuedAccesses--;
847 DPRINTF(MinorMem,
"StoreBuffer step numUnissuedAccesses: %d\n",
848 numUnissuedAccesses);
852 while (!slots.empty() &&
853 slots.front()->isComplete() && slots.front()->isBarrier())
857 DPRINTF(MinorMem,
"Clearing barrier for inst: %s\n",
860 numUnissuedAccesses--;
861 lsq.clearMemBarrier(barrier->
inst);
867 auto i = slots.begin();
869 unsigned int issue_count = 0;
879 issue_count < storeLimitPerCycle &&
880 lsq.canSendToMemorySystem() &&
885 DPRINTF(MinorMem,
"Considering request: %s, sentAllPackets: %d"
896 DPRINTF(MinorMem,
"Trying to send request: %s to memory"
897 " system\n", *(request->
inst));
899 if (lsq.tryToSend(request)) {
900 countIssuedStore(request);
920 if (!inst->inStoreBuffer) {
934 unsigned int size = slots.size();
936 std::ostringstream
os;
948 while (
i < numSlots) {
957 numUnissuedAccesses);
964 DPRINTF(MinorMem,
"Request needs retry, not issuing to"
965 " memory until retry arrives\n");
970 DPRINTF(MinorMem,
"Request still in translation, not issuing to"
981 DPRINTF(MinorMem,
"Request not at front of requests queue, can't"
982 " issue to memory\n");
987 DPRINTF(MinorMem,
"No space to insert request into transfers"
993 DPRINTF(MinorMem,
"Passing a %s transfer on to transfers"
994 " queue\n", (request->
isComplete() ?
"completed" :
"failed"));
1005 DPRINTF(MinorMem,
"Request's inst. is from the wrong stream,"
1006 " waiting for responses before aborting request\n");
1008 DPRINTF(MinorMem,
"Request's inst. is from the wrong stream,"
1009 " aborting request\n");
1018 if (request->
inst->staticInst->isPrefetch()) {
1019 DPRINTF(MinorMem,
"Not signalling fault for faulting prefetch\n");
1021 DPRINTF(MinorMem,
"Moving faulting request into the transfers"
1029 bool is_load = request->
isLoad;
1030 bool is_llsc = request->
request->isLLSC();
1031 bool is_release = request->
request->isRelease();
1032 bool is_swap = request->
request->isSwap();
1033 bool is_atomic = request->
request->isAtomic();
1034 bool bufferable = !(request->
request->isStrictlyOrdered() ||
1035 is_llsc || is_swap || is_atomic || is_release);
1039 DPRINTF(MinorMem,
"Load request with stores still in transfers"
1040 " queue, stalling\n");
1045 if (bufferable && !request->
request->isLocalAccess()) {
1048 DPRINTF(MinorMem,
"Moving store into transfers queue\n");
1055 if (((!is_load && is_llsc) || is_release) &&
1057 DPRINTF(MinorMem,
"Memory access needs to wait for store buffer"
1067 DPRINTF(MinorMem,
"Memory access not the head inst., can't be"
1068 " sure it can be performed, not issuing\n");
1072 unsigned int forwarding_slot = 0;
1084 DPRINTF(MinorMem,
"Memory access can receive forwarded data"
1085 " from the store buffer, but need to wait for store buffer"
1095 bool do_access =
true;
1100 unsigned int forwarding_slot = 0;
1105 switch (forwarding_result) {
1116 DPRINTF(MinorMem,
"Load partly satisfied by store buffer"
1117 " data. Must wait for the store to complete\n");
1121 DPRINTF(MinorMem,
"No forwardable data from store buffer\n");
1128 DPRINTF(MinorMem,
"Can't send request to memory system yet\n");
1134 std::unique_ptr<PCStateBase> old_pc(thread.
pcState().
clone());
1145 DPRINTF(MinorMem,
"Not perfoming a memory "
1146 "access for store conditional\n");
1155 DPRINTF(MinorMem,
"Can't send request to memory system yet\n");
1181 DPRINTF(MinorMem,
"Can't send request: %s yet, no space in memory\n",
1186 DPRINTF(MinorMem,
"Trying to send request: %s addr: 0x%x\n",
1187 *(request->
inst), packet->
req->getVaddr());
1193 if (request->
request->isLocalAccess()) {
1195 cpu.getContext(
cpu.contextToThread(
1196 request->
request->contextId()));
1199 DPRINTF(MinorMem,
"IPR read inst: %s\n", *(request->
inst));
1201 DPRINTF(MinorMem,
"IPR write inst: %s\n", *(request->
inst));
1203 request->
request->localAccessor(thread, packet);
1209 DPRINTF(MinorMem,
"IPR access has another packet: %s\n",
1218 DPRINTF(MinorMem,
"Sent data memory request\n");
1226 switch (request->
state) {
1240 panic(
"Unrecognized LSQ request state %d.", request->
state);
1246 "Sending data memory request - needs retry\n");
1252 switch (request->
state) {
1262 panic(
"Unrecognized LSQ request state %d.", request->
state);
1303 DPRINTF(MinorMem,
"Received response packet inst: %s"
1304 " addr: 0x%x cmd: %s\n",
1311 DPRINTF(MinorMem,
"Received error response packet: %s\n",
1315 switch (request->
state) {
1321 DPRINTF(MinorMem,
"Has outstanding packets?: %d %d\n",
1336 DPRINTF(MinorMem,
"Completed transfer for barrier: %s"
1337 " leaving the request as it is also a barrier\n",
1343 panic(
"Shouldn't be allowed to receive a response from another state");
1360 DPRINTF(MinorMem,
"Received retry request\n");
1404 LSQ::LSQ(std::string name_, std::string dcache_port_name_,
1406 unsigned int in_memory_system_limit,
unsigned int line_width,
1407 unsigned int requests_queue_size,
unsigned int transfers_queue_size,
1408 unsigned int store_buffer_size,
1409 unsigned int store_buffer_cycle_store_limit,
1419 lineWidth((line_width == 0 ?
cpu.cacheLineSize() : line_width)),
1420 requests(name_ +
".requests",
"addr", requests_queue_size),
1421 transfers(name_ +
".transfers",
"addr", transfers_queue_size),
1423 *this, store_buffer_size, store_buffer_cycle_store_limit),
1431 if (in_memory_system_limit < 1) {
1432 fatal(
"%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_,
1433 in_memory_system_limit);
1436 if (store_buffer_cycle_store_limit < 1) {
1437 fatal(
"%s: executeLSQMaxStoreBufferStoresPerCycle must be"
1438 " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit);
1441 if (requests_queue_size < 1) {
1442 fatal(
"%s: executeLSQRequestsQueueSize must be"
1443 " >= 1 (%d)\n", name_, requests_queue_size);
1446 if (transfers_queue_size < 1) {
1447 fatal(
"%s: executeLSQTransfersQueueSize must be"
1448 " >= 1 (%d)\n", name_, transfers_queue_size);
1451 if (store_buffer_size < 1) {
1452 fatal(
"%s: executeLSQStoreBufferSize must be"
1453 " >= 1 (%d)\n", name_, store_buffer_size);
1499 if (request->
inst->id == inst->id) {
1502 bool to_store_buffer = request->
state ==
1505 if ((complete && !(request->
isBarrier() && !can_store)) ||
1506 (to_store_buffer && can_store))
1514 DPRINTF(MinorMem,
"Found matching memory response for inst: %s\n",
1517 DPRINTF(MinorMem,
"No matching memory response for inst: %s\n",
1538 DPRINTF(MinorMem,
"Deleting %s request: %s\n",
1539 (response->
isLoad ?
"load" :
"store"),
1551 DPRINTF(MinorMem,
"Sending store: %s to store buffer\n",
1554 request->
inst->inStoreBuffer =
true;
1576 ret = have_translated_requests ||
1581 DPRINTF(Activity,
"Need to tick\n");
1592 assert(inst->translationFault ==
NoFault || inst->inLSQ);
1595 return inst->translationFault;
1600 if (needs_burst && inst->staticInst->isAtomic()) {
1608 panic(
"Do not expect cross-cache-line atomic memory request\n");
1615 uint8_t *request_data = NULL;
1617 DPRINTF(MinorMem,
"Pushing request (%s) addr: 0x%x size: %d flags:"
1618 " 0x%x%s lineWidth : 0x%x\n",
1619 (isLoad ?
"load" :
"store/atomic"),
addr, size, flags,
1620 (needs_burst ?
" (needs burst)" :
""),
lineWidth);
1625 request_data =
new uint8_t[size];
1626 if (inst->staticInst->isAtomic() ||
1629 std::memset(request_data, 0, size);
1631 std::memcpy(request_data,
data, size);
1637 *
this, inst, isLoad, request_data, res);
1640 *
this, inst, isLoad, request_data, res);
1643 if (inst->traceData)
1644 inst->traceData->setMem(
addr, size, flags);
1646 int cid =
cpu.
threads[inst->id.threadId]->getTC()->contextId();
1647 request->
request->setContext(cid);
1649 addr, size, flags,
cpu.dataRequestorId(),
1651 inst->pc->instAddr(), std::move(amo_op));
1652 request->
request->setByteEnable(byte_enable);
1658 return inst->translationFault;
1672 " lastMemBarrier=%d\n",
1681 unsigned int store_buffer_size,
1682 unsigned int store_limit_per_cycle) :
1683 Named(name_), lsq(lsq_),
1684 numSlots(store_buffer_size),
1685 storeLimitPerCycle(store_limit_per_cycle),
1687 numUnissuedAccesses(0)
1703 }
else if (!request->isCacheMaintenance()) {
1715 assert(inst->isInst() && inst->staticInst->isFullMemBarrier());
1727 assert(inst->translationFault ==
NoFault);
1745 os <<
"MemoryRunning";
1748 os <<
"MemoryNeedsRetry";
1751 os <<
"MemoryState-" <<
static_cast<int>(
state);
1763 for (
ThreadID tid = 0; tid <
cpu.numThreads; tid++) {
1764 if (
cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1770 for (
ThreadID tid = 0; tid <
cpu.numThreads; tid++) {
1771 cpu.getContext(tid)->getIsaPtr()->handleLockedSnoop(
1786 for (
ThreadID tid = 0; tid <
cpu.numThreads; tid++) {
1787 if (tid != req_tid) {
1788 if (
cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1793 cpu.getContext(tid)->getIsaPtr()->handleLockedSnoop(pkt,