50 #include "debug/Activity.hh"
51 #include "debug/MinorMem.hh"
71 issuedToMemory(false),
72 isTranslationDelayed(false),
75 request = std::make_shared<Request>();
81 SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
82 std::unique_ptr<PCStateBase> old_pc(thread.
pcState().
clone());
83 ExecContext context(port.cpu, thread, port.execute, inst);
84 [[maybe_unused]]
Fault fault = inst->translationFault;
87 inst->translationFault = inst->staticInst->initiateAcc(&context,
nullptr);
88 if (inst->translationFault ==
NoFault) {
90 "Translation fault suppressed for inst:%s\n", *inst);
92 assert(inst->translationFault == fault);
100 DPRINTFS(MinorMem, (&port),
"Complete disabled mem access for inst:%s\n",
103 SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
104 std::unique_ptr<PCStateBase> old_pc(thread.
pcState().
clone());
106 ExecContext context(port.cpu, thread, port.execute, inst);
109 inst->staticInst->completeAcc(
nullptr, &context, inst->traceData);
117 port.cpu.threads[inst->id.threadId]->setMemAccPredicate(
false);
118 DPRINTFS(MinorMem, (&port),
"Disable mem access for inst:%s\n", *inst);
123 Addr req1_addr,
unsigned int req1_size,
124 Addr req2_addr,
unsigned int req2_size)
128 Addr req2_end_addr = req2_addr + req2_size;
129 Addr req1_end_addr = req1_addr + req1_size;
133 if (req1_addr >= req2_end_addr || req1_end_addr <= req2_addr)
135 else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr)
147 request->getPaddr(), request->getSize(),
148 other_request->
request->getPaddr(), other_request->
request->getSize());
161 return inst->isInst() && inst->staticInst->isFullMemBarrier();
167 return state == StoreToStoreBuffer;
173 DPRINTFS(MinorMem, (&port),
"Setting state from %d to %d for request:"
174 " %s\n",
state, new_state, *inst);
189 os << (isLoad ?
'R' :
'W') <<
';';
190 inst->reportData(
os);
199 os <<
"PartialAddrRangeCoverage";
202 os <<
"FullAddrRangeCoverage";
205 os <<
"NoAddrRangeCoverage";
208 os <<
"AddrRangeCoverage-" <<
static_cast<int>(coverage);
222 os <<
"InTranslation";
231 os <<
"RequestIssuing";
234 os <<
"StoreToStoreBuffer";
237 os <<
"StoreInStoreBuffer";
240 os <<
"StoreBufferIssuing";
243 os <<
"RequestNeedsRetry";
246 os <<
"StoreBufferNeedsRetry";
252 os <<
"LSQRequestState-" <<
static_cast<int>(
state);
261 bool is_last_barrier =
264 DPRINTF(MinorMem,
"Moving %s barrier out of store buffer inst: %s\n",
265 (is_last_barrier ?
"last" :
"a"), *inst);
275 port.numAccessesInDTLB--;
277 DPRINTFS(MinorMem, (&port),
"Received translation response for"
278 " request: %s delayed:%d %s\n", *inst, isTranslationDelayed,
279 fault_ !=
NoFault ? fault_->name() :
"");
282 inst->translationFault = fault_;
283 if (isTranslationDelayed) {
284 tryToSuppressFault();
285 if (inst->translationFault ==
NoFault) {
286 completeDisabledMemAccess();
290 setState(Translated);
292 setState(Translated);
295 port.tryToSendToTransfers(
this);
307 const auto &byte_enable = request->getByteEnable();
309 port.numAccessesInDTLB++;
313 DPRINTFS(MinorMem, (&port),
"Submitting DTLB request\n");
328 DPRINTFS(MinorMem, (&port),
"Retiring packet\n");
330 packetInFlight =
false;
338 port.numAccessesInDTLB--;
340 [[maybe_unused]]
unsigned int expected_fragment_index =
341 numTranslatedFragments;
343 numInTranslationFragments--;
344 numTranslatedFragments++;
346 DPRINTFS(MinorMem, (&port),
"Received translation response for fragment"
347 " %d of request: %s delayed:%d %s\n", expected_fragment_index,
348 *inst, isTranslationDelayed,
349 fault_ !=
NoFault ? fault_->name() :
"");
351 assert(request_ == fragmentRequests[expected_fragment_index]);
359 inst->translationFault = fault_;
361 DPRINTFS(MinorMem, (&port),
"Faulting translation for fragment:"
362 " %d of request: %s\n",
363 expected_fragment_index, *inst);
365 if (expected_fragment_index > 0 || isTranslationDelayed)
366 tryToSuppressFault();
367 if (expected_fragment_index == 0) {
368 if (isTranslationDelayed && inst->translationFault ==
NoFault) {
369 completeDisabledMemAccess();
372 setState(Translated);
374 }
else if (inst->translationFault ==
NoFault) {
375 setState(Translated);
376 numTranslatedFragments--;
377 makeFragmentPackets();
379 setState(Translated);
381 port.tryToSendToTransfers(
this);
382 }
else if (numTranslatedFragments == numFragments) {
383 makeFragmentPackets();
384 setState(Translated);
385 port.tryToSendToTransfers(
this);
388 assert(!translationEvent.scheduled());
389 port.cpu.schedule(translationEvent,
curTick());
395 LSQRequest(port_, inst_, isLoad_, data_, res_),
399 numInTranslationFragments(0),
400 numTranslatedFragments(0),
401 numIssuedFragments(0),
402 numRetiredFragments(0),
412 for (
auto i = fragmentPackets.begin();
413 i != fragmentPackets.end();
i++)
422 Addr base_addr = request->getVaddr();
423 unsigned int whole_size = request->getSize();
424 unsigned int line_width = port.lineWidth;
426 unsigned int fragment_size;
443 unsigned int first_fragment_offset =
445 unsigned int last_fragment_size =
447 unsigned int first_fragment_size =
448 line_width - first_fragment_offset;
450 unsigned int middle_fragments_total_size =
451 whole_size - (first_fragment_size + last_fragment_size);
455 unsigned int middle_fragment_count =
456 middle_fragments_total_size / line_width;
458 numFragments = 1 + middle_fragment_count +
459 (last_fragment_size == 0 ? 0 : 1);
461 DPRINTFS(MinorMem, (&port),
"Dividing transfer into %d fragmentRequests."
462 " First fragment size: %d Last fragment size: %d\n",
463 numFragments, first_fragment_size,
464 (last_fragment_size == 0 ? line_width : last_fragment_size));
466 assert(((middle_fragment_count * line_width) +
467 first_fragment_size + last_fragment_size) == whole_size);
469 fragment_addr = base_addr;
470 fragment_size = first_fragment_size;
473 Addr end_addr = base_addr + whole_size;
475 auto& byte_enable = request->getByteEnable();
476 unsigned int num_disabled_fragments = 0;
478 for (
unsigned int fragment_index = 0; fragment_index < numFragments;
481 [[maybe_unused]]
bool is_last_fragment =
false;
483 if (fragment_addr == base_addr) {
485 fragment_size = first_fragment_size;
487 if ((fragment_addr + line_width) > end_addr) {
489 fragment_size = end_addr - fragment_addr;
490 is_last_fragment =
true;
493 fragment_size = line_width;
498 bool disabled_fragment =
false;
500 fragment->setContext(request->contextId());
502 auto it_start = byte_enable.begin() +
503 (fragment_addr - base_addr);
504 auto it_end = byte_enable.begin() +
505 (fragment_addr - base_addr) + fragment_size;
508 fragment_addr, fragment_size, request->getFlags(),
509 request->requestorId(),
513 disabled_fragment =
true;
516 if (!disabled_fragment) {
517 DPRINTFS(MinorMem, (&port),
"Generating fragment addr: 0x%x"
518 " size: %d (whole request addr: 0x%x size: %d) %s\n",
519 fragment_addr, fragment_size, base_addr, whole_size,
520 (is_last_fragment ?
"last fragment" :
""));
522 fragmentRequests.push_back(
fragment);
524 num_disabled_fragments++;
527 fragment_addr += fragment_size;
529 assert(numFragments >= num_disabled_fragments);
530 numFragments -= num_disabled_fragments;
536 assert(numTranslatedFragments > 0);
537 Addr base_addr = request->getVaddr();
539 DPRINTFS(MinorMem, (&port),
"Making packets for request: %s\n", *inst);
541 for (
unsigned int fragment_index = 0;
542 fragment_index < numTranslatedFragments;
547 DPRINTFS(MinorMem, (&port),
"Making packet %d for request: %s"
549 fragment_index, *inst,
550 (
fragment->hasPaddr() ?
"has paddr" :
"no paddr"),
554 unsigned int fragment_size =
fragment->getSize();
556 uint8_t *request_data = NULL;
561 request_data =
new uint8_t[fragment_size];
562 std::memcpy(request_data,
data + (fragment_addr - base_addr),
571 fragmentPackets.push_back(fragment_packet);
573 request->setFlags(
fragment->getFlags());
579 request->setPaddr(fragmentRequests[0]->getPaddr());
586 makeFragmentRequests();
588 if (numFragments > 0) {
590 numInTranslationFragments = 0;
591 numTranslatedFragments = 0;
601 sendNextFragmentToTranslation();
611 assert(numIssuedFragments < numTranslatedFragments);
613 return fragmentPackets[numIssuedFragments];
619 assert(numIssuedFragments < numTranslatedFragments);
621 numIssuedFragments++;
627 assert(inst->translationFault ==
NoFault);
628 assert(numRetiredFragments < numTranslatedFragments);
630 DPRINTFS(MinorMem, (&port),
"Retiring fragment addr: 0x%x size: %d"
631 " offset: 0x%x (retired fragment num: %d)\n",
632 response->
req->getVaddr(), response->
req->getSize(),
633 request->getVaddr() - response->
req->getVaddr(),
634 numRetiredFragments);
636 numRetiredFragments++;
641 DPRINTFS(MinorMem, (&port),
"Skipping this fragment\n");
642 }
else if (response->
isError()) {
644 DPRINTFS(MinorMem, (&port),
"Fragment has an error, skipping\n");
646 packet->copyError(response);
653 data =
new uint8_t[request->getSize()];
659 data + (response->
req->getVaddr() - request->getVaddr()),
661 response->
req->getSize());
666 if (skipped && !hasPacketsInMemSystem()) {
667 DPRINTFS(MinorMem, (&port),
"Completed skipped burst\n");
669 if (packet->needsResponse())
670 packet->makeResponse();
673 if (numRetiredFragments == numTranslatedFragments)
676 if (!skipped && isComplete()) {
677 DPRINTFS(MinorMem, (&port),
"Completed burst %d\n", packet != NULL);
679 DPRINTFS(MinorMem, (&port),
"Retired packet isRead: %d isWrite: %d"
680 " needsResponse: %d packetSize: %s requestSize: %s responseSize:"
681 " %s\n", packet->isRead(), packet->isWrite(),
682 packet->needsResponse(), packet->getSize(), request->getSize(),
688 data =
new uint8_t[request->getSize()];
692 DPRINTFS(MinorMem, (&port),
"Copying read data\n");
693 std::memcpy(packet->getPtr<uint8_t>(),
data, request->getSize());
695 packet->makeResponse();
704 unsigned int fragment_index = numTranslatedFragments;
709 DPRINTFS(MinorMem, (&port),
"Submitting DTLB request for fragment: %d\n",
712 port.numAccessesInDTLB++;
713 numInTranslationFragments++;
716 fragmentRequests[fragment_index], thread,
this, (isLoad ?
724 return slots.size() < numSlots;
730 auto found = std::find(slots.begin(), slots.end(), request);
732 if (found != slots.end()) {
733 DPRINTF(MinorMem,
"Deleting request: %s %s %s from StoreBuffer\n",
734 request, *found, *(request->
inst));
745 warn(
"%s: store buffer insertion without space to insert from"
746 " inst: %s\n",
name(), *(request->
inst));
749 DPRINTF(MinorMem,
"Pushing store: %s into store buffer\n", request);
751 numUnissuedAccesses++;
756 slots.push_back(request);
765 unsigned int &found_slot)
767 unsigned int slot_index = slots.size() - 1;
768 auto i = slots.rbegin();
780 slot->
inst->id.threadId == request->
inst->id.threadId &&
781 !slot->
packet->
req->isCacheMaintenance()) {
785 DPRINTF(MinorMem,
"Forwarding: slot: %d result: %s thisAddr:"
786 " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n",
787 slot_index, coverage,
791 found_slot = slot_index;
806 unsigned int slot_number)
808 assert(slot_number < slots.size());
819 Addr addr_offset = load_addr - store_addr;
821 unsigned int load_size = load->
request->getSize();
823 DPRINTF(MinorMem,
"Forwarding %d bytes for addr: 0x%x from store buffer"
824 " slot: %d addr: 0x%x addressOffset: 0x%x\n",
825 load_size, load_addr, slot_number,
826 store_addr, addr_offset);
829 void *store_packet_data = store->
packet->
getPtr<uint8_t>() + addr_offset;
831 std::memcpy(load_packet_data, store_packet_data, load_size);
840 numUnissuedAccesses--;
846 DPRINTF(MinorMem,
"StoreBuffer step numUnissuedAccesses: %d\n",
847 numUnissuedAccesses);
851 while (!slots.empty() &&
852 slots.front()->isComplete() && slots.front()->isBarrier())
856 DPRINTF(MinorMem,
"Clearing barrier for inst: %s\n",
859 numUnissuedAccesses--;
860 lsq.clearMemBarrier(barrier->
inst);
866 auto i = slots.begin();
868 unsigned int issue_count = 0;
878 issue_count < storeLimitPerCycle &&
879 lsq.canSendToMemorySystem() &&
884 DPRINTF(MinorMem,
"Considering request: %s, sentAllPackets: %d"
895 DPRINTF(MinorMem,
"Trying to send request: %s to memory"
896 " system\n", *(request->
inst));
898 if (lsq.tryToSend(request)) {
899 countIssuedStore(request);
919 if (!inst->inStoreBuffer) {
933 unsigned int size = slots.size();
935 std::ostringstream
os;
947 while (
i < numSlots) {
956 numUnissuedAccesses);
963 DPRINTF(MinorMem,
"Request needs retry, not issuing to"
964 " memory until retry arrives\n");
969 DPRINTF(MinorMem,
"Request still in translation, not issuing to"
980 DPRINTF(MinorMem,
"Request not at front of requests queue, can't"
981 " issue to memory\n");
986 DPRINTF(MinorMem,
"No space to insert request into transfers"
992 DPRINTF(MinorMem,
"Passing a %s transfer on to transfers"
993 " queue\n", (request->
isComplete() ?
"completed" :
"failed"));
1004 DPRINTF(MinorMem,
"Request's inst. is from the wrong stream,"
1005 " waiting for responses before aborting request\n");
1007 DPRINTF(MinorMem,
"Request's inst. is from the wrong stream,"
1008 " aborting request\n");
1017 if (request->
inst->staticInst->isPrefetch()) {
1018 DPRINTF(MinorMem,
"Not signalling fault for faulting prefetch\n");
1020 DPRINTF(MinorMem,
"Moving faulting request into the transfers"
1028 bool is_load = request->
isLoad;
1029 bool is_llsc = request->
request->isLLSC();
1030 bool is_release = request->
request->isRelease();
1031 bool is_swap = request->
request->isSwap();
1032 bool is_atomic = request->
request->isAtomic();
1033 bool bufferable = !(request->
request->isStrictlyOrdered() ||
1034 is_llsc || is_swap || is_atomic || is_release);
1038 DPRINTF(MinorMem,
"Load request with stores still in transfers"
1039 " queue, stalling\n");
1044 if (bufferable && !request->
request->isLocalAccess()) {
1047 DPRINTF(MinorMem,
"Moving store into transfers queue\n");
1054 if (((!is_load && is_llsc) || is_release) &&
1056 DPRINTF(MinorMem,
"Memory access needs to wait for store buffer"
1066 DPRINTF(MinorMem,
"Memory access not the head inst., can't be"
1067 " sure it can be performed, not issuing\n");
1071 unsigned int forwarding_slot = 0;
1083 DPRINTF(MinorMem,
"Memory access can receive forwarded data"
1084 " from the store buffer, but need to wait for store buffer"
1094 bool do_access =
true;
1099 unsigned int forwarding_slot = 0;
1104 switch (forwarding_result) {
1115 DPRINTF(MinorMem,
"Load partly satisfied by store buffer"
1116 " data. Must wait for the store to complete\n");
1120 DPRINTF(MinorMem,
"No forwardable data from store buffer\n");
1127 DPRINTF(MinorMem,
"Can't send request to memory system yet\n");
1133 std::unique_ptr<PCStateBase> old_pc(thread.
pcState().
clone());
1144 DPRINTF(MinorMem,
"Not perfoming a memory "
1145 "access for store conditional\n");
1154 DPRINTF(MinorMem,
"Can't send request to memory system yet\n");
1180 DPRINTF(MinorMem,
"Can't send request: %s yet, no space in memory\n",
1185 DPRINTF(MinorMem,
"Trying to send request: %s addr: 0x%x\n",
1186 *(request->
inst), packet->
req->getVaddr());
1192 if (request->
request->isLocalAccess()) {
1194 cpu.getContext(
cpu.contextToThread(
1195 request->
request->contextId()));
1198 DPRINTF(MinorMem,
"IPR read inst: %s\n", *(request->
inst));
1200 DPRINTF(MinorMem,
"IPR write inst: %s\n", *(request->
inst));
1202 request->
request->localAccessor(thread, packet);
1208 DPRINTF(MinorMem,
"IPR access has another packet: %s\n",
1217 DPRINTF(MinorMem,
"Sent data memory request\n");
1225 switch (request->
state) {
1239 panic(
"Unrecognized LSQ request state %d.", request->
state);
1245 "Sending data memory request - needs retry\n");
1251 switch (request->
state) {
1261 panic(
"Unrecognized LSQ request state %d.", request->
state);
1302 DPRINTF(MinorMem,
"Received response packet inst: %s"
1303 " addr: 0x%x cmd: %s\n",
1310 DPRINTF(MinorMem,
"Received error response packet: %s\n",
1314 switch (request->
state) {
1320 DPRINTF(MinorMem,
"Has outstanding packets?: %d %d\n",
1335 DPRINTF(MinorMem,
"Completed transfer for barrier: %s"
1336 " leaving the request as it is also a barrier\n",
1342 panic(
"Shouldn't be allowed to receive a response from another state");
1359 DPRINTF(MinorMem,
"Received retry request\n");
1403 LSQ::LSQ(std::string name_, std::string dcache_port_name_,
1405 unsigned int in_memory_system_limit,
unsigned int line_width,
1406 unsigned int requests_queue_size,
unsigned int transfers_queue_size,
1407 unsigned int store_buffer_size,
1408 unsigned int store_buffer_cycle_store_limit) :
1416 lineWidth((line_width == 0 ?
cpu.cacheLineSize() : line_width)),
1417 requests(name_ +
".requests",
"addr", requests_queue_size),
1418 transfers(name_ +
".transfers",
"addr", transfers_queue_size),
1420 *this, store_buffer_size, store_buffer_cycle_store_limit),
1428 if (in_memory_system_limit < 1) {
1429 fatal(
"%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_,
1430 in_memory_system_limit);
1433 if (store_buffer_cycle_store_limit < 1) {
1434 fatal(
"%s: executeLSQMaxStoreBufferStoresPerCycle must be"
1435 " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit);
1438 if (requests_queue_size < 1) {
1439 fatal(
"%s: executeLSQRequestsQueueSize must be"
1440 " >= 1 (%d)\n", name_, requests_queue_size);
1443 if (transfers_queue_size < 1) {
1444 fatal(
"%s: executeLSQTransfersQueueSize must be"
1445 " >= 1 (%d)\n", name_, transfers_queue_size);
1448 if (store_buffer_size < 1) {
1449 fatal(
"%s: executeLSQStoreBufferSize must be"
1450 " >= 1 (%d)\n", name_, store_buffer_size);
1496 if (request->
inst->id == inst->id) {
1499 bool to_store_buffer = request->
state ==
1502 if ((complete && !(request->
isBarrier() && !can_store)) ||
1503 (to_store_buffer && can_store))
1511 DPRINTF(MinorMem,
"Found matching memory response for inst: %s\n",
1514 DPRINTF(MinorMem,
"No matching memory response for inst: %s\n",
1535 DPRINTF(MinorMem,
"Deleting %s request: %s\n",
1536 (response->
isLoad ?
"load" :
"store"),
1548 DPRINTF(MinorMem,
"Sending store: %s to store buffer\n",
1551 request->
inst->inStoreBuffer =
true;
1573 ret = have_translated_requests ||
1578 DPRINTF(Activity,
"Need to tick\n");
1589 assert(inst->translationFault ==
NoFault || inst->inLSQ);
1592 return inst->translationFault;
1597 if (needs_burst && inst->staticInst->isAtomic()) {
1605 panic(
"Do not expect cross-cache-line atomic memory request\n");
1612 uint8_t *request_data = NULL;
1614 DPRINTF(MinorMem,
"Pushing request (%s) addr: 0x%x size: %d flags:"
1615 " 0x%x%s lineWidth : 0x%x\n",
1616 (isLoad ?
"load" :
"store/atomic"),
addr, size,
flags,
1617 (needs_burst ?
" (needs burst)" :
""),
lineWidth);
1622 request_data =
new uint8_t[size];
1623 if (inst->staticInst->isAtomic() ||
1626 std::memset(request_data, 0, size);
1628 std::memcpy(request_data,
data, size);
1634 *
this, inst, isLoad, request_data, res);
1637 *
this, inst, isLoad, request_data, res);
1640 if (inst->traceData)
1641 inst->traceData->setMem(
addr, size,
flags);
1643 int cid =
cpu.
threads[inst->id.threadId]->getTC()->contextId();
1644 request->
request->setContext(cid);
1648 inst->pc->instAddr(), std::move(amo_op));
1649 request->
request->setByteEnable(byte_enable);
1654 assert(!request->
request->isLocalAccess());
1655 request->
request->setLocalAccessor(
1663 return inst->translationFault;
1677 " lastMemBarrier=%d\n",
1686 unsigned int store_buffer_size,
1687 unsigned int store_limit_per_cycle) :
1688 Named(name_), lsq(lsq_),
1689 numSlots(store_buffer_size),
1690 storeLimitPerCycle(store_limit_per_cycle),
1692 numUnissuedAccesses(0)
1708 }
else if (!request->isCacheMaintenance()) {
1720 assert(inst->isInst() && inst->staticInst->isFullMemBarrier());
1732 assert(inst->translationFault ==
NoFault);
1750 os <<
"MemoryRunning";
1753 os <<
"MemoryNeedsRetry";
1756 os <<
"MemoryState-" <<
static_cast<int>(
state);
1768 for (
ThreadID tid = 0; tid <
cpu.numThreads; tid++) {
1769 if (
cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1775 for (
ThreadID tid = 0; tid <
cpu.numThreads; tid++) {
1776 cpu.getContext(tid)->getIsaPtr()->handleLockedSnoop(
1791 for (
ThreadID tid = 0; tid <
cpu.numThreads; tid++) {
1792 if (tid != req_tid) {
1793 if (
cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1798 cpu.getContext(tid)->getIsaPtr()->handleLockedSnoop(pkt,