gem5 v24.0.0.0
Loading...
Searching...
No Matches
lsq.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2013-2014,2017-2018,2020-2021 Arm Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include "cpu/minor/lsq.hh"
39
40#include <iomanip>
41#include <sstream>
42
43#include "base/compiler.hh"
44#include "base/logging.hh"
45#include "base/trace.hh"
47#include "cpu/minor/execute.hh"
48#include "cpu/minor/pipeline.hh"
49#include "cpu/utils.hh"
50#include "debug/Activity.hh"
51#include "debug/MinorMem.hh"
52
53namespace gem5
54{
55
56namespace minor
57{
58
60 PacketDataPtr data_, uint64_t *res_) :
62 port(port_),
63 inst(inst_),
64 isLoad(isLoad_),
65 data(data_),
66 packet(NULL),
67 request(),
68 res(res_),
69 skipped(false),
70 issuedToMemory(false),
71 isTranslationDelayed(false),
72 state(NotIssued)
73{
74 request = std::make_shared<Request>();
75}
76
77void
79{
80 SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
81 std::unique_ptr<PCStateBase> old_pc(thread.pcState().clone());
82 ExecContext context(port.cpu, thread, port.execute, inst);
83 [[maybe_unused]] Fault fault = inst->translationFault;
84
85 // Give the instruction a chance to suppress a translation fault
86 inst->translationFault = inst->staticInst->initiateAcc(&context, nullptr);
87 if (inst->translationFault == NoFault) {
88 DPRINTFS(MinorMem, (&port),
89 "Translation fault suppressed for inst:%s\n", *inst);
90 } else {
91 assert(inst->translationFault == fault);
92 }
93 thread.pcState(*old_pc);
94}
95
96void
98{
99 DPRINTFS(MinorMem, (&port), "Complete disabled mem access for inst:%s\n",
100 *inst);
101
102 SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
103 std::unique_ptr<PCStateBase> old_pc(thread.pcState().clone());
104
105 ExecContext context(port.cpu, thread, port.execute, inst);
106
107 context.setMemAccPredicate(false);
108 inst->staticInst->completeAcc(nullptr, &context, inst->traceData);
109
110 thread.pcState(*old_pc);
111}
112
113void
115{
116 port.cpu.threads[inst->id.threadId]->setMemAccPredicate(false);
117 DPRINTFS(MinorMem, (&port), "Disable mem access for inst:%s\n", *inst);
118}
119
122 Addr req1_addr, unsigned int req1_size,
123 Addr req2_addr, unsigned int req2_size)
124{
125 /* 'end' here means the address of the byte just past the request
126 * blocks */
127 Addr req2_end_addr = req2_addr + req2_size;
128 Addr req1_end_addr = req1_addr + req1_size;
129
131
132 if (req1_addr >= req2_end_addr || req1_end_addr <= req2_addr)
134 else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr)
136 else
138
139 return ret;
140}
141
144{
145 AddrRangeCoverage ret = containsAddrRangeOf(
146 request->getPaddr(), request->getSize(),
147 other_request->request->getPaddr(), other_request->request->getSize());
148 /* If there is a strobe mask then store data forwarding might not be
149 * correct. Instead of checking enablemant of every byte we just fall back
150 * to PartialAddrRangeCoverage to prohibit store data forwarding */
151 if (ret == FullAddrRangeCoverage && request->isMasked())
153 return ret;
154}
155
156
157bool
159{
160 return inst->isInst() && inst->staticInst->isFullMemBarrier();
161}
162
163bool
165{
166 return state == StoreToStoreBuffer;
167}
168
169void
171{
172 DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:"
173 " %s\n", state, new_state, *inst);
174 state = new_state;
175}
176
177bool
179{
180 /* @todo, There is currently only one 'completed' state. This
181 * may not be a good choice */
182 return state == Complete;
183}
184
185void
186LSQ::LSQRequest::reportData(std::ostream &os) const
187{
188 os << (isLoad ? 'R' : 'W') << ';';
189 inst->reportData(os);
190 os << ';' << state;
191}
192
193std::ostream &
194operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage)
195{
196 switch (coverage) {
198 os << "PartialAddrRangeCoverage";
199 break;
201 os << "FullAddrRangeCoverage";
202 break;
204 os << "NoAddrRangeCoverage";
205 break;
206 default:
207 os << "AddrRangeCoverage-" << static_cast<int>(coverage);
208 break;
209 }
210 return os;
211}
212
213std::ostream &
215{
216 switch (state) {
218 os << "NotIssued";
219 break;
221 os << "InTranslation";
222 break;
224 os << "Translated";
225 break;
227 os << "Failed";
228 break;
230 os << "RequestIssuing";
231 break;
233 os << "StoreToStoreBuffer";
234 break;
236 os << "StoreInStoreBuffer";
237 break;
239 os << "StoreBufferIssuing";
240 break;
242 os << "RequestNeedsRetry";
243 break;
245 os << "StoreBufferNeedsRetry";
246 break;
248 os << "Complete";
249 break;
250 default:
251 os << "LSQRequestState-" << static_cast<int>(state);
252 break;
253 }
254 return os;
255}
256
257void
259{
260 bool is_last_barrier =
261 inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId];
262
263 DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n",
264 (is_last_barrier ? "last" : "a"), *inst);
265
266 if (is_last_barrier)
267 lastMemBarrier[inst->id.threadId] = 0;
268}
269
270void
271LSQ::SingleDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
273{
274 port.numAccessesInDTLB--;
275
276 DPRINTFS(MinorMem, (&port), "Received translation response for"
277 " request: %s delayed:%d %s\n", *inst, isTranslationDelayed,
278 fault_ != NoFault ? fault_->name() : "");
279
280 if (fault_ != NoFault) {
281 inst->translationFault = fault_;
282 if (isTranslationDelayed) {
283 tryToSuppressFault();
284 if (inst->translationFault == NoFault) {
285 completeDisabledMemAccess();
286 setState(Complete);
287 }
288 }
289 setState(Translated);
290 } else {
291 setState(Translated);
292 makePacket();
293 }
294 port.tryToSendToTransfers(this);
295
296 /* Let's try and wake up the processor for the next cycle */
297 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
298}
299
300void
302{
303 ThreadContext *thread = port.cpu.getContext(
304 inst->id.threadId);
305
306 const auto &byte_enable = request->getByteEnable();
307 if (isAnyActiveElement(byte_enable.cbegin(), byte_enable.cend())) {
308 port.numAccessesInDTLB++;
309
311
312 DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n");
313 /* Submit the translation request. The response will come through
314 * finish/markDelayed on the LSQRequest as it bears the Translation
315 * interface */
316 thread->getMMUPtr()->translateTiming(
317 request, thread, this, (isLoad ? BaseMMU::Read : BaseMMU::Write));
318 } else {
319 disableMemAccess();
321 }
322}
323
324void
326{
327 DPRINTFS(MinorMem, (&port), "Retiring packet\n");
328 packet = packet_;
329 packetInFlight = false;
330 setState(Complete);
331}
332
333void
334LSQ::SplitDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
336{
337 port.numAccessesInDTLB--;
338
339 [[maybe_unused]] unsigned int expected_fragment_index =
340 numTranslatedFragments;
341
342 numInTranslationFragments--;
343 numTranslatedFragments++;
344
345 DPRINTFS(MinorMem, (&port), "Received translation response for fragment"
346 " %d of request: %s delayed:%d %s\n", expected_fragment_index,
347 *inst, isTranslationDelayed,
348 fault_ != NoFault ? fault_->name() : "");
349
350 assert(request_ == fragmentRequests[expected_fragment_index]);
351
352 /* Wake up next cycle to get things going again in case the
353 * tryToSendToTransfers does take */
354 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
355
356 if (fault_ != NoFault) {
357 /* tryToSendToTransfers will handle the fault */
358 inst->translationFault = fault_;
359
360 DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:"
361 " %d of request: %s\n",
362 expected_fragment_index, *inst);
363
364 if (expected_fragment_index > 0 || isTranslationDelayed)
365 tryToSuppressFault();
366 if (expected_fragment_index == 0) {
367 if (isTranslationDelayed && inst->translationFault == NoFault) {
368 completeDisabledMemAccess();
369 setState(Complete);
370 } else {
371 setState(Translated);
372 }
373 } else if (inst->translationFault == NoFault) {
374 setState(Translated);
375 numTranslatedFragments--;
376 makeFragmentPackets();
377 } else {
378 setState(Translated);
379 }
380 port.tryToSendToTransfers(this);
381 } else if (numTranslatedFragments == numFragments) {
382 makeFragmentPackets();
383 setState(Translated);
384 port.tryToSendToTransfers(this);
385 } else {
386 /* Avoid calling translateTiming from within ::finish */
387 assert(!translationEvent.scheduled());
388 port.cpu.schedule(translationEvent, curTick());
389 }
390}
391
393 bool isLoad_, PacketDataPtr data_, uint64_t *res_) :
394 LSQRequest(port_, inst_, isLoad_, data_, res_),
395 translationEvent([this]{ sendNextFragmentToTranslation(); },
396 "translationEvent"),
397 numFragments(0),
398 numInTranslationFragments(0),
399 numTranslatedFragments(0),
400 numIssuedFragments(0),
401 numRetiredFragments(0),
402 fragmentRequests(),
403 fragmentPackets()
404{
405 /* Don't know how many elements are needed until the request is
406 * populated by the caller. */
407}
408
410{
411 for (auto i = fragmentPackets.begin();
412 i != fragmentPackets.end(); i++)
413 {
414 delete *i;
415 }
416}
417
418void
420{
421 Addr base_addr = request->getVaddr();
422 unsigned int whole_size = request->getSize();
423 unsigned int line_width = port.lineWidth;
424
425 unsigned int fragment_size;
426 Addr fragment_addr;
427
428 std::vector<bool> fragment_write_byte_en;
429
430 /* Assume that this transfer is across potentially many block snap
431 * boundaries:
432 *
433 * | _|________|________|________|___ |
434 * | |0| 1 | 2 | 3 | 4 | |
435 * | |_|________|________|________|___| |
436 * | | | | | |
437 *
438 * The first transfer (0) can be up to lineWidth in size.
439 * All the middle transfers (1-3) are lineWidth in size
440 * The last transfer (4) can be from zero to lineWidth - 1 in size
441 */
442 unsigned int first_fragment_offset =
443 addrBlockOffset(base_addr, line_width);
444 unsigned int last_fragment_size =
445 addrBlockOffset(base_addr + whole_size, line_width);
446 unsigned int first_fragment_size =
447 line_width - first_fragment_offset;
448
449 unsigned int middle_fragments_total_size =
450 whole_size - (first_fragment_size + last_fragment_size);
451
452 assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0);
453
454 unsigned int middle_fragment_count =
455 middle_fragments_total_size / line_width;
456
457 numFragments = 1 /* first */ + middle_fragment_count +
458 (last_fragment_size == 0 ? 0 : 1);
459
460 DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests."
461 " First fragment size: %d Last fragment size: %d\n",
462 numFragments, first_fragment_size,
463 (last_fragment_size == 0 ? line_width : last_fragment_size));
464
465 assert(((middle_fragment_count * line_width) +
466 first_fragment_size + last_fragment_size) == whole_size);
467
468 fragment_addr = base_addr;
469 fragment_size = first_fragment_size;
470
471 /* Just past the last address in the request */
472 Addr end_addr = base_addr + whole_size;
473
474 auto& byte_enable = request->getByteEnable();
475 unsigned int num_disabled_fragments = 0;
476
477 for (unsigned int fragment_index = 0; fragment_index < numFragments;
478 fragment_index++)
479 {
480 [[maybe_unused]] bool is_last_fragment = false;
481
482 if (fragment_addr == base_addr) {
483 /* First fragment */
484 fragment_size = first_fragment_size;
485 } else {
486 if ((fragment_addr + line_width) > end_addr) {
487 /* Adjust size of last fragment */
488 fragment_size = end_addr - fragment_addr;
489 is_last_fragment = true;
490 } else {
491 /* Middle fragments */
492 fragment_size = line_width;
493 }
494 }
495
496 RequestPtr fragment = std::make_shared<Request>();
497 bool disabled_fragment = false;
498
499 fragment->setContext(request->contextId());
500 // Set up byte-enable mask for the current fragment
501 auto it_start = byte_enable.begin() +
502 (fragment_addr - base_addr);
503 auto it_end = byte_enable.begin() +
504 (fragment_addr - base_addr) + fragment_size;
505 if (isAnyActiveElement(it_start, it_end)) {
506 fragment->setVirt(
507 fragment_addr, fragment_size, request->getFlags(),
508 request->requestorId(),
509 request->getPC());
510 fragment->setByteEnable(std::vector<bool>(it_start, it_end));
511 } else {
512 disabled_fragment = true;
513 }
514
515 if (!disabled_fragment) {
516 DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x"
517 " size: %d (whole request addr: 0x%x size: %d) %s\n",
518 fragment_addr, fragment_size, base_addr, whole_size,
519 (is_last_fragment ? "last fragment" : ""));
520
521 fragmentRequests.push_back(fragment);
522 } else {
523 num_disabled_fragments++;
524 }
525
526 fragment_addr += fragment_size;
527 }
528 assert(numFragments >= num_disabled_fragments);
529 numFragments -= num_disabled_fragments;
530}
531
532void
534{
535 assert(numTranslatedFragments > 0);
536 Addr base_addr = request->getVaddr();
537
538 DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst);
539
540 for (unsigned int fragment_index = 0;
541 fragment_index < numTranslatedFragments;
542 fragment_index++)
543 {
544 RequestPtr fragment = fragmentRequests[fragment_index];
545
546 DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s"
547 " (%d, 0x%x)\n",
548 fragment_index, *inst,
549 (fragment->hasPaddr() ? "has paddr" : "no paddr"),
550 (fragment->hasPaddr() ? fragment->getPaddr() : 0));
551
552 Addr fragment_addr = fragment->getVaddr();
553 unsigned int fragment_size = fragment->getSize();
554
555 uint8_t *request_data = NULL;
556
557 if (!isLoad) {
558 /* Split data for Packets. Will become the property of the
559 * outgoing Packets */
560 request_data = new uint8_t[fragment_size];
561 std::memcpy(request_data, data + (fragment_addr - base_addr),
562 fragment_size);
563 }
564
565 assert(fragment->hasPaddr());
566
567 PacketPtr fragment_packet =
568 makePacketForRequest(fragment, isLoad, this, request_data);
569
570 fragmentPackets.push_back(fragment_packet);
571 /* Accumulate flags in parent request */
572 request->setFlags(fragment->getFlags());
573 }
574
575 /* Might as well make the overall/response packet here */
576 /* Get the physical address for the whole request/packet from the first
577 * fragment */
578 request->setPaddr(fragmentRequests[0]->getPaddr());
579 makePacket();
580}
581
582void
584{
585 makeFragmentRequests();
586
587 if (numFragments > 0) {
589 numInTranslationFragments = 0;
590 numTranslatedFragments = 0;
591
592 /* @todo, just do these in sequence for now with
593 * a loop of:
594 * do {
595 * sendNextFragmentToTranslation ; translateTiming ; finish
596 * } while (numTranslatedFragments != numFragments);
597 */
598
599 /* Do first translation */
600 sendNextFragmentToTranslation();
601 } else {
602 disableMemAccess();
604 }
605}
606
609{
610 assert(numIssuedFragments < numTranslatedFragments);
611
612 return fragmentPackets[numIssuedFragments];
613}
614
615void
617{
618 assert(numIssuedFragments < numTranslatedFragments);
619
620 numIssuedFragments++;
621}
622
623void
625{
626 assert(inst->translationFault == NoFault);
627 assert(numRetiredFragments < numTranslatedFragments);
628
629 DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d"
630 " offset: 0x%x (retired fragment num: %d)\n",
631 response->req->getVaddr(), response->req->getSize(),
632 request->getVaddr() - response->req->getVaddr(),
633 numRetiredFragments);
634
635 numRetiredFragments++;
636
637 if (skipped) {
638 /* Skip because we already knew the request had faulted or been
639 * skipped */
640 DPRINTFS(MinorMem, (&port), "Skipping this fragment\n");
641 } else if (response->isError()) {
642 /* Mark up the error and leave to execute to handle it */
643 DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n");
644 setSkipped();
645 packet->copyError(response);
646 } else {
647 if (isLoad) {
648 if (!data) {
649 /* For a split transfer, a Packet must be constructed
650 * to contain all returning data. This is that packet's
651 * data */
652 data = new uint8_t[request->getSize()];
653 }
654
655 /* Populate the portion of the overall response data represented
656 * by the response fragment */
657 std::memcpy(
658 data + (response->req->getVaddr() - request->getVaddr()),
659 response->getConstPtr<uint8_t>(),
660 response->req->getSize());
661 }
662 }
663
664 /* Complete early if we're skipping are no more in-flight accesses */
665 if (skipped && !hasPacketsInMemSystem()) {
666 DPRINTFS(MinorMem, (&port), "Completed skipped burst\n");
667 setState(Complete);
668 if (packet->needsResponse())
669 packet->makeResponse();
670 }
671
672 if (numRetiredFragments == numTranslatedFragments)
673 setState(Complete);
674
675 if (!skipped && isComplete()) {
676 DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL);
677
678 DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d"
679 " needsResponse: %d packetSize: %s requestSize: %s responseSize:"
680 " %s\n", packet->isRead(), packet->isWrite(),
681 packet->needsResponse(), packet->getSize(), request->getSize(),
682 response->getSize());
683
684 /* A request can become complete by several paths, this is a sanity
685 * check to make sure the packet's data is created */
686 if (!data) {
687 data = new uint8_t[request->getSize()];
688 }
689
690 if (isLoad) {
691 DPRINTFS(MinorMem, (&port), "Copying read data\n");
692 std::memcpy(packet->getPtr<uint8_t>(), data, request->getSize());
693 }
694 packet->makeResponse();
695 }
696
697 /* Packets are all deallocated together in ~SplitLSQRequest */
698}
699
700void
702{
703 unsigned int fragment_index = numTranslatedFragments;
704
705 ThreadContext *thread = port.cpu.getContext(
706 inst->id.threadId);
707
708 DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n",
709 fragment_index);
710
711 port.numAccessesInDTLB++;
712 numInTranslationFragments++;
713
714 thread->getMMUPtr()->translateTiming(
715 fragmentRequests[fragment_index], thread, this, (isLoad ?
717}
718
719bool
721{
722 /* @todo, support store amalgamation */
723 return slots.size() < numSlots;
724}
725
726void
728{
729 auto found = std::find(slots.begin(), slots.end(), request);
730
731 if (found != slots.end()) {
732 DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n",
733 request, *found, *(request->inst));
734 slots.erase(found);
735
736 delete request;
737 }
738}
739
740void
742{
743 if (!canInsert()) {
744 warn("%s: store buffer insertion without space to insert from"
745 " inst: %s\n", name(), *(request->inst));
746 }
747
748 DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request);
749
750 numUnissuedAccesses++;
751
752 if (request->state != LSQRequest::Complete)
754
755 slots.push_back(request);
756
757 /* Let's try and wake up the processor for the next cycle to step
758 * the store buffer */
759 lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
760}
761
764 unsigned int &found_slot)
765{
766 unsigned int slot_index = slots.size() - 1;
767 auto i = slots.rbegin();
769
770 /* Traverse the store buffer in reverse order (most to least recent)
771 * and try to find a slot whose address range overlaps this request */
772 while (ret == NoAddrRangeCoverage && i != slots.rend()) {
773 LSQRequestPtr slot = *i;
774
775 /* Cache maintenance instructions go down via the store path but
776 * they carry no data and they shouldn't be considered
777 * for forwarding */
778 if (slot->packet &&
779 slot->inst->id.threadId == request->inst->id.threadId &&
780 !slot->packet->req->isCacheMaintenance()) {
781 AddrRangeCoverage coverage = slot->containsAddrRangeOf(request);
782
783 if (coverage != NoAddrRangeCoverage) {
784 DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:"
785 " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n",
786 slot_index, coverage,
787 request->request->getPaddr(), request->request->getSize(),
788 slot->request->getPaddr(), slot->request->getSize());
789
790 found_slot = slot_index;
791 ret = coverage;
792 }
793 }
794
795 i++;
796 slot_index--;
797 }
798
799 return ret;
800}
801
803void
805 unsigned int slot_number)
806{
807 assert(slot_number < slots.size());
808 assert(load->packet);
809 assert(load->isLoad);
810
811 LSQRequestPtr store = slots[slot_number];
812
813 assert(store->packet);
814 assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage);
815
816 Addr load_addr = load->request->getPaddr();
817 Addr store_addr = store->request->getPaddr();
818 Addr addr_offset = load_addr - store_addr;
819
820 unsigned int load_size = load->request->getSize();
821
822 DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer"
823 " slot: %d addr: 0x%x addressOffset: 0x%x\n",
824 load_size, load_addr, slot_number,
825 store_addr, addr_offset);
826
827 void *load_packet_data = load->packet->getPtr<void>();
828 void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset;
829
830 std::memcpy(load_packet_data, store_packet_data, load_size);
831}
832
833void
835{
836 /* Barriers are accounted for as they are cleared from
837 * the queue, not after their transfers are complete */
838 if (!request->isBarrier())
839 numUnissuedAccesses--;
840}
841
842void
844{
845 DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n",
846 numUnissuedAccesses);
847
848 if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) {
849 /* Clear all the leading barriers */
850 while (!slots.empty() &&
851 slots.front()->isComplete() && slots.front()->isBarrier())
852 {
853 LSQRequestPtr barrier = slots.front();
854
855 DPRINTF(MinorMem, "Clearing barrier for inst: %s\n",
856 *(barrier->inst));
857
858 numUnissuedAccesses--;
859 lsq.clearMemBarrier(barrier->inst);
860 slots.pop_front();
861
862 delete barrier;
863 }
864
865 auto i = slots.begin();
866 bool issued = true;
867 unsigned int issue_count = 0;
868
869 /* Skip trying if the memory system is busy */
870 if (lsq.state == LSQ::MemoryNeedsRetry)
871 issued = false;
872
873 /* Try to issue all stores in order starting from the head
874 * of the queue. Responses are allowed to be retired
875 * out of order */
876 while (issued &&
877 issue_count < storeLimitPerCycle &&
878 lsq.canSendToMemorySystem() &&
879 i != slots.end())
880 {
881 LSQRequestPtr request = *i;
882
883 DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d"
884 " state: %s\n",
885 *(request->inst), request->sentAllPackets(),
886 request->state);
887
888 if (request->isBarrier() && request->isComplete()) {
889 /* Give up at barriers */
890 issued = false;
891 } else if (!(request->state == LSQRequest::StoreBufferIssuing &&
892 request->sentAllPackets()))
893 {
894 DPRINTF(MinorMem, "Trying to send request: %s to memory"
895 " system\n", *(request->inst));
896
897 if (lsq.tryToSend(request)) {
898 countIssuedStore(request);
899 issue_count++;
900 } else {
901 /* Don't step on to the next store buffer entry if this
902 * one hasn't issued all its packets as the store
903 * buffer must still enforce ordering */
904 issued = false;
905 }
906 }
907 i++;
908 }
909 }
910}
911
912void
914 bool committed)
915{
916 if (committed) {
917 /* Not already sent to the store buffer as a store request? */
918 if (!inst->inStoreBuffer) {
919 /* Insert an entry into the store buffer to tick off barriers
920 * until there are none in flight */
921 storeBuffer.insert(new BarrierDataRequest(*this, inst));
922 }
923 } else {
924 /* Clear the barrier anyway if it wasn't actually committed */
925 clearMemBarrier(inst);
926 }
927}
928
929void
931{
932 unsigned int size = slots.size();
933 unsigned int i = 0;
934 std::ostringstream os;
935
936 while (i < size) {
937 LSQRequestPtr request = slots[i];
938
939 request->reportData(os);
940
941 i++;
942 if (i < numSlots)
943 os << ',';
944 }
945
946 while (i < numSlots) {
947 os << '-';
948
949 i++;
950 if (i < numSlots)
951 os << ',';
952 }
953
954 minor::minorTrace("addr=%s num_unissued_stores=%d\n", os.str(),
955 numUnissuedAccesses);
956}
957
958void
960{
961 if (state == MemoryNeedsRetry) {
962 DPRINTF(MinorMem, "Request needs retry, not issuing to"
963 " memory until retry arrives\n");
964 return;
965 }
966
967 if (request->state == LSQRequest::InTranslation) {
968 DPRINTF(MinorMem, "Request still in translation, not issuing to"
969 " memory\n");
970 return;
971 }
972
973 assert(request->state == LSQRequest::Translated ||
974 request->state == LSQRequest::RequestIssuing ||
975 request->state == LSQRequest::Failed ||
976 request->state == LSQRequest::Complete);
977
978 if (requests.empty() || requests.front() != request) {
979 DPRINTF(MinorMem, "Request not at front of requests queue, can't"
980 " issue to memory\n");
981 return;
982 }
983
985 DPRINTF(MinorMem, "No space to insert request into transfers"
986 " queue\n");
987 return;
988 }
989
990 if (request->isComplete() || request->state == LSQRequest::Failed) {
991 DPRINTF(MinorMem, "Passing a %s transfer on to transfers"
992 " queue\n", (request->isComplete() ? "completed" : "failed"));
994 request->setSkipped();
996 return;
997 }
998
999 if (!execute.instIsRightStream(request->inst)) {
1000 /* Wrong stream, try to abort the transfer but only do so if
1001 * there are no packets in flight */
1002 if (request->hasPacketsInMemSystem()) {
1003 DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
1004 " waiting for responses before aborting request\n");
1005 } else {
1006 DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
1007 " aborting request\n");
1009 request->setSkipped();
1011 }
1012 return;
1013 }
1014
1015 if (request->inst->translationFault != NoFault) {
1016 if (request->inst->staticInst->isPrefetch()) {
1017 DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n");
1018 }
1019 DPRINTF(MinorMem, "Moving faulting request into the transfers"
1020 " queue\n");
1022 request->setSkipped();
1024 return;
1025 }
1026
1027 bool is_load = request->isLoad;
1028 bool is_llsc = request->request->isLLSC();
1029 bool is_release = request->request->isRelease();
1030 bool is_swap = request->request->isSwap();
1031 bool is_atomic = request->request->isAtomic();
1032 bool bufferable = !(request->request->isStrictlyOrdered() ||
1033 is_llsc || is_swap || is_atomic || is_release);
1034
1035 if (is_load) {
1036 if (numStoresInTransfers != 0) {
1037 DPRINTF(MinorMem, "Load request with stores still in transfers"
1038 " queue, stalling\n");
1039 return;
1040 }
1041 } else {
1042 /* Store. Can it be sent to the store buffer? */
1043 if (bufferable && !request->request->isLocalAccess()) {
1046 DPRINTF(MinorMem, "Moving store into transfers queue\n");
1047 return;
1048 }
1049 }
1050
1051 // Process store conditionals or store release after all previous
1052 // stores are completed
1053 if (((!is_load && is_llsc) || is_release) &&
1055 DPRINTF(MinorMem, "Memory access needs to wait for store buffer"
1056 " to drain\n");
1057 return;
1058 }
1059
1060 /* Check if this is the head instruction (and so must be executable as
1061 * its stream sequence number was checked above) for loads which must
1062 * not be speculatively issued and stores which must be issued here */
1063 if (!bufferable) {
1064 if (!execute.instIsHeadInst(request->inst)) {
1065 DPRINTF(MinorMem, "Memory access not the head inst., can't be"
1066 " sure it can be performed, not issuing\n");
1067 return;
1068 }
1069
1070 unsigned int forwarding_slot = 0;
1071
1072 if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) !=
1074 {
1075 // There's at least another request that targets the same
1076 // address and is staying in the storeBuffer. Since our
1077 // request is non-bufferable (e.g., strictly ordered or atomic),
1078 // we must wait for the other request in the storeBuffer to
1079 // complete before we can issue this non-bufferable request.
1080 // This is to make sure that the order they access the cache is
1081 // correct.
1082 DPRINTF(MinorMem, "Memory access can receive forwarded data"
1083 " from the store buffer, but need to wait for store buffer"
1084 " to drain\n");
1085 return;
1086 }
1087 }
1088
1089 /* True: submit this packet to the transfers queue to be sent to the
1090 * memory system.
1091 * False: skip the memory and push a packet for this request onto
1092 * requests */
1093 bool do_access = true;
1094
1095 if (!is_llsc) {
1096 /* Check for match in the store buffer */
1097 if (is_load) {
1098 unsigned int forwarding_slot = 0;
1099 AddrRangeCoverage forwarding_result =
1101 forwarding_slot);
1102
1103 switch (forwarding_result) {
1105 /* Forward data from the store buffer into this request and
1106 * repurpose this request's packet into a response packet */
1107 storeBuffer.forwardStoreData(request, forwarding_slot);
1108 request->packet->makeResponse();
1109
1110 /* Just move between queues, no access */
1111 do_access = false;
1112 break;
1114 DPRINTF(MinorMem, "Load partly satisfied by store buffer"
1115 " data. Must wait for the store to complete\n");
1116 return;
1117 break;
1119 DPRINTF(MinorMem, "No forwardable data from store buffer\n");
1120 /* Fall through to try access */
1121 break;
1122 }
1123 }
1124 } else {
1125 if (!canSendToMemorySystem()) {
1126 DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1127 return;
1128 }
1129
1130 SimpleThread &thread = *cpu.threads[request->inst->id.threadId];
1131
1132 std::unique_ptr<PCStateBase> old_pc(thread.pcState().clone());
1133 ExecContext context(cpu, thread, execute, request->inst);
1134
1135 /* Handle LLSC requests and tests */
1136 if (is_load) {
1137 thread.getIsaPtr()->handleLockedRead(&context, request->request);
1138 } else {
1139 do_access = thread.getIsaPtr()->handleLockedWrite(&context,
1140 request->request, cacheBlockMask);
1141
1142 if (!do_access) {
1143 DPRINTF(MinorMem, "Not perfoming a memory "
1144 "access for store conditional\n");
1145 }
1146 }
1147 thread.pcState(*old_pc);
1148 }
1149
1150 /* See the do_access comment above */
1151 if (do_access) {
1152 if (!canSendToMemorySystem()) {
1153 DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1154 return;
1155 }
1156
1157 /* Remember if this is an access which can't be idly
1158 * discarded by an interrupt */
1159 if (!bufferable && !request->issuedToMemory) {
1161 request->issuedToMemory = true;
1162 }
1163
1164 if (tryToSend(request)) {
1166 }
1167 } else {
1170 }
1171}
1172
1173bool
1175{
1176 bool ret = false;
1177
1178 if (!canSendToMemorySystem()) {
1179 DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n",
1180 *(request->inst));
1181 } else {
1182 PacketPtr packet = request->getHeadPacket();
1183
1184 DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n",
1185 *(request->inst), packet->req->getVaddr());
1186
1187 /* The sender state of the packet *must* be an LSQRequest
1188 * so the response can be correctly handled */
1189 assert(packet->findNextSenderState<LSQRequest>());
1190
1191 if (request->request->isLocalAccess()) {
1192 ThreadContext *thread =
1194 request->request->contextId()));
1195
1196 if (request->isLoad)
1197 DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst));
1198 else
1199 DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst));
1200
1201 request->request->localAccessor(thread, packet);
1202
1203 request->stepToNextPacket();
1204 ret = request->sentAllPackets();
1205
1206 if (!ret) {
1207 DPRINTF(MinorMem, "IPR access has another packet: %s\n",
1208 *(request->inst));
1209 }
1210
1211 if (ret)
1213 else
1215 } else if (dcachePort.sendTimingReq(packet)) {
1216 DPRINTF(MinorMem, "Sent data memory request\n");
1217
1219
1220 request->stepToNextPacket();
1221
1222 ret = request->sentAllPackets();
1223
1224 switch (request->state) {
1227 /* Fully or partially issued a request in the transfers
1228 * queue */
1230 break;
1233 /* Fully or partially issued a request in the store
1234 * buffer */
1236 break;
1237 default:
1238 panic("Unrecognized LSQ request state %d.", request->state);
1239 }
1240
1242 } else {
1243 DPRINTF(MinorMem,
1244 "Sending data memory request - needs retry\n");
1245
1246 /* Needs to be resent, wait for that */
1248 retryRequest = request;
1249
1250 switch (request->state) {
1254 break;
1258 break;
1259 default:
1260 panic("Unrecognized LSQ request state %d.", request->state);
1261 }
1262 }
1263 }
1264
1265 if (ret)
1266 threadSnoop(request);
1267
1268 return ret;
1269}
1270
1271void
1273{
1274 assert(!requests.empty() && requests.front() == request);
1275 assert(transfers.unreservedRemainingSpace() != 0);
1276
1277 /* Need to count the number of stores in the transfers
1278 * queue so that loads know when their store buffer forwarding
1279 * results will be correct (only when all those stores
1280 * have reached the store buffer) */
1281 if (!request->isLoad)
1283
1284 requests.pop();
1285 transfers.push(request);
1286}
1287
1288bool
1294
1295bool
1297{
1298 LSQRequestPtr request =
1300
1301 DPRINTF(MinorMem, "Received response packet inst: %s"
1302 " addr: 0x%x cmd: %s\n",
1303 *(request->inst), response->getAddr(),
1304 response->cmd.toString());
1305
1307
1308 if (response->isError()) {
1309 DPRINTF(MinorMem, "Received error response packet: %s\n",
1310 *request->inst);
1311 }
1312
1313 switch (request->state) {
1316 /* Response to a request from the transfers queue */
1317 request->retireResponse(response);
1318
1319 DPRINTF(MinorMem, "Has outstanding packets?: %d %d\n",
1320 request->hasPacketsInMemSystem(), request->isComplete());
1321
1322 break;
1325 /* Response to a request from the store buffer */
1326 request->retireResponse(response);
1327
1328 /* Remove completed requests unless they are barriers (which will
1329 * need to be removed in order */
1330 if (request->isComplete()) {
1331 if (!request->isBarrier()) {
1332 storeBuffer.deleteRequest(request);
1333 } else {
1334 DPRINTF(MinorMem, "Completed transfer for barrier: %s"
1335 " leaving the request as it is also a barrier\n",
1336 *(request->inst));
1337 }
1338 }
1339 break;
1340 default:
1341 panic("Shouldn't be allowed to receive a response from another state");
1342 }
1343
1344 /* We go to idle even if there are more things in the requests queue
1345 * as it's the job of step to actually step us on to the next
1346 * transaction */
1347
1348 /* Let's try and wake up the processor for the next cycle */
1350
1351 /* Never busy */
1352 return true;
1353}
1354
1355void
1357{
1358 DPRINTF(MinorMem, "Received retry request\n");
1359
1360 assert(state == MemoryNeedsRetry);
1361
1362 switch (retryRequest->state) {
1364 /* Retry in the requests queue */
1366 break;
1368 /* Retry in the store buffer */
1370 break;
1371 default:
1372 panic("Unrecognized retry request state %d.", retryRequest->state);
1373 }
1374
1375 /* Set state back to MemoryRunning so that the following
1376 * tryToSend can actually send. Note that this won't
1377 * allow another transfer in as tryToSend should
1378 * issue a memory request and either succeed for this
1379 * request or return the LSQ back to MemoryNeedsRetry */
1381
1382 /* Try to resend the request */
1383 if (tryToSend(retryRequest)) {
1384 /* Successfully sent, need to move the request */
1385 switch (retryRequest->state) {
1387 /* In the requests queue */
1389 break;
1391 /* In the store buffer */
1393 break;
1394 default:
1395 panic("Unrecognized retry request state %d.", retryRequest->state);
1396 }
1397
1398 retryRequest = NULL;
1399 }
1400}
1401
1402LSQ::LSQ(std::string name_, std::string dcache_port_name_,
1403 MinorCPU &cpu_, Execute &execute_,
1404 unsigned int in_memory_system_limit, unsigned int line_width,
1405 unsigned int requests_queue_size, unsigned int transfers_queue_size,
1406 unsigned int store_buffer_size,
1407 unsigned int store_buffer_cycle_store_limit) :
1408 Named(name_),
1409 cpu(cpu_),
1410 execute(execute_),
1411 dcachePort(dcache_port_name_, *this, cpu_),
1412 lastMemBarrier(cpu.numThreads, 0),
1414 inMemorySystemLimit(in_memory_system_limit),
1415 lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)),
1416 requests(name_ + ".requests", "addr", requests_queue_size),
1417 transfers(name_ + ".transfers", "addr", transfers_queue_size),
1418 storeBuffer(name_ + ".storeBuffer",
1419 *this, store_buffer_size, store_buffer_cycle_store_limit),
1424 retryRequest(NULL),
1425 cacheBlockMask(~(cpu_.cacheLineSize() - 1))
1426{
1427 if (in_memory_system_limit < 1) {
1428 fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_,
1429 in_memory_system_limit);
1430 }
1431
1432 if (store_buffer_cycle_store_limit < 1) {
1433 fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be"
1434 " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit);
1435 }
1436
1437 if (requests_queue_size < 1) {
1438 fatal("%s: executeLSQRequestsQueueSize must be"
1439 " >= 1 (%d)\n", name_, requests_queue_size);
1440 }
1441
1442 if (transfers_queue_size < 1) {
1443 fatal("%s: executeLSQTransfersQueueSize must be"
1444 " >= 1 (%d)\n", name_, transfers_queue_size);
1445 }
1446
1447 if (store_buffer_size < 1) {
1448 fatal("%s: executeLSQStoreBufferSize must be"
1449 " >= 1 (%d)\n", name_, store_buffer_size);
1450 }
1451
1452 if ((lineWidth & (lineWidth - 1)) != 0) {
1453 fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth);
1454 }
1455}
1456
1458{ }
1459
1461{
1462 if (packet)
1463 delete packet;
1464 if (data)
1465 delete [] data;
1466}
1467
1474void
1476{
1477 /* Try to move address-translated requests between queues and issue
1478 * them */
1479 if (!requests.empty())
1481
1482 storeBuffer.step();
1483}
1484
1487{
1488 LSQ::LSQRequestPtr ret = NULL;
1489
1490 if (!transfers.empty()) {
1491 LSQRequestPtr request = transfers.front();
1492
1493 /* Same instruction and complete access or a store that's
1494 * capable of being moved to the store buffer */
1495 if (request->inst->id == inst->id) {
1496 bool complete = request->isComplete();
1497 bool can_store = storeBuffer.canInsert();
1498 bool to_store_buffer = request->state ==
1500
1501 if ((complete && !(request->isBarrier() && !can_store)) ||
1502 (to_store_buffer && can_store))
1503 {
1504 ret = request;
1505 }
1506 }
1507 }
1508
1509 if (ret) {
1510 DPRINTF(MinorMem, "Found matching memory response for inst: %s\n",
1511 *inst);
1512 } else {
1513 DPRINTF(MinorMem, "No matching memory response for inst: %s\n",
1514 *inst);
1515 }
1516
1517 return ret;
1518}
1519
1520void
1522{
1523 assert(!transfers.empty() && transfers.front() == response);
1524
1525 transfers.pop();
1526
1527 if (!response->isLoad)
1529
1530 if (response->issuedToMemory)
1532
1533 if (response->state != LSQRequest::StoreInStoreBuffer) {
1534 DPRINTF(MinorMem, "Deleting %s request: %s\n",
1535 (response->isLoad ? "load" : "store"),
1536 *(response->inst));
1537
1538 delete response;
1539 }
1540}
1541
1542void
1544{
1545 assert(request->state == LSQRequest::StoreToStoreBuffer);
1546
1547 DPRINTF(MinorMem, "Sending store: %s to store buffer\n",
1548 *(request->inst));
1549
1550 request->inst->inStoreBuffer = true;
1551
1552 storeBuffer.insert(request);
1553}
1554
1555bool
1557{
1558 return requests.empty() && transfers.empty() &&
1560}
1561
1562bool
1564{
1565 bool ret = false;
1566
1567 if (canSendToMemorySystem()) {
1568 bool have_translated_requests = !requests.empty() &&
1571
1572 ret = have_translated_requests ||
1574 }
1575
1576 if (ret)
1577 DPRINTF(Activity, "Need to tick\n");
1578
1579 return ret;
1580}
1581
1582Fault
1583LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
1584 unsigned int size, Addr addr, Request::Flags flags,
1585 uint64_t *res, AtomicOpFunctorPtr amo_op,
1586 const std::vector<bool>& byte_enable)
1587{
1588 assert(inst->translationFault == NoFault || inst->inLSQ);
1589
1590 if (inst->inLSQ) {
1591 return inst->translationFault;
1592 }
1593
1594 bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
1595
1596 if (needs_burst && inst->staticInst->isAtomic()) {
1597 // AMO requests that access across a cache line boundary are not
1598 // allowed since the cache does not guarantee AMO ops to be executed
1599 // atomically in two cache lines
1600 // For ISAs such as x86 that requires AMO operations to work on
1601 // accesses that cross cache-line boundaries, the cache needs to be
1602 // modified to support locking both cache lines to guarantee the
1603 // atomicity.
1604 panic("Do not expect cross-cache-line atomic memory request\n");
1605 }
1606
1607 LSQRequestPtr request;
1608
1609 /* Copy given data into the request. The request will pass this to the
1610 * packet and then it will own the data */
1611 uint8_t *request_data = NULL;
1612
1613 DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:"
1614 " 0x%x%s lineWidth : 0x%x\n",
1615 (isLoad ? "load" : "store/atomic"), addr, size, flags,
1616 (needs_burst ? " (needs burst)" : ""), lineWidth);
1617
1618 if (!isLoad) {
1619 /* Request_data becomes the property of a ...DataRequest (see below)
1620 * and destroyed by its destructor */
1621 request_data = new uint8_t[size];
1622 if (inst->staticInst->isAtomic() ||
1624 /* For atomic or store-no-data, just use zeroed data */
1625 std::memset(request_data, 0, size);
1626 } else {
1627 std::memcpy(request_data, data, size);
1628 }
1629 }
1630
1631 if (needs_burst) {
1632 request = new SplitDataRequest(
1633 *this, inst, isLoad, request_data, res);
1634 } else {
1635 request = new SingleDataRequest(
1636 *this, inst, isLoad, request_data, res);
1637 }
1638
1639 if (inst->traceData)
1640 inst->traceData->setMem(addr, size, flags);
1641
1642 int cid = cpu.threads[inst->id.threadId]->getTC()->contextId();
1643 request->request->setContext(cid);
1644 request->request->setVirt(
1645 addr, size, flags, cpu.dataRequestorId(),
1646 /* I've no idea why we need the PC, but give it */
1647 inst->pc->instAddr(), std::move(amo_op));
1648 request->request->setByteEnable(byte_enable);
1649
1650 /* If the request is marked as NO_ACCESS, setup a local access
1651 * doing nothing */
1652 if (flags.isSet(Request::NO_ACCESS)) {
1653 assert(!request->request->isLocalAccess());
1654 request->request->setLocalAccessor(
1655 [] (ThreadContext *tc, PacketPtr pkt) { return Cycles(1); });
1656 }
1657
1658 requests.push(request);
1659 inst->inLSQ = true;
1660 request->startAddrTranslation();
1661
1662 return inst->translationFault;
1663}
1664
1665void
1667{
1668 LSQRequestPtr request = new FailedDataRequest(*this, inst);
1669 requests.push(request);
1670}
1671
1672void
1674{
1675 minor::minorTrace("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d"
1676 " lastMemBarrier=%d\n",
1682}
1683
1684LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_,
1685 unsigned int store_buffer_size,
1686 unsigned int store_limit_per_cycle) :
1687 Named(name_), lsq(lsq_),
1688 numSlots(store_buffer_size),
1689 storeLimitPerCycle(store_limit_per_cycle),
1690 slots(),
1691 numUnissuedAccesses(0)
1692{
1693}
1694
1696makePacketForRequest(const RequestPtr &request, bool isLoad,
1697 Packet::SenderState *sender_state, PacketDataPtr data)
1698{
1699 PacketPtr ret = isLoad ? Packet::createRead(request)
1700 : Packet::createWrite(request);
1701
1702 if (sender_state)
1703 ret->pushSenderState(sender_state);
1704
1705 if (isLoad) {
1706 ret->allocate();
1707 } else if (!request->isCacheMaintenance()) {
1708 // CMOs are treated as stores but they don't have data. All
1709 // stores otherwise need to allocate for data.
1710 ret->dataDynamic(data);
1711 }
1712
1713 return ret;
1714}
1715
1716void
1718{
1719 assert(inst->isInst() && inst->staticInst->isFullMemBarrier());
1720 assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]);
1721
1722 /* Remember the barrier. We only have a notion of one
1723 * barrier so this may result in some mem refs being
1724 * delayed if they are between barriers */
1725 lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum;
1726}
1727
1728void
1730{
1731 assert(inst->translationFault == NoFault);
1732
1733 /* Make the function idempotent */
1734 if (packet)
1735 return;
1736
1737 packet = makePacketForRequest(request, isLoad, this, data);
1738 /* Null the ret data so we know not to deallocate it when the
1739 * ret is destroyed. The data now belongs to the ret and
1740 * the ret is responsible for its destruction */
1741 data = NULL;
1742}
1743
1744std::ostream &
1745operator <<(std::ostream &os, LSQ::MemoryState state)
1746{
1747 switch (state) {
1748 case LSQ::MemoryRunning:
1749 os << "MemoryRunning";
1750 break;
1752 os << "MemoryNeedsRetry";
1753 break;
1754 default:
1755 os << "MemoryState-" << static_cast<int>(state);
1756 break;
1757 }
1758 return os;
1759}
1760
1761void
1763{
1764 /* LLSC operations in Minor can't be speculative and are executed from
1765 * the head of the requests queue. We shouldn't need to do more than
1766 * this action on snoops. */
1767 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1768 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1769 cpu.wakeup(tid);
1770 }
1771 }
1772
1773 if (pkt->isInvalidate() || pkt->isWrite()) {
1774 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1776 pkt, cacheBlockMask);
1777 }
1778 }
1779}
1780
1781void
1783{
1784 /* LLSC operations in Minor can't be speculative and are executed from
1785 * the head of the requests queue. We shouldn't need to do more than
1786 * this action on snoops. */
1787 ThreadID req_tid = request->inst->id.threadId;
1788 PacketPtr pkt = request->packet;
1789
1790 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1791 if (tid != req_tid) {
1792 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1793 cpu.wakeup(tid);
1794 }
1795
1796 if (pkt->isInvalidate() || pkt->isWrite()) {
1799 }
1800 }
1801 }
1802}
1803
1804} // namespace minor
1805} // namespace gem5
#define DPRINTFS(x, s,...)
Definition trace.hh:217
#define DPRINTF(x,...)
Definition trace.hh:210
const char data[]
RequestorID dataRequestorId() const
Reads this CPU's unique data requestor ID.
Definition base.hh:193
AddressMonitor * getCpuAddrMonitor(ThreadID tid)
Definition base.hh:656
virtual ThreadContext * getContext(int tn)
Given a thread num get tho thread context for it.
Definition base.hh:288
ThreadID numThreads
Number of threads we're actually simulating (<= SMT_MAX_THREADS).
Definition base.hh:390
ThreadID contextToThread(ContextID cid)
Convert ContextID to threadID.
Definition base.hh:299
virtual void handleLockedRead(const RequestPtr &req)
Definition isa.hh:97
virtual bool handleLockedWrite(const RequestPtr &req, Addr cacheBlockMask)
Definition isa.hh:104
virtual void handleLockedSnoop(PacketPtr pkt, Addr cacheBlockMask)
Definition isa.hh:115
virtual void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode)
Definition mmu.cc:111
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
const std::string & toString() const
Return the string to a cmd given by idx.
Definition packet.hh:276
MinorCPU is an in-order CPU model with four fixed pipeline stages:
Definition cpu.hh:85
void wakeup(ThreadID tid) override
Definition cpu.cc:143
void wakeupOnEvent(unsigned int stage_id)
Interface for stages to signal that they have become active after a callback or eventq event where th...
Definition cpu.cc:291
std::vector< minor::MinorThread * > threads
These are thread state-representing objects for this CPU.
Definition cpu.hh:100
Interface for things with names.
Definition named.hh:39
virtual std::string name() const
Definition named.hh:47
virtual PCStateBase * clone() const =0
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
Addr getAddr() const
Definition packet.hh:807
bool isError() const
Definition packet.hh:622
static PacketPtr createWrite(const RequestPtr &req)
Definition packet.hh:1044
void makeResponse()
Take a request packet and modify it in place to be suitable for returning as a response to that reque...
Definition packet.hh:1062
T * findNextSenderState() const
Go through the sender state stack and return the first instance that is of type T (as determined by a...
Definition packet.hh:575
void pushSenderState(SenderState *sender_state)
Push a new sender state to the packet and make the current sender state the predecessor of the new on...
Definition packet.cc:334
SenderState * popSenderState()
Pop the top of the state stack and return a pointer to it.
Definition packet.cc:342
T * getPtr()
get a pointer to the data ptr.
Definition packet.hh:1225
bool isWrite() const
Definition packet.hh:594
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition packet.hh:1038
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
unsigned getSize() const
Definition packet.hh:817
const T * getConstPtr() const
Definition packet.hh:1234
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition packet.hh:1213
MemCmd cmd
The command field of the packet.
Definition packet.hh:372
bool isInvalidate() const
Definition packet.hh:609
void allocate()
Allocate memory for the packet.
Definition packet.hh:1367
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition port.hh:603
@ NO_ACCESS
The request should not cause a memory access.
Definition request.hh:146
static const FlagsType STORE_NO_DATA
Definition request.hh:260
The SimpleThread object provides a combination of the ThreadState object and the ThreadContext interf...
const PCStateBase & pcState() const override
BaseISA * getIsaPtr() const override
ThreadContext is the external interface to all thread state for anything outside of the CPU.
virtual BaseISA * getIsaPtr() const =0
virtual BaseMMU * getMMUPtr()=0
ExecContext bears the exec_context interface for Minor.
void setMemAccPredicate(bool val) override
Execute stage.
Definition execute.hh:68
bool instIsRightStream(MinorDynInstPtr inst)
Does the given instruction have the right stream sequence number to be committed?
Definition execute.cc:1873
bool instIsHeadInst(MinorDynInstPtr inst)
Returns true if the given instruction is at the head of the inFlightInsts instruction queue.
Definition execute.cc:1879
Request for doing barrier accounting in the store buffer.
Definition lsq.hh:334
FailedDataRequest represents requests from instructions that failed their predicates but need to ride...
Definition lsq.hh:324
Derived SenderState to carry data access info.
Definition lsq.hh:129
static AddrRangeCoverage containsAddrRangeOf(Addr req1_addr, unsigned int req1_size, Addr req2_addr, unsigned int req2_size)
Does address range req1 (req1_addr to req1_addr + req1_size - 1) fully cover, partially cover or not ...
Definition lsq.cc:121
void tryToSuppressFault()
Instructions may want to suppress translation faults (e.g.
Definition lsq.cc:78
bool needsToBeSentToStoreBuffer()
This request, once processed by the requests/transfers queues, will need to go to the store buffer.
Definition lsq.cc:164
RequestPtr request
The underlying request of this LSQRequest.
Definition lsq.hh:152
void makePacket()
Make a packet to use with the memory transaction.
Definition lsq.cc:1729
bool isLoad
Load/store indication used for building packet.
Definition lsq.hh:139
void setState(LSQRequestState new_state)
Set state and output trace output.
Definition lsq.cc:170
bool issuedToMemory
This in an access other than a normal cacheable load that's visited the memory system.
Definition lsq.hh:164
void setSkipped()
Set this request as having been skipped before a memory transfer was attempt.
Definition lsq.hh:219
virtual bool sentAllPackets()=0
Have all packets been sent?
virtual PacketPtr getHeadPacket()=0
Get the next packet to issue for this request.
PacketDataPtr data
Dynamically allocated and populated data carried for building write packets.
Definition lsq.hh:143
void reportData(std::ostream &os) const
MinorTrace report interface.
Definition lsq.cc:186
virtual bool hasPacketsInMemSystem()=0
True if this request has any issued packets in the memory system and so can't be interrupted until it...
virtual void startAddrTranslation()=0
Start the address translation process for this request.
LSQRequestState state
Definition lsq.hh:191
LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, PacketDataPtr data_=NULL, uint64_t *res_=NULL)
Definition lsq.cc:59
virtual void stepToNextPacket()=0
Step to the next packet for the next call to getHeadPacket.
MinorDynInstPtr inst
Instruction which made this request.
Definition lsq.hh:135
virtual bool isBarrier()
Is this a request a barrier?
Definition lsq.cc:158
void completeDisabledMemAccess()
Definition lsq.cc:97
bool isComplete() const
Has this request been completed.
Definition lsq.cc:178
virtual void retireResponse(PacketPtr packet_)=0
Retire a response packet into the LSQRequest packet possibly completing this transfer.
SingleDataRequest is used for requests that don't fragment.
Definition lsq.hh:346
void startAddrTranslation()
Send single translation request.
Definition lsq.cc:301
void finish(const Fault &fault_, const RequestPtr &request_, ThreadContext *tc, BaseMMU::Mode mode)
TLB interace.
Definition lsq.cc:271
void retireResponse(PacketPtr packet_)
Keep the given packet as the response packet LSQRequest::packet.
Definition lsq.cc:325
void stepToNextPacket()
Step on numIssuedFragments.
Definition lsq.cc:616
void finish(const Fault &fault_, const RequestPtr &request_, ThreadContext *tc, BaseMMU::Mode mode)
TLB response interface.
Definition lsq.cc:334
void sendNextFragmentToTranslation()
Part of the address translation loop, see startAddTranslation.
Definition lsq.cc:701
void retireResponse(PacketPtr packet_)
For loads, paste the response data into the main response packet.
Definition lsq.cc:624
void makeFragmentRequests()
Make all the Requests for this transfer's fragments so that those requests can be sent for address tr...
Definition lsq.cc:419
PacketPtr getHeadPacket()
Get the head packet as counted by numIssuedFragments.
Definition lsq.cc:608
void makeFragmentPackets()
Make the packets to go with the requests so they can be sent to the memory system.
Definition lsq.cc:533
SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, PacketDataPtr data_=NULL, uint64_t *res_=NULL)
Definition lsq.cc:392
void startAddrTranslation()
Start a loop of do { sendNextFragmentToTranslation ; translateTiming ; finish } while (numTranslatedF...
Definition lsq.cc:583
bool isDrained() const
Drained if there is absolutely nothing left in the buffer.
Definition lsq.hh:528
void step()
Try to issue more stores to memory.
Definition lsq.cc:843
AddrRangeCoverage canForwardDataToLoad(LSQRequestPtr request, unsigned int &found_slot)
Look for a store which satisfies the given load.
Definition lsq.cc:763
void forwardStoreData(LSQRequestPtr load, unsigned int slot_number)
Fill the given packet with appropriate date from slot slot_number.
Definition lsq.cc:804
void minorTrace() const
Report queue contents for MinorTrace.
Definition lsq.cc:930
StoreBuffer(std::string name_, LSQ &lsq_, unsigned int store_buffer_size, unsigned int store_limit_per_cycle)
Definition lsq.cc:1684
void countIssuedStore(LSQRequestPtr request)
Count a store being issued to memory by decrementing numUnissuedAccesses.
Definition lsq.cc:834
void insert(LSQRequestPtr request)
Insert a request at the back of the queue.
Definition lsq.cc:741
unsigned int numUnissuedStores()
Number of stores in the store buffer which have not been completely issued to the memory system.
Definition lsq.hh:520
bool canInsert() const
Can a new request be inserted into the queue?
Definition lsq.cc:720
void deleteRequest(LSQRequestPtr request)
Delete the given request and free the slot it occupied.
Definition lsq.cc:727
void tryToSendToTransfers(LSQRequestPtr request)
Try and issue a memory access for a translated request at the head of the requests queue.
Definition lsq.cc:959
bool needsToTick()
May need to be ticked next cycle as one of the queues contains an actionable transfers or address tra...
Definition lsq.cc:1563
void recvTimingSnoopReq(PacketPtr pkt)
Definition lsq.cc:1762
void issuedMemBarrierInst(MinorDynInstPtr inst)
A memory barrier instruction has been issued, remember its execSeqNum that we can avoid issuing memor...
Definition lsq.cc:1717
bool tryToSend(LSQRequestPtr request)
Try to send (or resend) a memory request's next/only packet to the memory system.
Definition lsq.cc:1174
void minorTrace() const
Definition lsq.cc:1673
bool canSendToMemorySystem()
Can a request be sent to the memory system.
Definition lsq.cc:1289
MemoryState
State of memory access for head access.
Definition lsq.hh:77
@ MemoryNeedsRetry
Definition lsq.hh:79
std::vector< InstSeqNum > lastMemBarrier
Most recent execSeqNum of a memory barrier instruction or 0 if there are no in-flight barriers.
Definition lsq.hh:541
StoreBuffer storeBuffer
Definition lsq.hh:594
unsigned int numAccessesInDTLB
Number of requests in the DTLB in the requests queue.
Definition lsq.hh:606
void sendStoreToStoreBuffer(LSQRequestPtr request)
A store has been committed, please move it to the store buffer.
Definition lsq.cc:1543
void pushFailedRequest(MinorDynInstPtr inst)
Push a predicate failed-representing request into the queues just to maintain commit order.
Definition lsq.cc:1666
LSQRequestPtr retryRequest
The request (from either requests or the store buffer) which is currently waiting have its memory acc...
Definition lsq.hh:619
void threadSnoop(LSQRequestPtr request)
Snoop other threads monitors on memory system accesses.
Definition lsq.cc:1782
void recvReqRetry()
Definition lsq.cc:1356
const Addr lineWidth
Memory system access width (and snap) in bytes.
Definition lsq.hh:551
AddrRangeCoverage
Coverage of one address range with another.
Definition lsq.hh:88
@ PartialAddrRangeCoverage
Definition lsq.hh:89
@ NoAddrRangeCoverage
Definition lsq.hh:91
@ FullAddrRangeCoverage
Definition lsq.hh:90
virtual ~LSQ()
Definition lsq.cc:1457
DcachePort dcachePort
Definition lsq.hh:120
Fault pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector< bool > &byte_enable=std::vector< bool >())
Single interface for readMem/writeMem/amoMem to issue requests into the LSQ.
Definition lsq.cc:1583
unsigned int numAccessesInMemorySystem
Count of the number of mem.
Definition lsq.hh:603
unsigned int numAccessesIssuedToMemory
The number of accesses which have been issued to the memory system but have not been committed/discar...
Definition lsq.hh:615
LSQ(std::string name_, std::string dcache_port_name_, MinorCPU &cpu_, Execute &execute_, unsigned int max_accesses_in_memory_system, unsigned int line_width, unsigned int requests_queue_size, unsigned int transfers_queue_size, unsigned int store_buffer_size, unsigned int store_buffer_cycle_store_limit)
Definition lsq.cc:1402
Execute & execute
Definition lsq.hh:72
const unsigned int inMemorySystemLimit
Maximum number of in-flight accesses issued to the memory system.
Definition lsq.hh:548
LSQQueue requests
requests contains LSQRequests which have been issued to the TLB by calling ExecContext::readMem/write...
Definition lsq.hh:574
unsigned int numStoresInTransfers
The number of stores in the transfers queue.
Definition lsq.hh:610
friend std::ostream & operator<<(std::ostream &os, MemoryState state)
Print MemoryState values as shown in the enum definition.
Definition lsq.cc:1745
void step()
Step checks the queues to see if their are issuable transfers which were not otherwise picked up by t...
Definition lsq.cc:1475
MemoryState state
Retry state of last issued memory transfer.
Definition lsq.hh:545
LSQQueue transfers
Once issued to memory (or, for stores, just had their state changed to StoreToStoreBuffer) LSQRequest...
Definition lsq.hh:583
void completeMemBarrierInst(MinorDynInstPtr inst, bool committed)
Complete a barrier instruction.
Definition lsq.cc:913
Addr cacheBlockMask
Address Mask for a cache block (e.g.
Definition lsq.hh:622
void popResponse(LSQRequestPtr response)
Sanity check and pop the head response.
Definition lsq.cc:1521
MinorCPU & cpu
My owner(s)
Definition lsq.hh:71
void moveFromRequestsToTransfers(LSQRequestPtr request)
Move a request between queues.
Definition lsq.cc:1272
LSQRequestPtr findResponse(MinorDynInstPtr inst)
Returns a response if it's at the head of the transfers queue and it's either complete or can be sent...
Definition lsq.cc:1486
bool isDrained()
Is there nothing left in the LSQ.
Definition lsq.cc:1556
bool recvTimingResp(PacketPtr pkt)
Memory interface.
Definition lsq.cc:1296
void clearMemBarrier(MinorDynInstPtr inst)
Clear a barrier (if it's the last one marked up in lastMemBarrier)
Definition lsq.cc:258
bool empty() const
Is the queue empty?
Definition buffers.hh:508
void minorTrace() const
Definition buffers.hh:511
void pop()
Pop the head item.
Definition buffers.hh:505
unsigned int unreservedRemainingSpace() const
Like remainingSpace but does not count reserved spaces.
Definition buffers.hh:492
ElemType & front()
Head value.
Definition buffers.hh:500
void push(ElemType &data)
Push an element into the buffer if it isn't a bubble.
Definition buffers.hh:432
STL vector class.
Definition stl.hh:37
All the fun of executing instructions from Decode and sending branch/new instruction stream info.
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition amo.hh:269
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
atomic_var_t state
Definition helpers.cc:211
uint8_t flags
Definition helpers.cc:87
#define warn(...)
Definition logging.hh:256
ExecContext bears the exec_context interface for Minor.
A load/store queue that allows outstanding reads and writes.
Bitfield< 4, 0 > mode
Definition misc_types.hh:74
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 17 > os
Definition misc.hh:838
Bitfield< 3 > addr
Definition types.hh:84
PacketPtr makePacketForRequest(const RequestPtr &request, bool isLoad, Packet::SenderState *sender_state, PacketDataPtr data)
Make a suitable packet for the given request.
Definition lsq.cc:1696
void minorTrace(const char *fmt, Args ...args)
DPRINTFN for MinorTrace reporting.
Definition trace.hh:66
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< FaultBase > Fault
Definition types.hh:249
int16_t ThreadID
Thread index/ID type.
Definition types.hh:235
T safe_cast(U &&ref_or_ptr)
Definition cast.hh:74
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
Addr addrBlockOffset(Addr addr, Addr block_size)
Calculates the offset of a given address wrt aligned fixed-size blocks.
Definition utils.hh:53
@ Complete
Definition misc.hh:57
bool transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
Returns true if the given memory access (address, size) needs to be fragmented across aligned fixed-s...
Definition utils.hh:80
uint8_t * PacketDataPtr
Definition packet.hh:72
constexpr decltype(nullptr) NoFault
Definition types.hh:253
bool isAnyActiveElement(const std::vector< bool >::const_iterator &it_start, const std::vector< bool >::const_iterator &it_end)
Test if there is any active element in an enablement range.
Definition utils.hh:89
Minor contains all the definitions within the MinorCPU apart from the CPU class itself.
The constructed pipeline.
bool doMonitor(PacketPtr pkt)
Definition base.cc:764
A virtual base opaque structure used to hold state associated with the packet (e.g....
Definition packet.hh:469

Generated on Tue Jun 18 2024 16:24:02 for gem5 by doxygen 1.11.0