gem5 [DEVELOP-FOR-25.1]
Loading...
Searching...
No Matches
lsq.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2011-2012, 2014, 2017-2019, 2021 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2005-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include "cpu/o3/lsq.hh"
43
44#include <algorithm>
45#include <list>
46#include <string>
47
48#include "base/compiler.hh"
49#include "base/logging.hh"
50#include "cpu/o3/cpu.hh"
51#include "cpu/o3/dyn_inst.hh"
52#include "cpu/o3/iew.hh"
53#include "cpu/o3/limits.hh"
54#include "debug/Drain.hh"
55#include "debug/Fetch.hh"
56#include "debug/HtmCpu.hh"
57#include "debug/LSQ.hh"
58#include "debug/Writeback.hh"
59#include "params/BaseO3CPU.hh"
60
61namespace gem5
62{
63
64namespace o3
65{
66
68 RequestPort(_cpu->name() + ".dcache_port"), lsq(_lsq), cpu(_cpu),
69 dcachePortStats(_cpu)
70{}
71
73 : statistics::Group(_cpu),
74 ADD_STAT(numRecvResp, statistics::units::Count::get(),
75 "Number of received responses"),
77 "Number of received response bytes"),
79 statistics::units::Rate<statistics::units::Byte,
80 statistics::units::Cycle>::get(),
81 "Average bandwidth of received responses"),
83 statistics::units::Rate<statistics::units::Byte,
84 statistics::units::Count>::get(),
85 "Average packet size per received response"),
87 statistics::units::Rate<statistics::units::Count,
88 statistics::units::Cycle>::get(),
89 "Average rate of received responses per cycle"),
91 statistics::units::Rate<statistics::units::Count,
92 statistics::units::Count>::get(),
93 "Average retry rate per received response"),
95 "Number of retry responses sent")
96{
97 recvRespAvgBW.precision(2);
99
100 recvRespAvgSize.precision(2);
102
103 recvRespAvgRate.precision(2);
105
106 recvRespAvgRetryRate.precision(2);
108}
109
110LSQ::LSQ(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
111 : cpu(cpu_ptr), iewStage(iew_ptr),
112 _cacheBlocked(false),
117 lsqPolicy(params.smtLSQPolicy),
118 LQEntries(params.LQEntries),
119 SQEntries(params.SQEntries),
121 params.smtLSQThreshold)),
123 params.smtLSQThreshold)),
124 dcachePort(this, cpu_ptr),
125 numThreads(params.numThreads),
129 recvRespBytes(0),
133 retryRespEvent([this]{ sendRetryResp(); }, name())
134{
135 assert(numThreads > 0 && numThreads <= MaxThreads);
136
137 //**********************************************
138 //************ Handle SMT Parameters ***********
139 //**********************************************
140
141 /* Run SMT olicy checks. */
142 if (lsqPolicy == SMTQueuePolicy::Dynamic) {
143 DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
144 } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
145 DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
146 "%i entries per LQ | %i entries per SQ\n",
147 maxLQEntries,maxSQEntries);
148 } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
149
150 assert(params.smtLSQThreshold > params.LQEntries);
151 assert(params.smtLSQThreshold > params.SQEntries);
152
153 DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
154 "%i entries per LQ | %i entries per SQ\n",
155 maxLQEntries,maxSQEntries);
156 } else {
157 panic("Invalid LSQ sharing policy. Options are: Dynamic, "
158 "Partitioned, Threshold");
159 }
160
161 thread.reserve(numThreads);
162 for (ThreadID tid = 0; tid < numThreads; tid++) {
163 thread.emplace_back(maxLQEntries, maxSQEntries);
164 thread[tid].init(cpu, iew_ptr, params, this, tid);
165 thread[tid].setDcachePort(&dcachePort);
166 }
167}
168
169
170std::string
172{
173 return iewStage->name() + ".lsq";
174}
175
176void
178{
179 activeThreads = at_ptr;
180 assert(activeThreads != 0);
181}
182
183void
185{
186 assert(isDrained());
187
188 for (ThreadID tid = 0; tid < numThreads; tid++)
190}
191
192bool
194{
195 bool drained(true);
196
197 if (!lqEmpty()) {
198 DPRINTF(Drain, "Not drained, LQ not empty.\n");
199 drained = false;
200 }
201
202 if (!sqEmpty()) {
203 DPRINTF(Drain, "Not drained, SQ not empty.\n");
204 drained = false;
205 }
206
207 return drained;
208}
209
210void
212{
213 usedStorePorts = 0;
214 _cacheBlocked = false;
215
216 for (ThreadID tid = 0; tid < numThreads; tid++) {
217 thread[tid].takeOverFrom();
218 }
219}
220
221void
223{
224 // Re-issue loads which got blocked on the per-cycle load ports limit.
226 iewStage->cacheUnblocked();
227
228 usedLoadPorts = 0;
229 usedStorePorts = 0;
230}
231
232bool
234{
235 return _cacheBlocked;
236}
237
238void
240{
242}
243
244bool
245LSQ::cachePortAvailable(bool is_load) const
246{
247 bool ret;
248 if (is_load) {
250 } else {
252 }
253 return ret;
254}
255
256void
258{
259 assert(cachePortAvailable(is_load));
260 if (is_load) {
262 } else {
264 }
265}
266
267void
269{
270 ThreadID tid = load_inst->threadNumber;
271
272 thread[tid].insertLoad(load_inst);
273}
274
275void
276LSQ::insertStore(const DynInstPtr &store_inst)
277{
278 ThreadID tid = store_inst->threadNumber;
279
280 thread[tid].insertStore(store_inst);
281}
282
283Fault
285{
286 ThreadID tid = inst->threadNumber;
287
288 return thread[tid].executeLoad(inst);
289}
290
291Fault
293{
294 ThreadID tid = inst->threadNumber;
295
296 return thread[tid].executeStore(inst);
297}
298
299void
301{
302 thread.at(tid).commitLoads(youngest_inst);
303}
304
305void
307{
308 thread.at(tid).commitStores(youngest_inst);
309}
310
311void
313{
314 for (ThreadID tid : *activeThreads) {
315 if (numStoresToWB(tid) > 0) {
316 DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
317 "available for Writeback.\n", tid, numStoresToWB(tid));
318 }
319
320 thread[tid].writebackStores();
321 }
322}
323
324void
325LSQ::squash(const InstSeqNum &squashed_num, ThreadID tid)
326{
327 thread.at(tid).squash(squashed_num);
328}
329
330bool
332{
333 /* Answers: Does Anybody Have a Violation?*/
334 for (ThreadID tid : *activeThreads) {
335 if (thread[tid].violation())
336 return true;
337 }
338
339 return false;
340}
341
342bool LSQ::violation(ThreadID tid) { return thread.at(tid).violation(); }
343
346{
347 return thread.at(tid).getMemDepViolator();
348}
349
350int
352{
353 return thread.at(tid).getLoadHead();
354}
355
358{
359 return thread.at(tid).getLoadHeadSeqNum();
360}
361
362int
364{
365 return thread.at(tid).getStoreHead();
366}
367
370{
371 return thread.at(tid).getStoreHeadSeqNum();
372}
373
374int LSQ::getCount(ThreadID tid) { return thread.at(tid).getCount(); }
375
376int LSQ::numLoads(ThreadID tid) { return thread.at(tid).numLoads(); }
377
378int LSQ::numStores(ThreadID tid) { return thread.at(tid).numStores(); }
379
380int
382{
383 if (tid == InvalidThreadID)
384 return 0;
385 else
386 return thread[tid].numHtmStarts();
387}
388int
390{
391 if (tid == InvalidThreadID)
392 return 0;
393 else
394 return thread[tid].numHtmStops();
395}
396
397void
399{
400 if (tid != InvalidThreadID)
401 thread[tid].resetHtmStartsStops();
402}
403
404uint64_t
406{
407 if (tid == InvalidThreadID)
408 return 0;
409 else
410 return thread[tid].getLatestHtmUid();
411}
412
413void
415{
416 if (tid != InvalidThreadID)
417 thread[tid].setLastRetiredHtmUid(htmUid);
418}
419
420void
422{
423 iewStage->cacheUnblocked();
424 cacheBlocked(false);
425
426 for (ThreadID tid : *activeThreads) {
427 thread[tid].recvRetry();
428 }
429}
430
431void
433{
434 LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->senderState);
435 thread[cpu->contextToThread(request->contextId())]
436 .completeDataAccess(pkt);
437}
438
439void
441{
442 dcachePort.sendRetryResp();
443}
444
445bool
447{
448 if (pkt->isError())
449 DPRINTF(LSQ, "Got error packet back for address: %#X\n",
450 pkt->getAddr());
451
452 LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->senderState);
453 panic_if(!request, "Got packet back with unknown sender state\n");
454
455 thread[cpu->contextToThread(request->contextId())].recvTimingResp(pkt);
456
457 if (pkt->isInvalidate()) {
458 // This response also contains an invalidate; e.g. this can be the case
459 // if cmd is ReadRespWithInvalidate.
460 //
461 // The calling order between completeDataAccess and checkSnoop matters.
462 // By calling checkSnoop after completeDataAccess, we ensure that the
463 // fault set by checkSnoop is not lost. Calling writeback (more
464 // specifically inst->completeAcc) in completeDataAccess overwrites
465 // fault, and in case this instruction requires squashing (as
466 // determined by checkSnoop), the ReExec fault set by checkSnoop would
467 // be lost otherwise.
468
469 DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
470 pkt->getAddr());
471
472 for (ThreadID tid = 0; tid < numThreads; tid++) {
473 thread[tid].checkSnoop(pkt);
474 }
475 }
476 // Update the LSQRequest state (this may delete the request)
477 request->packetReplied();
478
481 }
482
483 return true;
484}
485
486void
488{
489 DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
490 pkt->cmdString());
491
492 // must be a snoop
493 if (pkt->isInvalidate()) {
494 DPRINTF(LSQ, "received invalidation for addr:%#x\n",
495 pkt->getAddr());
496 for (ThreadID tid = 0; tid < numThreads; tid++) {
497 thread[tid].checkSnoop(pkt);
498 }
499 } else if (pkt->req && pkt->req->isTlbiExtSync()) {
500 DPRINTF(LSQ, "received TLBI Ext Sync\n");
502
504 staleTranslationWaitTxnId = pkt->req->getExtraData();
505
506 for (auto& unit : thread) {
507 unit.startStaleTranslationFlush();
508 }
509
510 // In case no units have pending ops, just go ahead
512 }
513}
514
515int
517{
518 unsigned total = 0;
519
520 for (ThreadID tid : *activeThreads) {
521 total += getCount(tid);
522 }
523
524 return total;
525}
526
527int
529{
530 unsigned total = 0;
531
532 for (ThreadID tid : *activeThreads) {
533 total += numLoads(tid);
534 }
535
536 return total;
537}
538
539int
541{
542 unsigned total = 0;
543
544 for (ThreadID tid : *activeThreads) {
545 total += thread[tid].numStores();
546 }
547
548 return total;
549}
550
551unsigned
553{
554 unsigned total = 0;
555
556 for (ThreadID tid : *activeThreads) {
557 total += thread[tid].numFreeLoadEntries();
558 }
559
560 return total;
561}
562
563unsigned
565{
566 unsigned total = 0;
567
568 for (ThreadID tid : *activeThreads) {
569 total += thread[tid].numFreeStoreEntries();
570 }
571
572 return total;
573}
574
575unsigned
577{
578 return thread[tid].numFreeLoadEntries();
579}
580
581unsigned
583{
584 return thread[tid].numFreeStoreEntries();
585}
586
587bool
589{
590 for (ThreadID tid : *activeThreads) {
591 if (!(thread[tid].lqFull() || thread[tid].sqFull()))
592 return false;
593 }
594
595 return true;
596}
597
598bool
600{
601 //@todo: Change to Calculate All Entries for
602 //Dynamic Policy
603 if (lsqPolicy == SMTQueuePolicy::Dynamic)
604 return isFull();
605 else
606 return thread[tid].lqFull() || thread[tid].sqFull();
607}
608
609bool
611{
612 return lqEmpty() && sqEmpty();
613}
614
615bool
617{
618 for (ThreadID tid : *activeThreads) {
619 if (!thread[tid].lqEmpty())
620 return false;
621 }
622
623 return true;
624}
625
626bool
628{
629 for (ThreadID tid : *activeThreads) {
630 if (!thread[tid].sqEmpty())
631 return false;
632 }
633
634 return true;
635}
636
637bool
639{
640 for (ThreadID tid : *activeThreads) {
641 if (!thread[tid].lqFull())
642 return false;
643 }
644
645 return true;
646}
647
648bool
650{
651 //@todo: Change to Calculate All Entries for
652 //Dynamic Policy
653 if (lsqPolicy == SMTQueuePolicy::Dynamic)
654 return lqFull();
655 else
656 return thread[tid].lqFull();
657}
658
659bool
661{
662 for (ThreadID tid : *activeThreads) {
663 if (!sqFull(tid))
664 return false;
665 }
666
667 return true;
668}
669
670bool
672{
673 //@todo: Change to Calculate All Entries for
674 //Dynamic Policy
675 if (lsqPolicy == SMTQueuePolicy::Dynamic)
676 return sqFull();
677 else
678 return thread[tid].sqFull();
679}
680
681bool
683{
684 for (ThreadID tid : *activeThreads) {
685 if (!thread[tid].isStalled())
686 return false;
687 }
688
689 return true;
690}
691
692bool
694{
695 if (lsqPolicy == SMTQueuePolicy::Dynamic)
696 return isStalled();
697 else
698 return thread[tid].isStalled();
699}
700
701bool
703{
704 for (ThreadID tid : *activeThreads) {
705 if (hasStoresToWB(tid))
706 return true;
707 }
708
709 return false;
710}
711
712bool
714{
715 return thread.at(tid).hasStoresToWB();
716}
717
718int
720{
721 return thread.at(tid).numStoresToWB();
722}
723
724bool
726{
727 for (ThreadID tid : *activeThreads) {
728 if (willWB(tid))
729 return true;
730 }
731
732 return false;
733}
734
735bool
737{
738 return thread.at(tid).willWB();
739}
740
741void
743{
744 for (ThreadID tid : *activeThreads) {
745 thread[tid].dumpInsts();
746 }
747}
748
749void
751{
752 thread.at(tid).dumpInsts();
753}
754
755Fault
756LSQ::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
757 unsigned int size, Addr addr, Request::Flags flags, uint64_t *res,
758 AtomicOpFunctorPtr amo_op, const std::vector<bool>& byte_enable)
759{
760 // This comming request can be either load, store or atomic.
761 // Atomic request has a corresponding pointer to its atomic memory
762 // operation
763 [[maybe_unused]] bool isAtomic = !isLoad && amo_op;
764
765 ThreadID tid = cpu->contextToThread(inst->contextId());
766 auto cacheLineSize = cpu->cacheLineSize();
767 bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
768 LSQRequest* request = nullptr;
769
770 // Atomic requests that access data across cache line boundary are
771 // currently not allowed since the cache does not guarantee corresponding
772 // atomic memory operations to be executed atomically across a cache line.
773 // For ISAs such as x86 that supports cross-cache-line atomic instructions,
774 // the cache needs to be modified to perform atomic update to both cache
775 // lines. For now, such cross-line update is not supported.
776 assert(!isAtomic || (isAtomic && !needs_burst));
777
778 const bool htm_cmd = isLoad && (flags & Request::HTM_CMD);
779 const bool tlbi_cmd = isLoad && (flags & Request::TLBI_CMD);
780
781 if (inst->translationStarted()) {
782 request = inst->savedRequest;
783 assert(request);
784 } else {
785 if (htm_cmd || tlbi_cmd) {
786 assert(addr == 0x0lu);
787 assert(size == 8);
788 request = new UnsquashableDirectRequest(&thread[tid], inst, flags);
789 } else if (needs_burst) {
790 request = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
791 size, flags, data, res);
792 } else {
793 request = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
794 size, flags, data, res, std::move(amo_op));
795 }
796 assert(request);
797 request->_byteEnable = byte_enable;
798 inst->setRequest();
799 request->taskId(cpu->taskId());
800
801 // There might be fault from a previous execution attempt if this is
802 // a strictly ordered load
803 inst->getFault() = NoFault;
804
805 request->initiateTranslation();
806 }
807
808 /* This is the place were instructions get the effAddr. */
809 if (request->isTranslationComplete()) {
810 if (request->isMemAccessRequired()) {
811 inst->effAddr = request->getVaddr();
812 inst->effSize = size;
813 inst->effAddrValid(true);
814
815 if (cpu->checker) {
816 inst->reqToVerify = std::make_shared<Request>(*request->req());
817 }
818 Fault fault;
819 if (isLoad)
820 fault = read(request, inst->lqIdx);
821 else
822 fault = write(request, data, inst->sqIdx);
823 // inst->getFault() may have the first-fault of a
824 // multi-access split request at this point.
825 // Overwrite that only if we got another type of fault
826 // (e.g. re-exec).
827 if (fault != NoFault)
828 inst->getFault() = fault;
829 } else if (isLoad) {
830 inst->setMemAccPredicate(false);
831 // Commit will have to clean up whatever happened. Set this
832 // instruction as executed.
833 inst->setExecuted();
834 }
835 }
836
837 if (inst->traceData)
838 inst->traceData->setMem(addr, size, flags);
839
840 return inst->getFault();
841}
842
843void
846{
847 _fault.push_back(fault);
850 /* If the instruction has been squahsed, let the request know
851 * as it may have to self-destruct. */
852 if (_inst->isSquashed()) {
854 } else {
855 _inst->strictlyOrdered(request->isStrictlyOrdered());
856
858 if (fault == NoFault) {
859 _inst->physEffAddr = request->getPaddr();
860 _inst->memReqFlags = request->getFlags();
861 if (request->isCondSwap()) {
862 assert(_res);
863 request->setExtraData(*_res);
864 }
866 } else {
868 }
869
870 LSQRequest::_inst->fault = fault;
871 LSQRequest::_inst->translationCompleted(true);
872 }
873}
874
875void
878{
879 int i;
880 for (i = 0; i < _reqs.size() && _reqs[i] != req; i++);
881 assert(i < _reqs.size());
882 _fault[i] = fault;
883
886
887 if (fault == NoFault)
888 _mainReq->setFlags(req->getFlags());
889
890 if (numTranslatedFragments == _reqs.size()) {
891 if (_inst->isSquashed()) {
893 } else {
894 _inst->strictlyOrdered(_mainReq->isStrictlyOrdered());
896 _inst->translationCompleted(true);
897
898 for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++);
899 if (i > 0) {
900 _inst->physEffAddr = LSQRequest::req()->getPaddr();
901 _inst->memReqFlags = _mainReq->getFlags();
902 if (_mainReq->isCondSwap()) {
903 assert (i == _fault.size());
904 assert(_res);
905 _mainReq->setExtraData(*_res);
906 }
907 if (i == _fault.size()) {
908 _inst->fault = NoFault;
910 } else {
911 _inst->fault = _fault[i];
913 }
914 } else {
915 _inst->fault = _fault[0];
917 }
918 }
919
920 }
921}
922
923void
925{
926 assert(_reqs.size() == 0);
927
929
930 if (_reqs.size() > 0) {
931 _reqs.back()->setReqInstSeqNum(_inst->seqNum);
932 _reqs.back()->taskId(_taskId);
933 _inst->translationStarted(true);
936
937 _inst->savedRequest = this;
939 } else {
940 _inst->setMemAccPredicate(false);
941 }
942}
943
949
955
956void
958{
959 auto cacheLineSize = _port.cacheLineSize();
960 Addr base_addr = _addr;
961 Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
962 Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
963 uint32_t size_so_far = 0;
964
965 _mainReq = std::make_shared<Request>(base_addr,
966 _size, _flags, _inst->requestorId(),
967 _inst->pcState().instAddr(), _inst->contextId());
968 _mainReq->setByteEnable(_byteEnable);
969
970 // Paddr is not used in _mainReq. However, we will accumulate the flags
971 // from the sub requests into _mainReq by calling setFlags() in finish().
972 // setFlags() assumes that paddr is set so flip the paddr valid bit here to
973 // avoid a potential assert in setFlags() when we call it from finish().
974 _mainReq->setPaddr(0);
975
976 /* Get the pre-fix, possibly unaligned. */
977 auto it_start = _byteEnable.begin();
978 auto it_end = _byteEnable.begin() + (next_addr - base_addr);
979 addReq(base_addr, next_addr - base_addr,
980 std::vector<bool>(it_start, it_end));
981 size_so_far = next_addr - base_addr;
982
983 /* We are block aligned now, reading whole blocks. */
984 base_addr = next_addr;
985 while (base_addr != final_addr) {
986 auto it_start = _byteEnable.begin() + size_so_far;
987 auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
988 addReq(base_addr, cacheLineSize,
989 std::vector<bool>(it_start, it_end));
990 size_so_far += cacheLineSize;
991 base_addr += cacheLineSize;
992 }
993
994 /* Deal with the tail. */
995 if (size_so_far < _size) {
996 auto it_start = _byteEnable.begin() + size_so_far;
997 auto it_end = _byteEnable.end();
998 addReq(base_addr, _size - size_so_far,
999 std::vector<bool>(it_start, it_end));
1000 }
1001
1002 if (_reqs.size() > 0) {
1003 /* Setup the requests and send them to translation. */
1004 for (auto& r: _reqs) {
1005 r->setReqInstSeqNum(_inst->seqNum);
1006 r->taskId(_taskId);
1007 }
1008
1009 _inst->translationStarted(true);
1012 _inst->savedRequest = this;
1015 _fault.resize(_reqs.size());
1016
1017 for (uint32_t i = 0; i < _reqs.size(); i++) {
1019 }
1020 } else {
1021 _inst->setMemAccPredicate(false);
1022 }
1023}
1024
1026 LSQUnit *port, const DynInstPtr& inst, bool isLoad) :
1028 _port(*port), _inst(inst), _data(nullptr),
1029 _res(nullptr), _addr(0), _size(0), _flags(0),
1030 _numOutstandingPackets(0), _amo_op(nullptr)
1031{
1034 _inst->isStoreConditional() || _inst->isAtomic() ||
1035 _inst->isLoad());
1036 flags.set(Flag::IsAtomic, _inst->isAtomic());
1037 install();
1038}
1039
1041 LSQUnit *port, const DynInstPtr& inst, bool isLoad,
1042 const Addr& addr, const uint32_t& size, const Request::Flags& flags_,
1043 PacketDataPtr data, uint64_t* res, AtomicOpFunctorPtr amo_op,
1044 bool stale_translation)
1048 _port(*port), _inst(inst), _data(data),
1049 _res(res), _addr(addr), _size(size),
1050 _flags(flags_),
1052 _amo_op(std::move(amo_op)),
1053 _hasStaleTranslation(stale_translation)
1054{
1057 _inst->isStoreConditional() || _inst->isAtomic() ||
1058 _inst->isLoad());
1059 flags.set(Flag::IsAtomic, _inst->isAtomic());
1060 install();
1061}
1062
1063void
1065{
1066 if (isLoad()) {
1067 _port.loadQueue[_inst->lqIdx].setRequest(this);
1068 } else {
1069 // Store, StoreConditional, and Atomic requests are pushed
1070 // to this storeQueue
1071 _port.storeQueue[_inst->sqIdx].setRequest(this);
1072 }
1073}
1074
1075bool LSQ::LSQRequest::squashed() const { return _inst->isSquashed(); }
1076
1077void
1079 const std::vector<bool>& byte_enable)
1080{
1081 unsigned inactive_tail_size = inactiveTailSize(byte_enable.begin(),
1082 byte_enable.end());
1083
1084 if (inactive_tail_size != byte_enable.size()) {
1085 auto req = new Request(
1086 addr, size-inactive_tail_size, _flags, _inst->requestorId(),
1087 _inst->pcState().instAddr(), _inst->contextId(),
1088 std::move(_amo_op));
1089
1090 req->setByteEnable(
1091 std::vector<bool>(byte_enable.begin(),
1092 byte_enable.end()-inactive_tail_size));
1093
1094 /* If the request is marked as NO_ACCESS, setup a local access */
1095 if (_flags.isSet(Request::NO_ACCESS)) {
1096 req->setLocalAccessor(
1097 [this, req](gem5::ThreadContext *tc, PacketPtr pkt) -> Cycles
1098 {
1099 if ((req->isHTMStart() || req->isHTMCommit())) {
1100 auto& inst = this->instruction();
1101 assert(inst->inHtmTransactionalState());
1103 inst->getHtmTransactionUid());
1104 }
1105 return Cycles(1);
1106 }
1107 );
1108 }
1109
1110 _reqs.emplace_back(req);
1111 }
1112}
1113
1115{
1116 assert(!isAnyOutstandingRequest());
1117 _inst->savedRequest = nullptr;
1118
1119 for (auto r: _packets)
1120 delete r;
1121};
1122
1125{
1126 return _inst->contextId();
1127}
1128
1129void
1131{
1133 _port.getMMUPtr()->translateTiming(req(i), _inst->thread->getTC(),
1134 this, isLoad() ? BaseMMU::Read : BaseMMU::Write);
1135}
1136
1137void
1139{
1140 // If this element has been translated and is currently being requested,
1141 // then it may be stale
1142 if ((!flags.isSet(Flag::Complete)) &&
1143 (!flags.isSet(Flag::Discarded)) &&
1145 _hasStaleTranslation = true;
1146 }
1147
1148 DPRINTF(LSQ, "SingleDataRequest %d 0x%08x isBlocking:%d\n",
1149 (int)_state, (uint32_t)flags, _hasStaleTranslation);
1150}
1151
1152void
1154{
1155 // If this element has been translated and is currently being requested,
1156 // then it may be stale
1157 if ((!flags.isSet(Flag::Complete)) &&
1158 (!flags.isSet(Flag::Discarded)) &&
1160 _hasStaleTranslation = true;
1161 }
1162
1163 DPRINTF(LSQ, "SplitDataRequest %d 0x%08x isBlocking:%d\n",
1164 (int)_state, (uint32_t)flags, _hasStaleTranslation);
1165}
1166
1167bool
1169{
1170 assert(_numOutstandingPackets == 1);
1171 flags.set(Flag::Complete);
1172 assert(pkt == _packets.front());
1173 _port.completeDataAccess(pkt);
1174 _hasStaleTranslation = false;
1175 return true;
1176}
1177
1178bool
1180{
1181 uint32_t pktIdx = 0;
1182 while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
1183 pktIdx++;
1184 assert(pktIdx < _packets.size());
1186 if (numReceivedPackets == _packets.size()) {
1187 flags.set(Flag::Complete);
1188 /* Assemble packets. */
1189 PacketPtr resp = isLoad()
1192 if (isLoad())
1193 resp->dataStatic(_inst->memData);
1194 else
1195 resp->dataStatic(_data);
1196 resp->senderState = this;
1197 _port.completeDataAccess(resp);
1198 delete resp;
1199 }
1200 _hasStaleTranslation = false;
1201 return true;
1202}
1203
1204void
1206{
1207 /* Retries do not create new packets. */
1208 if (_packets.size() == 0) {
1209 _packets.push_back(
1210 isLoad()
1213 _packets.back()->dataStatic(_inst->memData);
1214 _packets.back()->senderState = this;
1215
1216 // hardware transactional memory
1217 // If request originates in a transaction (not necessarily a HtmCmd),
1218 // then the packet should be marked as such.
1219 if (_inst->inHtmTransactionalState()) {
1220 _packets.back()->setHtmTransactional(
1221 _inst->getHtmTransactionUid());
1222
1223 DPRINTF(HtmCpu,
1224 "HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n",
1225 isLoad() ? "LD" : "ST",
1226 _inst->pcState().instAddr(),
1227 _packets.back()->req->hasVaddr() ?
1228 _packets.back()->req->getVaddr() : 0lu,
1229 _packets.back()->getAddr(),
1230 _inst->getHtmTransactionUid());
1231 }
1232 }
1233 assert(_packets.size() == 1);
1234}
1235
1236void
1238{
1239 /* Extra data?? */
1240 Addr base_address = _addr;
1241
1242 if (_packets.size() == 0) {
1243 /* New stuff */
1244 if (isLoad()) {
1246 _mainPacket->dataStatic(_inst->memData);
1247
1248 // hardware transactional memory
1249 // If request originates in a transaction,
1250 // packet should be marked as such
1251 if (_inst->inHtmTransactionalState()) {
1252 _mainPacket->setHtmTransactional(
1253 _inst->getHtmTransactionUid());
1254 DPRINTF(HtmCpu,
1255 "HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1256 _inst->pcState().instAddr(),
1257 _mainPacket->req->hasVaddr() ?
1258 _mainPacket->req->getVaddr() : 0lu,
1259 _mainPacket->getAddr(),
1260 _inst->getHtmTransactionUid());
1261 }
1262 }
1263 for (int i = 0; i < _reqs.size() && _fault[i] == NoFault; i++) {
1264 RequestPtr req = _reqs[i];
1267 ptrdiff_t offset = req->getVaddr() - base_address;
1268 if (isLoad()) {
1269 pkt->dataStatic(_inst->memData + offset);
1270 } else {
1271 uint8_t* req_data = new uint8_t[req->getSize()];
1272 std::memcpy(req_data,
1273 _inst->memData + offset,
1274 req->getSize());
1275 pkt->dataDynamic(req_data);
1276 }
1277 pkt->senderState = this;
1278 _packets.push_back(pkt);
1279
1280 // hardware transactional memory
1281 // If request originates in a transaction,
1282 // packet should be marked as such
1283 if (_inst->inHtmTransactionalState()) {
1284 _packets.back()->setHtmTransactional(
1285 _inst->getHtmTransactionUid());
1286 DPRINTF(HtmCpu,
1287 "HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1288 isLoad() ? "LD" : "ST",
1289 i+1,
1290 _inst->pcState().instAddr(),
1291 _packets.back()->req->hasVaddr() ?
1292 _packets.back()->req->getVaddr() : 0lu,
1293 _packets.back()->getAddr(),
1294 _inst->getHtmTransactionUid());
1295 }
1296 }
1297 }
1298 assert(_packets.size() > 0);
1299}
1300
1301void
1303{
1304 assert(_numOutstandingPackets == 0);
1305 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1307}
1308
1309void
1311{
1312 /* Try to send the packets. */
1314 lsqUnit()->trySendPacket(isLoad(),
1317 }
1318}
1319
1320Cycles
1323{
1324 return pkt->req->localAccessor(thread, pkt);
1325}
1326
1327Cycles
1330{
1331 Cycles delay(0);
1332 unsigned offset = 0;
1333
1334 for (auto r: _reqs) {
1335 PacketPtr pkt =
1337 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1338 Cycles d = r->localAccessor(thread, pkt);
1339 if (d > delay)
1340 delay = d;
1341 offset += r->getSize();
1342 delete pkt;
1343 }
1344 return delay;
1345}
1346
1347bool
1349{
1350 return ( (LSQRequest::_reqs[0]->getPaddr() & blockMask) == blockAddr);
1351}
1352
1368bool
1370{
1371 bool is_hit = false;
1372 for (auto &r: _reqs) {
1382 if (r->hasPaddr() && (r->getPaddr() & blockMask) == blockAddr) {
1383 is_hit = true;
1384 break;
1385 }
1386 }
1387 return is_hit;
1388}
1389
1390bool
1392{
1393 // Check if tick is unaligned to correct +1 curCycle delta
1394 bool is_unaligned_tick = curTick() % cpu->clockPeriod() != 0;
1395 Cycles current_cycle = cpu->curCycle() - Cycles(is_unaligned_tick);
1396
1397 // Reset counters/flags on new cycle
1398 if (current_cycle > lsq->recvRespLastActiveCycle) {
1399 lsq->recvRespBytes = 0;
1400 lsq->recvRespCachelines = 0;
1401 lsq->recvRespLastCachelineAddr = 0;
1402 }
1403
1404 lsq->recvRespLastActiveCycle = current_cycle;
1405
1406 Addr cacheline_addr = addrBlockAlign(pkt->getAddr(), cpu->cacheLineSize());
1407
1408 bool throttle_cycle = false;
1409
1410 // Check limits
1411 bool is_new_cacheline = cacheline_addr != lsq->recvRespLastCachelineAddr;
1412 bool max_cachelines = (lsq->recvRespCachelines + is_new_cacheline)
1413 > lsq->recvRespMaxCachelines;
1414 int free_buf_size = lsq->recvRespBufferSize - lsq->recvRespBytes;
1415 bool max_bytes = pkt->getSize() > free_buf_size;
1416
1417 // No pending response and either per cycle limit reached
1418 if (lsq->recvRespPendBytes == 0 && (max_cachelines || max_bytes)) {
1419 // If the buffer size is an exclusive limit try to saturate it and save
1420 // any remaining bytes for later
1421 if (max_bytes && !max_cachelines) {
1422 lsq->recvRespBytes += free_buf_size;
1423 assert(lsq->recvRespBytes <= lsq->recvRespBufferSize);
1424
1425 lsq->recvRespPendBytes = pkt->getSize() - free_buf_size;
1426 }
1427
1428 // Throttle this cycle
1429 throttle_cycle = true;
1430 DPRINTF(LSQ, "throttling ReadResp: max_cachelines=%d max_bytes=%d\n",
1431 max_cachelines, max_bytes);
1432
1433 // Still processing previous response
1434 } else if (lsq->recvRespPendBytes > 0) {
1435 DPRINTF(LSQ, "recvRespPendBytes=%u\n", lsq->recvRespPendBytes);
1436
1437 // Shouldn't have processed anything this cycle yet
1438 assert(lsq->recvRespBytes == 0 && lsq->recvRespCachelines == 0);
1439
1440 // Throttle if pending bytes are greater than buffer size
1441 throttle_cycle = lsq->recvRespPendBytes > lsq->recvRespBufferSize;
1442
1443 // Process as much pending bytes as possible this cycle
1444 lsq->recvRespBytes += (throttle_cycle) ? lsq->recvRespBufferSize
1445 : lsq->recvRespPendBytes;
1446 lsq->recvRespPendBytes -= lsq->recvRespBytes;
1447
1448 // No pending response and no limit reached
1449 } else {
1450 // Process whole response
1451 lsq->recvRespBytes += pkt->getSize();
1452 }
1453
1454 // Got new cacheline on this cycle. Count and save for later (assumes
1455 // we cannot get previous cachelines again this cycle)
1456 if (is_new_cacheline) {
1457 lsq->recvRespCachelines++;
1458 lsq->recvRespLastCachelineAddr = cacheline_addr;
1459 }
1460
1461 // Throttling this cycle
1462 if (throttle_cycle) {
1463 Tick next_cycle = cpu->cyclesToTicks(current_cycle + Cycles(1));
1464
1465 // Sanity checks
1466 assert(next_cycle > curTick());
1467 assert(!(lsq->retryRespEvent.scheduled()));
1468
1469 // Schedule retry on next cycle
1470 cpu->schedule(lsq->retryRespEvent, next_cycle);
1471 DPRINTF(LSQ, "retryRespEvent scheduled for tick=%lu\n", next_cycle);
1472
1473 dcachePortStats.numSendRetryResp++;
1474 }
1475
1476 return throttle_cycle;
1477}
1478
1479bool
1481{
1482 if (lsq->recvRespThrottling && pkt->cmd == MemCmd::ReadResp) {
1483 if (throttleReadResp(pkt)) {
1484 return false;
1485 }
1486 }
1487
1488 dcachePortStats.numRecvResp++;
1489 dcachePortStats.numRecvRespBytes += pkt->getSize();
1490
1491 return lsq->recvTimingResp(pkt);
1492}
1493
1494void
1496{
1497 for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
1498 if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1499 cpu->wakeup(tid);
1500 }
1501 }
1502 lsq->recvTimingSnoopReq(pkt);
1503}
1504
1505void
1507{
1508 lsq->recvReqRetry();
1509}
1510
1512 LSQUnit* port,
1513 const DynInstPtr& inst,
1514 const Request::Flags& flags_) :
1515 SingleDataRequest(port, inst, true, 0x0lu, 8, flags_,
1516 nullptr, nullptr, nullptr)
1517{
1518}
1519
1520void
1522{
1523 // Special commands are implemented as loads to avoid significant
1524 // changes to the cpu and memory interfaces
1525 // The virtual and physical address uses a dummy value of 0x00
1526 // Address translation does not really occur thus the code below
1527
1528 assert(_reqs.size() == 0);
1529
1531
1532 if (_reqs.size() > 0) {
1533 _reqs.back()->setReqInstSeqNum(_inst->seqNum);
1534 _reqs.back()->taskId(_taskId);
1535 _reqs.back()->setPaddr(_addr);
1536 _reqs.back()->setInstCount(_inst->getCpuPtr()->totalInsts());
1537
1538 _inst->strictlyOrdered(_reqs.back()->isStrictlyOrdered());
1539 _inst->fault = NoFault;
1540 _inst->physEffAddr = _reqs.back()->getPaddr();
1541 _inst->memReqFlags = _reqs.back()->getFlags();
1542 _inst->savedRequest = this;
1543
1546
1547 _inst->translationStarted(true);
1548 _inst->translationCompleted(true);
1549
1551 } else {
1552 panic("unexpected behaviour in initiateTranslation()");
1553 }
1554}
1555
1556void
1558{
1559 // HTM/TLBI operations do not translate,
1560 // so cannot have stale translations
1561 _hasStaleTranslation = false;
1562}
1563
1564void
1568{
1569 panic("unexpected behaviour - finish()");
1570}
1571
1572void
1574{
1576
1577 DPRINTF(LSQ, "Checking pending TLBI sync\n");
1578 // Check if all thread queues are complete
1579 for (const auto& unit : thread) {
1580 if (unit.checkStaleTranslations())
1581 return;
1582 }
1583 DPRINTF(LSQ, "No threads have blocking TLBI sync\n");
1584
1585 // All thread queues have committed their sync operations
1586 // => send a RubyRequest to the sequencer
1589 cpu->dataRequestorId());
1590 req->setExtraData(staleTranslationWaitTxnId);
1591 PacketPtr pkt = Packet::createRead(req);
1592
1593 // TODO - reserve some credit for these responses?
1594 if (!dcachePort.sendTimingReq(pkt)) {
1595 panic("Couldn't send TLBI_EXT_SYNC_COMP message");
1596 }
1597
1600}
1601
1602Fault
1603LSQ::read(LSQRequest* request, ssize_t load_idx)
1604{
1605 assert(request->req()->contextId() == request->contextId());
1606 ThreadID tid = cpu->contextToThread(request->req()->contextId());
1607
1608 return thread.at(tid).read(request, load_idx);
1609}
1610
1611Fault
1612LSQ::write(LSQRequest* request, uint8_t *data, ssize_t store_idx)
1613{
1614 ThreadID tid = cpu->contextToThread(request->req()->contextId());
1615
1616 return thread.at(tid).write(request, data, store_idx);
1617}
1618
1619} // namespace o3
1620} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
const char data[]
gem5::BaseCPU::BaseCPUStats baseStats
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
Addr getAddr() const
Definition packet.hh:807
bool isError() const
Definition packet.hh:622
const std::string & cmdString() const
Return the string name of the cmd field (for debugging and tracing).
Definition packet.hh:588
static PacketPtr createWrite(const RequestPtr &req)
Definition packet.hh:1044
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition packet.hh:1175
SenderState * senderState
This packet's sender state.
Definition packet.hh:545
T * getPtr()
get a pointer to the data ptr.
Definition packet.hh:1225
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition packet.hh:1038
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
unsigned getSize() const
Definition packet.hh:817
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition packet.hh:1213
void setHtmTransactional(uint64_t val)
Stipulates that this packet/request originates in the CPU executing in transactional mode,...
Definition packet.cc:516
MemCmd cmd
The command field of the packet.
Definition packet.hh:372
bool isInvalidate() const
Definition packet.hh:609
RequestPort(const std::string &name, SimObject *_owner, PortID id=InvalidPortID)
Request port.
Definition port.cc:125
static RequestPtr createMemManagement(Flags flags, RequestorID id)
Factory method for creating memory management requests, with unspecified addr and size.
Definition request.hh:538
static const FlagsType TLBI_CMD
Definition request.hh:269
@ TLBI_EXT_SYNC_COMP
The Request tells the interconnect that a remote TLB Sync request has completed.
Definition request.hh:252
@ NO_ACCESS
The request should not cause a memory access.
Definition request.hh:146
static const FlagsType HTM_CMD
Definition request.hh:266
gem5::Flags< FlagsType > Flags
Definition request.hh:102
ThreadContext is the external interface to all thread state for anything outside of the CPU.
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:97
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Definition iew.hh:88
Class that implements the actual LQ and SQ for each specific thread.
Definition lsq_unit.hh:89
virtual void recvTimingSnoopReq(PacketPtr pkt)
Receive a timing snoop request from the peer.
Definition lsq.cc:1495
DcachePort(LSQ *_lsq, CPU *_cpu)
Default constructor.
Definition lsq.cc:67
virtual bool recvTimingResp(PacketPtr pkt)
Timing version of receive.
Definition lsq.cc:1480
gem5::o3::LSQ::DcachePort::DcachePortStats dcachePortStats
virtual void recvReqRetry()
Handles doing a retry of the previous send.
Definition lsq.cc:1506
LSQ * lsq
Pointer to LSQ.
Definition lsq.hh:89
bool throttleReadResp(PacketPtr pkt)
Applies throttling in recvTimingResp for incoming load responses.
Definition lsq.cc:1391
Memory operation metadata.
Definition lsq.hh:220
@ IsAtomic
True if this is an atomic request.
Definition lsq.hh:253
@ TranslationFinished
True if there are un-replied outbound translations.
Definition lsq.hh:238
@ WriteBackToRegister
True if this request needs to writeBack to register.
Definition lsq.hh:232
@ TranslationStarted
True if any translation has been sent to TLB.
Definition lsq.hh:236
@ Discarded
Request discarded.
Definition lsq.hh:246
std::vector< bool > _byteEnable
Definition lsq.hh:287
LSQRequest(LSQUnit *port, const DynInstPtr &inst, bool isLoad)
Definition lsq.cc:1025
virtual ~LSQRequest()
Destructor.
Definition lsq.cc:1114
bool isLoad() const
Definition lsq.hh:302
void install()
Install the request in the LQ/SQ.
Definition lsq.cc:1064
AtomicOpFunctorPtr _amo_op
Definition lsq.hh:289
ContextID contextId() const
Definition lsq.cc:1124
void taskId(const uint32_t &v)
Definition lsq.hh:383
virtual void initiateTranslation()=0
void setState(const State &newState)
Definition lsq.hh:266
void addReq(Addr addr, unsigned size, const std::vector< bool > &byte_enable)
Helper function used to add a (sub)request, given its address addr, size size and byte-enable mask by...
Definition lsq.cc:1078
uint32_t _numOutstandingPackets
Definition lsq.hh:288
const uint32_t _size
Definition lsq.hh:285
LSQUnit * lsqUnit()
Definition lsq.hh:293
PacketDataPtr _data
Definition lsq.hh:279
bool isAnyOutstandingRequest()
Test if there is any in-flight translation or mem access request.
Definition lsq.hh:418
void sendFragmentToTranslation(int i)
Definition lsq.cc:1130
const DynInstPtr _inst
Definition lsq.hh:277
const Request::Flags _flags
Definition lsq.hh:286
uint32_t numTranslatedFragments
Definition lsq.hh:268
std::vector< Fault > _fault
Definition lsq.hh:282
uint32_t numInTranslationFragments
Definition lsq.hh:269
bool squashed() const override
This function is used by the page table walker to determine if it should translate the a pending requ...
Definition lsq.cc:1075
RequestPtr req(int idx=0)
Definition lsq.hh:392
std::vector< RequestPtr > _reqs
Definition lsq.hh:281
std::vector< PacketPtr > _packets
Definition lsq.hh:280
const DynInstPtr & instruction()
Definition lsq.hh:364
bool isTranslationComplete()
Definition lsq.hh:496
Addr getVaddr(int idx=0) const
Definition lsq.hh:395
virtual Cycles handleLocalAccess(gem5::ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
Definition lsq.cc:1321
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
Definition lsq.cc:844
virtual void initiateTranslation()
Definition lsq.cc:924
SingleDataRequest(LSQUnit *port, const DynInstPtr &inst, bool isLoad, const Addr &addr, const uint32_t &size, const Request::Flags &flags_, PacketDataPtr data=nullptr, uint64_t *res=nullptr, AtomicOpFunctorPtr amo_op=nullptr)
Definition lsq.hh:602
virtual bool recvTimingResp(PacketPtr pkt)
Definition lsq.cc:1168
virtual void buildPackets()
Definition lsq.cc:1205
virtual void markAsStaleTranslation()
Definition lsq.cc:1138
virtual void sendPacketToCache()
Definition lsq.cc:1302
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Test if the request accesses a particular cache line.
Definition lsq.cc:1348
virtual bool recvTimingResp(PacketPtr pkt)
Definition lsq.cc:1179
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Caches may probe into the load-store queue to enforce memory ordering guarantees.
Definition lsq.cc:1369
virtual void initiateTranslation()
Definition lsq.cc:957
virtual void markAsStaleTranslation()
Definition lsq.cc:1153
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
Definition lsq.cc:876
virtual PacketPtr mainPacket()
Definition lsq.cc:945
virtual Cycles handleLocalAccess(gem5::ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
Definition lsq.cc:1328
virtual void sendPacketToCache()
Definition lsq.cc:1310
virtual RequestPtr mainReq()
Definition lsq.cc:951
virtual void buildPackets()
Definition lsq.cc:1237
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
Definition lsq.cc:1565
UnsquashableDirectRequest(LSQUnit *port, const DynInstPtr &inst, const Request::Flags &flags_)
Definition lsq.cc:1511
unsigned SQEntries
Total Size of SQ Entries.
Definition lsq.hh:975
bool isDrained() const
Has the LSQ drained?
Definition lsq.cc:193
EventFunctionWrapper retryRespEvent
Definition lsq.hh:1001
int cacheLoadPorts
The number of cache ports available each cycle (loads only).
Definition lsq.hh:934
int usedStorePorts
The number of used cache ports in this cycle by stores.
Definition lsq.hh:932
int numHtmStarts(ThreadID tid) const
Definition lsq.cc:381
std::string name() const
Returns the name of the LSQ.
Definition lsq.cc:171
void commitStores(InstSeqNum &youngest_inst, ThreadID tid)
Commits stores up until the given sequence number for a specific thread.
Definition lsq.cc:306
Addr staleTranslationWaitTxnId
The ID if the transaction that made translations stale.
Definition lsq.hh:941
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
Definition lsq.cc:446
void checkStaleTranslations()
Checks if queues have any marked operations left, and sends the appropriate Sync Completion message i...
Definition lsq.cc:1573
int getLoadHead(ThreadID tid)
Returns the head index of the load queue for a specific thread.
Definition lsq.cc:351
void squash(const InstSeqNum &squashed_num, ThreadID tid)
Squash instructions from a thread until the specified sequence number.
Definition lsq.cc:325
bool sqEmpty() const
Returns if all of the SQs are empty.
Definition lsq.cc:627
void completeDataAccess(PacketPtr pkt)
Definition lsq.cc:432
Fault pushRequest(const DynInstPtr &inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector< bool > &byte_enable)
Definition lsq.cc:756
unsigned numFreeLoadEntries()
Returns the number of free load entries.
Definition lsq.cc:552
ThreadID numThreads
Number of Threads.
Definition lsq.hh:990
IEW * iewStage
The IEW stage pointer.
Definition lsq.hh:911
InstSeqNum getLoadHeadSeqNum(ThreadID tid)
Returns the sequence number of the head of the load queue.
Definition lsq.cc:357
void sendRetryResp()
Definition lsq.cc:440
std::list< ThreadID > * activeThreads
List of Active Threads in System.
Definition lsq.hh:970
DcachePort dcachePort
Data port.
Definition lsq.hh:984
void takeOverFrom()
Takes over execution from another CPU's thread.
Definition lsq.cc:211
DynInstPtr getMemDepViolator(ThreadID tid)
Gets the instruction that caused the memory ordering violation.
Definition lsq.cc:345
static uint32_t maxLSQAllocation(SMTQueuePolicy pol, uint32_t entries, uint32_t numThreads, uint32_t SMTThreshold)
Auxiliary function to calculate per-thread max LSQ allocation limit.
Definition lsq.hh:952
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets the pointer to the list of active threads.
Definition lsq.cc:177
bool cacheBlocked() const
Is D-cache blocked?
Definition lsq.cc:233
int numLoads()
Returns the total number of loads in the load queue.
Definition lsq.cc:528
void setLastRetiredHtmUid(ThreadID tid, uint64_t htmUid)
Definition lsq.cc:414
void dumpInsts() const
Debugging function to print out all instructions.
Definition lsq.cc:742
int usedLoadPorts
The number of used cache ports in this cycle by loads.
Definition lsq.hh:936
unsigned maxLQEntries
Max LQ Size - Used to Enforce Sharing Policies.
Definition lsq.hh:978
bool isFull()
Returns if the LSQ is full (either LQ or SQ is full).
Definition lsq.cc:588
void insertStore(const DynInstPtr &store_inst)
Inserts a store into the LSQ.
Definition lsq.cc:276
void recvReqRetry()
Retry the previous send that failed.
Definition lsq.cc:421
void commitLoads(InstSeqNum &youngest_inst, ThreadID tid)
Commits loads up until the given sequence number for a specific thread.
Definition lsq.cc:300
Fault write(LSQRequest *request, uint8_t *data, ssize_t store_idx)
Executes a store operation, using the store specified at the store index.
Definition lsq.cc:1612
Cycles recvRespLastActiveCycle
Definition lsq.hh:1000
Addr recvRespLastCachelineAddr
Definition lsq.hh:999
uint64_t getLatestHtmUid(ThreadID tid) const
Definition lsq.cc:405
bool willWB()
Returns if the LSQ will write back to memory this cycle.
Definition lsq.cc:725
int getStoreHead(ThreadID tid)
Returns the head index of the store queue.
Definition lsq.cc:363
LSQ(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
Constructs an LSQ with the given parameters.
Definition lsq.cc:110
CPU * cpu
The CPU pointer.
Definition lsq.hh:908
bool _cacheBlocked
D-cache is blocked.
Definition lsq.hh:928
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition lsq.cc:184
std::vector< LSQUnit > thread
The LSQ units for individual threads.
Definition lsq.hh:987
unsigned LQEntries
Total Size of LQ Entries.
Definition lsq.hh:973
int numHtmStops(ThreadID tid) const
Definition lsq.cc:389
void cachePortBusy(bool is_load)
Another store port is in use.
Definition lsq.cc:257
unsigned recvRespBytes
Definition lsq.hh:996
const unsigned recvRespBufferSize
Definition lsq.hh:995
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
Definition lsq.cc:245
InstSeqNum getStoreHeadSeqNum(ThreadID tid)
Returns the sequence number of the head of the store queue.
Definition lsq.cc:369
bool isStalled()
Returns if the LSQ is stalled due to a memory operation that must be replayed.
Definition lsq.cc:682
void writebackStores()
Attempts to write back stores until all cache ports are used or the interface becomes blocked.
Definition lsq.cc:312
bool lqFull()
Returns if any of the LQs are full.
Definition lsq.cc:638
bool waitingForStaleTranslation
If the LSQ is currently waiting for stale translations.
Definition lsq.hh:939
unsigned maxSQEntries
Max SQ Size - Used to Enforce Sharing Policies.
Definition lsq.hh:981
bool lqEmpty() const
Returns if all of the LQs are empty.
Definition lsq.cc:616
int getCount()
Returns the number of instructions in all of the queues.
Definition lsq.cc:516
bool hasStoresToWB()
Returns whether or not there are any stores to write back to memory.
Definition lsq.cc:702
Fault read(LSQRequest *request, ssize_t load_idx)
Executes a read operation, using the load specified at the load index.
Definition lsq.cc:1603
Fault executeStore(const DynInstPtr &inst)
Executes a store.
Definition lsq.cc:292
void tick()
Ticks the LSQ.
Definition lsq.cc:222
void insertLoad(const DynInstPtr &load_inst)
Inserts a load into the LSQ.
Definition lsq.cc:268
bool isEmpty() const
Returns if the LSQ is empty (both LQ and SQ are empty).
Definition lsq.cc:610
int numStores()
Returns the total number of stores in the store queue.
Definition lsq.cc:540
void recvTimingSnoopReq(PacketPtr pkt)
Definition lsq.cc:487
int cacheStorePorts
The number of cache ports available each cycle (stores only).
Definition lsq.hh:930
Fault executeLoad(const DynInstPtr &inst)
Executes a load.
Definition lsq.cc:284
bool violation()
Returns whether or not there was a memory ordering violation.
Definition lsq.cc:331
void resetHtmStartsStops(ThreadID tid)
Definition lsq.cc:398
SMTQueuePolicy lsqPolicy
The LSQ policy for SMT mode.
Definition lsq.hh:944
const bool recvRespThrottling
Enable load receive response throttling in the LSQ.
Definition lsq.hh:993
int numStoresToWB(ThreadID tid)
Returns the number of stores a specific thread has to write back.
Definition lsq.cc:719
unsigned numFreeStoreEntries()
Returns the number of free store entries.
Definition lsq.cc:564
bool sqFull()
Returns if any of the SQs are full.
Definition lsq.cc:660
unsigned recvRespCachelines
Definition lsq.hh:998
const unsigned recvRespMaxCachelines
Definition lsq.hh:994
STL list class.
Definition stl.hh:51
STL vector class.
Definition stl.hh:37
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition amo.hh:269
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:246
Bitfield< 28 > v
Definition misc_types.hh:54
Bitfield< 4, 0 > mode
Definition misc_types.hh:74
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 9 > d
Definition misc_types.hh:64
Bitfield< 3 > addr
Definition types.hh:84
static constexpr int MaxThreads
Definition limits.hh:38
RefCountingPtr< DynInst > DynInstPtr
Units for Stats.
Definition units.hh:113
const FlagsType total
Print the total.
Definition info.hh:59
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< FaultBase > Fault
Definition types.hh:249
int16_t ThreadID
Thread index/ID type.
Definition types.hh:235
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
const ThreadID InvalidThreadID
Definition types.hh:236
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
bool transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
Returns true if the given memory access (address, size) needs to be fragmented across aligned fixed-s...
Definition utils.hh:82
uint64_t Tick
Tick count type.
Definition types.hh:58
uint8_t * PacketDataPtr
Definition packet.hh:72
Packet * PacketPtr
int ContextID
Globally unique thread context ID.
Definition types.hh:239
constexpr decltype(nullptr) NoFault
Definition types.hh:253
uint64_t InstSeqNum
Definition inst_seq.hh:40
unsigned inactiveTailSize(const std::vector< bool >::const_iterator &it_start, const std::vector< bool >::const_iterator &it_end)
Get size of inactive tail in an enablement range (0 if none).
Definition utils.hh:103
Addr addrBlockAlign(Addr addr, Addr block_size)
Returns the address of the closest aligned fixed-size block to the given address.
Definition utils.hh:68
Overload hash function for BasicBlockRange type.
Definition binary32.hh:81
statistics::Scalar numCycles
Definition base.hh:664
statistics::Formula recvRespAvgRetryRate
Definition lsq.hh:116
statistics::Formula recvRespAvgSize
Definition lsq.hh:110
statistics::Formula recvRespAvgRate
Definition lsq.hh:113
const std::string & name()
Definition trace.cc:48

Generated on Mon Oct 27 2025 04:13:00 for gem5 by doxygen 1.14.0