gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
lsq.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2011-2012, 2014, 2017-2019, 2021 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2005-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include "cpu/o3/lsq.hh"
43
44#include <algorithm>
45#include <list>
46#include <string>
47
48#include "base/compiler.hh"
49#include "base/logging.hh"
50#include "cpu/o3/cpu.hh"
51#include "cpu/o3/dyn_inst.hh"
52#include "cpu/o3/iew.hh"
53#include "cpu/o3/limits.hh"
54#include "debug/Drain.hh"
55#include "debug/Fetch.hh"
56#include "debug/HtmCpu.hh"
57#include "debug/LSQ.hh"
58#include "debug/Writeback.hh"
59#include "params/BaseO3CPU.hh"
60
61namespace gem5
62{
63
64namespace o3
65{
66
68 RequestPort(_cpu->name() + ".dcache_port"), lsq(_lsq), cpu(_cpu),
69 dcachePortStats(_cpu)
70{}
71
73 : statistics::Group(_cpu),
74 ADD_STAT(numRecvResp, statistics::units::Count::get(),
75 "Number of received responses"),
77 "Number of received response bytes"),
79 statistics::units::Rate<statistics::units::Byte,
80 statistics::units::Cycle>::get(),
81 "Average bandwidth of received responses"),
83 statistics::units::Rate<statistics::units::Byte,
84 statistics::units::Count>::get(),
85 "Average packet size per received response"),
87 statistics::units::Rate<statistics::units::Count,
88 statistics::units::Cycle>::get(),
89 "Average rate of received responses per cycle"),
91 statistics::units::Rate<statistics::units::Count,
92 statistics::units::Count>::get(),
93 "Average retry rate per received response"),
95 "Number of retry responses sent")
96{
97 recvRespAvgBW.precision(2);
99
100 recvRespAvgSize.precision(2);
102
103 recvRespAvgRate.precision(2);
105
106 recvRespAvgRetryRate.precision(2);
108}
109
110LSQ::LSQ(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
111 : cpu(cpu_ptr), iewStage(iew_ptr),
112 _cacheBlocked(false),
117 lsqPolicy(params.smtLSQPolicy),
118 LQEntries(params.LQEntries),
119 SQEntries(params.SQEntries),
121 params.smtLSQThreshold)),
123 params.smtLSQThreshold)),
124 dcachePort(this, cpu_ptr),
125 numThreads(params.numThreads),
129 recvRespBytes(0),
133 retryRespEvent([this]{ sendRetryResp(); }, name())
134{
135 assert(numThreads > 0 && numThreads <= MaxThreads);
136
137 //**********************************************
138 //************ Handle SMT Parameters ***********
139 //**********************************************
140
141 /* Run SMT olicy checks. */
142 if (lsqPolicy == SMTQueuePolicy::Dynamic) {
143 DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
144 } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
145 DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
146 "%i entries per LQ | %i entries per SQ\n",
147 maxLQEntries,maxSQEntries);
148 } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
149
150 assert(params.smtLSQThreshold > params.LQEntries);
151 assert(params.smtLSQThreshold > params.SQEntries);
152
153 DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
154 "%i entries per LQ | %i entries per SQ\n",
155 maxLQEntries,maxSQEntries);
156 } else {
157 panic("Invalid LSQ sharing policy. Options are: Dynamic, "
158 "Partitioned, Threshold");
159 }
160
161 thread.reserve(numThreads);
162 for (ThreadID tid = 0; tid < numThreads; tid++) {
163 thread.emplace_back(maxLQEntries, maxSQEntries);
164 thread[tid].init(cpu, iew_ptr, params, this, tid);
165 thread[tid].setDcachePort(&dcachePort);
166 }
167}
168
169
170std::string
172{
173 return iewStage->name() + ".lsq";
174}
175
176void
178{
179 activeThreads = at_ptr;
180 assert(activeThreads != 0);
181}
182
183void
185{
186 assert(isDrained());
187
188 for (ThreadID tid = 0; tid < numThreads; tid++)
190}
191
192bool
194{
195 bool drained(true);
196
197 if (!lqEmpty()) {
198 DPRINTF(Drain, "Not drained, LQ not empty.\n");
199 drained = false;
200 }
201
202 if (!sqEmpty()) {
203 DPRINTF(Drain, "Not drained, SQ not empty.\n");
204 drained = false;
205 }
206
207 return drained;
208}
209
210void
212{
213 usedStorePorts = 0;
214 _cacheBlocked = false;
215
216 for (ThreadID tid = 0; tid < numThreads; tid++) {
217 thread[tid].takeOverFrom();
218 }
219}
220
221void
223{
224 // Re-issue loads which got blocked on the per-cycle load ports limit.
226 iewStage->cacheUnblocked();
227
228 usedLoadPorts = 0;
229 usedStorePorts = 0;
230}
231
232bool
234{
235 return _cacheBlocked;
236}
237
238void
240{
242}
243
244bool
245LSQ::cachePortAvailable(bool is_load) const
246{
247 bool ret;
248 if (is_load) {
250 } else {
252 }
253 return ret;
254}
255
256void
258{
259 assert(cachePortAvailable(is_load));
260 if (is_load) {
262 } else {
264 }
265}
266
267void
269{
270 ThreadID tid = load_inst->threadNumber;
271
272 thread[tid].insertLoad(load_inst);
273}
274
275void
276LSQ::insertStore(const DynInstPtr &store_inst)
277{
278 ThreadID tid = store_inst->threadNumber;
279
280 thread[tid].insertStore(store_inst);
281}
282
283Fault
285{
286 ThreadID tid = inst->threadNumber;
287
288 return thread[tid].executeLoad(inst);
289}
290
291Fault
293{
294 ThreadID tid = inst->threadNumber;
295
296 return thread[tid].executeStore(inst);
297}
298
299void
301{
302 thread.at(tid).commitLoads(youngest_inst);
303}
304
305void
307{
308 thread.at(tid).commitStores(youngest_inst);
309}
310
311void
313{
314 for (ThreadID tid : *activeThreads) {
315 if (numStoresToWB(tid) > 0) {
316 DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
317 "available for Writeback.\n", tid, numStoresToWB(tid));
318 }
319
320 thread[tid].writebackStores();
321 }
322}
323
324void
325LSQ::squash(const InstSeqNum &squashed_num, ThreadID tid)
326{
327 thread.at(tid).squash(squashed_num);
328}
329
330bool
332{
333 /* Answers: Does Anybody Have a Violation?*/
334 for (ThreadID tid : *activeThreads) {
335 if (thread[tid].violation())
336 return true;
337 }
338
339 return false;
340}
341
342bool LSQ::violation(ThreadID tid) { return thread.at(tid).violation(); }
343
346{
347 return thread.at(tid).getMemDepViolator();
348}
349
350int
352{
353 return thread.at(tid).getLoadHead();
354}
355
358{
359 return thread.at(tid).getLoadHeadSeqNum();
360}
361
362int
364{
365 return thread.at(tid).getStoreHead();
366}
367
370{
371 return thread.at(tid).getStoreHeadSeqNum();
372}
373
374int LSQ::getCount(ThreadID tid) { return thread.at(tid).getCount(); }
375
376int LSQ::numLoads(ThreadID tid) { return thread.at(tid).numLoads(); }
377
378int LSQ::numStores(ThreadID tid) { return thread.at(tid).numStores(); }
379
380int
382{
383 if (tid == InvalidThreadID)
384 return 0;
385 else
386 return thread[tid].numHtmStarts();
387}
388int
390{
391 if (tid == InvalidThreadID)
392 return 0;
393 else
394 return thread[tid].numHtmStops();
395}
396
397void
399{
400 if (tid != InvalidThreadID)
401 thread[tid].resetHtmStartsStops();
402}
403
404uint64_t
406{
407 if (tid == InvalidThreadID)
408 return 0;
409 else
410 return thread[tid].getLatestHtmUid();
411}
412
413void
415{
416 if (tid != InvalidThreadID)
417 thread[tid].setLastRetiredHtmUid(htmUid);
418}
419
420void
422{
423 iewStage->cacheUnblocked();
424 cacheBlocked(false);
425
426 for (ThreadID tid : *activeThreads) {
427 thread[tid].recvRetry();
428 }
429}
430
431void
433{
434 LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->senderState);
435 thread[cpu->contextToThread(request->contextId())]
436 .completeDataAccess(pkt);
437}
438
439void
441{
442 dcachePort.sendRetryResp();
443}
444
445bool
447{
448 if (pkt->isError())
449 DPRINTF(LSQ, "Got error packet back for address: %#X\n",
450 pkt->getAddr());
451
452 LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->senderState);
453 panic_if(!request, "Got packet back with unknown sender state\n");
454
455 thread[cpu->contextToThread(request->contextId())].recvTimingResp(pkt);
456
457 if (pkt->isInvalidate()) {
458 // This response also contains an invalidate; e.g. this can be the case
459 // if cmd is ReadRespWithInvalidate.
460 //
461 // The calling order between completeDataAccess and checkSnoop matters.
462 // By calling checkSnoop after completeDataAccess, we ensure that the
463 // fault set by checkSnoop is not lost. Calling writeback (more
464 // specifically inst->completeAcc) in completeDataAccess overwrites
465 // fault, and in case this instruction requires squashing (as
466 // determined by checkSnoop), the ReExec fault set by checkSnoop would
467 // be lost otherwise.
468
469 DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
470 pkt->getAddr());
471
472 for (ThreadID tid = 0; tid < numThreads; tid++) {
473 thread[tid].checkSnoop(pkt);
474 }
475 }
476 // Update the LSQRequest state (this may delete the request)
477 request->packetReplied();
478
481 }
482
483 return true;
484}
485
486void
488{
489 DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
490 pkt->cmdString());
491
492 // must be a snoop
493 if (pkt->isInvalidate()) {
494 DPRINTF(LSQ, "received invalidation for addr:%#x\n",
495 pkt->getAddr());
496 for (ThreadID tid = 0; tid < numThreads; tid++) {
497 thread[tid].checkSnoop(pkt);
498 }
499 } else if (pkt->req && pkt->req->isTlbiExtSync()) {
500 DPRINTF(LSQ, "received TLBI Ext Sync\n");
502
504 staleTranslationWaitTxnId = pkt->req->getExtraData();
505
506 for (auto& unit : thread) {
507 unit.startStaleTranslationFlush();
508 }
509
510 // In case no units have pending ops, just go ahead
512 }
513}
514
515int
517{
518 unsigned total = 0;
519
520 for (ThreadID tid : *activeThreads) {
521 total += getCount(tid);
522 }
523
524 return total;
525}
526
527int
529{
530 unsigned total = 0;
531
532 for (ThreadID tid : *activeThreads) {
533 total += numLoads(tid);
534 }
535
536 return total;
537}
538
539int
541{
542 unsigned total = 0;
543
544 for (ThreadID tid : *activeThreads) {
545 total += thread[tid].numStores();
546 }
547
548 return total;
549}
550
551unsigned
553{
554 unsigned total = 0;
555
556 for (ThreadID tid : *activeThreads) {
557 total += thread[tid].numFreeLoadEntries();
558 }
559
560 return total;
561}
562
563unsigned
565{
566 unsigned total = 0;
567
568 for (ThreadID tid : *activeThreads) {
569 total += thread[tid].numFreeStoreEntries();
570 }
571
572 return total;
573}
574
575unsigned
577{
578 return thread[tid].numFreeLoadEntries();
579}
580
581unsigned
583{
584 return thread[tid].numFreeStoreEntries();
585}
586
587bool
589{
590 for (ThreadID tid : *activeThreads) {
591 if (!(thread[tid].lqFull() || thread[tid].sqFull()))
592 return false;
593 }
594
595 return true;
596}
597
598bool
600{
601 //@todo: Change to Calculate All Entries for
602 //Dynamic Policy
603 if (lsqPolicy == SMTQueuePolicy::Dynamic)
604 return isFull();
605 else
606 return thread[tid].lqFull() || thread[tid].sqFull();
607}
608
609bool
611{
612 return lqEmpty() && sqEmpty();
613}
614
615bool
617{
618 for (ThreadID tid : *activeThreads) {
619 if (!thread[tid].lqEmpty())
620 return false;
621 }
622
623 return true;
624}
625
626bool
628{
629 for (ThreadID tid : *activeThreads) {
630 if (!thread[tid].sqEmpty())
631 return false;
632 }
633
634 return true;
635}
636
637bool
639{
640 for (ThreadID tid : *activeThreads) {
641 if (!thread[tid].lqFull())
642 return false;
643 }
644
645 return true;
646}
647
648bool
650{
651 //@todo: Change to Calculate All Entries for
652 //Dynamic Policy
653 if (lsqPolicy == SMTQueuePolicy::Dynamic)
654 return lqFull();
655 else
656 return thread[tid].lqFull();
657}
658
659bool
661{
662 for (ThreadID tid : *activeThreads) {
663 if (!sqFull(tid))
664 return false;
665 }
666
667 return true;
668}
669
670bool
672{
673 //@todo: Change to Calculate All Entries for
674 //Dynamic Policy
675 if (lsqPolicy == SMTQueuePolicy::Dynamic)
676 return sqFull();
677 else
678 return thread[tid].sqFull();
679}
680
681bool
683{
684 for (ThreadID tid : *activeThreads) {
685 if (!thread[tid].isStalled())
686 return false;
687 }
688
689 return true;
690}
691
692bool
694{
695 if (lsqPolicy == SMTQueuePolicy::Dynamic)
696 return isStalled();
697 else
698 return thread[tid].isStalled();
699}
700
701bool
703{
704 for (ThreadID tid : *activeThreads) {
705 if (hasStoresToWB(tid))
706 return true;
707 }
708
709 return false;
710}
711
712bool
714{
715 return thread.at(tid).hasStoresToWB();
716}
717
718int
720{
721 return thread.at(tid).numStoresToWB();
722}
723
724bool
726{
727 for (ThreadID tid : *activeThreads) {
728 if (willWB(tid))
729 return true;
730 }
731
732 return false;
733}
734
735bool
737{
738 return thread.at(tid).willWB();
739}
740
741void
743{
744 for (ThreadID tid : *activeThreads) {
745 thread[tid].dumpInsts();
746 }
747}
748
749void
751{
752 thread.at(tid).dumpInsts();
753}
754
755Fault
756LSQ::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
757 unsigned int size, Addr addr, Request::Flags flags, uint64_t *res,
758 AtomicOpFunctorPtr amo_op, const std::vector<bool>& byte_enable)
759{
760 // This comming request can be either load, store or atomic.
761 // Atomic request has a corresponding pointer to its atomic memory
762 // operation
763 [[maybe_unused]] bool isAtomic = !isLoad && amo_op;
764
765 ThreadID tid = cpu->contextToThread(inst->contextId());
766 auto cacheLineSize = cpu->cacheLineSize();
767 bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
768 LSQRequest* request = nullptr;
769
770 // Atomic requests that access data across cache line boundary are
771 // currently not allowed since the cache does not guarantee corresponding
772 // atomic memory operations to be executed atomically across a cache line.
773 // For ISAs such as x86 that supports cross-cache-line atomic instructions,
774 // the cache needs to be modified to perform atomic update to both cache
775 // lines. For now, such cross-line update is not supported.
776 assert(!isAtomic || (isAtomic && !needs_burst));
777
778 const bool htm_cmd = isLoad && (flags & Request::HTM_CMD);
779 const bool tlbi_cmd = isLoad && (flags & Request::TLBI_CMD);
780
781 if (inst->translationStarted()) {
782 request = inst->savedRequest;
783 assert(request);
784 } else {
785 if (htm_cmd || tlbi_cmd) {
786 assert(addr == 0x0lu);
787 assert(size == 8);
788 request = new UnsquashableDirectRequest(&thread[tid], inst, flags);
789 } else if (needs_burst) {
790 request = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
791 size, flags, data, res);
792 } else {
793 request = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
794 size, flags, data, res, std::move(amo_op));
795 }
796 assert(request);
797 request->_byteEnable = byte_enable;
798 inst->setRequest();
799 request->taskId(cpu->taskId());
800
801 // There might be fault from a previous execution attempt if this is
802 // a strictly ordered load
803 inst->getFault() = NoFault;
804
805 request->initiateTranslation();
806 }
807
808 /* This is the place were instructions get the effAddr. */
809 if (request->isTranslationComplete()) {
810 if (request->isMemAccessRequired()) {
811 inst->effAddr = request->getVaddr();
812 inst->effSize = size;
813 inst->effAddrValid(true);
814
815 if (cpu->checker) {
816 inst->reqToVerify = std::make_shared<Request>(*request->req());
817 }
818 Fault fault;
819 if (isLoad)
820 fault = read(request, inst->lqIdx);
821 else
822 fault = write(request, data, inst->sqIdx);
823 // inst->getFault() may have the first-fault of a
824 // multi-access split request at this point.
825 // Overwrite that only if we got another type of fault
826 // (e.g. re-exec).
827 if (fault != NoFault)
828 inst->getFault() = fault;
829 } else if (isLoad) {
830 inst->setMemAccPredicate(false);
831 // Commit will have to clean up whatever happened. Set this
832 // instruction as executed.
833 inst->setExecuted();
834 }
835 }
836
837 if (inst->traceData)
838 inst->traceData->setMem(addr, size, flags);
839
840 return inst->getFault();
841}
842
843void
846{
847 _fault.push_back(fault);
850 /* If the instruction has been squahsed, let the request know
851 * as it may have to self-destruct. */
852 if (_inst->isSquashed()) {
854 } else {
855 _inst->strictlyOrdered(request->isStrictlyOrdered());
856
858 if (fault == NoFault) {
859 _inst->physEffAddr = request->getPaddr();
860 _inst->memReqFlags = request->getFlags();
861 if (request->isCondSwap()) {
862 assert(_res);
863 request->setExtraData(*_res);
864 }
866 } else {
868 }
869
870 LSQRequest::_inst->fault = fault;
871 LSQRequest::_inst->translationCompleted(true);
872 }
873}
874
875void
878{
879 int i;
880 for (i = 0; i < _reqs.size() && _reqs[i] != req; i++);
881 assert(i < _reqs.size());
882 _fault[i] = fault;
883
886
887 if (fault == NoFault)
888 _mainReq->setFlags(req->getFlags());
889
890 if (numTranslatedFragments == _reqs.size()) {
891 if (_inst->isSquashed()) {
893 } else {
894 _inst->strictlyOrdered(_mainReq->isStrictlyOrdered());
896 _inst->translationCompleted(true);
897
898 for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++);
899 if (i > 0) {
900 _inst->physEffAddr = LSQRequest::req()->getPaddr();
901 _inst->memReqFlags = _mainReq->getFlags();
902 if (_mainReq->isCondSwap()) {
903 assert (i == _fault.size());
904 assert(_res);
905 _mainReq->setExtraData(*_res);
906 }
907 if (i == _fault.size()) {
908 _inst->fault = NoFault;
910 } else {
911 _inst->fault = _fault[i];
913 }
914 } else {
915 _inst->fault = _fault[0];
917 }
918 }
919
920 }
921}
922
923void
925{
926 assert(_reqs.size() == 0);
927
929
930 if (_reqs.size() > 0) {
931 _reqs.back()->setReqInstSeqNum(_inst->seqNum);
932 _reqs.back()->taskId(_taskId);
933 _inst->translationStarted(true);
936
937 _inst->savedRequest = this;
939 } else {
940 _inst->setMemAccPredicate(false);
941 }
942}
943
949
955
956void
958{
959 auto cacheLineSize = _port.cacheLineSize();
960 Addr base_addr = _addr;
961 Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
962 Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
963 uint32_t size_so_far = 0;
964
965 _mainReq = std::make_shared<Request>(base_addr,
966 _size, _flags, _inst->requestorId(),
967 _inst->pcState().instAddr(), _inst->contextId());
968 _mainReq->setByteEnable(_byteEnable);
969
970 // Paddr is not used in _mainReq. However, we will accumulate the flags
971 // from the sub requests into _mainReq by calling setFlags() in finish().
972 // setFlags() assumes that paddr is set so flip the paddr valid bit here to
973 // avoid a potential assert in setFlags() when we call it from finish().
974 _mainReq->setPaddr(0);
975
976 /* Get the pre-fix, possibly unaligned. */
977 auto it_start = _byteEnable.begin();
978 auto it_end = _byteEnable.begin() + (next_addr - base_addr);
979 addReq(base_addr, next_addr - base_addr,
980 std::vector<bool>(it_start, it_end));
981 size_so_far = next_addr - base_addr;
982
983 /* We are block aligned now, reading whole blocks. */
984 base_addr = next_addr;
985 while (base_addr != final_addr) {
986 auto it_start = _byteEnable.begin() + size_so_far;
987 auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
988 addReq(base_addr, cacheLineSize,
989 std::vector<bool>(it_start, it_end));
990 size_so_far += cacheLineSize;
991 base_addr += cacheLineSize;
992 }
993
994 /* Deal with the tail. */
995 if (size_so_far < _size) {
996 auto it_start = _byteEnable.begin() + size_so_far;
997 auto it_end = _byteEnable.end();
998 addReq(base_addr, _size - size_so_far,
999 std::vector<bool>(it_start, it_end));
1000 }
1001
1002 if (_reqs.size() > 0) {
1003 /* Setup the requests and send them to translation. */
1004 for (auto& r: _reqs) {
1005 r->setReqInstSeqNum(_inst->seqNum);
1006 r->taskId(_taskId);
1007 }
1008
1009 _inst->translationStarted(true);
1012 _inst->savedRequest = this;
1015 _fault.resize(_reqs.size());
1016
1017 for (uint32_t i = 0; i < _reqs.size(); i++) {
1019 }
1020 } else {
1021 _inst->setMemAccPredicate(false);
1022 }
1023}
1024
1026 LSQUnit *port, const DynInstPtr& inst, bool isLoad) :
1028 _port(*port), _inst(inst), _data(nullptr),
1029 _res(nullptr), _addr(0), _size(0), _flags(0),
1030 _numOutstandingPackets(0), _amo_op(nullptr)
1031{
1034 _inst->isStoreConditional() || _inst->isAtomic() ||
1035 _inst->isLoad());
1036 flags.set(Flag::IsAtomic, _inst->isAtomic());
1037 install();
1038}
1039
1041 LSQUnit *port, const DynInstPtr& inst, bool isLoad,
1042 const Addr& addr, const uint32_t& size, const Request::Flags& flags_,
1043 PacketDataPtr data, uint64_t* res, AtomicOpFunctorPtr amo_op,
1044 bool stale_translation)
1048 _port(*port), _inst(inst), _data(data),
1049 _res(res), _addr(addr), _size(size),
1050 _flags(flags_),
1052 _amo_op(std::move(amo_op)),
1053 _hasStaleTranslation(stale_translation)
1054{
1057 _inst->isStoreConditional() || _inst->isAtomic() ||
1058 _inst->isLoad());
1059 flags.set(Flag::IsAtomic, _inst->isAtomic());
1060 install();
1061}
1062
1063void
1065{
1066 if (isLoad()) {
1067 _port.loadQueue[_inst->lqIdx].setRequest(this);
1068 } else {
1069 // Store, StoreConditional, and Atomic requests are pushed
1070 // to this storeQueue
1071 _port.storeQueue[_inst->sqIdx].setRequest(this);
1072 }
1073}
1074
1075bool LSQ::LSQRequest::squashed() const { return _inst->isSquashed(); }
1076
1077void
1079 const std::vector<bool>& byte_enable)
1080{
1081 if (isAnyActiveElement(byte_enable.begin(), byte_enable.end())) {
1082 auto req = new Request(
1083 addr, size, _flags, _inst->requestorId(),
1084 _inst->pcState().instAddr(), _inst->contextId(),
1085 std::move(_amo_op));
1086 req->setByteEnable(byte_enable);
1087
1088 /* If the request is marked as NO_ACCESS, setup a local access */
1089 if (_flags.isSet(Request::NO_ACCESS)) {
1090 req->setLocalAccessor(
1091 [this, req](gem5::ThreadContext *tc, PacketPtr pkt) -> Cycles
1092 {
1093 if ((req->isHTMStart() || req->isHTMCommit())) {
1094 auto& inst = this->instruction();
1095 assert(inst->inHtmTransactionalState());
1097 inst->getHtmTransactionUid());
1098 }
1099 return Cycles(1);
1100 }
1101 );
1102 }
1103
1104 _reqs.emplace_back(req);
1105 }
1106}
1107
1109{
1110 assert(!isAnyOutstandingRequest());
1111 _inst->savedRequest = nullptr;
1112
1113 for (auto r: _packets)
1114 delete r;
1115};
1116
1119{
1120 return _inst->contextId();
1121}
1122
1123void
1125{
1127 _port.getMMUPtr()->translateTiming(req(i), _inst->thread->getTC(),
1128 this, isLoad() ? BaseMMU::Read : BaseMMU::Write);
1129}
1130
1131void
1133{
1134 // If this element has been translated and is currently being requested,
1135 // then it may be stale
1136 if ((!flags.isSet(Flag::Complete)) &&
1137 (!flags.isSet(Flag::Discarded)) &&
1139 _hasStaleTranslation = true;
1140 }
1141
1142 DPRINTF(LSQ, "SingleDataRequest %d 0x%08x isBlocking:%d\n",
1143 (int)_state, (uint32_t)flags, _hasStaleTranslation);
1144}
1145
1146void
1148{
1149 // If this element has been translated and is currently being requested,
1150 // then it may be stale
1151 if ((!flags.isSet(Flag::Complete)) &&
1152 (!flags.isSet(Flag::Discarded)) &&
1154 _hasStaleTranslation = true;
1155 }
1156
1157 DPRINTF(LSQ, "SplitDataRequest %d 0x%08x isBlocking:%d\n",
1158 (int)_state, (uint32_t)flags, _hasStaleTranslation);
1159}
1160
1161bool
1163{
1164 assert(_numOutstandingPackets == 1);
1165 flags.set(Flag::Complete);
1166 assert(pkt == _packets.front());
1167 _port.completeDataAccess(pkt);
1168 _hasStaleTranslation = false;
1169 return true;
1170}
1171
1172bool
1174{
1175 uint32_t pktIdx = 0;
1176 while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
1177 pktIdx++;
1178 assert(pktIdx < _packets.size());
1180 if (numReceivedPackets == _packets.size()) {
1181 flags.set(Flag::Complete);
1182 /* Assemble packets. */
1183 PacketPtr resp = isLoad()
1186 if (isLoad())
1187 resp->dataStatic(_inst->memData);
1188 else
1189 resp->dataStatic(_data);
1190 resp->senderState = this;
1191 _port.completeDataAccess(resp);
1192 delete resp;
1193 }
1194 _hasStaleTranslation = false;
1195 return true;
1196}
1197
1198void
1200{
1201 /* Retries do not create new packets. */
1202 if (_packets.size() == 0) {
1203 _packets.push_back(
1204 isLoad()
1207 _packets.back()->dataStatic(_inst->memData);
1208 _packets.back()->senderState = this;
1209
1210 // hardware transactional memory
1211 // If request originates in a transaction (not necessarily a HtmCmd),
1212 // then the packet should be marked as such.
1213 if (_inst->inHtmTransactionalState()) {
1214 _packets.back()->setHtmTransactional(
1215 _inst->getHtmTransactionUid());
1216
1217 DPRINTF(HtmCpu,
1218 "HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n",
1219 isLoad() ? "LD" : "ST",
1220 _inst->pcState().instAddr(),
1221 _packets.back()->req->hasVaddr() ?
1222 _packets.back()->req->getVaddr() : 0lu,
1223 _packets.back()->getAddr(),
1224 _inst->getHtmTransactionUid());
1225 }
1226 }
1227 assert(_packets.size() == 1);
1228}
1229
1230void
1232{
1233 /* Extra data?? */
1234 Addr base_address = _addr;
1235
1236 if (_packets.size() == 0) {
1237 /* New stuff */
1238 if (isLoad()) {
1240 _mainPacket->dataStatic(_inst->memData);
1241
1242 // hardware transactional memory
1243 // If request originates in a transaction,
1244 // packet should be marked as such
1245 if (_inst->inHtmTransactionalState()) {
1246 _mainPacket->setHtmTransactional(
1247 _inst->getHtmTransactionUid());
1248 DPRINTF(HtmCpu,
1249 "HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1250 _inst->pcState().instAddr(),
1251 _mainPacket->req->hasVaddr() ?
1252 _mainPacket->req->getVaddr() : 0lu,
1253 _mainPacket->getAddr(),
1254 _inst->getHtmTransactionUid());
1255 }
1256 }
1257 for (int i = 0; i < _reqs.size() && _fault[i] == NoFault; i++) {
1258 RequestPtr req = _reqs[i];
1261 ptrdiff_t offset = req->getVaddr() - base_address;
1262 if (isLoad()) {
1263 pkt->dataStatic(_inst->memData + offset);
1264 } else {
1265 uint8_t* req_data = new uint8_t[req->getSize()];
1266 std::memcpy(req_data,
1267 _inst->memData + offset,
1268 req->getSize());
1269 pkt->dataDynamic(req_data);
1270 }
1271 pkt->senderState = this;
1272 _packets.push_back(pkt);
1273
1274 // hardware transactional memory
1275 // If request originates in a transaction,
1276 // packet should be marked as such
1277 if (_inst->inHtmTransactionalState()) {
1278 _packets.back()->setHtmTransactional(
1279 _inst->getHtmTransactionUid());
1280 DPRINTF(HtmCpu,
1281 "HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1282 isLoad() ? "LD" : "ST",
1283 i+1,
1284 _inst->pcState().instAddr(),
1285 _packets.back()->req->hasVaddr() ?
1286 _packets.back()->req->getVaddr() : 0lu,
1287 _packets.back()->getAddr(),
1288 _inst->getHtmTransactionUid());
1289 }
1290 }
1291 }
1292 assert(_packets.size() > 0);
1293}
1294
1295void
1297{
1298 assert(_numOutstandingPackets == 0);
1299 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1301}
1302
1303void
1305{
1306 /* Try to send the packets. */
1308 lsqUnit()->trySendPacket(isLoad(),
1311 }
1312}
1313
1314Cycles
1317{
1318 return pkt->req->localAccessor(thread, pkt);
1319}
1320
1321Cycles
1324{
1325 Cycles delay(0);
1326 unsigned offset = 0;
1327
1328 for (auto r: _reqs) {
1329 PacketPtr pkt =
1331 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1332 Cycles d = r->localAccessor(thread, pkt);
1333 if (d > delay)
1334 delay = d;
1335 offset += r->getSize();
1336 delete pkt;
1337 }
1338 return delay;
1339}
1340
1341bool
1343{
1344 return ( (LSQRequest::_reqs[0]->getPaddr() & blockMask) == blockAddr);
1345}
1346
1362bool
1364{
1365 bool is_hit = false;
1366 for (auto &r: _reqs) {
1376 if (r->hasPaddr() && (r->getPaddr() & blockMask) == blockAddr) {
1377 is_hit = true;
1378 break;
1379 }
1380 }
1381 return is_hit;
1382}
1383
1384bool
1386{
1387 // Check if tick is unaligned to correct +1 curCycle delta
1388 bool is_unaligned_tick = curTick() % cpu->clockPeriod() != 0;
1389 Cycles current_cycle = cpu->curCycle() - Cycles(is_unaligned_tick);
1390
1391 // Reset counters/flags on new cycle
1392 if (current_cycle > lsq->recvRespLastActiveCycle) {
1393 lsq->recvRespBytes = 0;
1394 lsq->recvRespCachelines = 0;
1395 lsq->recvRespLastCachelineAddr = 0;
1396 }
1397
1398 lsq->recvRespLastActiveCycle = current_cycle;
1399
1400 Addr cacheline_addr = addrBlockAlign(pkt->getAddr(), cpu->cacheLineSize());
1401
1402 bool throttle_cycle = false;
1403
1404 // Check limits
1405 bool is_new_cacheline = cacheline_addr != lsq->recvRespLastCachelineAddr;
1406 bool max_cachelines = (lsq->recvRespCachelines + is_new_cacheline)
1407 > lsq->recvRespMaxCachelines;
1408 int free_buf_size = lsq->recvRespBufferSize - lsq->recvRespBytes;
1409 bool max_bytes = pkt->getSize() > free_buf_size;
1410
1411 // No pending response and either per cycle limit reached
1412 if (lsq->recvRespPendBytes == 0 && (max_cachelines || max_bytes)) {
1413 // If the buffer size is an exclusive limit try to saturate it and save
1414 // any remaining bytes for later
1415 if (max_bytes && !max_cachelines) {
1416 lsq->recvRespBytes += free_buf_size;
1417 assert(lsq->recvRespBytes <= lsq->recvRespBufferSize);
1418
1419 lsq->recvRespPendBytes = pkt->getSize() - free_buf_size;
1420 }
1421
1422 // Throttle this cycle
1423 throttle_cycle = true;
1424 DPRINTF(LSQ, "throttling ReadResp: max_cachelines=%d max_bytes=%d\n",
1425 max_cachelines, max_bytes);
1426
1427 // Still processing previous response
1428 } else if (lsq->recvRespPendBytes > 0) {
1429 DPRINTF(LSQ, "recvRespPendBytes=%u\n", lsq->recvRespPendBytes);
1430
1431 // Shouldn't have processed anything this cycle yet
1432 assert(lsq->recvRespBytes == 0 && lsq->recvRespCachelines == 0);
1433
1434 // Throttle if pending bytes are greater than buffer size
1435 throttle_cycle = lsq->recvRespPendBytes > lsq->recvRespBufferSize;
1436
1437 // Process as much pending bytes as possible this cycle
1438 lsq->recvRespBytes += (throttle_cycle) ? lsq->recvRespBufferSize
1439 : lsq->recvRespPendBytes;
1440 lsq->recvRespPendBytes -= lsq->recvRespBytes;
1441
1442 // No pending response and no limit reached
1443 } else {
1444 // Process whole response
1445 lsq->recvRespBytes += pkt->getSize();
1446 }
1447
1448 // Got new cacheline on this cycle. Count and save for later (assumes
1449 // we cannot get previous cachelines again this cycle)
1450 if (is_new_cacheline) {
1451 lsq->recvRespCachelines++;
1452 lsq->recvRespLastCachelineAddr = cacheline_addr;
1453 }
1454
1455 // Throttling this cycle
1456 if (throttle_cycle) {
1457 Tick next_cycle = cpu->cyclesToTicks(current_cycle + Cycles(1));
1458
1459 // Sanity checks
1460 assert(next_cycle > curTick());
1461 assert(!(lsq->retryRespEvent.scheduled()));
1462
1463 // Schedule retry on next cycle
1464 cpu->schedule(lsq->retryRespEvent, next_cycle);
1465 DPRINTF(LSQ, "retryRespEvent scheduled for tick=%lu\n", next_cycle);
1466
1467 dcachePortStats.numSendRetryResp++;
1468 }
1469
1470 return throttle_cycle;
1471}
1472
1473bool
1475{
1476 if (lsq->recvRespThrottling && pkt->cmd == MemCmd::ReadResp) {
1477 if (throttleReadResp(pkt)) {
1478 return false;
1479 }
1480 }
1481
1482 dcachePortStats.numRecvResp++;
1483 dcachePortStats.numRecvRespBytes += pkt->getSize();
1484
1485 return lsq->recvTimingResp(pkt);
1486}
1487
1488void
1490{
1491 for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
1492 if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1493 cpu->wakeup(tid);
1494 }
1495 }
1496 lsq->recvTimingSnoopReq(pkt);
1497}
1498
1499void
1501{
1502 lsq->recvReqRetry();
1503}
1504
1506 LSQUnit* port,
1507 const DynInstPtr& inst,
1508 const Request::Flags& flags_) :
1509 SingleDataRequest(port, inst, true, 0x0lu, 8, flags_,
1510 nullptr, nullptr, nullptr)
1511{
1512}
1513
1514void
1516{
1517 // Special commands are implemented as loads to avoid significant
1518 // changes to the cpu and memory interfaces
1519 // The virtual and physical address uses a dummy value of 0x00
1520 // Address translation does not really occur thus the code below
1521
1522 assert(_reqs.size() == 0);
1523
1525
1526 if (_reqs.size() > 0) {
1527 _reqs.back()->setReqInstSeqNum(_inst->seqNum);
1528 _reqs.back()->taskId(_taskId);
1529 _reqs.back()->setPaddr(_addr);
1530 _reqs.back()->setInstCount(_inst->getCpuPtr()->totalInsts());
1531
1532 _inst->strictlyOrdered(_reqs.back()->isStrictlyOrdered());
1533 _inst->fault = NoFault;
1534 _inst->physEffAddr = _reqs.back()->getPaddr();
1535 _inst->memReqFlags = _reqs.back()->getFlags();
1536 _inst->savedRequest = this;
1537
1540
1541 _inst->translationStarted(true);
1542 _inst->translationCompleted(true);
1543
1545 } else {
1546 panic("unexpected behaviour in initiateTranslation()");
1547 }
1548}
1549
1550void
1552{
1553 // HTM/TLBI operations do not translate,
1554 // so cannot have stale translations
1555 _hasStaleTranslation = false;
1556}
1557
1558void
1562{
1563 panic("unexpected behaviour - finish()");
1564}
1565
1566void
1568{
1570
1571 DPRINTF(LSQ, "Checking pending TLBI sync\n");
1572 // Check if all thread queues are complete
1573 for (const auto& unit : thread) {
1574 if (unit.checkStaleTranslations())
1575 return;
1576 }
1577 DPRINTF(LSQ, "No threads have blocking TLBI sync\n");
1578
1579 // All thread queues have committed their sync operations
1580 // => send a RubyRequest to the sequencer
1583 cpu->dataRequestorId());
1584 req->setExtraData(staleTranslationWaitTxnId);
1585 PacketPtr pkt = Packet::createRead(req);
1586
1587 // TODO - reserve some credit for these responses?
1588 if (!dcachePort.sendTimingReq(pkt)) {
1589 panic("Couldn't send TLBI_EXT_SYNC_COMP message");
1590 }
1591
1594}
1595
1596Fault
1597LSQ::read(LSQRequest* request, ssize_t load_idx)
1598{
1599 assert(request->req()->contextId() == request->contextId());
1600 ThreadID tid = cpu->contextToThread(request->req()->contextId());
1601
1602 return thread.at(tid).read(request, load_idx);
1603}
1604
1605Fault
1606LSQ::write(LSQRequest* request, uint8_t *data, ssize_t store_idx)
1607{
1608 ThreadID tid = cpu->contextToThread(request->req()->contextId());
1609
1610 return thread.at(tid).write(request, data, store_idx);
1611}
1612
1613} // namespace o3
1614} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
const char data[]
gem5::BaseCPU::BaseCPUStats baseStats
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
Addr getAddr() const
Definition packet.hh:807
bool isError() const
Definition packet.hh:622
const std::string & cmdString() const
Return the string name of the cmd field (for debugging and tracing).
Definition packet.hh:588
static PacketPtr createWrite(const RequestPtr &req)
Definition packet.hh:1044
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition packet.hh:1175
SenderState * senderState
This packet's sender state.
Definition packet.hh:545
T * getPtr()
get a pointer to the data ptr.
Definition packet.hh:1225
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition packet.hh:1038
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
unsigned getSize() const
Definition packet.hh:817
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition packet.hh:1213
void setHtmTransactional(uint64_t val)
Stipulates that this packet/request originates in the CPU executing in transactional mode,...
Definition packet.cc:516
MemCmd cmd
The command field of the packet.
Definition packet.hh:372
bool isInvalidate() const
Definition packet.hh:609
RequestPort(const std::string &name, SimObject *_owner, PortID id=InvalidPortID)
Request port.
Definition port.cc:125
static RequestPtr createMemManagement(Flags flags, RequestorID id)
Factory method for creating memory management requests, with unspecified addr and size.
Definition request.hh:538
static const FlagsType TLBI_CMD
Definition request.hh:269
@ TLBI_EXT_SYNC_COMP
The Request tells the interconnect that a remote TLB Sync request has completed.
Definition request.hh:252
@ NO_ACCESS
The request should not cause a memory access.
Definition request.hh:146
static const FlagsType HTM_CMD
Definition request.hh:266
gem5::Flags< FlagsType > Flags
Definition request.hh:102
ThreadContext is the external interface to all thread state for anything outside of the CPU.
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:94
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Definition iew.hh:88
Class that implements the actual LQ and SQ for each specific thread.
Definition lsq_unit.hh:89
virtual void recvTimingSnoopReq(PacketPtr pkt)
Receive a timing snoop request from the peer.
Definition lsq.cc:1489
DcachePort(LSQ *_lsq, CPU *_cpu)
Default constructor.
Definition lsq.cc:67
virtual bool recvTimingResp(PacketPtr pkt)
Timing version of receive.
Definition lsq.cc:1474
gem5::o3::LSQ::DcachePort::DcachePortStats dcachePortStats
virtual void recvReqRetry()
Handles doing a retry of the previous send.
Definition lsq.cc:1500
LSQ * lsq
Pointer to LSQ.
Definition lsq.hh:89
bool throttleReadResp(PacketPtr pkt)
Applies throttling in recvTimingResp for incoming load responses.
Definition lsq.cc:1385
Memory operation metadata.
Definition lsq.hh:220
@ IsAtomic
True if this is an atomic request.
Definition lsq.hh:253
@ TranslationFinished
True if there are un-replied outbound translations.
Definition lsq.hh:238
@ WriteBackToRegister
True if this request needs to writeBack to register.
Definition lsq.hh:232
@ TranslationStarted
True if any translation has been sent to TLB.
Definition lsq.hh:236
@ Discarded
Request discarded.
Definition lsq.hh:246
std::vector< bool > _byteEnable
Definition lsq.hh:287
LSQRequest(LSQUnit *port, const DynInstPtr &inst, bool isLoad)
Definition lsq.cc:1025
virtual ~LSQRequest()
Destructor.
Definition lsq.cc:1108
bool isLoad() const
Definition lsq.hh:302
void install()
Install the request in the LQ/SQ.
Definition lsq.cc:1064
AtomicOpFunctorPtr _amo_op
Definition lsq.hh:289
ContextID contextId() const
Definition lsq.cc:1118
void taskId(const uint32_t &v)
Definition lsq.hh:383
virtual void initiateTranslation()=0
void setState(const State &newState)
Definition lsq.hh:266
void addReq(Addr addr, unsigned size, const std::vector< bool > &byte_enable)
Helper function used to add a (sub)request, given its address addr, size size and byte-enable mask by...
Definition lsq.cc:1078
uint32_t _numOutstandingPackets
Definition lsq.hh:288
const uint32_t _size
Definition lsq.hh:285
LSQUnit * lsqUnit()
Definition lsq.hh:293
PacketDataPtr _data
Definition lsq.hh:279
bool isAnyOutstandingRequest()
Test if there is any in-flight translation or mem access request.
Definition lsq.hh:418
void sendFragmentToTranslation(int i)
Definition lsq.cc:1124
const DynInstPtr _inst
Definition lsq.hh:277
const Request::Flags _flags
Definition lsq.hh:286
uint32_t numTranslatedFragments
Definition lsq.hh:268
std::vector< Fault > _fault
Definition lsq.hh:282
uint32_t numInTranslationFragments
Definition lsq.hh:269
bool squashed() const override
This function is used by the page table walker to determine if it should translate the a pending requ...
Definition lsq.cc:1075
RequestPtr req(int idx=0)
Definition lsq.hh:392
std::vector< RequestPtr > _reqs
Definition lsq.hh:281
std::vector< PacketPtr > _packets
Definition lsq.hh:280
const DynInstPtr & instruction()
Definition lsq.hh:364
bool isTranslationComplete()
Definition lsq.hh:496
Addr getVaddr(int idx=0) const
Definition lsq.hh:395
virtual Cycles handleLocalAccess(gem5::ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
Definition lsq.cc:1315
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
Definition lsq.cc:844
virtual void initiateTranslation()
Definition lsq.cc:924
SingleDataRequest(LSQUnit *port, const DynInstPtr &inst, bool isLoad, const Addr &addr, const uint32_t &size, const Request::Flags &flags_, PacketDataPtr data=nullptr, uint64_t *res=nullptr, AtomicOpFunctorPtr amo_op=nullptr)
Definition lsq.hh:602
virtual bool recvTimingResp(PacketPtr pkt)
Definition lsq.cc:1162
virtual void buildPackets()
Definition lsq.cc:1199
virtual void markAsStaleTranslation()
Definition lsq.cc:1132
virtual void sendPacketToCache()
Definition lsq.cc:1296
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Test if the request accesses a particular cache line.
Definition lsq.cc:1342
virtual bool recvTimingResp(PacketPtr pkt)
Definition lsq.cc:1173
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Caches may probe into the load-store queue to enforce memory ordering guarantees.
Definition lsq.cc:1363
virtual void initiateTranslation()
Definition lsq.cc:957
virtual void markAsStaleTranslation()
Definition lsq.cc:1147
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
Definition lsq.cc:876
virtual PacketPtr mainPacket()
Definition lsq.cc:945
virtual Cycles handleLocalAccess(gem5::ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
Definition lsq.cc:1322
virtual void sendPacketToCache()
Definition lsq.cc:1304
virtual RequestPtr mainReq()
Definition lsq.cc:951
virtual void buildPackets()
Definition lsq.cc:1231
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
Definition lsq.cc:1559
UnsquashableDirectRequest(LSQUnit *port, const DynInstPtr &inst, const Request::Flags &flags_)
Definition lsq.cc:1505
unsigned SQEntries
Total Size of SQ Entries.
Definition lsq.hh:975
bool isDrained() const
Has the LSQ drained?
Definition lsq.cc:193
EventFunctionWrapper retryRespEvent
Definition lsq.hh:1001
int cacheLoadPorts
The number of cache ports available each cycle (loads only).
Definition lsq.hh:934
int usedStorePorts
The number of used cache ports in this cycle by stores.
Definition lsq.hh:932
int numHtmStarts(ThreadID tid) const
Definition lsq.cc:381
std::string name() const
Returns the name of the LSQ.
Definition lsq.cc:171
void commitStores(InstSeqNum &youngest_inst, ThreadID tid)
Commits stores up until the given sequence number for a specific thread.
Definition lsq.cc:306
Addr staleTranslationWaitTxnId
The ID if the transaction that made translations stale.
Definition lsq.hh:941
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
Definition lsq.cc:446
void checkStaleTranslations()
Checks if queues have any marked operations left, and sends the appropriate Sync Completion message i...
Definition lsq.cc:1567
int getLoadHead(ThreadID tid)
Returns the head index of the load queue for a specific thread.
Definition lsq.cc:351
void squash(const InstSeqNum &squashed_num, ThreadID tid)
Squash instructions from a thread until the specified sequence number.
Definition lsq.cc:325
bool sqEmpty() const
Returns if all of the SQs are empty.
Definition lsq.cc:627
void completeDataAccess(PacketPtr pkt)
Definition lsq.cc:432
Fault pushRequest(const DynInstPtr &inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector< bool > &byte_enable)
Definition lsq.cc:756
unsigned numFreeLoadEntries()
Returns the number of free load entries.
Definition lsq.cc:552
ThreadID numThreads
Number of Threads.
Definition lsq.hh:990
IEW * iewStage
The IEW stage pointer.
Definition lsq.hh:911
InstSeqNum getLoadHeadSeqNum(ThreadID tid)
Returns the sequence number of the head of the load queue.
Definition lsq.cc:357
void sendRetryResp()
Definition lsq.cc:440
std::list< ThreadID > * activeThreads
List of Active Threads in System.
Definition lsq.hh:970
DcachePort dcachePort
Data port.
Definition lsq.hh:984
void takeOverFrom()
Takes over execution from another CPU's thread.
Definition lsq.cc:211
DynInstPtr getMemDepViolator(ThreadID tid)
Gets the instruction that caused the memory ordering violation.
Definition lsq.cc:345
static uint32_t maxLSQAllocation(SMTQueuePolicy pol, uint32_t entries, uint32_t numThreads, uint32_t SMTThreshold)
Auxiliary function to calculate per-thread max LSQ allocation limit.
Definition lsq.hh:952
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets the pointer to the list of active threads.
Definition lsq.cc:177
bool cacheBlocked() const
Is D-cache blocked?
Definition lsq.cc:233
int numLoads()
Returns the total number of loads in the load queue.
Definition lsq.cc:528
void setLastRetiredHtmUid(ThreadID tid, uint64_t htmUid)
Definition lsq.cc:414
void dumpInsts() const
Debugging function to print out all instructions.
Definition lsq.cc:742
int usedLoadPorts
The number of used cache ports in this cycle by loads.
Definition lsq.hh:936
unsigned maxLQEntries
Max LQ Size - Used to Enforce Sharing Policies.
Definition lsq.hh:978
bool isFull()
Returns if the LSQ is full (either LQ or SQ is full).
Definition lsq.cc:588
void insertStore(const DynInstPtr &store_inst)
Inserts a store into the LSQ.
Definition lsq.cc:276
void recvReqRetry()
Retry the previous send that failed.
Definition lsq.cc:421
void commitLoads(InstSeqNum &youngest_inst, ThreadID tid)
Commits loads up until the given sequence number for a specific thread.
Definition lsq.cc:300
Fault write(LSQRequest *request, uint8_t *data, ssize_t store_idx)
Executes a store operation, using the store specified at the store index.
Definition lsq.cc:1606
Cycles recvRespLastActiveCycle
Definition lsq.hh:1000
Addr recvRespLastCachelineAddr
Definition lsq.hh:999
uint64_t getLatestHtmUid(ThreadID tid) const
Definition lsq.cc:405
bool willWB()
Returns if the LSQ will write back to memory this cycle.
Definition lsq.cc:725
int getStoreHead(ThreadID tid)
Returns the head index of the store queue.
Definition lsq.cc:363
LSQ(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
Constructs an LSQ with the given parameters.
Definition lsq.cc:110
CPU * cpu
The CPU pointer.
Definition lsq.hh:908
bool _cacheBlocked
D-cache is blocked.
Definition lsq.hh:928
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition lsq.cc:184
std::vector< LSQUnit > thread
The LSQ units for individual threads.
Definition lsq.hh:987
unsigned LQEntries
Total Size of LQ Entries.
Definition lsq.hh:973
int numHtmStops(ThreadID tid) const
Definition lsq.cc:389
void cachePortBusy(bool is_load)
Another store port is in use.
Definition lsq.cc:257
unsigned recvRespBytes
Definition lsq.hh:996
const unsigned recvRespBufferSize
Definition lsq.hh:995
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
Definition lsq.cc:245
InstSeqNum getStoreHeadSeqNum(ThreadID tid)
Returns the sequence number of the head of the store queue.
Definition lsq.cc:369
bool isStalled()
Returns if the LSQ is stalled due to a memory operation that must be replayed.
Definition lsq.cc:682
void writebackStores()
Attempts to write back stores until all cache ports are used or the interface becomes blocked.
Definition lsq.cc:312
bool lqFull()
Returns if any of the LQs are full.
Definition lsq.cc:638
bool waitingForStaleTranslation
If the LSQ is currently waiting for stale translations.
Definition lsq.hh:939
unsigned maxSQEntries
Max SQ Size - Used to Enforce Sharing Policies.
Definition lsq.hh:981
bool lqEmpty() const
Returns if all of the LQs are empty.
Definition lsq.cc:616
int getCount()
Returns the number of instructions in all of the queues.
Definition lsq.cc:516
bool hasStoresToWB()
Returns whether or not there are any stores to write back to memory.
Definition lsq.cc:702
Fault read(LSQRequest *request, ssize_t load_idx)
Executes a read operation, using the load specified at the load index.
Definition lsq.cc:1597
Fault executeStore(const DynInstPtr &inst)
Executes a store.
Definition lsq.cc:292
void tick()
Ticks the LSQ.
Definition lsq.cc:222
void insertLoad(const DynInstPtr &load_inst)
Inserts a load into the LSQ.
Definition lsq.cc:268
bool isEmpty() const
Returns if the LSQ is empty (both LQ and SQ are empty).
Definition lsq.cc:610
int numStores()
Returns the total number of stores in the store queue.
Definition lsq.cc:540
void recvTimingSnoopReq(PacketPtr pkt)
Definition lsq.cc:487
int cacheStorePorts
The number of cache ports available each cycle (stores only).
Definition lsq.hh:930
Fault executeLoad(const DynInstPtr &inst)
Executes a load.
Definition lsq.cc:284
bool violation()
Returns whether or not there was a memory ordering violation.
Definition lsq.cc:331
void resetHtmStartsStops(ThreadID tid)
Definition lsq.cc:398
SMTQueuePolicy lsqPolicy
The LSQ policy for SMT mode.
Definition lsq.hh:944
const bool recvRespThrottling
Enable load receive response throttling in the LSQ.
Definition lsq.hh:993
int numStoresToWB(ThreadID tid)
Returns the number of stores a specific thread has to write back.
Definition lsq.cc:719
unsigned numFreeStoreEntries()
Returns the number of free store entries.
Definition lsq.cc:564
bool sqFull()
Returns if any of the SQs are full.
Definition lsq.cc:660
unsigned recvRespCachelines
Definition lsq.hh:998
const unsigned recvRespMaxCachelines
Definition lsq.hh:994
STL list class.
Definition stl.hh:51
STL vector class.
Definition stl.hh:37
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition amo.hh:269
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:246
Bitfield< 28 > v
Definition misc_types.hh:54
Bitfield< 4, 0 > mode
Definition misc_types.hh:74
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 9 > d
Definition misc_types.hh:64
Bitfield< 3 > addr
Definition types.hh:84
static constexpr int MaxThreads
Definition limits.hh:38
RefCountingPtr< DynInst > DynInstPtr
Units for Stats.
Definition units.hh:113
const FlagsType total
Print the total.
Definition info.hh:59
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< FaultBase > Fault
Definition types.hh:249
int16_t ThreadID
Thread index/ID type.
Definition types.hh:235
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
const ThreadID InvalidThreadID
Definition types.hh:236
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
bool transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
Returns true if the given memory access (address, size) needs to be fragmented across aligned fixed-s...
Definition utils.hh:80
uint64_t Tick
Tick count type.
Definition types.hh:58
uint8_t * PacketDataPtr
Definition packet.hh:72
Packet * PacketPtr
int ContextID
Globally unique thread context ID.
Definition types.hh:239
constexpr decltype(nullptr) NoFault
Definition types.hh:253
uint64_t InstSeqNum
Definition inst_seq.hh:40
bool isAnyActiveElement(const std::vector< bool >::const_iterator &it_start, const std::vector< bool >::const_iterator &it_end)
Test if there is any active element in an enablement range.
Definition utils.hh:89
Addr addrBlockAlign(Addr addr, Addr block_size)
Returns the address of the closest aligned fixed-size block to the given address.
Definition utils.hh:66
Overload hash function for BasicBlockRange type.
Definition binary32.hh:81
statistics::Scalar numCycles
Definition base.hh:667
statistics::Formula recvRespAvgRetryRate
Definition lsq.hh:116
statistics::Formula recvRespAvgSize
Definition lsq.hh:110
statistics::Formula recvRespAvgRate
Definition lsq.hh:113
const std::string & name()
Definition trace.cc:48

Generated on Mon May 26 2025 09:19:08 for gem5 by doxygen 1.13.2