gem5 v24.0.0.0
Loading...
Searching...
No Matches
lsq.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2011-2012, 2014, 2017-2019, 2021 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2005-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include "cpu/o3/lsq.hh"
43
44#include <algorithm>
45#include <list>
46#include <string>
47
48#include "base/compiler.hh"
49#include "base/logging.hh"
50#include "cpu/o3/cpu.hh"
51#include "cpu/o3/dyn_inst.hh"
52#include "cpu/o3/iew.hh"
53#include "cpu/o3/limits.hh"
54#include "debug/Drain.hh"
55#include "debug/Fetch.hh"
56#include "debug/HtmCpu.hh"
57#include "debug/LSQ.hh"
58#include "debug/Writeback.hh"
59#include "params/BaseO3CPU.hh"
60
61namespace gem5
62{
63
64namespace o3
65{
66
68 RequestPort(_cpu->name() + ".dcache_port"), lsq(_lsq), cpu(_cpu)
69{}
70
71LSQ::LSQ(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
72 : cpu(cpu_ptr), iewStage(iew_ptr),
73 _cacheBlocked(false),
78 lsqPolicy(params.smtLSQPolicy),
79 LQEntries(params.LQEntries),
80 SQEntries(params.SQEntries),
82 params.smtLSQThreshold)),
84 params.smtLSQThreshold)),
85 dcachePort(this, cpu_ptr),
87{
88 assert(numThreads > 0 && numThreads <= MaxThreads);
89
90 //**********************************************
91 //************ Handle SMT Parameters ***********
92 //**********************************************
93
94 /* Run SMT olicy checks. */
95 if (lsqPolicy == SMTQueuePolicy::Dynamic) {
96 DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
97 } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
98 DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
99 "%i entries per LQ | %i entries per SQ\n",
101 } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
102
103 assert(params.smtLSQThreshold > params.LQEntries);
104 assert(params.smtLSQThreshold > params.SQEntries);
105
106 DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
107 "%i entries per LQ | %i entries per SQ\n",
109 } else {
110 panic("Invalid LSQ sharing policy. Options are: Dynamic, "
111 "Partitioned, Threshold");
112 }
113
114 thread.reserve(numThreads);
115 for (ThreadID tid = 0; tid < numThreads; tid++) {
116 thread.emplace_back(maxLQEntries, maxSQEntries);
117 thread[tid].init(cpu, iew_ptr, params, this, tid);
118 thread[tid].setDcachePort(&dcachePort);
119 }
120}
121
122
123std::string
125{
126 return iewStage->name() + ".lsq";
127}
128
129void
131{
132 activeThreads = at_ptr;
133 assert(activeThreads != 0);
134}
135
136void
138{
139 assert(isDrained());
140
141 for (ThreadID tid = 0; tid < numThreads; tid++)
143}
144
145bool
147{
148 bool drained(true);
149
150 if (!lqEmpty()) {
151 DPRINTF(Drain, "Not drained, LQ not empty.\n");
152 drained = false;
153 }
154
155 if (!sqEmpty()) {
156 DPRINTF(Drain, "Not drained, SQ not empty.\n");
157 drained = false;
158 }
159
160 return drained;
161}
162
163void
165{
166 usedStorePorts = 0;
167 _cacheBlocked = false;
168
169 for (ThreadID tid = 0; tid < numThreads; tid++) {
170 thread[tid].takeOverFrom();
171 }
172}
173
174void
176{
177 // Re-issue loads which got blocked on the per-cycle load ports limit.
180
181 usedLoadPorts = 0;
182 usedStorePorts = 0;
183}
184
185bool
187{
188 return _cacheBlocked;
189}
190
191void
193{
195}
196
197bool
198LSQ::cachePortAvailable(bool is_load) const
199{
200 bool ret;
201 if (is_load) {
203 } else {
205 }
206 return ret;
207}
208
209void
211{
212 assert(cachePortAvailable(is_load));
213 if (is_load) {
215 } else {
217 }
218}
219
220void
222{
223 ThreadID tid = load_inst->threadNumber;
224
225 thread[tid].insertLoad(load_inst);
226}
227
228void
229LSQ::insertStore(const DynInstPtr &store_inst)
230{
231 ThreadID tid = store_inst->threadNumber;
232
233 thread[tid].insertStore(store_inst);
234}
235
236Fault
238{
239 ThreadID tid = inst->threadNumber;
240
241 return thread[tid].executeLoad(inst);
242}
243
244Fault
246{
247 ThreadID tid = inst->threadNumber;
248
249 return thread[tid].executeStore(inst);
250}
251
252void
254{
255 thread.at(tid).commitLoads(youngest_inst);
256}
257
258void
260{
261 thread.at(tid).commitStores(youngest_inst);
262}
263
264void
266{
269
270 while (threads != end) {
271 ThreadID tid = *threads++;
272
273 if (numStoresToWB(tid) > 0) {
274 DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
275 "available for Writeback.\n", tid, numStoresToWB(tid));
276 }
277
278 thread[tid].writebackStores();
279 }
280}
281
282void
283LSQ::squash(const InstSeqNum &squashed_num, ThreadID tid)
284{
285 thread.at(tid).squash(squashed_num);
286}
287
288bool
290{
291 /* Answers: Does Anybody Have a Violation?*/
294
295 while (threads != end) {
296 ThreadID tid = *threads++;
297
298 if (thread[tid].violation())
299 return true;
300 }
301
302 return false;
303}
304
305bool LSQ::violation(ThreadID tid) { return thread.at(tid).violation(); }
306
309{
310 return thread.at(tid).getMemDepViolator();
311}
312
313int
315{
316 return thread.at(tid).getLoadHead();
317}
318
321{
322 return thread.at(tid).getLoadHeadSeqNum();
323}
324
325int
327{
328 return thread.at(tid).getStoreHead();
329}
330
333{
334 return thread.at(tid).getStoreHeadSeqNum();
335}
336
337int LSQ::getCount(ThreadID tid) { return thread.at(tid).getCount(); }
338
339int LSQ::numLoads(ThreadID tid) { return thread.at(tid).numLoads(); }
340
341int LSQ::numStores(ThreadID tid) { return thread.at(tid).numStores(); }
342
343int
345{
346 if (tid == InvalidThreadID)
347 return 0;
348 else
349 return thread[tid].numHtmStarts();
350}
351int
353{
354 if (tid == InvalidThreadID)
355 return 0;
356 else
357 return thread[tid].numHtmStops();
358}
359
360void
362{
363 if (tid != InvalidThreadID)
364 thread[tid].resetHtmStartsStops();
365}
366
367uint64_t
369{
370 if (tid == InvalidThreadID)
371 return 0;
372 else
373 return thread[tid].getLatestHtmUid();
374}
375
376void
378{
379 if (tid != InvalidThreadID)
380 thread[tid].setLastRetiredHtmUid(htmUid);
381}
382
383void
385{
387 cacheBlocked(false);
388
389 for (ThreadID tid : *activeThreads) {
390 thread[tid].recvRetry();
391 }
392}
393
394void
396{
397 LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->senderState);
398 thread[cpu->contextToThread(request->contextId())]
399 .completeDataAccess(pkt);
400}
401
402bool
404{
405 if (pkt->isError())
406 DPRINTF(LSQ, "Got error packet back for address: %#X\n",
407 pkt->getAddr());
408
409 LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->senderState);
410 panic_if(!request, "Got packet back with unknown sender state\n");
411
412 thread[cpu->contextToThread(request->contextId())].recvTimingResp(pkt);
413
414 if (pkt->isInvalidate()) {
415 // This response also contains an invalidate; e.g. this can be the case
416 // if cmd is ReadRespWithInvalidate.
417 //
418 // The calling order between completeDataAccess and checkSnoop matters.
419 // By calling checkSnoop after completeDataAccess, we ensure that the
420 // fault set by checkSnoop is not lost. Calling writeback (more
421 // specifically inst->completeAcc) in completeDataAccess overwrites
422 // fault, and in case this instruction requires squashing (as
423 // determined by checkSnoop), the ReExec fault set by checkSnoop would
424 // be lost otherwise.
425
426 DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
427 pkt->getAddr());
428
429 for (ThreadID tid = 0; tid < numThreads; tid++) {
430 thread[tid].checkSnoop(pkt);
431 }
432 }
433 // Update the LSQRequest state (this may delete the request)
434 request->packetReplied();
435
438 }
439
440 return true;
441}
442
443void
445{
446 DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
447 pkt->cmdString());
448
449 // must be a snoop
450 if (pkt->isInvalidate()) {
451 DPRINTF(LSQ, "received invalidation for addr:%#x\n",
452 pkt->getAddr());
453 for (ThreadID tid = 0; tid < numThreads; tid++) {
454 thread[tid].checkSnoop(pkt);
455 }
456 } else if (pkt->req && pkt->req->isTlbiExtSync()) {
457 DPRINTF(LSQ, "received TLBI Ext Sync\n");
459
461 staleTranslationWaitTxnId = pkt->req->getExtraData();
462
463 for (auto& unit : thread) {
464 unit.startStaleTranslationFlush();
465 }
466
467 // In case no units have pending ops, just go ahead
469 }
470}
471
472int
474{
475 unsigned total = 0;
476
479
480 while (threads != end) {
481 ThreadID tid = *threads++;
482
483 total += getCount(tid);
484 }
485
486 return total;
487}
488
489int
491{
492 unsigned total = 0;
493
496
497 while (threads != end) {
498 ThreadID tid = *threads++;
499
500 total += numLoads(tid);
501 }
502
503 return total;
504}
505
506int
508{
509 unsigned total = 0;
510
513
514 while (threads != end) {
515 ThreadID tid = *threads++;
516
517 total += thread[tid].numStores();
518 }
519
520 return total;
521}
522
523unsigned
525{
526 unsigned total = 0;
527
530
531 while (threads != end) {
532 ThreadID tid = *threads++;
533
534 total += thread[tid].numFreeLoadEntries();
535 }
536
537 return total;
538}
539
540unsigned
542{
543 unsigned total = 0;
544
547
548 while (threads != end) {
549 ThreadID tid = *threads++;
550
551 total += thread[tid].numFreeStoreEntries();
552 }
553
554 return total;
555}
556
557unsigned
559{
560 return thread[tid].numFreeLoadEntries();
561}
562
563unsigned
565{
566 return thread[tid].numFreeStoreEntries();
567}
568
569bool
571{
574
575 while (threads != end) {
576 ThreadID tid = *threads++;
577
578 if (!(thread[tid].lqFull() || thread[tid].sqFull()))
579 return false;
580 }
581
582 return true;
583}
584
585bool
587{
588 //@todo: Change to Calculate All Entries for
589 //Dynamic Policy
590 if (lsqPolicy == SMTQueuePolicy::Dynamic)
591 return isFull();
592 else
593 return thread[tid].lqFull() || thread[tid].sqFull();
594}
595
596bool
598{
599 return lqEmpty() && sqEmpty();
600}
601
602bool
604{
607
608 while (threads != end) {
609 ThreadID tid = *threads++;
610
611 if (!thread[tid].lqEmpty())
612 return false;
613 }
614
615 return true;
616}
617
618bool
620{
623
624 while (threads != end) {
625 ThreadID tid = *threads++;
626
627 if (!thread[tid].sqEmpty())
628 return false;
629 }
630
631 return true;
632}
633
634bool
636{
639
640 while (threads != end) {
641 ThreadID tid = *threads++;
642
643 if (!thread[tid].lqFull())
644 return false;
645 }
646
647 return true;
648}
649
650bool
652{
653 //@todo: Change to Calculate All Entries for
654 //Dynamic Policy
655 if (lsqPolicy == SMTQueuePolicy::Dynamic)
656 return lqFull();
657 else
658 return thread[tid].lqFull();
659}
660
661bool
663{
666
667 while (threads != end) {
668 ThreadID tid = *threads++;
669
670 if (!sqFull(tid))
671 return false;
672 }
673
674 return true;
675}
676
677bool
679{
680 //@todo: Change to Calculate All Entries for
681 //Dynamic Policy
682 if (lsqPolicy == SMTQueuePolicy::Dynamic)
683 return sqFull();
684 else
685 return thread[tid].sqFull();
686}
687
688bool
690{
693
694 while (threads != end) {
695 ThreadID tid = *threads++;
696
697 if (!thread[tid].isStalled())
698 return false;
699 }
700
701 return true;
702}
703
704bool
706{
707 if (lsqPolicy == SMTQueuePolicy::Dynamic)
708 return isStalled();
709 else
710 return thread[tid].isStalled();
711}
712
713bool
715{
718
719 while (threads != end) {
720 ThreadID tid = *threads++;
721
722 if (hasStoresToWB(tid))
723 return true;
724 }
725
726 return false;
727}
728
729bool
731{
732 return thread.at(tid).hasStoresToWB();
733}
734
735int
737{
738 return thread.at(tid).numStoresToWB();
739}
740
741bool
743{
746
747 while (threads != end) {
748 ThreadID tid = *threads++;
749
750 if (willWB(tid))
751 return true;
752 }
753
754 return false;
755}
756
757bool
759{
760 return thread.at(tid).willWB();
761}
762
763void
765{
768
769 while (threads != end) {
770 ThreadID tid = *threads++;
771
772 thread[tid].dumpInsts();
773 }
774}
775
776void
778{
779 thread.at(tid).dumpInsts();
780}
781
782Fault
783LSQ::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
784 unsigned int size, Addr addr, Request::Flags flags, uint64_t *res,
785 AtomicOpFunctorPtr amo_op, const std::vector<bool>& byte_enable)
786{
787 // This comming request can be either load, store or atomic.
788 // Atomic request has a corresponding pointer to its atomic memory
789 // operation
790 [[maybe_unused]] bool isAtomic = !isLoad && amo_op;
791
792 ThreadID tid = cpu->contextToThread(inst->contextId());
793 auto cacheLineSize = cpu->cacheLineSize();
794 bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
795 LSQRequest* request = nullptr;
796
797 // Atomic requests that access data across cache line boundary are
798 // currently not allowed since the cache does not guarantee corresponding
799 // atomic memory operations to be executed atomically across a cache line.
800 // For ISAs such as x86 that supports cross-cache-line atomic instructions,
801 // the cache needs to be modified to perform atomic update to both cache
802 // lines. For now, such cross-line update is not supported.
803 assert(!isAtomic || (isAtomic && !needs_burst));
804
805 const bool htm_cmd = isLoad && (flags & Request::HTM_CMD);
806 const bool tlbi_cmd = isLoad && (flags & Request::TLBI_CMD);
807
808 if (inst->translationStarted()) {
809 request = inst->savedRequest;
810 assert(request);
811 } else {
812 if (htm_cmd || tlbi_cmd) {
813 assert(addr == 0x0lu);
814 assert(size == 8);
815 request = new UnsquashableDirectRequest(&thread[tid], inst, flags);
816 } else if (needs_burst) {
817 request = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
818 size, flags, data, res);
819 } else {
820 request = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
821 size, flags, data, res, std::move(amo_op));
822 }
823 assert(request);
824 request->_byteEnable = byte_enable;
825 inst->setRequest();
826 request->taskId(cpu->taskId());
827
828 // There might be fault from a previous execution attempt if this is
829 // a strictly ordered load
830 inst->getFault() = NoFault;
831
832 request->initiateTranslation();
833 }
834
835 /* This is the place were instructions get the effAddr. */
836 if (request->isTranslationComplete()) {
837 if (request->isMemAccessRequired()) {
838 inst->effAddr = request->getVaddr();
839 inst->effSize = size;
840 inst->effAddrValid(true);
841
842 if (cpu->checker) {
843 inst->reqToVerify = std::make_shared<Request>(*request->req());
844 }
845 Fault fault;
846 if (isLoad)
847 fault = read(request, inst->lqIdx);
848 else
849 fault = write(request, data, inst->sqIdx);
850 // inst->getFault() may have the first-fault of a
851 // multi-access split request at this point.
852 // Overwrite that only if we got another type of fault
853 // (e.g. re-exec).
854 if (fault != NoFault)
855 inst->getFault() = fault;
856 } else if (isLoad) {
857 inst->setMemAccPredicate(false);
858 // Commit will have to clean up whatever happened. Set this
859 // instruction as executed.
860 inst->setExecuted();
861 }
862 }
863
864 if (inst->traceData)
865 inst->traceData->setMem(addr, size, flags);
866
867 return inst->getFault();
868}
869
870void
873{
874 _fault.push_back(fault);
877 /* If the instruction has been squahsed, let the request know
878 * as it may have to self-destruct. */
879 if (_inst->isSquashed()) {
881 } else {
882 _inst->strictlyOrdered(request->isStrictlyOrdered());
883
885 if (fault == NoFault) {
886 _inst->physEffAddr = request->getPaddr();
887 _inst->memReqFlags = request->getFlags();
888 if (request->isCondSwap()) {
889 assert(_res);
890 request->setExtraData(*_res);
891 }
893 } else {
895 }
896
897 LSQRequest::_inst->fault = fault;
898 LSQRequest::_inst->translationCompleted(true);
899 }
900}
901
902void
905{
906 int i;
907 for (i = 0; i < _reqs.size() && _reqs[i] != req; i++);
908 assert(i < _reqs.size());
909 _fault[i] = fault;
910
911 numInTranslationFragments--;
912 numTranslatedFragments++;
913
914 if (fault == NoFault)
915 _mainReq->setFlags(req->getFlags());
916
917 if (numTranslatedFragments == _reqs.size()) {
918 if (_inst->isSquashed()) {
919 squashTranslation();
920 } else {
921 _inst->strictlyOrdered(_mainReq->isStrictlyOrdered());
922 flags.set(Flag::TranslationFinished);
923 _inst->translationCompleted(true);
924
925 for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++);
926 if (i > 0) {
927 _inst->physEffAddr = LSQRequest::req()->getPaddr();
928 _inst->memReqFlags = _mainReq->getFlags();
929 if (_mainReq->isCondSwap()) {
930 assert (i == _fault.size());
931 assert(_res);
932 _mainReq->setExtraData(*_res);
933 }
934 if (i == _fault.size()) {
935 _inst->fault = NoFault;
936 setState(State::Request);
937 } else {
938 _inst->fault = _fault[i];
939 setState(State::PartialFault);
940 }
941 } else {
942 _inst->fault = _fault[0];
943 setState(State::Fault);
944 }
945 }
946
947 }
948}
949
950void
952{
953 assert(_reqs.size() == 0);
954
955 addReq(_addr, _size, _byteEnable);
956
957 if (_reqs.size() > 0) {
958 _reqs.back()->setReqInstSeqNum(_inst->seqNum);
959 _reqs.back()->taskId(_taskId);
960 _inst->translationStarted(true);
961 setState(State::Translation);
962 flags.set(Flag::TranslationStarted);
963
964 _inst->savedRequest = this;
965 sendFragmentToTranslation(0);
966 } else {
967 _inst->setMemAccPredicate(false);
968 }
969}
970
973{
974 return _mainPacket;
975}
976
979{
980 return _mainReq;
981}
982
983void
985{
986 auto cacheLineSize = _port.cacheLineSize();
987 Addr base_addr = _addr;
988 Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
989 Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
990 uint32_t size_so_far = 0;
991
992 _mainReq = std::make_shared<Request>(base_addr,
993 _size, _flags, _inst->requestorId(),
994 _inst->pcState().instAddr(), _inst->contextId());
995 _mainReq->setByteEnable(_byteEnable);
996
997 // Paddr is not used in _mainReq. However, we will accumulate the flags
998 // from the sub requests into _mainReq by calling setFlags() in finish().
999 // setFlags() assumes that paddr is set so flip the paddr valid bit here to
1000 // avoid a potential assert in setFlags() when we call it from finish().
1001 _mainReq->setPaddr(0);
1002
1003 /* Get the pre-fix, possibly unaligned. */
1004 auto it_start = _byteEnable.begin();
1005 auto it_end = _byteEnable.begin() + (next_addr - base_addr);
1006 addReq(base_addr, next_addr - base_addr,
1007 std::vector<bool>(it_start, it_end));
1008 size_so_far = next_addr - base_addr;
1009
1010 /* We are block aligned now, reading whole blocks. */
1011 base_addr = next_addr;
1012 while (base_addr != final_addr) {
1013 auto it_start = _byteEnable.begin() + size_so_far;
1014 auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
1015 addReq(base_addr, cacheLineSize,
1016 std::vector<bool>(it_start, it_end));
1017 size_so_far += cacheLineSize;
1018 base_addr += cacheLineSize;
1019 }
1020
1021 /* Deal with the tail. */
1022 if (size_so_far < _size) {
1023 auto it_start = _byteEnable.begin() + size_so_far;
1024 auto it_end = _byteEnable.end();
1025 addReq(base_addr, _size - size_so_far,
1026 std::vector<bool>(it_start, it_end));
1027 }
1028
1029 if (_reqs.size() > 0) {
1030 /* Setup the requests and send them to translation. */
1031 for (auto& r: _reqs) {
1032 r->setReqInstSeqNum(_inst->seqNum);
1033 r->taskId(_taskId);
1034 }
1035
1036 _inst->translationStarted(true);
1037 setState(State::Translation);
1038 flags.set(Flag::TranslationStarted);
1039 _inst->savedRequest = this;
1040 numInTranslationFragments = 0;
1041 numTranslatedFragments = 0;
1042 _fault.resize(_reqs.size());
1043
1044 for (uint32_t i = 0; i < _reqs.size(); i++) {
1045 sendFragmentToTranslation(i);
1046 }
1047 } else {
1048 _inst->setMemAccPredicate(false);
1049 }
1050}
1051
1053 LSQUnit *port, const DynInstPtr& inst, bool isLoad) :
1054 _state(State::NotIssued),
1055 _port(*port), _inst(inst), _data(nullptr),
1056 _res(nullptr), _addr(0), _size(0), _flags(0),
1057 _numOutstandingPackets(0), _amo_op(nullptr)
1058{
1061 _inst->isStoreConditional() || _inst->isAtomic() ||
1062 _inst->isLoad());
1063 flags.set(Flag::IsAtomic, _inst->isAtomic());
1064 install();
1065}
1066
1068 LSQUnit *port, const DynInstPtr& inst, bool isLoad,
1069 const Addr& addr, const uint32_t& size, const Request::Flags& flags_,
1070 PacketDataPtr data, uint64_t* res, AtomicOpFunctorPtr amo_op,
1071 bool stale_translation)
1072 : _state(State::NotIssued),
1073 numTranslatedFragments(0),
1074 numInTranslationFragments(0),
1075 _port(*port), _inst(inst), _data(data),
1076 _res(res), _addr(addr), _size(size),
1077 _flags(flags_),
1078 _numOutstandingPackets(0),
1079 _amo_op(std::move(amo_op)),
1080 _hasStaleTranslation(stale_translation)
1081{
1084 _inst->isStoreConditional() || _inst->isAtomic() ||
1085 _inst->isLoad());
1086 flags.set(Flag::IsAtomic, _inst->isAtomic());
1087 install();
1088}
1089
1090void
1092{
1093 if (isLoad()) {
1094 _port.loadQueue[_inst->lqIdx].setRequest(this);
1095 } else {
1096 // Store, StoreConditional, and Atomic requests are pushed
1097 // to this storeQueue
1098 _port.storeQueue[_inst->sqIdx].setRequest(this);
1099 }
1100}
1101
1102bool LSQ::LSQRequest::squashed() const { return _inst->isSquashed(); }
1103
1104void
1106 const std::vector<bool>& byte_enable)
1107{
1108 if (isAnyActiveElement(byte_enable.begin(), byte_enable.end())) {
1109 auto req = std::make_shared<Request>(
1110 addr, size, _flags, _inst->requestorId(),
1111 _inst->pcState().instAddr(), _inst->contextId(),
1112 std::move(_amo_op));
1113 req->setByteEnable(byte_enable);
1114
1115 /* If the request is marked as NO_ACCESS, setup a local access */
1116 if (_flags.isSet(Request::NO_ACCESS)) {
1117 req->setLocalAccessor(
1118 [this, req](gem5::ThreadContext *tc, PacketPtr pkt) -> Cycles
1119 {
1120 if ((req->isHTMStart() || req->isHTMCommit())) {
1121 auto& inst = this->instruction();
1122 assert(inst->inHtmTransactionalState());
1123 pkt->setHtmTransactional(
1124 inst->getHtmTransactionUid());
1125 }
1126 return Cycles(1);
1127 }
1128 );
1129 }
1130
1131 _reqs.push_back(req);
1132 }
1133}
1134
1136{
1137 assert(!isAnyOutstandingRequest());
1138 _inst->savedRequest = nullptr;
1139
1140 for (auto r: _packets)
1141 delete r;
1142};
1143
1146{
1147 return _inst->contextId();
1148}
1149
1150void
1152{
1153 numInTranslationFragments++;
1154 _port.getMMUPtr()->translateTiming(req(i), _inst->thread->getTC(),
1155 this, isLoad() ? BaseMMU::Read : BaseMMU::Write);
1156}
1157
1158void
1160{
1161 // If this element has been translated and is currently being requested,
1162 // then it may be stale
1163 if ((!flags.isSet(Flag::Complete)) &&
1164 (!flags.isSet(Flag::Discarded)) &&
1165 (flags.isSet(Flag::TranslationStarted))) {
1166 _hasStaleTranslation = true;
1167 }
1168
1169 DPRINTF(LSQ, "SingleDataRequest %d 0x%08x isBlocking:%d\n",
1170 (int)_state, (uint32_t)flags, _hasStaleTranslation);
1171}
1172
1173void
1175{
1176 // If this element has been translated and is currently being requested,
1177 // then it may be stale
1178 if ((!flags.isSet(Flag::Complete)) &&
1179 (!flags.isSet(Flag::Discarded)) &&
1180 (flags.isSet(Flag::TranslationStarted))) {
1181 _hasStaleTranslation = true;
1182 }
1183
1184 DPRINTF(LSQ, "SplitDataRequest %d 0x%08x isBlocking:%d\n",
1185 (int)_state, (uint32_t)flags, _hasStaleTranslation);
1186}
1187
1188bool
1190{
1191 assert(_numOutstandingPackets == 1);
1192 flags.set(Flag::Complete);
1193 assert(pkt == _packets.front());
1194 _port.completeDataAccess(pkt);
1195 _hasStaleTranslation = false;
1196 return true;
1197}
1198
1199bool
1201{
1202 uint32_t pktIdx = 0;
1203 while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
1204 pktIdx++;
1205 assert(pktIdx < _packets.size());
1206 numReceivedPackets++;
1207 if (numReceivedPackets == _packets.size()) {
1208 flags.set(Flag::Complete);
1209 /* Assemble packets. */
1210 PacketPtr resp = isLoad()
1211 ? Packet::createRead(_mainReq)
1212 : Packet::createWrite(_mainReq);
1213 if (isLoad())
1214 resp->dataStatic(_inst->memData);
1215 else
1216 resp->dataStatic(_data);
1217 resp->senderState = this;
1218 _port.completeDataAccess(resp);
1219 delete resp;
1220 }
1221 _hasStaleTranslation = false;
1222 return true;
1223}
1224
1225void
1227{
1228 /* Retries do not create new packets. */
1229 if (_packets.size() == 0) {
1230 _packets.push_back(
1231 isLoad()
1232 ? Packet::createRead(req())
1233 : Packet::createWrite(req()));
1234 _packets.back()->dataStatic(_inst->memData);
1235 _packets.back()->senderState = this;
1236
1237 // hardware transactional memory
1238 // If request originates in a transaction (not necessarily a HtmCmd),
1239 // then the packet should be marked as such.
1240 if (_inst->inHtmTransactionalState()) {
1241 _packets.back()->setHtmTransactional(
1242 _inst->getHtmTransactionUid());
1243
1244 DPRINTF(HtmCpu,
1245 "HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n",
1246 isLoad() ? "LD" : "ST",
1247 _inst->pcState().instAddr(),
1248 _packets.back()->req->hasVaddr() ?
1249 _packets.back()->req->getVaddr() : 0lu,
1250 _packets.back()->getAddr(),
1251 _inst->getHtmTransactionUid());
1252 }
1253 }
1254 assert(_packets.size() == 1);
1255}
1256
1257void
1259{
1260 /* Extra data?? */
1261 Addr base_address = _addr;
1262
1263 if (_packets.size() == 0) {
1264 /* New stuff */
1265 if (isLoad()) {
1266 _mainPacket = Packet::createRead(_mainReq);
1267 _mainPacket->dataStatic(_inst->memData);
1268
1269 // hardware transactional memory
1270 // If request originates in a transaction,
1271 // packet should be marked as such
1272 if (_inst->inHtmTransactionalState()) {
1273 _mainPacket->setHtmTransactional(
1274 _inst->getHtmTransactionUid());
1275 DPRINTF(HtmCpu,
1276 "HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1277 _inst->pcState().instAddr(),
1278 _mainPacket->req->hasVaddr() ?
1279 _mainPacket->req->getVaddr() : 0lu,
1280 _mainPacket->getAddr(),
1281 _inst->getHtmTransactionUid());
1282 }
1283 }
1284 for (int i = 0; i < _reqs.size() && _fault[i] == NoFault; i++) {
1285 RequestPtr req = _reqs[i];
1286 PacketPtr pkt = isLoad() ? Packet::createRead(req)
1287 : Packet::createWrite(req);
1288 ptrdiff_t offset = req->getVaddr() - base_address;
1289 if (isLoad()) {
1290 pkt->dataStatic(_inst->memData + offset);
1291 } else {
1292 uint8_t* req_data = new uint8_t[req->getSize()];
1293 std::memcpy(req_data,
1294 _inst->memData + offset,
1295 req->getSize());
1296 pkt->dataDynamic(req_data);
1297 }
1298 pkt->senderState = this;
1299 _packets.push_back(pkt);
1300
1301 // hardware transactional memory
1302 // If request originates in a transaction,
1303 // packet should be marked as such
1304 if (_inst->inHtmTransactionalState()) {
1305 _packets.back()->setHtmTransactional(
1306 _inst->getHtmTransactionUid());
1307 DPRINTF(HtmCpu,
1308 "HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1309 isLoad() ? "LD" : "ST",
1310 i+1,
1311 _inst->pcState().instAddr(),
1312 _packets.back()->req->hasVaddr() ?
1313 _packets.back()->req->getVaddr() : 0lu,
1314 _packets.back()->getAddr(),
1315 _inst->getHtmTransactionUid());
1316 }
1317 }
1318 }
1319 assert(_packets.size() > 0);
1320}
1321
1322void
1324{
1325 assert(_numOutstandingPackets == 0);
1326 if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1327 _numOutstandingPackets = 1;
1328}
1329
1330void
1332{
1333 /* Try to send the packets. */
1334 while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1335 lsqUnit()->trySendPacket(isLoad(),
1336 _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1337 _numOutstandingPackets++;
1338 }
1339}
1340
1341Cycles
1344{
1345 return pkt->req->localAccessor(thread, pkt);
1346}
1347
1348Cycles
1351{
1352 Cycles delay(0);
1353 unsigned offset = 0;
1354
1355 for (auto r: _reqs) {
1356 PacketPtr pkt =
1357 new Packet(r, isLoad() ? MemCmd::ReadReq : MemCmd::WriteReq);
1358 pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1359 Cycles d = r->localAccessor(thread, pkt);
1360 if (d > delay)
1361 delay = d;
1362 offset += r->getSize();
1363 delete pkt;
1364 }
1365 return delay;
1366}
1367
1368bool
1370{
1371 return ( (LSQRequest::_reqs[0]->getPaddr() & blockMask) == blockAddr);
1372}
1373
1389bool
1391{
1392 bool is_hit = false;
1393 for (auto &r: _reqs) {
1403 if (r->hasPaddr() && (r->getPaddr() & blockMask) == blockAddr) {
1404 is_hit = true;
1405 break;
1406 }
1407 }
1408 return is_hit;
1409}
1410
1411bool
1413{
1414 return lsq->recvTimingResp(pkt);
1415}
1416
1417void
1419{
1420 for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
1421 if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1422 cpu->wakeup(tid);
1423 }
1424 }
1425 lsq->recvTimingSnoopReq(pkt);
1426}
1427
1428void
1430{
1431 lsq->recvReqRetry();
1432}
1433
1435 LSQUnit* port,
1436 const DynInstPtr& inst,
1437 const Request::Flags& flags_) :
1438 SingleDataRequest(port, inst, true, 0x0lu, 8, flags_,
1439 nullptr, nullptr, nullptr)
1440{
1441}
1442
1443void
1445{
1446 // Special commands are implemented as loads to avoid significant
1447 // changes to the cpu and memory interfaces
1448 // The virtual and physical address uses a dummy value of 0x00
1449 // Address translation does not really occur thus the code below
1450
1451 assert(_reqs.size() == 0);
1452
1453 addReq(_addr, _size, _byteEnable);
1454
1455 if (_reqs.size() > 0) {
1456 _reqs.back()->setReqInstSeqNum(_inst->seqNum);
1457 _reqs.back()->taskId(_taskId);
1458 _reqs.back()->setPaddr(_addr);
1459 _reqs.back()->setInstCount(_inst->getCpuPtr()->totalInsts());
1460
1461 _inst->strictlyOrdered(_reqs.back()->isStrictlyOrdered());
1462 _inst->fault = NoFault;
1463 _inst->physEffAddr = _reqs.back()->getPaddr();
1464 _inst->memReqFlags = _reqs.back()->getFlags();
1465 _inst->savedRequest = this;
1466
1467 flags.set(Flag::TranslationStarted);
1468 flags.set(Flag::TranslationFinished);
1469
1470 _inst->translationStarted(true);
1471 _inst->translationCompleted(true);
1472
1473 setState(State::Request);
1474 } else {
1475 panic("unexpected behaviour in initiateTranslation()");
1476 }
1477}
1478
1479void
1481{
1482 // HTM/TLBI operations do not translate,
1483 // so cannot have stale translations
1484 _hasStaleTranslation = false;
1485}
1486
1487void
1489 const RequestPtr &req, gem5::ThreadContext* tc,
1491{
1492 panic("unexpected behaviour - finish()");
1493}
1494
1495void
1497{
1499
1500 DPRINTF(LSQ, "Checking pending TLBI sync\n");
1501 // Check if all thread queues are complete
1502 for (const auto& unit : thread) {
1503 if (unit.checkStaleTranslations())
1504 return;
1505 }
1506 DPRINTF(LSQ, "No threads have blocking TLBI sync\n");
1507
1508 // All thread queues have committed their sync operations
1509 // => send a RubyRequest to the sequencer
1512 cpu->dataRequestorId());
1513 req->setExtraData(staleTranslationWaitTxnId);
1514 PacketPtr pkt = Packet::createRead(req);
1515
1516 // TODO - reserve some credit for these responses?
1517 if (!dcachePort.sendTimingReq(pkt)) {
1518 panic("Couldn't send TLBI_EXT_SYNC_COMP message");
1519 }
1520
1523}
1524
1525Fault
1526LSQ::read(LSQRequest* request, ssize_t load_idx)
1527{
1528 assert(request->req()->contextId() == request->contextId());
1529 ThreadID tid = cpu->contextToThread(request->req()->contextId());
1530
1531 return thread.at(tid).read(request, load_idx);
1532}
1533
1534Fault
1535LSQ::write(LSQRequest* request, uint8_t *data, ssize_t store_idx)
1536{
1537 ThreadID tid = cpu->contextToThread(request->req()->contextId());
1538
1539 return thread.at(tid).write(request, data, store_idx);
1540}
1541
1542} // namespace o3
1543} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
const char data[]
RequestorID dataRequestorId() const
Reads this CPU's unique data requestor ID.
Definition base.hh:193
Addr cacheLineSize() const
Get the cache line size of the system.
Definition base.hh:397
AddressMonitor * getCpuAddrMonitor(ThreadID tid)
Definition base.hh:656
uint32_t taskId() const
Get cpu task id.
Definition base.hh:211
ThreadID numThreads
Number of threads we're actually simulating (<= SMT_MAX_THREADS).
Definition base.hh:390
ThreadID contextToThread(ContextID cid)
Convert ContextID to threadID.
Definition base.hh:299
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
Addr getAddr() const
Definition packet.hh:807
bool isError() const
Definition packet.hh:622
const std::string & cmdString() const
Return the string name of the cmd field (for debugging and tracing).
Definition packet.hh:588
static PacketPtr createWrite(const RequestPtr &req)
Definition packet.hh:1044
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition packet.hh:1175
SenderState * senderState
This packet's sender state.
Definition packet.hh:545
T * getPtr()
get a pointer to the data ptr.
Definition packet.hh:1225
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition packet.hh:1038
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition packet.hh:1213
bool isInvalidate() const
Definition packet.hh:609
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Definition port.hh:136
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition port.hh:603
@ TLBI_EXT_SYNC_COMP
The Request tells the interconnect that a remote TLB Sync request has completed.
Definition request.hh:252
@ NO_ACCESS
The request should not cause a memory access.
Definition request.hh:146
static RequestPtr createMemManagement(Flags flags, RequestorID id)
Factory method for creating memory management requests, with unspecified addr and size.
Definition request.hh:531
static const FlagsType TLBI_CMD
Definition request.hh:266
static const FlagsType HTM_CMD
Definition request.hh:263
ThreadContext is the external interface to all thread state for anything outside of the CPU.
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:94
gem5::Checker< DynInstPtr > * checker
Pointer to the checker, which can dynamically verify instruction results at run time.
Definition cpu.hh:527
virtual void wakeup(ThreadID tid) override
Definition cpu.cc:1337
Fetch class handles both single threaded and SMT fetch.
Definition fetch.hh:79
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Definition iew.hh:88
std::string name() const
Returns the name of the IEW stage.
Definition iew.cc:118
void cacheUnblocked()
Notifies that the cache has become unblocked.
Definition iew.cc:523
Class that implements the actual LQ and SQ for each specific thread.
Definition lsq_unit.hh:89
virtual void recvTimingSnoopReq(PacketPtr pkt)
Receive a timing snoop request from the peer.
Definition lsq.cc:1418
DcachePort(LSQ *_lsq, CPU *_cpu)
Default constructor.
Definition lsq.cc:67
virtual bool recvTimingResp(PacketPtr pkt)
Timing version of receive.
Definition lsq.cc:1412
virtual void recvReqRetry()
Handles doing a retry of the previous send.
Definition lsq.cc:1429
Memory operation metadata.
Definition lsq.hh:190
@ IsAtomic
True if this is an atomic request.
Definition lsq.hh:223
@ TranslationFinished
True if there are un-replied outbound translations.
Definition lsq.hh:208
@ WriteBackToRegister
True if this request needs to writeBack to register.
Definition lsq.hh:202
std::vector< bool > _byteEnable
Definition lsq.hh:257
LSQRequest(LSQUnit *port, const DynInstPtr &inst, bool isLoad)
Definition lsq.cc:1052
virtual ~LSQRequest()
Destructor.
Definition lsq.cc:1135
bool isLoad() const
Definition lsq.hh:272
void install()
Install the request in the LQ/SQ.
Definition lsq.cc:1091
ContextID contextId() const
Definition lsq.cc:1145
void taskId(const uint32_t &v)
Definition lsq.hh:353
virtual void initiateTranslation()=0
void setState(const State &newState)
Definition lsq.hh:236
void addReq(Addr addr, unsigned size, const std::vector< bool > &byte_enable)
Helper function used to add a (sub)request, given its address addr, size size and byte-enable mask by...
Definition lsq.cc:1105
void sendFragmentToTranslation(int i)
Definition lsq.cc:1151
const DynInstPtr _inst
Definition lsq.hh:247
uint32_t numTranslatedFragments
Definition lsq.hh:238
std::vector< Fault > _fault
Definition lsq.hh:252
uint32_t numInTranslationFragments
Definition lsq.hh:239
bool squashed() const override
This function is used by the page table walker to determine if it should translate the a pending requ...
Definition lsq.cc:1102
RequestPtr req(int idx=0)
Definition lsq.hh:362
std::vector< RequestPtr > _reqs
Definition lsq.hh:251
bool isTranslationComplete()
Definition lsq.hh:466
Addr getVaddr(int idx=0) const
Definition lsq.hh:365
virtual Cycles handleLocalAccess(gem5::ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
Definition lsq.cc:1342
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
Definition lsq.cc:871
virtual void initiateTranslation()
Definition lsq.cc:951
virtual bool recvTimingResp(PacketPtr pkt)
Definition lsq.cc:1189
virtual void buildPackets()
Definition lsq.cc:1226
virtual void markAsStaleTranslation()
Definition lsq.cc:1159
virtual void sendPacketToCache()
Definition lsq.cc:1323
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Test if the request accesses a particular cache line.
Definition lsq.cc:1369
virtual bool recvTimingResp(PacketPtr pkt)
Definition lsq.cc:1200
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Caches may probe into the load-store queue to enforce memory ordering guarantees.
Definition lsq.cc:1390
virtual void initiateTranslation()
Definition lsq.cc:984
virtual void markAsStaleTranslation()
Definition lsq.cc:1174
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
Definition lsq.cc:903
virtual PacketPtr mainPacket()
Definition lsq.cc:972
virtual Cycles handleLocalAccess(gem5::ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
Definition lsq.cc:1349
virtual void sendPacketToCache()
Definition lsq.cc:1331
virtual RequestPtr mainReq()
Definition lsq.cc:978
virtual void buildPackets()
Definition lsq.cc:1258
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
Definition lsq.cc:1488
UnsquashableDirectRequest(LSQUnit *port, const DynInstPtr &inst, const Request::Flags &flags_)
Definition lsq.cc:1434
unsigned SQEntries
Total Size of SQ Entries.
Definition lsq.hh:943
bool isDrained() const
Has the LSQ drained?
Definition lsq.cc:146
int cacheLoadPorts
The number of cache ports available each cycle (loads only).
Definition lsq.hh:902
int usedStorePorts
The number of used cache ports in this cycle by stores.
Definition lsq.hh:900
int numHtmStarts(ThreadID tid) const
Definition lsq.cc:344
std::string name() const
Returns the name of the LSQ.
Definition lsq.cc:124
void commitStores(InstSeqNum &youngest_inst, ThreadID tid)
Commits stores up until the given sequence number for a specific thread.
Definition lsq.cc:259
Addr staleTranslationWaitTxnId
The ID if the transaction that made translations stale.
Definition lsq.hh:909
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
Definition lsq.cc:403
void checkStaleTranslations()
Checks if queues have any marked operations left, and sends the appropriate Sync Completion message i...
Definition lsq.cc:1496
int getLoadHead(ThreadID tid)
Returns the head index of the load queue for a specific thread.
Definition lsq.cc:314
void squash(const InstSeqNum &squashed_num, ThreadID tid)
Squash instructions from a thread until the specified sequence number.
Definition lsq.cc:283
bool sqEmpty() const
Returns if all of the SQs are empty.
Definition lsq.cc:619
void completeDataAccess(PacketPtr pkt)
Definition lsq.cc:395
Fault pushRequest(const DynInstPtr &inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector< bool > &byte_enable)
Definition lsq.cc:783
unsigned numFreeLoadEntries()
Returns the number of free load entries.
Definition lsq.cc:524
ThreadID numThreads
Number of Threads.
Definition lsq.hh:958
IEW * iewStage
The IEW stage pointer.
Definition lsq.hh:881
InstSeqNum getLoadHeadSeqNum(ThreadID tid)
Returns the sequence number of the head of the load queue.
Definition lsq.cc:320
std::list< ThreadID > * activeThreads
List of Active Threads in System.
Definition lsq.hh:938
DcachePort dcachePort
Data port.
Definition lsq.hh:952
void takeOverFrom()
Takes over execution from another CPU's thread.
Definition lsq.cc:164
DynInstPtr getMemDepViolator(ThreadID tid)
Gets the instruction that caused the memory ordering violation.
Definition lsq.cc:308
static uint32_t maxLSQAllocation(SMTQueuePolicy pol, uint32_t entries, uint32_t numThreads, uint32_t SMTThreshold)
Auxiliary function to calculate per-thread max LSQ allocation limit.
Definition lsq.hh:920
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets the pointer to the list of active threads.
Definition lsq.cc:130
bool cacheBlocked() const
Is D-cache blocked?
Definition lsq.cc:186
int numLoads()
Returns the total number of loads in the load queue.
Definition lsq.cc:490
void setLastRetiredHtmUid(ThreadID tid, uint64_t htmUid)
Definition lsq.cc:377
void dumpInsts() const
Debugging function to print out all instructions.
Definition lsq.cc:764
int usedLoadPorts
The number of used cache ports in this cycle by loads.
Definition lsq.hh:904
unsigned maxLQEntries
Max LQ Size - Used to Enforce Sharing Policies.
Definition lsq.hh:946
bool isFull()
Returns if the LSQ is full (either LQ or SQ is full).
Definition lsq.cc:570
void insertStore(const DynInstPtr &store_inst)
Inserts a store into the LSQ.
Definition lsq.cc:229
void recvReqRetry()
Retry the previous send that failed.
Definition lsq.cc:384
void commitLoads(InstSeqNum &youngest_inst, ThreadID tid)
Commits loads up until the given sequence number for a specific thread.
Definition lsq.cc:253
Fault write(LSQRequest *request, uint8_t *data, ssize_t store_idx)
Executes a store operation, using the store specified at the store index.
Definition lsq.cc:1535
uint64_t getLatestHtmUid(ThreadID tid) const
Definition lsq.cc:368
bool willWB()
Returns if the LSQ will write back to memory this cycle.
Definition lsq.cc:742
int getStoreHead(ThreadID tid)
Returns the head index of the store queue.
Definition lsq.cc:326
LSQ(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
Constructs an LSQ with the given parameters.
Definition lsq.cc:71
CPU * cpu
The CPU pointer.
Definition lsq.hh:878
bool _cacheBlocked
D-cache is blocked.
Definition lsq.hh:896
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition lsq.cc:137
std::vector< LSQUnit > thread
The LSQ units for individual threads.
Definition lsq.hh:955
unsigned LQEntries
Total Size of LQ Entries.
Definition lsq.hh:941
int numHtmStops(ThreadID tid) const
Definition lsq.cc:352
void cachePortBusy(bool is_load)
Another store port is in use.
Definition lsq.cc:210
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
Definition lsq.cc:198
InstSeqNum getStoreHeadSeqNum(ThreadID tid)
Returns the sequence number of the head of the store queue.
Definition lsq.cc:332
bool isStalled()
Returns if the LSQ is stalled due to a memory operation that must be replayed.
Definition lsq.cc:689
void writebackStores()
Attempts to write back stores until all cache ports are used or the interface becomes blocked.
Definition lsq.cc:265
bool lqFull()
Returns if any of the LQs are full.
Definition lsq.cc:635
bool waitingForStaleTranslation
If the LSQ is currently waiting for stale translations.
Definition lsq.hh:907
unsigned maxSQEntries
Max SQ Size - Used to Enforce Sharing Policies.
Definition lsq.hh:949
bool lqEmpty() const
Returns if all of the LQs are empty.
Definition lsq.cc:603
int getCount()
Returns the number of instructions in all of the queues.
Definition lsq.cc:473
bool hasStoresToWB()
Returns whether or not there are any stores to write back to memory.
Definition lsq.cc:714
Fault read(LSQRequest *request, ssize_t load_idx)
Executes a read operation, using the load specified at the load index.
Definition lsq.cc:1526
Fault executeStore(const DynInstPtr &inst)
Executes a store.
Definition lsq.cc:245
void tick()
Ticks the LSQ.
Definition lsq.cc:175
void insertLoad(const DynInstPtr &load_inst)
Inserts a load into the LSQ.
Definition lsq.cc:221
bool isEmpty() const
Returns if the LSQ is empty (both LQ and SQ are empty).
Definition lsq.cc:597
int numStores()
Returns the total number of stores in the store queue.
Definition lsq.cc:507
void recvTimingSnoopReq(PacketPtr pkt)
Definition lsq.cc:444
int cacheStorePorts
The number of cache ports available each cycle (stores only).
Definition lsq.hh:898
Fault executeLoad(const DynInstPtr &inst)
Executes a load.
Definition lsq.cc:237
bool violation()
Returns whether or not there was a memory ordering violation.
Definition lsq.cc:289
void resetHtmStartsStops(ThreadID tid)
Definition lsq.cc:361
SMTQueuePolicy lsqPolicy
The LSQ policy for SMT mode.
Definition lsq.hh:912
int numStoresToWB(ThreadID tid)
Returns the number of stores a specific thread has to write back.
Definition lsq.cc:736
unsigned numFreeStoreEntries()
Returns the number of free store entries.
Definition lsq.cc:541
bool sqFull()
Returns if any of the SQs are full.
Definition lsq.cc:662
STL list class.
Definition stl.hh:51
STL vector class.
Definition stl.hh:37
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition amo.hh:269
void set(Type mask)
Set all flag's bits matching the given mask.
Definition flags.hh:116
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214
uint8_t flags
Definition helpers.cc:87
Bitfield< 28 > v
Definition misc_types.hh:54
Bitfield< 4, 0 > mode
Definition misc_types.hh:74
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 9 > d
Definition misc_types.hh:64
Bitfield< 3 > addr
Definition types.hh:84
static constexpr int MaxThreads
Definition limits.hh:38
const FlagsType total
Print the total.
Definition info.hh:59
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< FaultBase > Fault
Definition types.hh:249
int16_t ThreadID
Thread index/ID type.
Definition types.hh:235
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
const ThreadID InvalidThreadID
Definition types.hh:236
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
bool transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
Returns true if the given memory access (address, size) needs to be fragmented across aligned fixed-s...
Definition utils.hh:80
uint8_t * PacketDataPtr
Definition packet.hh:72
int ContextID
Globally unique thread context ID.
Definition types.hh:239
constexpr decltype(nullptr) NoFault
Definition types.hh:253
uint64_t InstSeqNum
Definition inst_seq.hh:40
bool isAnyActiveElement(const std::vector< bool >::const_iterator &it_start, const std::vector< bool >::const_iterator &it_end)
Test if there is any active element in an enablement range.
Definition utils.hh:89
Addr addrBlockAlign(Addr addr, Addr block_size)
Returns the address of the closest aligned fixed-size block to the given address.
Definition utils.hh:66
Overload hash function for BasicBlockRange type.
Definition binary32.hh:81
bool doMonitor(PacketPtr pkt)
Definition base.cc:764
const std::string & name()
Definition trace.cc:48

Generated on Tue Jun 18 2024 16:24:02 for gem5 by doxygen 1.11.0