gem5  v22.1.0.0
lsq.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2012, 2014, 2017-2019, 2021 ARM Limited
3  * Copyright (c) 2013 Advanced Micro Devices, Inc.
4  * All rights reserved
5  *
6  * The license below extends only to copyright in the software and shall
7  * not be construed as granting a license to any other intellectual
8  * property including but not limited to intellectual property relating
9  * to a hardware implementation of the functionality of the software
10  * licensed hereunder. You may use the software subject to the license
11  * terms below provided that you ensure that this notice is replicated
12  * unmodified and in its entirety in all distributions of the software,
13  * modified or unmodified, in source code or in binary form.
14  *
15  * Copyright (c) 2005-2006 The Regents of The University of Michigan
16  * All rights reserved.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions are
20  * met: redistributions of source code must retain the above copyright
21  * notice, this list of conditions and the following disclaimer;
22  * redistributions in binary form must reproduce the above copyright
23  * notice, this list of conditions and the following disclaimer in the
24  * documentation and/or other materials provided with the distribution;
25  * neither the name of the copyright holders nor the names of its
26  * contributors may be used to endorse or promote products derived from
27  * this software without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40  */
41 
42 #include "cpu/o3/lsq.hh"
43 
44 #include <algorithm>
45 #include <list>
46 #include <string>
47 
48 #include "base/compiler.hh"
49 #include "base/logging.hh"
50 #include "cpu/o3/cpu.hh"
51 #include "cpu/o3/dyn_inst.hh"
52 #include "cpu/o3/iew.hh"
53 #include "cpu/o3/limits.hh"
54 #include "debug/Drain.hh"
55 #include "debug/Fetch.hh"
56 #include "debug/HtmCpu.hh"
57 #include "debug/LSQ.hh"
58 #include "debug/Writeback.hh"
59 #include "params/BaseO3CPU.hh"
60 
61 namespace gem5
62 {
63 
64 namespace o3
65 {
66 
68  RequestPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq), cpu(_cpu)
69 {}
70 
71 LSQ::LSQ(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
72  : cpu(cpu_ptr), iewStage(iew_ptr),
73  _cacheBlocked(false),
78  lsqPolicy(params.smtLSQPolicy),
79  LQEntries(params.LQEntries),
80  SQEntries(params.SQEntries),
82  params.smtLSQThreshold)),
84  params.smtLSQThreshold)),
85  dcachePort(this, cpu_ptr),
86  numThreads(params.numThreads)
87 {
88  assert(numThreads > 0 && numThreads <= MaxThreads);
89 
90  //**********************************************
91  //************ Handle SMT Parameters ***********
92  //**********************************************
93 
94  /* Run SMT olicy checks. */
95  if (lsqPolicy == SMTQueuePolicy::Dynamic) {
96  DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
97  } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
98  DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
99  "%i entries per LQ | %i entries per SQ\n",
101  } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
102 
103  assert(params.smtLSQThreshold > params.LQEntries);
104  assert(params.smtLSQThreshold > params.SQEntries);
105 
106  DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
107  "%i entries per LQ | %i entries per SQ\n",
109  } else {
110  panic("Invalid LSQ sharing policy. Options are: Dynamic, "
111  "Partitioned, Threshold");
112  }
113 
114  thread.reserve(numThreads);
115  for (ThreadID tid = 0; tid < numThreads; tid++) {
116  thread.emplace_back(maxLQEntries, maxSQEntries);
117  thread[tid].init(cpu, iew_ptr, params, this, tid);
118  thread[tid].setDcachePort(&dcachePort);
119  }
120 }
121 
122 
123 std::string
124 LSQ::name() const
125 {
126  return iewStage->name() + ".lsq";
127 }
128 
129 void
131 {
132  activeThreads = at_ptr;
133  assert(activeThreads != 0);
134 }
135 
136 void
138 {
139  assert(isDrained());
140 
141  for (ThreadID tid = 0; tid < numThreads; tid++)
142  thread[tid].drainSanityCheck();
143 }
144 
145 bool
147 {
148  bool drained(true);
149 
150  if (!lqEmpty()) {
151  DPRINTF(Drain, "Not drained, LQ not empty.\n");
152  drained = false;
153  }
154 
155  if (!sqEmpty()) {
156  DPRINTF(Drain, "Not drained, SQ not empty.\n");
157  drained = false;
158  }
159 
160  return drained;
161 }
162 
163 void
165 {
166  usedStorePorts = 0;
167  _cacheBlocked = false;
168 
169  for (ThreadID tid = 0; tid < numThreads; tid++) {
170  thread[tid].takeOverFrom();
171  }
172 }
173 
174 void
176 {
177  // Re-issue loads which got blocked on the per-cycle load ports limit.
180 
181  usedLoadPorts = 0;
182  usedStorePorts = 0;
183 }
184 
185 bool
187 {
188  return _cacheBlocked;
189 }
190 
191 void
193 {
194  _cacheBlocked = v;
195 }
196 
197 bool
198 LSQ::cachePortAvailable(bool is_load) const
199 {
200  bool ret;
201  if (is_load) {
203  } else {
205  }
206  return ret;
207 }
208 
209 void
210 LSQ::cachePortBusy(bool is_load)
211 {
212  assert(cachePortAvailable(is_load));
213  if (is_load) {
214  usedLoadPorts++;
215  } else {
216  usedStorePorts++;
217  }
218 }
219 
220 void
221 LSQ::insertLoad(const DynInstPtr &load_inst)
222 {
223  ThreadID tid = load_inst->threadNumber;
224 
225  thread[tid].insertLoad(load_inst);
226 }
227 
228 void
229 LSQ::insertStore(const DynInstPtr &store_inst)
230 {
231  ThreadID tid = store_inst->threadNumber;
232 
233  thread[tid].insertStore(store_inst);
234 }
235 
236 Fault
238 {
239  ThreadID tid = inst->threadNumber;
240 
241  return thread[tid].executeLoad(inst);
242 }
243 
244 Fault
246 {
247  ThreadID tid = inst->threadNumber;
248 
249  return thread[tid].executeStore(inst);
250 }
251 
252 void
254 {
255  thread.at(tid).commitLoads(youngest_inst);
256 }
257 
258 void
260 {
261  thread.at(tid).commitStores(youngest_inst);
262 }
263 
264 void
266 {
267  std::list<ThreadID>::iterator threads = activeThreads->begin();
269 
270  while (threads != end) {
271  ThreadID tid = *threads++;
272 
273  if (numStoresToWB(tid) > 0) {
274  DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
275  "available for Writeback.\n", tid, numStoresToWB(tid));
276  }
277 
278  thread[tid].writebackStores();
279  }
280 }
281 
282 void
283 LSQ::squash(const InstSeqNum &squashed_num, ThreadID tid)
284 {
285  thread.at(tid).squash(squashed_num);
286 }
287 
288 bool
290 {
291  /* Answers: Does Anybody Have a Violation?*/
292  std::list<ThreadID>::iterator threads = activeThreads->begin();
294 
295  while (threads != end) {
296  ThreadID tid = *threads++;
297 
298  if (thread[tid].violation())
299  return true;
300  }
301 
302  return false;
303 }
304 
305 bool LSQ::violation(ThreadID tid) { return thread.at(tid).violation(); }
306 
309 {
310  return thread.at(tid).getMemDepViolator();
311 }
312 
313 int
315 {
316  return thread.at(tid).getLoadHead();
317 }
318 
321 {
322  return thread.at(tid).getLoadHeadSeqNum();
323 }
324 
325 int
327 {
328  return thread.at(tid).getStoreHead();
329 }
330 
333 {
334  return thread.at(tid).getStoreHeadSeqNum();
335 }
336 
337 int LSQ::getCount(ThreadID tid) { return thread.at(tid).getCount(); }
338 
339 int LSQ::numLoads(ThreadID tid) { return thread.at(tid).numLoads(); }
340 
341 int LSQ::numStores(ThreadID tid) { return thread.at(tid).numStores(); }
342 
343 int
345 {
346  if (tid == InvalidThreadID)
347  return 0;
348  else
349  return thread[tid].numHtmStarts();
350 }
351 int
353 {
354  if (tid == InvalidThreadID)
355  return 0;
356  else
357  return thread[tid].numHtmStops();
358 }
359 
360 void
362 {
363  if (tid != InvalidThreadID)
364  thread[tid].resetHtmStartsStops();
365 }
366 
367 uint64_t
369 {
370  if (tid == InvalidThreadID)
371  return 0;
372  else
373  return thread[tid].getLatestHtmUid();
374 }
375 
376 void
377 LSQ::setLastRetiredHtmUid(ThreadID tid, uint64_t htmUid)
378 {
379  if (tid != InvalidThreadID)
380  thread[tid].setLastRetiredHtmUid(htmUid);
381 }
382 
383 void
385 {
387  cacheBlocked(false);
388 
389  for (ThreadID tid : *activeThreads) {
390  thread[tid].recvRetry();
391  }
392 }
393 
394 void
396 {
397  LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->senderState);
398  thread[cpu->contextToThread(request->contextId())]
399  .completeDataAccess(pkt);
400 }
401 
402 bool
404 {
405  if (pkt->isError())
406  DPRINTF(LSQ, "Got error packet back for address: %#X\n",
407  pkt->getAddr());
408 
409  LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->senderState);
410  panic_if(!request, "Got packet back with unknown sender state\n");
411 
412  thread[cpu->contextToThread(request->contextId())].recvTimingResp(pkt);
413 
414  if (pkt->isInvalidate()) {
415  // This response also contains an invalidate; e.g. this can be the case
416  // if cmd is ReadRespWithInvalidate.
417  //
418  // The calling order between completeDataAccess and checkSnoop matters.
419  // By calling checkSnoop after completeDataAccess, we ensure that the
420  // fault set by checkSnoop is not lost. Calling writeback (more
421  // specifically inst->completeAcc) in completeDataAccess overwrites
422  // fault, and in case this instruction requires squashing (as
423  // determined by checkSnoop), the ReExec fault set by checkSnoop would
424  // be lost otherwise.
425 
426  DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
427  pkt->getAddr());
428 
429  for (ThreadID tid = 0; tid < numThreads; tid++) {
430  thread[tid].checkSnoop(pkt);
431  }
432  }
433  // Update the LSQRequest state (this may delete the request)
434  request->packetReplied();
435 
438  }
439 
440  return true;
441 }
442 
443 void
445 {
446  DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
447  pkt->cmdString());
448 
449  // must be a snoop
450  if (pkt->isInvalidate()) {
451  DPRINTF(LSQ, "received invalidation for addr:%#x\n",
452  pkt->getAddr());
453  for (ThreadID tid = 0; tid < numThreads; tid++) {
454  thread[tid].checkSnoop(pkt);
455  }
456  } else if (pkt->req && pkt->req->isTlbiExtSync()) {
457  DPRINTF(LSQ, "received TLBI Ext Sync\n");
459 
461  staleTranslationWaitTxnId = pkt->req->getExtraData();
462 
463  for (auto& unit : thread) {
464  unit.startStaleTranslationFlush();
465  }
466 
467  // In case no units have pending ops, just go ahead
469  }
470 }
471 
472 int
474 {
475  unsigned total = 0;
476 
477  std::list<ThreadID>::iterator threads = activeThreads->begin();
479 
480  while (threads != end) {
481  ThreadID tid = *threads++;
482 
483  total += getCount(tid);
484  }
485 
486  return total;
487 }
488 
489 int
491 {
492  unsigned total = 0;
493 
494  std::list<ThreadID>::iterator threads = activeThreads->begin();
496 
497  while (threads != end) {
498  ThreadID tid = *threads++;
499 
500  total += numLoads(tid);
501  }
502 
503  return total;
504 }
505 
506 int
508 {
509  unsigned total = 0;
510 
511  std::list<ThreadID>::iterator threads = activeThreads->begin();
513 
514  while (threads != end) {
515  ThreadID tid = *threads++;
516 
517  total += thread[tid].numStores();
518  }
519 
520  return total;
521 }
522 
523 unsigned
525 {
526  unsigned total = 0;
527 
528  std::list<ThreadID>::iterator threads = activeThreads->begin();
530 
531  while (threads != end) {
532  ThreadID tid = *threads++;
533 
534  total += thread[tid].numFreeLoadEntries();
535  }
536 
537  return total;
538 }
539 
540 unsigned
542 {
543  unsigned total = 0;
544 
545  std::list<ThreadID>::iterator threads = activeThreads->begin();
547 
548  while (threads != end) {
549  ThreadID tid = *threads++;
550 
551  total += thread[tid].numFreeStoreEntries();
552  }
553 
554  return total;
555 }
556 
557 unsigned
559 {
560  return thread[tid].numFreeLoadEntries();
561 }
562 
563 unsigned
565 {
566  return thread[tid].numFreeStoreEntries();
567 }
568 
569 bool
571 {
572  std::list<ThreadID>::iterator threads = activeThreads->begin();
574 
575  while (threads != end) {
576  ThreadID tid = *threads++;
577 
578  if (!(thread[tid].lqFull() || thread[tid].sqFull()))
579  return false;
580  }
581 
582  return true;
583 }
584 
585 bool
587 {
588  //@todo: Change to Calculate All Entries for
589  //Dynamic Policy
590  if (lsqPolicy == SMTQueuePolicy::Dynamic)
591  return isFull();
592  else
593  return thread[tid].lqFull() || thread[tid].sqFull();
594 }
595 
596 bool
598 {
599  return lqEmpty() && sqEmpty();
600 }
601 
602 bool
604 {
607 
608  while (threads != end) {
609  ThreadID tid = *threads++;
610 
611  if (!thread[tid].lqEmpty())
612  return false;
613  }
614 
615  return true;
616 }
617 
618 bool
620 {
623 
624  while (threads != end) {
625  ThreadID tid = *threads++;
626 
627  if (!thread[tid].sqEmpty())
628  return false;
629  }
630 
631  return true;
632 }
633 
634 bool
636 {
637  std::list<ThreadID>::iterator threads = activeThreads->begin();
639 
640  while (threads != end) {
641  ThreadID tid = *threads++;
642 
643  if (!thread[tid].lqFull())
644  return false;
645  }
646 
647  return true;
648 }
649 
650 bool
652 {
653  //@todo: Change to Calculate All Entries for
654  //Dynamic Policy
655  if (lsqPolicy == SMTQueuePolicy::Dynamic)
656  return lqFull();
657  else
658  return thread[tid].lqFull();
659 }
660 
661 bool
663 {
664  std::list<ThreadID>::iterator threads = activeThreads->begin();
666 
667  while (threads != end) {
668  ThreadID tid = *threads++;
669 
670  if (!sqFull(tid))
671  return false;
672  }
673 
674  return true;
675 }
676 
677 bool
679 {
680  //@todo: Change to Calculate All Entries for
681  //Dynamic Policy
682  if (lsqPolicy == SMTQueuePolicy::Dynamic)
683  return sqFull();
684  else
685  return thread[tid].sqFull();
686 }
687 
688 bool
690 {
691  std::list<ThreadID>::iterator threads = activeThreads->begin();
693 
694  while (threads != end) {
695  ThreadID tid = *threads++;
696 
697  if (!thread[tid].isStalled())
698  return false;
699  }
700 
701  return true;
702 }
703 
704 bool
706 {
707  if (lsqPolicy == SMTQueuePolicy::Dynamic)
708  return isStalled();
709  else
710  return thread[tid].isStalled();
711 }
712 
713 bool
715 {
716  std::list<ThreadID>::iterator threads = activeThreads->begin();
718 
719  while (threads != end) {
720  ThreadID tid = *threads++;
721 
722  if (hasStoresToWB(tid))
723  return true;
724  }
725 
726  return false;
727 }
728 
729 bool
731 {
732  return thread.at(tid).hasStoresToWB();
733 }
734 
735 int
737 {
738  return thread.at(tid).numStoresToWB();
739 }
740 
741 bool
743 {
744  std::list<ThreadID>::iterator threads = activeThreads->begin();
746 
747  while (threads != end) {
748  ThreadID tid = *threads++;
749 
750  if (willWB(tid))
751  return true;
752  }
753 
754  return false;
755 }
756 
757 bool
759 {
760  return thread.at(tid).willWB();
761 }
762 
763 void
765 {
768 
769  while (threads != end) {
770  ThreadID tid = *threads++;
771 
772  thread[tid].dumpInsts();
773  }
774 }
775 
776 void
778 {
779  thread.at(tid).dumpInsts();
780 }
781 
782 Fault
783 LSQ::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
784  unsigned int size, Addr addr, Request::Flags flags, uint64_t *res,
785  AtomicOpFunctorPtr amo_op, const std::vector<bool>& byte_enable)
786 {
787  // This comming request can be either load, store or atomic.
788  // Atomic request has a corresponding pointer to its atomic memory
789  // operation
790  [[maybe_unused]] bool isAtomic = !isLoad && amo_op;
791 
792  ThreadID tid = cpu->contextToThread(inst->contextId());
793  auto cacheLineSize = cpu->cacheLineSize();
794  bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
795  LSQRequest* request = nullptr;
796 
797  // Atomic requests that access data across cache line boundary are
798  // currently not allowed since the cache does not guarantee corresponding
799  // atomic memory operations to be executed atomically across a cache line.
800  // For ISAs such as x86 that supports cross-cache-line atomic instructions,
801  // the cache needs to be modified to perform atomic update to both cache
802  // lines. For now, such cross-line update is not supported.
803  assert(!isAtomic || (isAtomic && !needs_burst));
804 
805  const bool htm_cmd = isLoad && (flags & Request::HTM_CMD);
806  const bool tlbi_cmd = isLoad && (flags & Request::TLBI_CMD);
807 
808  if (inst->translationStarted()) {
809  request = inst->savedRequest;
810  assert(request);
811  } else {
812  if (htm_cmd || tlbi_cmd) {
813  assert(addr == 0x0lu);
814  assert(size == 8);
815  request = new UnsquashableDirectRequest(&thread[tid], inst, flags);
816  } else if (needs_burst) {
817  request = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
818  size, flags, data, res);
819  } else {
820  request = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
821  size, flags, data, res, std::move(amo_op));
822  }
823  assert(request);
824  request->_byteEnable = byte_enable;
825  inst->setRequest();
826  request->taskId(cpu->taskId());
827 
828  // There might be fault from a previous execution attempt if this is
829  // a strictly ordered load
830  inst->getFault() = NoFault;
831 
832  request->initiateTranslation();
833  }
834 
835  /* This is the place were instructions get the effAddr. */
836  if (request->isTranslationComplete()) {
837  if (request->isMemAccessRequired()) {
838  inst->effAddr = request->getVaddr();
839  inst->effSize = size;
840  inst->effAddrValid(true);
841 
842  if (cpu->checker) {
843  inst->reqToVerify = std::make_shared<Request>(*request->req());
844  }
845  Fault fault;
846  if (isLoad)
847  fault = read(request, inst->lqIdx);
848  else
849  fault = write(request, data, inst->sqIdx);
850  // inst->getFault() may have the first-fault of a
851  // multi-access split request at this point.
852  // Overwrite that only if we got another type of fault
853  // (e.g. re-exec).
854  if (fault != NoFault)
855  inst->getFault() = fault;
856  } else if (isLoad) {
857  inst->setMemAccPredicate(false);
858  // Commit will have to clean up whatever happened. Set this
859  // instruction as executed.
860  inst->setExecuted();
861  }
862  }
863 
864  if (inst->traceData)
865  inst->traceData->setMem(addr, size, flags);
866 
867  return inst->getFault();
868 }
869 
870 void
871 LSQ::SingleDataRequest::finish(const Fault &fault, const RequestPtr &request,
873 {
874  _fault.push_back(fault);
877  /* If the instruction has been squahsed, let the request know
878  * as it may have to self-destruct. */
879  if (_inst->isSquashed()) {
881  } else {
882  _inst->strictlyOrdered(request->isStrictlyOrdered());
883 
884  flags.set(Flag::TranslationFinished);
885  if (fault == NoFault) {
886  _inst->physEffAddr = request->getPaddr();
887  _inst->memReqFlags = request->getFlags();
888  if (request->isCondSwap()) {
889  assert(_res);
890  request->setExtraData(*_res);
891  }
893  } else {
895  }
896 
897  LSQRequest::_inst->fault = fault;
898  LSQRequest::_inst->translationCompleted(true);
899  }
900 }
901 
902 void
905 {
906  int i;
907  for (i = 0; i < _reqs.size() && _reqs[i] != req; i++);
908  assert(i < _reqs.size());
909  _fault[i] = fault;
910 
911  numInTranslationFragments--;
912  numTranslatedFragments++;
913 
914  if (fault == NoFault)
915  _mainReq->setFlags(req->getFlags());
916 
917  if (numTranslatedFragments == _reqs.size()) {
918  if (_inst->isSquashed()) {
919  squashTranslation();
920  } else {
921  _inst->strictlyOrdered(_mainReq->isStrictlyOrdered());
922  flags.set(Flag::TranslationFinished);
923  _inst->translationCompleted(true);
924 
925  for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++);
926  if (i > 0) {
927  _inst->physEffAddr = LSQRequest::req()->getPaddr();
928  _inst->memReqFlags = _mainReq->getFlags();
929  if (_mainReq->isCondSwap()) {
930  assert (i == _fault.size());
931  assert(_res);
932  _mainReq->setExtraData(*_res);
933  }
934  if (i == _fault.size()) {
935  _inst->fault = NoFault;
936  setState(State::Request);
937  } else {
938  _inst->fault = _fault[i];
939  setState(State::PartialFault);
940  }
941  } else {
942  _inst->fault = _fault[0];
943  setState(State::Fault);
944  }
945  }
946 
947  }
948 }
949 
950 void
952 {
953  assert(_reqs.size() == 0);
954 
955  addReq(_addr, _size, _byteEnable);
956 
957  if (_reqs.size() > 0) {
958  _reqs.back()->setReqInstSeqNum(_inst->seqNum);
959  _reqs.back()->taskId(_taskId);
960  _inst->translationStarted(true);
961  setState(State::Translation);
962  flags.set(Flag::TranslationStarted);
963 
964  _inst->savedRequest = this;
965  sendFragmentToTranslation(0);
966  } else {
967  _inst->setMemAccPredicate(false);
968  }
969 }
970 
971 PacketPtr
973 {
974  return _mainPacket;
975 }
976 
979 {
980  return _mainReq;
981 }
982 
983 void
985 {
986  auto cacheLineSize = _port.cacheLineSize();
987  Addr base_addr = _addr;
988  Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
989  Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
990  uint32_t size_so_far = 0;
991 
992  _mainReq = std::make_shared<Request>(base_addr,
993  _size, _flags, _inst->requestorId(),
994  _inst->pcState().instAddr(), _inst->contextId());
995  _mainReq->setByteEnable(_byteEnable);
996 
997  // Paddr is not used in _mainReq. However, we will accumulate the flags
998  // from the sub requests into _mainReq by calling setFlags() in finish().
999  // setFlags() assumes that paddr is set so flip the paddr valid bit here to
1000  // avoid a potential assert in setFlags() when we call it from finish().
1001  _mainReq->setPaddr(0);
1002 
1003  /* Get the pre-fix, possibly unaligned. */
1004  auto it_start = _byteEnable.begin();
1005  auto it_end = _byteEnable.begin() + (next_addr - base_addr);
1006  addReq(base_addr, next_addr - base_addr,
1007  std::vector<bool>(it_start, it_end));
1008  size_so_far = next_addr - base_addr;
1009 
1010  /* We are block aligned now, reading whole blocks. */
1011  base_addr = next_addr;
1012  while (base_addr != final_addr) {
1013  auto it_start = _byteEnable.begin() + size_so_far;
1014  auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
1015  addReq(base_addr, cacheLineSize,
1016  std::vector<bool>(it_start, it_end));
1017  size_so_far += cacheLineSize;
1018  base_addr += cacheLineSize;
1019  }
1020 
1021  /* Deal with the tail. */
1022  if (size_so_far < _size) {
1023  auto it_start = _byteEnable.begin() + size_so_far;
1024  auto it_end = _byteEnable.end();
1025  addReq(base_addr, _size - size_so_far,
1026  std::vector<bool>(it_start, it_end));
1027  }
1028 
1029  if (_reqs.size() > 0) {
1030  /* Setup the requests and send them to translation. */
1031  for (auto& r: _reqs) {
1032  r->setReqInstSeqNum(_inst->seqNum);
1033  r->taskId(_taskId);
1034  }
1035 
1036  _inst->translationStarted(true);
1037  setState(State::Translation);
1038  flags.set(Flag::TranslationStarted);
1039  _inst->savedRequest = this;
1040  numInTranslationFragments = 0;
1041  numTranslatedFragments = 0;
1042  _fault.resize(_reqs.size());
1043 
1044  for (uint32_t i = 0; i < _reqs.size(); i++) {
1045  sendFragmentToTranslation(i);
1046  }
1047  } else {
1048  _inst->setMemAccPredicate(false);
1049  }
1050 }
1051 
1053  LSQUnit *port, const DynInstPtr& inst, bool isLoad) :
1054  _state(State::NotIssued),
1055  _port(*port), _inst(inst), _data(nullptr),
1056  _res(nullptr), _addr(0), _size(0), _flags(0),
1057  _numOutstandingPackets(0), _amo_op(nullptr)
1058 {
1059  flags.set(Flag::IsLoad, isLoad);
1060  flags.set(Flag::WriteBackToRegister,
1061  _inst->isStoreConditional() || _inst->isAtomic() ||
1062  _inst->isLoad());
1063  flags.set(Flag::IsAtomic, _inst->isAtomic());
1064  install();
1065 }
1066 
1068  LSQUnit *port, const DynInstPtr& inst, bool isLoad,
1069  const Addr& addr, const uint32_t& size, const Request::Flags& flags_,
1070  PacketDataPtr data, uint64_t* res, AtomicOpFunctorPtr amo_op,
1071  bool stale_translation)
1072  : _state(State::NotIssued),
1073  numTranslatedFragments(0),
1074  numInTranslationFragments(0),
1075  _port(*port), _inst(inst), _data(data),
1076  _res(res), _addr(addr), _size(size),
1077  _flags(flags_),
1078  _numOutstandingPackets(0),
1079  _amo_op(std::move(amo_op)),
1080  _hasStaleTranslation(stale_translation)
1081 {
1082  flags.set(Flag::IsLoad, isLoad);
1083  flags.set(Flag::WriteBackToRegister,
1084  _inst->isStoreConditional() || _inst->isAtomic() ||
1085  _inst->isLoad());
1086  flags.set(Flag::IsAtomic, _inst->isAtomic());
1087  install();
1088 }
1089 
1090 void
1092 {
1093  if (isLoad()) {
1094  _port.loadQueue[_inst->lqIdx].setRequest(this);
1095  } else {
1096  // Store, StoreConditional, and Atomic requests are pushed
1097  // to this storeQueue
1098  _port.storeQueue[_inst->sqIdx].setRequest(this);
1099  }
1100 }
1101 
1102 bool LSQ::LSQRequest::squashed() const { return _inst->isSquashed(); }
1103 
1104 void
1106  const std::vector<bool>& byte_enable)
1107 {
1108  if (isAnyActiveElement(byte_enable.begin(), byte_enable.end())) {
1109  auto req = std::make_shared<Request>(
1110  addr, size, _flags, _inst->requestorId(),
1111  _inst->pcState().instAddr(), _inst->contextId(),
1112  std::move(_amo_op));
1113  req->setByteEnable(byte_enable);
1114 
1115  /* If the request is marked as NO_ACCESS, setup a local access */
1116  if (_flags.isSet(Request::NO_ACCESS)) {
1117  req->setLocalAccessor(
1118  [this, req](gem5::ThreadContext *tc, PacketPtr pkt) -> Cycles
1119  {
1120  if ((req->isHTMStart() || req->isHTMCommit())) {
1121  auto& inst = this->instruction();
1122  assert(inst->inHtmTransactionalState());
1123  pkt->setHtmTransactional(
1124  inst->getHtmTransactionUid());
1125  }
1126  return Cycles(1);
1127  }
1128  );
1129  }
1130 
1131  _reqs.push_back(req);
1132  }
1133 }
1134 
1136 {
1137  assert(!isAnyOutstandingRequest());
1138  _inst->savedRequest = nullptr;
1139 
1140  for (auto r: _packets)
1141  delete r;
1142 };
1143 
1144 ContextID
1146 {
1147  return _inst->contextId();
1148 }
1149 
1150 void
1152 {
1153  numInTranslationFragments++;
1154  _port.getMMUPtr()->translateTiming(req(i), _inst->thread->getTC(),
1155  this, isLoad() ? BaseMMU::Read : BaseMMU::Write);
1156 }
1157 
1158 void
1160 {
1161  // If this element has been translated and is currently being requested,
1162  // then it may be stale
1163  if ((!flags.isSet(Flag::Complete)) &&
1164  (!flags.isSet(Flag::Discarded)) &&
1165  (flags.isSet(Flag::TranslationStarted))) {
1166  _hasStaleTranslation = true;
1167  }
1168 
1169  DPRINTF(LSQ, "SingleDataRequest %d 0x%08x isBlocking:%d\n",
1170  (int)_state, (uint32_t)flags, _hasStaleTranslation);
1171 }
1172 
1173 void
1175 {
1176  // If this element has been translated and is currently being requested,
1177  // then it may be stale
1178  if ((!flags.isSet(Flag::Complete)) &&
1179  (!flags.isSet(Flag::Discarded)) &&
1180  (flags.isSet(Flag::TranslationStarted))) {
1181  _hasStaleTranslation = true;
1182  }
1183 
1184  DPRINTF(LSQ, "SplitDataRequest %d 0x%08x isBlocking:%d\n",
1185  (int)_state, (uint32_t)flags, _hasStaleTranslation);
1186 }
1187 
1188 bool
1190 {
1191  assert(_numOutstandingPackets == 1);
1192  flags.set(Flag::Complete);
1193  assert(pkt == _packets.front());
1194  _port.completeDataAccess(pkt);
1195  _hasStaleTranslation = false;
1196  return true;
1197 }
1198 
1199 bool
1201 {
1202  uint32_t pktIdx = 0;
1203  while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
1204  pktIdx++;
1205  assert(pktIdx < _packets.size());
1206  numReceivedPackets++;
1207  if (numReceivedPackets == _packets.size()) {
1208  flags.set(Flag::Complete);
1209  /* Assemble packets. */
1210  PacketPtr resp = isLoad()
1211  ? Packet::createRead(_mainReq)
1212  : Packet::createWrite(_mainReq);
1213  if (isLoad())
1214  resp->dataStatic(_inst->memData);
1215  else
1216  resp->dataStatic(_data);
1217  resp->senderState = this;
1218  _port.completeDataAccess(resp);
1219  delete resp;
1220  }
1221  _hasStaleTranslation = false;
1222  return true;
1223 }
1224 
1225 void
1227 {
1228  /* Retries do not create new packets. */
1229  if (_packets.size() == 0) {
1230  _packets.push_back(
1231  isLoad()
1232  ? Packet::createRead(req())
1233  : Packet::createWrite(req()));
1234  _packets.back()->dataStatic(_inst->memData);
1235  _packets.back()->senderState = this;
1236 
1237  // hardware transactional memory
1238  // If request originates in a transaction (not necessarily a HtmCmd),
1239  // then the packet should be marked as such.
1240  if (_inst->inHtmTransactionalState()) {
1241  _packets.back()->setHtmTransactional(
1242  _inst->getHtmTransactionUid());
1243 
1244  DPRINTF(HtmCpu,
1245  "HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n",
1246  isLoad() ? "LD" : "ST",
1247  _inst->pcState().instAddr(),
1248  _packets.back()->req->hasVaddr() ?
1249  _packets.back()->req->getVaddr() : 0lu,
1250  _packets.back()->getAddr(),
1251  _inst->getHtmTransactionUid());
1252  }
1253  }
1254  assert(_packets.size() == 1);
1255 }
1256 
1257 void
1259 {
1260  /* Extra data?? */
1261  Addr base_address = _addr;
1262 
1263  if (_packets.size() == 0) {
1264  /* New stuff */
1265  if (isLoad()) {
1266  _mainPacket = Packet::createRead(_mainReq);
1267  _mainPacket->dataStatic(_inst->memData);
1268 
1269  // hardware transactional memory
1270  // If request originates in a transaction,
1271  // packet should be marked as such
1272  if (_inst->inHtmTransactionalState()) {
1273  _mainPacket->setHtmTransactional(
1274  _inst->getHtmTransactionUid());
1275  DPRINTF(HtmCpu,
1276  "HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1277  _inst->pcState().instAddr(),
1278  _mainPacket->req->hasVaddr() ?
1279  _mainPacket->req->getVaddr() : 0lu,
1280  _mainPacket->getAddr(),
1281  _inst->getHtmTransactionUid());
1282  }
1283  }
1284  for (int i = 0; i < _reqs.size() && _fault[i] == NoFault; i++) {
1285  RequestPtr req = _reqs[i];
1286  PacketPtr pkt = isLoad() ? Packet::createRead(req)
1287  : Packet::createWrite(req);
1288  ptrdiff_t offset = req->getVaddr() - base_address;
1289  if (isLoad()) {
1290  pkt->dataStatic(_inst->memData + offset);
1291  } else {
1292  uint8_t* req_data = new uint8_t[req->getSize()];
1293  std::memcpy(req_data,
1294  _inst->memData + offset,
1295  req->getSize());
1296  pkt->dataDynamic(req_data);
1297  }
1298  pkt->senderState = this;
1299  _packets.push_back(pkt);
1300 
1301  // hardware transactional memory
1302  // If request originates in a transaction,
1303  // packet should be marked as such
1304  if (_inst->inHtmTransactionalState()) {
1305  _packets.back()->setHtmTransactional(
1306  _inst->getHtmTransactionUid());
1307  DPRINTF(HtmCpu,
1308  "HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
1309  isLoad() ? "LD" : "ST",
1310  i+1,
1311  _inst->pcState().instAddr(),
1312  _packets.back()->req->hasVaddr() ?
1313  _packets.back()->req->getVaddr() : 0lu,
1314  _packets.back()->getAddr(),
1315  _inst->getHtmTransactionUid());
1316  }
1317  }
1318  }
1319  assert(_packets.size() > 0);
1320 }
1321 
1322 void
1324 {
1325  assert(_numOutstandingPackets == 0);
1326  if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1327  _numOutstandingPackets = 1;
1328 }
1329 
1330 void
1332 {
1333  /* Try to send the packets. */
1334  while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1335  lsqUnit()->trySendPacket(isLoad(),
1336  _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1337  _numOutstandingPackets++;
1338  }
1339 }
1340 
1341 Cycles
1344 {
1345  return pkt->req->localAccessor(thread, pkt);
1346 }
1347 
1348 Cycles
1351 {
1352  Cycles delay(0);
1353  unsigned offset = 0;
1354 
1355  for (auto r: _reqs) {
1356  PacketPtr pkt =
1357  new Packet(r, isLoad() ? MemCmd::ReadReq : MemCmd::WriteReq);
1358  pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1359  Cycles d = r->localAccessor(thread, pkt);
1360  if (d > delay)
1361  delay = d;
1362  offset += r->getSize();
1363  delete pkt;
1364  }
1365  return delay;
1366 }
1367 
1368 bool
1370 {
1371  return ( (LSQRequest::_reqs[0]->getPaddr() & blockMask) == blockAddr);
1372 }
1373 
1389 bool
1391 {
1392  bool is_hit = false;
1393  for (auto &r: _reqs) {
1403  if (r->hasPaddr() && (r->getPaddr() & blockMask) == blockAddr) {
1404  is_hit = true;
1405  break;
1406  }
1407  }
1408  return is_hit;
1409 }
1410 
1411 bool
1413 {
1414  return lsq->recvTimingResp(pkt);
1415 }
1416 
1417 void
1419 {
1420  for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
1421  if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1422  cpu->wakeup(tid);
1423  }
1424  }
1425  lsq->recvTimingSnoopReq(pkt);
1426 }
1427 
1428 void
1430 {
1431  lsq->recvReqRetry();
1432 }
1433 
1435  LSQUnit* port,
1436  const DynInstPtr& inst,
1437  const Request::Flags& flags_) :
1438  SingleDataRequest(port, inst, true, 0x0lu, 8, flags_,
1439  nullptr, nullptr, nullptr)
1440 {
1441 }
1442 
1443 void
1445 {
1446  // Special commands are implemented as loads to avoid significant
1447  // changes to the cpu and memory interfaces
1448  // The virtual and physical address uses a dummy value of 0x00
1449  // Address translation does not really occur thus the code below
1450 
1451  assert(_reqs.size() == 0);
1452 
1453  addReq(_addr, _size, _byteEnable);
1454 
1455  if (_reqs.size() > 0) {
1456  _reqs.back()->setReqInstSeqNum(_inst->seqNum);
1457  _reqs.back()->taskId(_taskId);
1458  _reqs.back()->setPaddr(_addr);
1459  _reqs.back()->setInstCount(_inst->getCpuPtr()->totalInsts());
1460 
1461  _inst->strictlyOrdered(_reqs.back()->isStrictlyOrdered());
1462  _inst->fault = NoFault;
1463  _inst->physEffAddr = _reqs.back()->getPaddr();
1464  _inst->memReqFlags = _reqs.back()->getFlags();
1465  _inst->savedRequest = this;
1466 
1467  flags.set(Flag::TranslationStarted);
1468  flags.set(Flag::TranslationFinished);
1469 
1470  _inst->translationStarted(true);
1471  _inst->translationCompleted(true);
1472 
1473  setState(State::Request);
1474  } else {
1475  panic("unexpected behaviour in initiateTranslation()");
1476  }
1477 }
1478 
1479 void
1481 {
1482  // HTM/TLBI operations do not translate,
1483  // so cannot have stale translations
1484  _hasStaleTranslation = false;
1485 }
1486 
1487 void
1489  const RequestPtr &req, gem5::ThreadContext* tc,
1491 {
1492  panic("unexpected behaviour - finish()");
1493 }
1494 
1495 void
1497 {
1499 
1500  DPRINTF(LSQ, "Checking pending TLBI sync\n");
1501  // Check if all thread queues are complete
1502  for (const auto& unit : thread) {
1503  if (unit.checkStaleTranslations())
1504  return;
1505  }
1506  DPRINTF(LSQ, "No threads have blocking TLBI sync\n");
1507 
1508  // All thread queues have committed their sync operations
1509  // => send a RubyRequest to the sequencer
1510  auto req = Request::createMemManagement(
1512  cpu->dataRequestorId());
1513  req->setExtraData(staleTranslationWaitTxnId);
1514  PacketPtr pkt = Packet::createRead(req);
1515 
1516  // TODO - reserve some credit for these responses?
1517  if (!dcachePort.sendTimingReq(pkt)) {
1518  panic("Couldn't send TLBI_EXT_SYNC_COMP message");
1519  }
1520 
1523 }
1524 
1525 Fault
1526 LSQ::read(LSQRequest* request, ssize_t load_idx)
1527 {
1528  assert(request->req()->contextId() == request->contextId());
1529  ThreadID tid = cpu->contextToThread(request->req()->contextId());
1530 
1531  return thread.at(tid).read(request, load_idx);
1532 }
1533 
1534 Fault
1535 LSQ::write(LSQRequest* request, uint8_t *data, ssize_t store_idx)
1536 {
1537  ThreadID tid = cpu->contextToThread(request->req()->contextId());
1538 
1539  return thread.at(tid).write(request, data, store_idx);
1540 }
1541 
1542 } // namespace o3
1543 } // namespace gem5
#define DPRINTF(x,...)
Definition: trace.hh:186
const char data[]
RequestorID dataRequestorId() const
Reads this CPU's unique data requestor ID.
Definition: base.hh:189
AddressMonitor * getCpuAddrMonitor(ThreadID tid)
Definition: base.hh:633
uint32_t taskId() const
Get cpu task id.
Definition: base.hh:207
ThreadID numThreads
Number of threads we're actually simulating (<= SMT_MAX_THREADS).
Definition: base.hh:367
ThreadID contextToThread(ContextID cid)
Convert ContextID to threadID.
Definition: base.hh:295
unsigned int cacheLineSize() const
Get the cache line size of the system.
Definition: base.hh:380
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:79
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
T * getPtr()
get a pointer to the data ptr.
Definition: packet.hh:1212
Addr getAddr() const
Definition: packet.hh:805
bool isError() const
Definition: packet.hh:621
static PacketPtr createWrite(const RequestPtr &req)
Definition: packet.hh:1041
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:1162
SenderState * senderState
This packet's sender state.
Definition: packet.hh:544
const std::string & cmdString() const
Return the string name of the cmd field (for debugging and tracing).
Definition: packet.hh:587
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition: packet.hh:1035
RequestPtr req
A pointer to the original request.
Definition: packet.hh:376
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition: packet.hh:1200
bool isInvalidate() const
Definition: packet.hh:608
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Definition: port.hh:79
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition: port.hh:495
static RequestPtr createMemManagement(Flags flags, RequestorID id)
Factory method for creating memory management requests, with unspecified addr and size.
Definition: request.hh:530
static const FlagsType TLBI_CMD
Definition: request.hh:266
@ TLBI_EXT_SYNC_COMP
The Request tells the interconnect that a remote TLB Sync request has completed.
Definition: request.hh:252
@ NO_ACCESS
The request should not cause a memory access.
Definition: request.hh:146
static const FlagsType HTM_CMD
Definition: request.hh:263
ThreadContext is the external interface to all thread state for anything outside of the CPU.
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition: cpu.hh:94
gem5::Checker< DynInstPtr > * checker
Pointer to the checker, which can dynamically verify instruction results at run time.
Definition: cpu.hh:524
virtual void wakeup(ThreadID tid) override
Definition: cpu.cc:1420
Fetch class handles both single threaded and SMT fetch.
Definition: fetch.hh:79
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Definition: iew.hh:88
std::string name() const
Returns the name of the IEW stage.
Definition: iew.cc:118
void cacheUnblocked()
Notifies that the cache has become unblocked.
Definition: iew.cc:561
Class that implements the actual LQ and SQ for each specific thread.
Definition: lsq_unit.hh:89
virtual void recvTimingSnoopReq(PacketPtr pkt)
Receive a timing snoop request from the peer.
Definition: lsq.cc:1418
DcachePort(LSQ *_lsq, CPU *_cpu)
Default constructor.
Definition: lsq.cc:67
virtual bool recvTimingResp(PacketPtr pkt)
Timing version of receive.
Definition: lsq.cc:1412
virtual void recvReqRetry()
Handles doing a retry of the previous send.
Definition: lsq.cc:1429
Memory operation metadata.
Definition: lsq.hh:190
std::vector< bool > _byteEnable
Definition: lsq.hh:257
LSQRequest(LSQUnit *port, const DynInstPtr &inst, bool isLoad)
Definition: lsq.cc:1052
virtual ~LSQRequest()
Destructor.
Definition: lsq.cc:1135
bool isLoad() const
Definition: lsq.hh:272
void install()
Install the request in the LQ/SQ.
Definition: lsq.cc:1091
void squashTranslation()
Definition: lsq.hh:550
ContextID contextId() const
Definition: lsq.cc:1145
void taskId(const uint32_t &v)
Definition: lsq.hh:353
virtual void initiateTranslation()=0
void setState(const State &newState)
Definition: lsq.hh:236
void addReq(Addr addr, unsigned size, const std::vector< bool > &byte_enable)
Helper function used to add a (sub)request, given its address addr, size size and byte-enable mask by...
Definition: lsq.cc:1105
void sendFragmentToTranslation(int i)
Definition: lsq.cc:1151
const DynInstPtr _inst
Definition: lsq.hh:247
uint32_t numTranslatedFragments
Definition: lsq.hh:238
std::vector< Fault > _fault
Definition: lsq.hh:252
bool isMemAccessRequired()
Definition: lsq.hh:493
uint32_t numInTranslationFragments
Definition: lsq.hh:239
bool squashed() const override
This function is used by the page table walker to determine if it should translate the a pending requ...
Definition: lsq.cc:1102
RequestPtr req(int idx=0)
Definition: lsq.hh:362
std::vector< RequestPtr > _reqs
Definition: lsq.hh:251
bool isTranslationComplete()
Definition: lsq.hh:466
Addr getVaddr(int idx=0) const
Definition: lsq.hh:365
virtual Cycles handleLocalAccess(gem5::ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
Definition: lsq.cc:1342
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
Definition: lsq.cc:871
virtual void initiateTranslation()
Definition: lsq.cc:951
virtual bool recvTimingResp(PacketPtr pkt)
Definition: lsq.cc:1189
virtual void buildPackets()
Definition: lsq.cc:1226
virtual void markAsStaleTranslation()
Definition: lsq.cc:1159
virtual void sendPacketToCache()
Definition: lsq.cc:1323
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Test if the request accesses a particular cache line.
Definition: lsq.cc:1369
virtual bool recvTimingResp(PacketPtr pkt)
Definition: lsq.cc:1200
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Caches may probe into the load-store queue to enforce memory ordering guarantees.
Definition: lsq.cc:1390
virtual void initiateTranslation()
Definition: lsq.cc:984
virtual void markAsStaleTranslation()
Definition: lsq.cc:1174
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
Definition: lsq.cc:903
virtual PacketPtr mainPacket()
Definition: lsq.cc:972
virtual Cycles handleLocalAccess(gem5::ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
Definition: lsq.cc:1349
virtual void sendPacketToCache()
Definition: lsq.cc:1331
virtual RequestPtr mainReq()
Definition: lsq.cc:978
virtual void buildPackets()
Definition: lsq.cc:1258
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
Definition: lsq.cc:1488
UnsquashableDirectRequest(LSQUnit *port, const DynInstPtr &inst, const Request::Flags &flags_)
Definition: lsq.cc:1434
unsigned SQEntries
Total Size of SQ Entries.
Definition: lsq.hh:943
bool isDrained() const
Has the LSQ drained?
Definition: lsq.cc:146
int cacheLoadPorts
The number of cache ports available each cycle (loads only).
Definition: lsq.hh:902
int usedStorePorts
The number of used cache ports in this cycle by stores.
Definition: lsq.hh:900
int numHtmStarts(ThreadID tid) const
Definition: lsq.cc:344
std::string name() const
Returns the name of the LSQ.
Definition: lsq.cc:124
void commitStores(InstSeqNum &youngest_inst, ThreadID tid)
Commits stores up until the given sequence number for a specific thread.
Definition: lsq.cc:259
Addr staleTranslationWaitTxnId
The ID if the transaction that made translations stale.
Definition: lsq.hh:909
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
Definition: lsq.cc:403
void checkStaleTranslations()
Checks if queues have any marked operations left, and sends the appropriate Sync Completion message i...
Definition: lsq.cc:1496
int getLoadHead(ThreadID tid)
Returns the head index of the load queue for a specific thread.
Definition: lsq.cc:314
void squash(const InstSeqNum &squashed_num, ThreadID tid)
Squash instructions from a thread until the specified sequence number.
Definition: lsq.cc:283
bool sqEmpty() const
Returns if all of the SQs are empty.
Definition: lsq.cc:619
void completeDataAccess(PacketPtr pkt)
Definition: lsq.cc:395
Fault pushRequest(const DynInstPtr &inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector< bool > &byte_enable)
Definition: lsq.cc:783
unsigned numFreeLoadEntries()
Returns the number of free load entries.
Definition: lsq.cc:524
ThreadID numThreads
Number of Threads.
Definition: lsq.hh:958
IEW * iewStage
The IEW stage pointer.
Definition: lsq.hh:881
InstSeqNum getLoadHeadSeqNum(ThreadID tid)
Returns the sequence number of the head of the load queue.
Definition: lsq.cc:320
std::list< ThreadID > * activeThreads
List of Active Threads in System.
Definition: lsq.hh:938
DcachePort dcachePort
Data port.
Definition: lsq.hh:952
void takeOverFrom()
Takes over execution from another CPU's thread.
Definition: lsq.cc:164
DynInstPtr getMemDepViolator(ThreadID tid)
Gets the instruction that caused the memory ordering violation.
Definition: lsq.cc:308
static uint32_t maxLSQAllocation(SMTQueuePolicy pol, uint32_t entries, uint32_t numThreads, uint32_t SMTThreshold)
Auxiliary function to calculate per-thread max LSQ allocation limit.
Definition: lsq.hh:920
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets the pointer to the list of active threads.
Definition: lsq.cc:130
bool cacheBlocked() const
Is D-cache blocked?
Definition: lsq.cc:186
int numLoads()
Returns the total number of loads in the load queue.
Definition: lsq.cc:490
void setLastRetiredHtmUid(ThreadID tid, uint64_t htmUid)
Definition: lsq.cc:377
void dumpInsts() const
Debugging function to print out all instructions.
Definition: lsq.cc:764
int usedLoadPorts
The number of used cache ports in this cycle by loads.
Definition: lsq.hh:904
unsigned maxLQEntries
Max LQ Size - Used to Enforce Sharing Policies.
Definition: lsq.hh:946
bool isFull()
Returns if the LSQ is full (either LQ or SQ is full).
Definition: lsq.cc:570
void insertStore(const DynInstPtr &store_inst)
Inserts a store into the LSQ.
Definition: lsq.cc:229
void recvReqRetry()
Retry the previous send that failed.
Definition: lsq.cc:384
void commitLoads(InstSeqNum &youngest_inst, ThreadID tid)
Commits loads up until the given sequence number for a specific thread.
Definition: lsq.cc:253
Fault write(LSQRequest *request, uint8_t *data, ssize_t store_idx)
Executes a store operation, using the store specified at the store index.
Definition: lsq.cc:1535
uint64_t getLatestHtmUid(ThreadID tid) const
Definition: lsq.cc:368
bool willWB()
Returns if the LSQ will write back to memory this cycle.
Definition: lsq.cc:742
int getStoreHead(ThreadID tid)
Returns the head index of the store queue.
Definition: lsq.cc:326
LSQ(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
Constructs an LSQ with the given parameters.
Definition: lsq.cc:71
CPU * cpu
The CPU pointer.
Definition: lsq.hh:878
bool _cacheBlocked
D-cache is blocked.
Definition: lsq.hh:896
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition: lsq.cc:137
std::vector< LSQUnit > thread
The LSQ units for individual threads.
Definition: lsq.hh:955
unsigned LQEntries
Total Size of LQ Entries.
Definition: lsq.hh:941
int numHtmStops(ThreadID tid) const
Definition: lsq.cc:352
void cachePortBusy(bool is_load)
Another store port is in use.
Definition: lsq.cc:210
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
Definition: lsq.cc:198
InstSeqNum getStoreHeadSeqNum(ThreadID tid)
Returns the sequence number of the head of the store queue.
Definition: lsq.cc:332
bool isStalled()
Returns if the LSQ is stalled due to a memory operation that must be replayed.
Definition: lsq.cc:689
void writebackStores()
Attempts to write back stores until all cache ports are used or the interface becomes blocked.
Definition: lsq.cc:265
bool lqFull()
Returns if any of the LQs are full.
Definition: lsq.cc:635
bool waitingForStaleTranslation
If the LSQ is currently waiting for stale translations.
Definition: lsq.hh:907
unsigned maxSQEntries
Max SQ Size - Used to Enforce Sharing Policies.
Definition: lsq.hh:949
bool lqEmpty() const
Returns if all of the LQs are empty.
Definition: lsq.cc:603
int getCount()
Returns the number of instructions in all of the queues.
Definition: lsq.cc:473
bool hasStoresToWB()
Returns whether or not there are any stores to write back to memory.
Definition: lsq.cc:714
Fault read(LSQRequest *request, ssize_t load_idx)
Executes a read operation, using the load specified at the load index.
Definition: lsq.cc:1526
Fault executeStore(const DynInstPtr &inst)
Executes a store.
Definition: lsq.cc:245
void tick()
Ticks the LSQ.
Definition: lsq.cc:175
void insertLoad(const DynInstPtr &load_inst)
Inserts a load into the LSQ.
Definition: lsq.cc:221
bool isEmpty() const
Returns if the LSQ is empty (both LQ and SQ are empty).
Definition: lsq.cc:597
int numStores()
Returns the total number of stores in the store queue.
Definition: lsq.cc:507
void recvTimingSnoopReq(PacketPtr pkt)
Definition: lsq.cc:444
int cacheStorePorts
The number of cache ports available each cycle (stores only).
Definition: lsq.hh:898
Fault executeLoad(const DynInstPtr &inst)
Executes a load.
Definition: lsq.cc:237
bool violation()
Returns whether or not there was a memory ordering violation.
Definition: lsq.cc:289
void resetHtmStartsStops(ThreadID tid)
Definition: lsq.cc:361
SMTQueuePolicy lsqPolicy
The LSQ policy for SMT mode.
Definition: lsq.hh:912
int numStoresToWB(ThreadID tid)
Returns the number of stores a specific thread has to write back.
Definition: lsq.cc:736
unsigned numFreeStoreEntries()
Returns the number of free store entries.
Definition: lsq.cc:541
bool sqFull()
Returns if any of the SQs are full.
Definition: lsq.cc:662
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition: amo.hh:242
void set(Type mask)
Set all flag's bits matching the given mask.
Definition: flags.hh:116
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:204
uint8_t flags
Definition: helpers.cc:66
Bitfield< 4, 0 > mode
Definition: misc_types.hh:74
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 23, 0 > offset
Definition: types.hh:144
Bitfield< 9 > d
Definition: misc_types.hh:64
Bitfield< 5 > r
Definition: pagetable.hh:60
Bitfield< 0 > v
Definition: pagetable.hh:65
Bitfield< 3 > addr
Definition: types.hh:84
static constexpr int MaxThreads
Definition: limits.hh:38
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109
const FlagsType total
Print the total.
Definition: info.hh:60
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< FaultBase > Fault
Definition: types.hh:248
int16_t ThreadID
Thread index/ID type.
Definition: types.hh:235
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
const ThreadID InvalidThreadID
Definition: types.hh:236
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
@ Complete
Definition: misc.hh:57
bool transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
Returns true if the given memory access (address, size) needs to be fragmented across aligned fixed-s...
Definition: utils.hh:80
uint8_t * PacketDataPtr
Definition: packet.hh:71
int ContextID
Globally unique thread context ID.
Definition: types.hh:239
constexpr decltype(nullptr) NoFault
Definition: types.hh:253
uint64_t InstSeqNum
Definition: inst_seq.hh:40
bool isAnyActiveElement(const std::vector< bool >::const_iterator &it_start, const std::vector< bool >::const_iterator &it_end)
Test if there is any active element in an enablement range.
Definition: utils.hh:89
Addr addrBlockAlign(Addr addr, Addr block_size)
Returns the address of the closest aligned fixed-size block to the given address.
Definition: utils.hh:66
Overload hash function for BasicBlockRange type.
Definition: misc.hh:2826
bool doMonitor(PacketPtr pkt)
Definition: base.cc:682
const std::string & name()
Definition: trace.cc:49

Generated on Wed Dec 21 2022 10:22:30 for gem5 by doxygen 1.9.1