gem5  v20.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
lsq_impl.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited
3  * Copyright (c) 2013 Advanced Micro Devices, Inc.
4  * All rights reserved
5  *
6  * The license below extends only to copyright in the software and shall
7  * not be construed as granting a license to any other intellectual
8  * property including but not limited to intellectual property relating
9  * to a hardware implementation of the functionality of the software
10  * licensed hereunder. You may use the software subject to the license
11  * terms below provided that you ensure that this notice is replicated
12  * unmodified and in its entirety in all distributions of the software,
13  * modified or unmodified, in source code or in binary form.
14  *
15  * Copyright (c) 2005-2006 The Regents of The University of Michigan
16  * All rights reserved.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions are
20  * met: redistributions of source code must retain the above copyright
21  * notice, this list of conditions and the following disclaimer;
22  * redistributions in binary form must reproduce the above copyright
23  * notice, this list of conditions and the following disclaimer in the
24  * documentation and/or other materials provided with the distribution;
25  * neither the name of the copyright holders nor the names of its
26  * contributors may be used to endorse or promote products derived from
27  * this software without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40  */
41 
42 #ifndef __CPU_O3_LSQ_IMPL_HH__
43 #define __CPU_O3_LSQ_IMPL_HH__
44 
45 #include <algorithm>
46 #include <list>
47 #include <string>
48 
49 #include "base/logging.hh"
50 #include "cpu/o3/cpu.hh"
51 #include "cpu/o3/lsq.hh"
52 #include "debug/Drain.hh"
53 #include "debug/Fetch.hh"
54 #include "debug/LSQ.hh"
55 #include "debug/Writeback.hh"
56 #include "params/DerivO3CPU.hh"
57 
58 using namespace std;
59 
60 template <class Impl>
61 LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
62  : cpu(cpu_ptr), iewStage(iew_ptr),
63  _cacheBlocked(false),
64  cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
65  cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
66  lsqPolicy(params->smtLSQPolicy),
67  LQEntries(params->LQEntries),
68  SQEntries(params->SQEntries),
69  maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
70  params->smtLSQThreshold)),
71  maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
72  params->smtLSQThreshold)),
73  dcachePort(this, cpu_ptr),
74  numThreads(params->numThreads)
75 {
76  assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
77 
78  //**********************************************/
79  //************ Handle SMT Parameters ***********/
80  //**********************************************/
81 
82  /* Run SMT olicy checks. */
83  if (lsqPolicy == SMTQueuePolicy::Dynamic) {
84  DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
85  } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
86  DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
87  "%i entries per LQ | %i entries per SQ\n",
89  } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
90 
91  assert(params->smtLSQThreshold > params->LQEntries);
92  assert(params->smtLSQThreshold > params->SQEntries);
93 
94  DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
95  "%i entries per LQ | %i entries per SQ\n",
97  } else {
98  panic("Invalid LSQ sharing policy. Options are: Dynamic, "
99  "Partitioned, Threshold");
100  }
101 
102  thread.reserve(numThreads);
103  for (ThreadID tid = 0; tid < numThreads; tid++) {
104  thread.emplace_back(maxLQEntries, maxSQEntries);
105  thread[tid].init(cpu, iew_ptr, params, this, tid);
106  thread[tid].setDcachePort(&dcachePort);
107  }
108 }
109 
110 
111 template<class Impl>
112 std::string
114 {
115  return iewStage->name() + ".lsq";
116 }
117 
118 template<class Impl>
119 void
121 {
122  //Initialize LSQs
123  for (ThreadID tid = 0; tid < numThreads; tid++) {
124  thread[tid].regStats();
125  }
126 }
127 
128 template<class Impl>
129 void
131 {
132  activeThreads = at_ptr;
133  assert(activeThreads != 0);
134 }
135 
136 template <class Impl>
137 void
139 {
140  assert(isDrained());
141 
142  for (ThreadID tid = 0; tid < numThreads; tid++)
143  thread[tid].drainSanityCheck();
144 }
145 
146 template <class Impl>
147 bool
149 {
150  bool drained(true);
151 
152  if (!lqEmpty()) {
153  DPRINTF(Drain, "Not drained, LQ not empty.\n");
154  drained = false;
155  }
156 
157  if (!sqEmpty()) {
158  DPRINTF(Drain, "Not drained, SQ not empty.\n");
159  drained = false;
160  }
161 
162  return drained;
163 }
164 
165 template <class Impl>
166 void
168 {
169  usedStorePorts = 0;
170  _cacheBlocked = false;
171 
172  for (ThreadID tid = 0; tid < numThreads; tid++) {
173  thread[tid].takeOverFrom();
174  }
175 }
176 
177 template <class Impl>
178 void
180 {
181  // Re-issue loads which got blocked on the per-cycle load ports limit.
183  iewStage->cacheUnblocked();
184 
185  usedLoadPorts = 0;
186  usedStorePorts = 0;
187 }
188 
189 template<class Impl>
190 bool
192 {
193  return _cacheBlocked;
194 }
195 
196 template<class Impl>
197 void
199 {
200  _cacheBlocked = v;
201 }
202 
203 template<class Impl>
204 bool
205 LSQ<Impl>::cachePortAvailable(bool is_load) const
206 {
207  bool ret;
208  if (is_load) {
210  } else {
212  }
213  return ret;
214 }
215 
216 template<class Impl>
217 void
219 {
220  assert(cachePortAvailable(is_load));
221  if (is_load) {
222  usedLoadPorts++;
223  } else {
224  usedStorePorts++;
225  }
226 }
227 
228 template<class Impl>
229 void
231 {
232  ThreadID tid = load_inst->threadNumber;
233 
234  thread[tid].insertLoad(load_inst);
235 }
236 
237 template<class Impl>
238 void
240 {
241  ThreadID tid = store_inst->threadNumber;
242 
243  thread[tid].insertStore(store_inst);
244 }
245 
246 template<class Impl>
247 Fault
249 {
250  ThreadID tid = inst->threadNumber;
251 
252  return thread[tid].executeLoad(inst);
253 }
254 
255 template<class Impl>
256 Fault
258 {
259  ThreadID tid = inst->threadNumber;
260 
261  return thread[tid].executeStore(inst);
262 }
263 
264 template<class Impl>
265 void
267 {
268  list<ThreadID>::iterator threads = activeThreads->begin();
270 
271  while (threads != end) {
272  ThreadID tid = *threads++;
273 
274  if (numStoresToWB(tid) > 0) {
275  DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
276  "available for Writeback.\n", tid, numStoresToWB(tid));
277  }
278 
279  thread[tid].writebackStores();
280  }
281 }
282 
283 template<class Impl>
284 bool
286 {
287  /* Answers: Does Anybody Have a Violation?*/
288  list<ThreadID>::iterator threads = activeThreads->begin();
290 
291  while (threads != end) {
292  ThreadID tid = *threads++;
293 
294  if (thread[tid].violation())
295  return true;
296  }
297 
298  return false;
299 }
300 
301 template <class Impl>
302 void
304 {
305  iewStage->cacheUnblocked();
306  cacheBlocked(false);
307 
308  for (ThreadID tid : *activeThreads) {
309  thread[tid].recvRetry();
310  }
311 }
312 
313 template <class Impl>
314 void
316 {
317  auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
318  thread[cpu->contextToThread(senderState->contextId())]
319  .completeDataAccess(pkt);
320 }
321 
322 template <class Impl>
323 bool
325 {
326  if (pkt->isError())
327  DPRINTF(LSQ, "Got error packet back for address: %#X\n",
328  pkt->getAddr());
329 
330  auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
331  panic_if(!senderState, "Got packet back with unknown sender state\n");
332 
333  thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
334 
335  if (pkt->isInvalidate()) {
336  // This response also contains an invalidate; e.g. this can be the case
337  // if cmd is ReadRespWithInvalidate.
338  //
339  // The calling order between completeDataAccess and checkSnoop matters.
340  // By calling checkSnoop after completeDataAccess, we ensure that the
341  // fault set by checkSnoop is not lost. Calling writeback (more
342  // specifically inst->completeAcc) in completeDataAccess overwrites
343  // fault, and in case this instruction requires squashing (as
344  // determined by checkSnoop), the ReExec fault set by checkSnoop would
345  // be lost otherwise.
346 
347  DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
348  pkt->getAddr());
349 
350  for (ThreadID tid = 0; tid < numThreads; tid++) {
351  thread[tid].checkSnoop(pkt);
352  }
353  }
354  // Update the LSQRequest state (this may delete the request)
355  senderState->request()->packetReplied();
356 
357  return true;
358 }
359 
360 template <class Impl>
361 void
363 {
364  DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
365  pkt->cmdString());
366 
367  // must be a snoop
368  if (pkt->isInvalidate()) {
369  DPRINTF(LSQ, "received invalidation for addr:%#x\n",
370  pkt->getAddr());
371  for (ThreadID tid = 0; tid < numThreads; tid++) {
372  thread[tid].checkSnoop(pkt);
373  }
374  }
375 }
376 
377 template<class Impl>
378 int
380 {
381  unsigned total = 0;
382 
383  list<ThreadID>::iterator threads = activeThreads->begin();
385 
386  while (threads != end) {
387  ThreadID tid = *threads++;
388 
389  total += getCount(tid);
390  }
391 
392  return total;
393 }
394 
395 template<class Impl>
396 int
398 {
399  unsigned total = 0;
400 
401  list<ThreadID>::iterator threads = activeThreads->begin();
403 
404  while (threads != end) {
405  ThreadID tid = *threads++;
406 
407  total += numLoads(tid);
408  }
409 
410  return total;
411 }
412 
413 template<class Impl>
414 int
416 {
417  unsigned total = 0;
418 
419  list<ThreadID>::iterator threads = activeThreads->begin();
421 
422  while (threads != end) {
423  ThreadID tid = *threads++;
424 
425  total += thread[tid].numStores();
426  }
427 
428  return total;
429 }
430 
431 template<class Impl>
432 unsigned
434 {
435  unsigned total = 0;
436 
437  list<ThreadID>::iterator threads = activeThreads->begin();
439 
440  while (threads != end) {
441  ThreadID tid = *threads++;
442 
443  total += thread[tid].numFreeLoadEntries();
444  }
445 
446  return total;
447 }
448 
449 template<class Impl>
450 unsigned
452 {
453  unsigned total = 0;
454 
455  list<ThreadID>::iterator threads = activeThreads->begin();
457 
458  while (threads != end) {
459  ThreadID tid = *threads++;
460 
461  total += thread[tid].numFreeStoreEntries();
462  }
463 
464  return total;
465 }
466 
467 template<class Impl>
468 unsigned
470 {
471  return thread[tid].numFreeLoadEntries();
472 }
473 
474 template<class Impl>
475 unsigned
477 {
478  return thread[tid].numFreeStoreEntries();
479 }
480 
481 template<class Impl>
482 bool
484 {
485  list<ThreadID>::iterator threads = activeThreads->begin();
487 
488  while (threads != end) {
489  ThreadID tid = *threads++;
490 
491  if (!(thread[tid].lqFull() || thread[tid].sqFull()))
492  return false;
493  }
494 
495  return true;
496 }
497 
498 template<class Impl>
499 bool
501 {
502  //@todo: Change to Calculate All Entries for
503  //Dynamic Policy
504  if (lsqPolicy == SMTQueuePolicy::Dynamic)
505  return isFull();
506  else
507  return thread[tid].lqFull() || thread[tid].sqFull();
508 }
509 
510 template<class Impl>
511 bool
513 {
514  return lqEmpty() && sqEmpty();
515 }
516 
517 template<class Impl>
518 bool
520 {
523 
524  while (threads != end) {
525  ThreadID tid = *threads++;
526 
527  if (!thread[tid].lqEmpty())
528  return false;
529  }
530 
531  return true;
532 }
533 
534 template<class Impl>
535 bool
537 {
540 
541  while (threads != end) {
542  ThreadID tid = *threads++;
543 
544  if (!thread[tid].sqEmpty())
545  return false;
546  }
547 
548  return true;
549 }
550 
551 template<class Impl>
552 bool
554 {
555  list<ThreadID>::iterator threads = activeThreads->begin();
557 
558  while (threads != end) {
559  ThreadID tid = *threads++;
560 
561  if (!thread[tid].lqFull())
562  return false;
563  }
564 
565  return true;
566 }
567 
568 template<class Impl>
569 bool
571 {
572  //@todo: Change to Calculate All Entries for
573  //Dynamic Policy
574  if (lsqPolicy == SMTQueuePolicy::Dynamic)
575  return lqFull();
576  else
577  return thread[tid].lqFull();
578 }
579 
580 template<class Impl>
581 bool
583 {
584  list<ThreadID>::iterator threads = activeThreads->begin();
586 
587  while (threads != end) {
588  ThreadID tid = *threads++;
589 
590  if (!sqFull(tid))
591  return false;
592  }
593 
594  return true;
595 }
596 
597 template<class Impl>
598 bool
600 {
601  //@todo: Change to Calculate All Entries for
602  //Dynamic Policy
603  if (lsqPolicy == SMTQueuePolicy::Dynamic)
604  return sqFull();
605  else
606  return thread[tid].sqFull();
607 }
608 
609 template<class Impl>
610 bool
612 {
613  list<ThreadID>::iterator threads = activeThreads->begin();
615 
616  while (threads != end) {
617  ThreadID tid = *threads++;
618 
619  if (!thread[tid].isStalled())
620  return false;
621  }
622 
623  return true;
624 }
625 
626 template<class Impl>
627 bool
629 {
630  if (lsqPolicy == SMTQueuePolicy::Dynamic)
631  return isStalled();
632  else
633  return thread[tid].isStalled();
634 }
635 
636 template<class Impl>
637 bool
639 {
640  list<ThreadID>::iterator threads = activeThreads->begin();
642 
643  while (threads != end) {
644  ThreadID tid = *threads++;
645 
646  if (hasStoresToWB(tid))
647  return true;
648  }
649 
650  return false;
651 }
652 
653 template<class Impl>
654 bool
656 {
657  list<ThreadID>::iterator threads = activeThreads->begin();
659 
660  while (threads != end) {
661  ThreadID tid = *threads++;
662 
663  if (willWB(tid))
664  return true;
665  }
666 
667  return false;
668 }
669 
670 template<class Impl>
671 void
673 {
676 
677  while (threads != end) {
678  ThreadID tid = *threads++;
679 
680  thread[tid].dumpInsts();
681  }
682 }
683 
684 template<class Impl>
685 Fault
686 LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
687  unsigned int size, Addr addr, Request::Flags flags,
688  uint64_t *res, AtomicOpFunctorPtr amo_op,
689  const std::vector<bool>& byte_enable)
690 {
691  // This comming request can be either load, store or atomic.
692  // Atomic request has a corresponding pointer to its atomic memory
693  // operation
694  bool isAtomic M5_VAR_USED = !isLoad && amo_op;
695 
696  ThreadID tid = cpu->contextToThread(inst->contextId());
697  auto cacheLineSize = cpu->cacheLineSize();
698  bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
699  LSQRequest* req = nullptr;
700 
701  // Atomic requests that access data across cache line boundary are
702  // currently not allowed since the cache does not guarantee corresponding
703  // atomic memory operations to be executed atomically across a cache line.
704  // For ISAs such as x86 that supports cross-cache-line atomic instructions,
705  // the cache needs to be modified to perform atomic update to both cache
706  // lines. For now, such cross-line update is not supported.
707  assert(!isAtomic || (isAtomic && !needs_burst));
708 
709  if (inst->translationStarted()) {
710  req = inst->savedReq;
711  assert(req);
712  } else {
713  if (needs_burst) {
714  req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
715  size, flags, data, res);
716  } else {
717  req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
718  size, flags, data, res, std::move(amo_op));
719  }
720  assert(req);
721  if (!byte_enable.empty()) {
722  req->_byteEnable = byte_enable;
723  }
724  inst->setRequest();
725  req->taskId(cpu->taskId());
726 
727  // There might be fault from a previous execution attempt if this is
728  // a strictly ordered load
729  inst->getFault() = NoFault;
730 
731  req->initiateTranslation();
732  }
733 
734  /* This is the place were instructions get the effAddr. */
735  if (req->isTranslationComplete()) {
736  if (req->isMemAccessRequired()) {
737  inst->effAddr = req->getVaddr();
738  inst->effSize = size;
739  inst->effAddrValid(true);
740 
741  if (cpu->checker) {
742  inst->reqToVerify = std::make_shared<Request>(*req->request());
743  }
744  Fault fault;
745  if (isLoad)
746  fault = cpu->read(req, inst->lqIdx);
747  else
748  fault = cpu->write(req, data, inst->sqIdx);
749  // inst->getFault() may have the first-fault of a
750  // multi-access split request at this point.
751  // Overwrite that only if we got another type of fault
752  // (e.g. re-exec).
753  if (fault != NoFault)
754  inst->getFault() = fault;
755  } else if (isLoad) {
756  inst->setMemAccPredicate(false);
757  // Commit will have to clean up whatever happened. Set this
758  // instruction as executed.
759  inst->setExecuted();
760  }
761  }
762 
763  if (inst->traceData)
764  inst->traceData->setMem(addr, size, flags);
765 
766  return inst->getFault();
767 }
768 
769 template<class Impl>
770 void
773 {
774  _fault.push_back(fault);
775  numInTranslationFragments = 0;
776  numTranslatedFragments = 1;
777  /* If the instruction has been squahsed, let the request know
778  * as it may have to self-destruct. */
779  if (_inst->isSquashed()) {
780  this->squashTranslation();
781  } else {
782  _inst->strictlyOrdered(req->isStrictlyOrdered());
783 
784  flags.set(Flag::TranslationFinished);
785  if (fault == NoFault) {
786  _inst->physEffAddr = req->getPaddr();
787  _inst->memReqFlags = req->getFlags();
788  if (req->isCondSwap()) {
789  assert(_res);
790  req->setExtraData(*_res);
791  }
792  setState(State::Request);
793  } else {
794  setState(State::Fault);
795  }
796 
797  LSQRequest::_inst->fault = fault;
798  LSQRequest::_inst->translationCompleted(true);
799  }
800 }
801 
802 template<class Impl>
803 void
806 {
807  int i;
808  for (i = 0; i < _requests.size() && _requests[i] != req; i++);
809  assert(i < _requests.size());
810  _fault[i] = fault;
811 
812  numInTranslationFragments--;
813  numTranslatedFragments++;
814 
815  if (fault == NoFault)
816  mainReq->setFlags(req->getFlags());
817 
818  if (numTranslatedFragments == _requests.size()) {
819  if (_inst->isSquashed()) {
820  this->squashTranslation();
821  } else {
822  _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
823  flags.set(Flag::TranslationFinished);
824  _inst->translationCompleted(true);
825 
826  for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++);
827  if (i > 0) {
828  _inst->physEffAddr = request(0)->getPaddr();
829  _inst->memReqFlags = mainReq->getFlags();
830  if (mainReq->isCondSwap()) {
831  assert (i == _fault.size());
832  assert(_res);
833  mainReq->setExtraData(*_res);
834  }
835  if (i == _fault.size()) {
836  _inst->fault = NoFault;
837  setState(State::Request);
838  } else {
839  _inst->fault = _fault[i];
840  setState(State::PartialFault);
841  }
842  } else {
843  _inst->fault = _fault[0];
844  setState(State::Fault);
845  }
846  }
847 
848  }
849 }
850 
851 template<class Impl>
852 void
854 {
855  assert(_requests.size() == 0);
856 
857  this->addRequest(_addr, _size, _byteEnable);
858 
859  if (_requests.size() > 0) {
860  _requests.back()->setReqInstSeqNum(_inst->seqNum);
861  _requests.back()->taskId(_taskId);
862  _inst->translationStarted(true);
863  setState(State::Translation);
864  flags.set(Flag::TranslationStarted);
865 
866  _inst->savedReq = this;
867  sendFragmentToTranslation(0);
868  } else {
869  _inst->setMemAccPredicate(false);
870  }
871 }
872 
873 template<class Impl>
874 PacketPtr
876 {
877  return _mainPacket;
878 }
879 
880 template<class Impl>
883 {
884  return mainReq;
885 }
886 
887 template<class Impl>
888 void
890 {
891  auto cacheLineSize = _port.cacheLineSize();
892  Addr base_addr = _addr;
893  Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
894  Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
895  uint32_t size_so_far = 0;
896 
897  mainReq = std::make_shared<Request>(base_addr,
898  _size, _flags, _inst->masterId(),
899  _inst->instAddr(), _inst->contextId());
900  if (!_byteEnable.empty()) {
901  mainReq->setByteEnable(_byteEnable);
902  }
903 
904  // Paddr is not used in mainReq. However, we will accumulate the flags
905  // from the sub requests into mainReq by calling setFlags() in finish().
906  // setFlags() assumes that paddr is set so flip the paddr valid bit here to
907  // avoid a potential assert in setFlags() when we call it from finish().
908  mainReq->setPaddr(0);
909 
910  /* Get the pre-fix, possibly unaligned. */
911  if (_byteEnable.empty()) {
912  this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
913  } else {
914  auto it_start = _byteEnable.begin();
915  auto it_end = _byteEnable.begin() + (next_addr - base_addr);
916  this->addRequest(base_addr, next_addr - base_addr,
917  std::vector<bool>(it_start, it_end));
918  }
919  size_so_far = next_addr - base_addr;
920 
921  /* We are block aligned now, reading whole blocks. */
922  base_addr = next_addr;
923  while (base_addr != final_addr) {
924  if (_byteEnable.empty()) {
925  this->addRequest(base_addr, cacheLineSize, _byteEnable);
926  } else {
927  auto it_start = _byteEnable.begin() + size_so_far;
928  auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
929  this->addRequest(base_addr, cacheLineSize,
930  std::vector<bool>(it_start, it_end));
931  }
932  size_so_far += cacheLineSize;
933  base_addr += cacheLineSize;
934  }
935 
936  /* Deal with the tail. */
937  if (size_so_far < _size) {
938  if (_byteEnable.empty()) {
939  this->addRequest(base_addr, _size - size_so_far, _byteEnable);
940  } else {
941  auto it_start = _byteEnable.begin() + size_so_far;
942  auto it_end = _byteEnable.end();
943  this->addRequest(base_addr, _size - size_so_far,
944  std::vector<bool>(it_start, it_end));
945  }
946  }
947 
948  if (_requests.size() > 0) {
949  /* Setup the requests and send them to translation. */
950  for (auto& r: _requests) {
951  r->setReqInstSeqNum(_inst->seqNum);
952  r->taskId(_taskId);
953  }
954 
955  _inst->translationStarted(true);
956  setState(State::Translation);
957  flags.set(Flag::TranslationStarted);
958  this->_inst->savedReq = this;
959  numInTranslationFragments = 0;
960  numTranslatedFragments = 0;
961  _fault.resize(_requests.size());
962 
963  for (uint32_t i = 0; i < _requests.size(); i++) {
964  sendFragmentToTranslation(i);
965  }
966  } else {
967  _inst->setMemAccPredicate(false);
968  }
969 }
970 
971 template<class Impl>
972 void
974 {
975  numInTranslationFragments++;
976  _port.dTLB()->translateTiming(
977  this->request(i),
978  this->_inst->thread->getTC(), this,
979  this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
980 }
981 
982 template<class Impl>
983 bool
985 {
986  assert(_numOutstandingPackets == 1);
987  auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
988  flags.set(Flag::Complete);
989  state->outstanding--;
990  assert(pkt == _packets.front());
991  _port.completeDataAccess(pkt);
992  return true;
993 }
994 
995 template<class Impl>
996 bool
998 {
999  auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
1000  uint32_t pktIdx = 0;
1001  while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
1002  pktIdx++;
1003  assert(pktIdx < _packets.size());
1004  numReceivedPackets++;
1005  state->outstanding--;
1006  if (numReceivedPackets == _packets.size()) {
1007  flags.set(Flag::Complete);
1008  /* Assemble packets. */
1009  PacketPtr resp = isLoad()
1010  ? Packet::createRead(mainReq)
1011  : Packet::createWrite(mainReq);
1012  if (isLoad())
1013  resp->dataStatic(_inst->memData);
1014  else
1015  resp->dataStatic(_data);
1016  resp->senderState = _senderState;
1017  _port.completeDataAccess(resp);
1018  delete resp;
1019  }
1020  return true;
1021 }
1022 
1023 template<class Impl>
1024 void
1026 {
1027  assert(_senderState);
1028  /* Retries do not create new packets. */
1029  if (_packets.size() == 0) {
1030  _packets.push_back(
1031  isLoad()
1032  ? Packet::createRead(request())
1033  : Packet::createWrite(request()));
1034  _packets.back()->dataStatic(_inst->memData);
1035  _packets.back()->senderState = _senderState;
1036  }
1037  assert(_packets.size() == 1);
1038 }
1039 
1040 template<class Impl>
1041 void
1043 {
1044  /* Extra data?? */
1045  Addr base_address = _addr;
1046 
1047  if (_packets.size() == 0) {
1048  /* New stuff */
1049  if (isLoad()) {
1050  _mainPacket = Packet::createRead(mainReq);
1051  _mainPacket->dataStatic(_inst->memData);
1052  }
1053  for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
1054  RequestPtr r = _requests[i];
1055  PacketPtr pkt = isLoad() ? Packet::createRead(r)
1056  : Packet::createWrite(r);
1057  ptrdiff_t offset = r->getVaddr() - base_address;
1058  if (isLoad()) {
1059  pkt->dataStatic(_inst->memData + offset);
1060  } else {
1061  uint8_t* req_data = new uint8_t[r->getSize()];
1062  std::memcpy(req_data,
1063  _inst->memData + offset,
1064  r->getSize());
1065  pkt->dataDynamic(req_data);
1066  }
1067  pkt->senderState = _senderState;
1068  _packets.push_back(pkt);
1069  }
1070  }
1071  assert(_packets.size() > 0);
1072 }
1073 
1074 template<class Impl>
1075 void
1077 {
1078  assert(_numOutstandingPackets == 0);
1079  if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1080  _numOutstandingPackets = 1;
1081 }
1082 
1083 template<class Impl>
1084 void
1086 {
1087  /* Try to send the packets. */
1088  while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1089  lsqUnit()->trySendPacket(isLoad(),
1090  _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1091  _numOutstandingPackets++;
1092  }
1093 }
1094 
1095 template<class Impl>
1096 Cycles
1099 {
1100  return pkt->req->localAccessor(thread, pkt);
1101 }
1102 
1103 template<class Impl>
1104 Cycles
1106  ThreadContext *thread, PacketPtr mainPkt)
1107 {
1108  Cycles delay(0);
1109  unsigned offset = 0;
1110 
1111  for (auto r: _requests) {
1112  PacketPtr pkt =
1113  new Packet(r, isLoad() ? MemCmd::ReadReq : MemCmd::WriteReq);
1114  pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1115  Cycles d = r->localAccessor(thread, pkt);
1116  if (d > delay)
1117  delay = d;
1118  offset += r->getSize();
1119  delete pkt;
1120  }
1121  return delay;
1122 }
1123 
1124 template<class Impl>
1125 bool
1127 {
1128  return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
1129 }
1130 
1146 template<class Impl>
1147 bool
1149 {
1150  bool is_hit = false;
1151  for (auto &r: _requests) {
1161  if (r->hasPaddr() && (r->getPaddr() & blockMask) == blockAddr) {
1162  is_hit = true;
1163  break;
1164  }
1165  }
1166  return is_hit;
1167 }
1168 
1169 template <class Impl>
1170 bool
1172 {
1173  return lsq->recvTimingResp(pkt);
1174 }
1175 
1176 template <class Impl>
1177 void
1179 {
1180  for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
1181  if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1182  cpu->wakeup(tid);
1183  }
1184  }
1185  lsq->recvTimingSnoopReq(pkt);
1186 }
1187 
1188 template <class Impl>
1189 void
1191 {
1192  lsq->recvReqRetry();
1193 }
1194 
1195 #endif//__CPU_O3_LSQ_IMPL_HH__
IEW * iewStage
The IEW stage pointer.
Definition: lsq.hh:1045
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:163
#define DPRINTF(x,...)
Definition: trace.hh:225
void takeOverFrom()
Takes over execution from another CPU&#39;s thread.
Definition: lsq_impl.hh:167
virtual void initiateTranslation()
Definition: lsq_impl.hh:889
ThreadID numThreads
Number of Threads.
Definition: lsq.hh:1118
Impl::DynInstPtr DynInstPtr
Definition: lsq.hh:67
Bitfield< 28 > v
decltype(nullptr) constexpr NoFault
Definition: types.hh:243
Fault pushRequest(const DynInstPtr &inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector< bool > &byte_enable)
Definition: lsq_impl.hh:686
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:81
virtual bool recvTimingResp(PacketPtr pkt)
Timing version of receive.
Definition: lsq_impl.hh:1171
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Caches may probe into the load-store queue to enforce memory ordering guarantees. ...
Definition: lsq_impl.hh:1148
void taskId(const uint32_t &v)
Definition: lsq.hh:465
bool transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
Returns true if the given memory access (address, size) needs to be fragmented across aligned fixed-s...
Definition: utils.hh:77
bool willWB()
Returns if the LSQ will write back to memory this cycle.
Definition: lsq_impl.hh:655
Bitfield< 7 > i
std::vector< RequestPtr > _requests
Definition: lsq.hh:292
virtual void sendPacketToCache()
Definition: lsq_impl.hh:1085
unsigned maxSQEntries
Max SQ Size - Used to Enforce Sharing Policies.
Definition: lsq.hh:1109
SMTQueuePolicy lsqPolicy
The LSQ policy for SMT mode.
Definition: lsq.hh:1072
std::vector< LSQUnit > thread
The LSQ units for individual threads.
Definition: lsq.hh:1115
std::vector< bool > _byteEnable
Definition: lsq.hh:298
std::string name() const
Returns the name of the LSQ.
Definition: lsq_impl.hh:113
void tick()
Ticks the LSQ.
Definition: lsq_impl.hh:179
std::shared_ptr< Request > RequestPtr
Definition: request.hh:81
bool sqFull()
Returns if any of the SQs are full.
Definition: lsq_impl.hh:582
ip6_addr_t addr
Definition: inet.hh:330
static PacketPtr createWrite(const RequestPtr &req)
Definition: packet.hh:913
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition: amo.hh:228
virtual void recvReqRetry()
Handles doing a retry of the previous send.
Definition: lsq_impl.hh:1190
bool violation()
Returns whether or not there was a memory ordering violation.
Definition: lsq_impl.hh:285
bool hasStoresToWB()
Returns whether or not there are any stores to write back to memory.
Definition: lsq_impl.hh:638
virtual Cycles handleLocalAccess(ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
Definition: lsq_impl.hh:1097
Bitfield< 23, 0 > offset
Definition: types.hh:152
Overload hash function for BasicBlockRange type.
Definition: vec_reg.hh:587
Derived class to hold any sender state the LSQ needs.
Definition: lsq.hh:73
Fault executeLoad(const DynInstPtr &inst)
Executes a load.
Definition: lsq_impl.hh:248
Bitfield< 4, 0 > mode
bool isTranslationComplete()
Definition: lsq.hh:591
int usedStorePorts
The number of used cache ports in this cycle by stores.
Definition: lsq.hh:1064
bool isFull()
Returns if the LSQ is full (either LQ or SQ is full).
Definition: lsq_impl.hh:483
Definition: lsq.hh:62
bool lqEmpty() const
Returns if all of the LQs are empty.
Definition: lsq_impl.hh:519
bool isInvalidate() const
Definition: packet.hh:537
ThreadContext is the external interface to all thread state for anything outside of the CPU...
T * getPtr()
get a pointer to the data ptr.
Definition: packet.hh:1084
void writebackStores()
Attempts to write back stores until all cache ports are used or the interface becomes blocked...
Definition: lsq_impl.hh:266
void dumpInsts() const
Debugging function to print out all instructions.
Definition: lsq_impl.hh:672
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:1034
RequestPtr req
A pointer to the original request.
Definition: packet.hh:321
virtual void finish(const Fault &fault, const RequestPtr &req, ThreadContext *tc, BaseTLB::Mode mode)
Definition: lsq_impl.hh:771
bool isMemAccessRequired()
Definition: lsq.hh:618
bool _cacheBlocked
D-cache is blocked.
Definition: lsq.hh:1060
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Test if the request accesses a particular cache line.
Definition: lsq_impl.hh:1126
void insertLoad(const DynInstPtr &load_inst)
Inserts a load into the LSQ.
Definition: lsq_impl.hh:230
bool isDrained() const
Has the LSQ drained?
Definition: lsq_impl.hh:148
Addr getVaddr(int idx=0) const
Definition: lsq.hh:481
void cachePortBusy(bool is_load)
Another store port is in use.
Definition: lsq_impl.hh:218
unsigned numFreeStoreEntries()
Returns the number of free store entries.
Definition: lsq_impl.hh:451
bool isError() const
Definition: packet.hh:549
virtual void recvTimingSnoopReq(PacketPtr pkt)
Receive a timing snoop request from the peer.
Definition: lsq_impl.hh:1178
virtual void buildPackets()
Definition: lsq_impl.hh:1025
DcachePort dcachePort
Data port.
Definition: lsq.hh:1112
int usedLoadPorts
The number of used cache ports in this cycle by loads.
Definition: lsq.hh:1068
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition: lsq_impl.hh:138
int numLoads()
Returns the total number of loads in the load queue.
Definition: lsq_impl.hh:397
Bitfield< 9 > d
unsigned maxLQEntries
Max LQ Size - Used to Enforce Sharing Policies.
Definition: lsq.hh:1106
void sendFragmentToTranslation(int i)
Definition: lsq_impl.hh:973
Addr getAddr() const
Definition: packet.hh:720
int numStoresToWB(ThreadID tid)
Returns the number of stores a specific thread has to write back.
Definition: lsq.hh:996
Fault executeStore(const DynInstPtr &inst)
Executes a store.
Definition: lsq_impl.hh:257
virtual void finish(const Fault &fault, const RequestPtr &req, ThreadContext *tc, BaseTLB::Mode mode)
Definition: lsq_impl.hh:804
virtual Cycles handleLocalAccess(ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
Definition: lsq_impl.hh:1105
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition: packet.hh:907
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
virtual void sendPacketToCache()
Definition: lsq_impl.hh:1076
void insertStore(const DynInstPtr &store_inst)
Inserts a store into the LSQ.
Definition: lsq_impl.hh:239
const FlagsType total
Print the total.
Definition: info.hh:49
bool isEmpty() const
Returns if the LSQ is empty (both LQ and SQ are empty).
Definition: lsq_impl.hh:512
const DynInstPtr _inst
Definition: lsq.hh:288
int getCount()
Returns the number of instructions in all of the queues.
Definition: lsq_impl.hh:379
Mode
Definition: tlb.hh:57
virtual bool recvTimingResp(PacketPtr pkt)
Definition: lsq_impl.hh:997
int16_t ThreadID
Thread index/ID type.
Definition: types.hh:225
Impl::O3CPU O3CPU
Definition: lsq.hh:66
virtual void initiateTranslation()
Definition: lsq_impl.hh:853
uint8_t outstanding
Number of outstanding packets to complete.
Definition: lsq.hh:94
unsigned numFreeLoadEntries()
Returns the number of free load entries.
Definition: lsq_impl.hh:433
RequestPtr request(int idx=0)
Definition: lsq.hh:473
void recvReqRetry()
Retry the previous send that failed.
Definition: lsq_impl.hh:303
SenderState * senderState
This packet&#39;s sender state.
Definition: packet.hh:474
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets the pointer to the list of active threads.
Definition: lsq_impl.hh:130
int cacheStorePorts
The number of cache ports available each cycle (stores only).
Definition: lsq.hh:1062
virtual void buildPackets()
Definition: lsq_impl.hh:1042
LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
Constructs an LSQ with the given parameters.
Definition: lsq_impl.hh:61
int numStores()
Returns the total number of stores in the store queue.
Definition: lsq_impl.hh:415
virtual bool recvTimingResp(PacketPtr pkt)
Definition: lsq_impl.hh:984
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
Definition: lsq_impl.hh:205
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition: packet.hh:1072
Memory operation metadata.
Definition: lsq.hh:230
void completeDataAccess(PacketPtr pkt)
Definition: lsq_impl.hh:315
int cacheLoadPorts
The number of cache ports available each cycle (loads only).
Definition: lsq.hh:1066
virtual PacketPtr mainPacket()
Definition: lsq_impl.hh:875
void regStats()
Registers statistics of each LSQ unit.
Definition: lsq_impl.hh:120
virtual RequestPtr mainRequest()
Definition: lsq_impl.hh:882
Addr addrBlockAlign(Addr addr, Addr block_size)
Returns the address of the closest aligned fixed-size block to the given address. ...
Definition: utils.hh:63
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
Definition: lsq_impl.hh:324
virtual void initiateTranslation()=0
bool cacheBlocked() const
Is D-cache blocked?
Definition: lsq_impl.hh:191
Impl::CPUPol::IEW IEW
Definition: lsq.hh:68
const std::string & cmdString() const
Return the string name of the cmd field (for debugging and tracing).
Definition: packet.hh:517
O3CPU * cpu
The CPU pointer.
Definition: lsq.hh:1042
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:181
std::list< ThreadID > * activeThreads
List of Active Threads in System.
Definition: lsq.hh:1098
bool isStalled()
Returns if the LSQ is stalled due to a memory operation that must be replayed.
Definition: lsq_impl.hh:611
void recvTimingSnoopReq(PacketPtr pkt)
Definition: lsq_impl.hh:362
const char data[]
std::shared_ptr< FaultBase > Fault
Definition: types.hh:238
bool sqEmpty() const
Returns if all of the SQs are empty.
Definition: lsq_impl.hh:536
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:103
bool lqFull()
Returns if any of the LQs are full.
Definition: lsq_impl.hh:553

Generated on Thu May 28 2020 16:21:31 for gem5 by doxygen 1.8.13