gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
lsq_impl.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited
3  * Copyright (c) 2013 Advanced Micro Devices, Inc.
4  * All rights reserved
5  *
6  * The license below extends only to copyright in the software and shall
7  * not be construed as granting a license to any other intellectual
8  * property including but not limited to intellectual property relating
9  * to a hardware implementation of the functionality of the software
10  * licensed hereunder. You may use the software subject to the license
11  * terms below provided that you ensure that this notice is replicated
12  * unmodified and in its entirety in all distributions of the software,
13  * modified or unmodified, in source code or in binary form.
14  *
15  * Copyright (c) 2005-2006 The Regents of The University of Michigan
16  * All rights reserved.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions are
20  * met: redistributions of source code must retain the above copyright
21  * notice, this list of conditions and the following disclaimer;
22  * redistributions in binary form must reproduce the above copyright
23  * notice, this list of conditions and the following disclaimer in the
24  * documentation and/or other materials provided with the distribution;
25  * neither the name of the copyright holders nor the names of its
26  * contributors may be used to endorse or promote products derived from
27  * this software without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40  *
41  * Authors: Korey Sewell
42  */
43 
44 #ifndef __CPU_O3_LSQ_IMPL_HH__
45 #define __CPU_O3_LSQ_IMPL_HH__
46 
47 #include <algorithm>
48 #include <list>
49 #include <string>
50 
51 #include "base/logging.hh"
52 #include "cpu/o3/cpu.hh"
53 #include "cpu/o3/lsq.hh"
54 #include "debug/Drain.hh"
55 #include "debug/Fetch.hh"
56 #include "debug/LSQ.hh"
57 #include "debug/Writeback.hh"
58 #include "params/DerivO3CPU.hh"
59 
60 using namespace std;
61 
62 template <class Impl>
63 LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
64  : cpu(cpu_ptr), iewStage(iew_ptr),
65  _cacheBlocked(false),
66  cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
67  cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
68  lsqPolicy(params->smtLSQPolicy),
69  LQEntries(params->LQEntries),
70  SQEntries(params->SQEntries),
71  maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
72  params->smtLSQThreshold)),
73  maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
74  params->smtLSQThreshold)),
75  dcachePort(this, cpu_ptr),
76  numThreads(params->numThreads)
77 {
78  assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
79 
80  //**********************************************/
81  //************ Handle SMT Parameters ***********/
82  //**********************************************/
83 
84  /* Run SMT olicy checks. */
85  if (lsqPolicy == SMTQueuePolicy::Dynamic) {
86  DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
87  } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
88  DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
89  "%i entries per LQ | %i entries per SQ\n",
91  } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
92 
93  assert(params->smtLSQThreshold > params->LQEntries);
94  assert(params->smtLSQThreshold > params->SQEntries);
95 
96  DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
97  "%i entries per LQ | %i entries per SQ\n",
99  } else {
100  panic("Invalid LSQ sharing policy. Options are: Dynamic, "
101  "Partitioned, Threshold");
102  }
103 
104  thread.reserve(numThreads);
105  for (ThreadID tid = 0; tid < numThreads; tid++) {
106  thread.emplace_back(maxLQEntries, maxSQEntries);
107  thread[tid].init(cpu, iew_ptr, params, this, tid);
108  thread[tid].setDcachePort(&dcachePort);
109  }
110 }
111 
112 
113 template<class Impl>
114 std::string
116 {
117  return iewStage->name() + ".lsq";
118 }
119 
120 template<class Impl>
121 void
123 {
124  //Initialize LSQs
125  for (ThreadID tid = 0; tid < numThreads; tid++) {
126  thread[tid].regStats();
127  }
128 }
129 
130 template<class Impl>
131 void
133 {
134  activeThreads = at_ptr;
135  assert(activeThreads != 0);
136 }
137 
138 template <class Impl>
139 void
141 {
142  assert(isDrained());
143 
144  for (ThreadID tid = 0; tid < numThreads; tid++)
145  thread[tid].drainSanityCheck();
146 }
147 
148 template <class Impl>
149 bool
151 {
152  bool drained(true);
153 
154  if (!lqEmpty()) {
155  DPRINTF(Drain, "Not drained, LQ not empty.\n");
156  drained = false;
157  }
158 
159  if (!sqEmpty()) {
160  DPRINTF(Drain, "Not drained, SQ not empty.\n");
161  drained = false;
162  }
163 
164  return drained;
165 }
166 
167 template <class Impl>
168 void
170 {
171  usedStorePorts = 0;
172  _cacheBlocked = false;
173 
174  for (ThreadID tid = 0; tid < numThreads; tid++) {
175  thread[tid].takeOverFrom();
176  }
177 }
178 
179 template <class Impl>
180 void
182 {
183  // Re-issue loads which got blocked on the per-cycle load ports limit.
185  iewStage->cacheUnblocked();
186 
187  usedLoadPorts = 0;
188  usedStorePorts = 0;
189 }
190 
191 template<class Impl>
192 bool
194 {
195  return _cacheBlocked;
196 }
197 
198 template<class Impl>
199 void
201 {
202  _cacheBlocked = v;
203 }
204 
205 template<class Impl>
206 bool
207 LSQ<Impl>::cachePortAvailable(bool is_load) const
208 {
209  bool ret;
210  if (is_load) {
212  } else {
214  }
215  return ret;
216 }
217 
218 template<class Impl>
219 void
221 {
222  assert(cachePortAvailable(is_load));
223  if (is_load) {
224  usedLoadPorts++;
225  } else {
226  usedStorePorts++;
227  }
228 }
229 
230 template<class Impl>
231 void
233 {
234  ThreadID tid = load_inst->threadNumber;
235 
236  thread[tid].insertLoad(load_inst);
237 }
238 
239 template<class Impl>
240 void
242 {
243  ThreadID tid = store_inst->threadNumber;
244 
245  thread[tid].insertStore(store_inst);
246 }
247 
248 template<class Impl>
249 Fault
251 {
252  ThreadID tid = inst->threadNumber;
253 
254  return thread[tid].executeLoad(inst);
255 }
256 
257 template<class Impl>
258 Fault
260 {
261  ThreadID tid = inst->threadNumber;
262 
263  return thread[tid].executeStore(inst);
264 }
265 
266 template<class Impl>
267 void
269 {
270  list<ThreadID>::iterator threads = activeThreads->begin();
272 
273  while (threads != end) {
274  ThreadID tid = *threads++;
275 
276  if (numStoresToWB(tid) > 0) {
277  DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
278  "available for Writeback.\n", tid, numStoresToWB(tid));
279  }
280 
281  thread[tid].writebackStores();
282  }
283 }
284 
285 template<class Impl>
286 bool
288 {
289  /* Answers: Does Anybody Have a Violation?*/
290  list<ThreadID>::iterator threads = activeThreads->begin();
292 
293  while (threads != end) {
294  ThreadID tid = *threads++;
295 
296  if (thread[tid].violation())
297  return true;
298  }
299 
300  return false;
301 }
302 
303 template <class Impl>
304 void
306 {
307  iewStage->cacheUnblocked();
308  cacheBlocked(false);
309 
310  for (ThreadID tid : *activeThreads) {
311  thread[tid].recvRetry();
312  }
313 }
314 
315 template <class Impl>
316 void
318 {
319  auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
320  thread[cpu->contextToThread(senderState->contextId())]
321  .completeDataAccess(pkt);
322 }
323 
324 template <class Impl>
325 bool
327 {
328  if (pkt->isError())
329  DPRINTF(LSQ, "Got error packet back for address: %#X\n",
330  pkt->getAddr());
331 
332  auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
333  panic_if(!senderState, "Got packet back with unknown sender state\n");
334 
335  thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
336 
337  if (pkt->isInvalidate()) {
338  // This response also contains an invalidate; e.g. this can be the case
339  // if cmd is ReadRespWithInvalidate.
340  //
341  // The calling order between completeDataAccess and checkSnoop matters.
342  // By calling checkSnoop after completeDataAccess, we ensure that the
343  // fault set by checkSnoop is not lost. Calling writeback (more
344  // specifically inst->completeAcc) in completeDataAccess overwrites
345  // fault, and in case this instruction requires squashing (as
346  // determined by checkSnoop), the ReExec fault set by checkSnoop would
347  // be lost otherwise.
348 
349  DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
350  pkt->getAddr());
351 
352  for (ThreadID tid = 0; tid < numThreads; tid++) {
353  thread[tid].checkSnoop(pkt);
354  }
355  }
356  // Update the LSQRequest state (this may delete the request)
357  senderState->request()->packetReplied();
358 
359  return true;
360 }
361 
362 template <class Impl>
363 void
365 {
366  DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
367  pkt->cmdString());
368 
369  // must be a snoop
370  if (pkt->isInvalidate()) {
371  DPRINTF(LSQ, "received invalidation for addr:%#x\n",
372  pkt->getAddr());
373  for (ThreadID tid = 0; tid < numThreads; tid++) {
374  thread[tid].checkSnoop(pkt);
375  }
376  }
377 }
378 
379 template<class Impl>
380 int
382 {
383  unsigned total = 0;
384 
385  list<ThreadID>::iterator threads = activeThreads->begin();
387 
388  while (threads != end) {
389  ThreadID tid = *threads++;
390 
391  total += getCount(tid);
392  }
393 
394  return total;
395 }
396 
397 template<class Impl>
398 int
400 {
401  unsigned total = 0;
402 
403  list<ThreadID>::iterator threads = activeThreads->begin();
405 
406  while (threads != end) {
407  ThreadID tid = *threads++;
408 
409  total += numLoads(tid);
410  }
411 
412  return total;
413 }
414 
415 template<class Impl>
416 int
418 {
419  unsigned total = 0;
420 
421  list<ThreadID>::iterator threads = activeThreads->begin();
423 
424  while (threads != end) {
425  ThreadID tid = *threads++;
426 
427  total += thread[tid].numStores();
428  }
429 
430  return total;
431 }
432 
433 template<class Impl>
434 unsigned
436 {
437  unsigned total = 0;
438 
439  list<ThreadID>::iterator threads = activeThreads->begin();
441 
442  while (threads != end) {
443  ThreadID tid = *threads++;
444 
445  total += thread[tid].numFreeLoadEntries();
446  }
447 
448  return total;
449 }
450 
451 template<class Impl>
452 unsigned
454 {
455  unsigned total = 0;
456 
457  list<ThreadID>::iterator threads = activeThreads->begin();
459 
460  while (threads != end) {
461  ThreadID tid = *threads++;
462 
463  total += thread[tid].numFreeStoreEntries();
464  }
465 
466  return total;
467 }
468 
469 template<class Impl>
470 unsigned
472 {
473  return thread[tid].numFreeLoadEntries();
474 }
475 
476 template<class Impl>
477 unsigned
479 {
480  return thread[tid].numFreeStoreEntries();
481 }
482 
483 template<class Impl>
484 bool
486 {
487  list<ThreadID>::iterator threads = activeThreads->begin();
489 
490  while (threads != end) {
491  ThreadID tid = *threads++;
492 
493  if (!(thread[tid].lqFull() || thread[tid].sqFull()))
494  return false;
495  }
496 
497  return true;
498 }
499 
500 template<class Impl>
501 bool
503 {
504  //@todo: Change to Calculate All Entries for
505  //Dynamic Policy
506  if (lsqPolicy == SMTQueuePolicy::Dynamic)
507  return isFull();
508  else
509  return thread[tid].lqFull() || thread[tid].sqFull();
510 }
511 
512 template<class Impl>
513 bool
515 {
516  return lqEmpty() && sqEmpty();
517 }
518 
519 template<class Impl>
520 bool
522 {
525 
526  while (threads != end) {
527  ThreadID tid = *threads++;
528 
529  if (!thread[tid].lqEmpty())
530  return false;
531  }
532 
533  return true;
534 }
535 
536 template<class Impl>
537 bool
539 {
542 
543  while (threads != end) {
544  ThreadID tid = *threads++;
545 
546  if (!thread[tid].sqEmpty())
547  return false;
548  }
549 
550  return true;
551 }
552 
553 template<class Impl>
554 bool
556 {
557  list<ThreadID>::iterator threads = activeThreads->begin();
559 
560  while (threads != end) {
561  ThreadID tid = *threads++;
562 
563  if (!thread[tid].lqFull())
564  return false;
565  }
566 
567  return true;
568 }
569 
570 template<class Impl>
571 bool
573 {
574  //@todo: Change to Calculate All Entries for
575  //Dynamic Policy
576  if (lsqPolicy == SMTQueuePolicy::Dynamic)
577  return lqFull();
578  else
579  return thread[tid].lqFull();
580 }
581 
582 template<class Impl>
583 bool
585 {
586  list<ThreadID>::iterator threads = activeThreads->begin();
588 
589  while (threads != end) {
590  ThreadID tid = *threads++;
591 
592  if (!sqFull(tid))
593  return false;
594  }
595 
596  return true;
597 }
598 
599 template<class Impl>
600 bool
602 {
603  //@todo: Change to Calculate All Entries for
604  //Dynamic Policy
605  if (lsqPolicy == SMTQueuePolicy::Dynamic)
606  return sqFull();
607  else
608  return thread[tid].sqFull();
609 }
610 
611 template<class Impl>
612 bool
614 {
615  list<ThreadID>::iterator threads = activeThreads->begin();
617 
618  while (threads != end) {
619  ThreadID tid = *threads++;
620 
621  if (!thread[tid].isStalled())
622  return false;
623  }
624 
625  return true;
626 }
627 
628 template<class Impl>
629 bool
631 {
632  if (lsqPolicy == SMTQueuePolicy::Dynamic)
633  return isStalled();
634  else
635  return thread[tid].isStalled();
636 }
637 
638 template<class Impl>
639 bool
641 {
642  list<ThreadID>::iterator threads = activeThreads->begin();
644 
645  while (threads != end) {
646  ThreadID tid = *threads++;
647 
648  if (hasStoresToWB(tid))
649  return true;
650  }
651 
652  return false;
653 }
654 
655 template<class Impl>
656 bool
658 {
659  list<ThreadID>::iterator threads = activeThreads->begin();
661 
662  while (threads != end) {
663  ThreadID tid = *threads++;
664 
665  if (willWB(tid))
666  return true;
667  }
668 
669  return false;
670 }
671 
672 template<class Impl>
673 void
675 {
678 
679  while (threads != end) {
680  ThreadID tid = *threads++;
681 
682  thread[tid].dumpInsts();
683  }
684 }
685 
686 template<class Impl>
687 Fault
688 LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
689  unsigned int size, Addr addr, Request::Flags flags,
690  uint64_t *res, AtomicOpFunctorPtr amo_op,
691  const std::vector<bool>& byte_enable)
692 {
693  // This comming request can be either load, store or atomic.
694  // Atomic request has a corresponding pointer to its atomic memory
695  // operation
696  bool isAtomic M5_VAR_USED = !isLoad && amo_op;
697 
698  ThreadID tid = cpu->contextToThread(inst->contextId());
699  auto cacheLineSize = cpu->cacheLineSize();
700  bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
701  LSQRequest* req = nullptr;
702 
703  // Atomic requests that access data across cache line boundary are
704  // currently not allowed since the cache does not guarantee corresponding
705  // atomic memory operations to be executed atomically across a cache line.
706  // For ISAs such as x86 that supports cross-cache-line atomic instructions,
707  // the cache needs to be modified to perform atomic update to both cache
708  // lines. For now, such cross-line update is not supported.
709  assert(!isAtomic || (isAtomic && !needs_burst));
710 
711  if (inst->translationStarted()) {
712  req = inst->savedReq;
713  assert(req);
714  } else {
715  if (needs_burst) {
716  req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
717  size, flags, data, res);
718  } else {
719  req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
720  size, flags, data, res, std::move(amo_op));
721  }
722  assert(req);
723  if (!byte_enable.empty()) {
724  req->_byteEnable = byte_enable;
725  }
726  inst->setRequest();
727  req->taskId(cpu->taskId());
728 
729  // There might be fault from a previous execution attempt if this is
730  // a strictly ordered load
731  inst->getFault() = NoFault;
732 
733  req->initiateTranslation();
734  }
735 
736  /* This is the place were instructions get the effAddr. */
737  if (req->isTranslationComplete()) {
738  if (req->isMemAccessRequired()) {
739  inst->effAddr = req->getVaddr();
740  inst->effSize = size;
741  inst->effAddrValid(true);
742 
743  if (cpu->checker) {
744  inst->reqToVerify = std::make_shared<Request>(*req->request());
745  }
746  Fault fault;
747  if (isLoad)
748  fault = cpu->read(req, inst->lqIdx);
749  else
750  fault = cpu->write(req, data, inst->sqIdx);
751  // inst->getFault() may have the first-fault of a
752  // multi-access split request at this point.
753  // Overwrite that only if we got another type of fault
754  // (e.g. re-exec).
755  if (fault != NoFault)
756  inst->getFault() = fault;
757  } else if (isLoad) {
758  inst->setMemAccPredicate(false);
759  // Commit will have to clean up whatever happened. Set this
760  // instruction as executed.
761  inst->setExecuted();
762  }
763  }
764 
765  if (inst->traceData)
766  inst->traceData->setMem(addr, size, flags);
767 
768  return inst->getFault();
769 }
770 
771 template<class Impl>
772 void
775 {
776  _fault.push_back(fault);
777  numInTranslationFragments = 0;
778  numTranslatedFragments = 1;
779  /* If the instruction has been squahsed, let the request know
780  * as it may have to self-destruct. */
781  if (_inst->isSquashed()) {
782  this->squashTranslation();
783  } else {
784  _inst->strictlyOrdered(req->isStrictlyOrdered());
785 
786  flags.set(Flag::TranslationFinished);
787  if (fault == NoFault) {
788  _inst->physEffAddr = req->getPaddr();
789  _inst->memReqFlags = req->getFlags();
790  if (req->isCondSwap()) {
791  assert(_res);
792  req->setExtraData(*_res);
793  }
794  setState(State::Request);
795  } else {
796  setState(State::Fault);
797  }
798 
799  LSQRequest::_inst->fault = fault;
800  LSQRequest::_inst->translationCompleted(true);
801  }
802 }
803 
804 template<class Impl>
805 void
808 {
809  int i;
810  for (i = 0; i < _requests.size() && _requests[i] != req; i++);
811  assert(i < _requests.size());
812  _fault[i] = fault;
813 
814  numInTranslationFragments--;
815  numTranslatedFragments++;
816 
817  if (fault == NoFault)
818  mainReq->setFlags(req->getFlags());
819 
820  if (numTranslatedFragments == _requests.size()) {
821  if (_inst->isSquashed()) {
822  this->squashTranslation();
823  } else {
824  _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
825  flags.set(Flag::TranslationFinished);
826  _inst->translationCompleted(true);
827 
828  for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++);
829  if (i > 0) {
830  _inst->physEffAddr = request(0)->getPaddr();
831  _inst->memReqFlags = mainReq->getFlags();
832  if (mainReq->isCondSwap()) {
833  assert (i == _fault.size());
834  assert(_res);
835  mainReq->setExtraData(*_res);
836  }
837  if (i == _fault.size()) {
838  _inst->fault = NoFault;
839  setState(State::Request);
840  } else {
841  _inst->fault = _fault[i];
842  setState(State::PartialFault);
843  }
844  } else {
845  _inst->fault = _fault[0];
846  setState(State::Fault);
847  }
848  }
849 
850  }
851 }
852 
853 template<class Impl>
854 void
856 {
857  assert(_requests.size() == 0);
858 
859  this->addRequest(_addr, _size, _byteEnable);
860 
861  if (_requests.size() > 0) {
862  _requests.back()->setReqInstSeqNum(_inst->seqNum);
863  _requests.back()->taskId(_taskId);
864  _inst->translationStarted(true);
865  setState(State::Translation);
866  flags.set(Flag::TranslationStarted);
867 
868  _inst->savedReq = this;
869  sendFragmentToTranslation(0);
870  } else {
871  _inst->setMemAccPredicate(false);
872  }
873 }
874 
875 template<class Impl>
876 PacketPtr
878 {
879  return _mainPacket;
880 }
881 
882 template<class Impl>
885 {
886  return mainReq;
887 }
888 
889 template<class Impl>
890 void
892 {
893  auto cacheLineSize = _port.cacheLineSize();
894  Addr base_addr = _addr;
895  Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
896  Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
897  uint32_t size_so_far = 0;
898 
899  mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
900  _size, _flags, _inst->masterId(),
901  _inst->instAddr(), _inst->contextId());
902  if (!_byteEnable.empty()) {
903  mainReq->setByteEnable(_byteEnable);
904  }
905 
906  // Paddr is not used in mainReq. However, we will accumulate the flags
907  // from the sub requests into mainReq by calling setFlags() in finish().
908  // setFlags() assumes that paddr is set so flip the paddr valid bit here to
909  // avoid a potential assert in setFlags() when we call it from finish().
910  mainReq->setPaddr(0);
911 
912  /* Get the pre-fix, possibly unaligned. */
913  if (_byteEnable.empty()) {
914  this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
915  } else {
916  auto it_start = _byteEnable.begin();
917  auto it_end = _byteEnable.begin() + (next_addr - base_addr);
918  this->addRequest(base_addr, next_addr - base_addr,
919  std::vector<bool>(it_start, it_end));
920  }
921  size_so_far = next_addr - base_addr;
922 
923  /* We are block aligned now, reading whole blocks. */
924  base_addr = next_addr;
925  while (base_addr != final_addr) {
926  if (_byteEnable.empty()) {
927  this->addRequest(base_addr, cacheLineSize, _byteEnable);
928  } else {
929  auto it_start = _byteEnable.begin() + size_so_far;
930  auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
931  this->addRequest(base_addr, cacheLineSize,
932  std::vector<bool>(it_start, it_end));
933  }
934  size_so_far += cacheLineSize;
935  base_addr += cacheLineSize;
936  }
937 
938  /* Deal with the tail. */
939  if (size_so_far < _size) {
940  if (_byteEnable.empty()) {
941  this->addRequest(base_addr, _size - size_so_far, _byteEnable);
942  } else {
943  auto it_start = _byteEnable.begin() + size_so_far;
944  auto it_end = _byteEnable.end();
945  this->addRequest(base_addr, _size - size_so_far,
946  std::vector<bool>(it_start, it_end));
947  }
948  }
949 
950  if (_requests.size() > 0) {
951  /* Setup the requests and send them to translation. */
952  for (auto& r: _requests) {
953  r->setReqInstSeqNum(_inst->seqNum);
954  r->taskId(_taskId);
955  }
956 
957  _inst->translationStarted(true);
958  setState(State::Translation);
959  flags.set(Flag::TranslationStarted);
960  this->_inst->savedReq = this;
961  numInTranslationFragments = 0;
962  numTranslatedFragments = 0;
963  _fault.resize(_requests.size());
964 
965  for (uint32_t i = 0; i < _requests.size(); i++) {
966  sendFragmentToTranslation(i);
967  }
968  } else {
969  _inst->setMemAccPredicate(false);
970  }
971 }
972 
973 template<class Impl>
974 void
976 {
977  numInTranslationFragments++;
978  _port.dTLB()->translateTiming(
979  this->request(i),
980  this->_inst->thread->getTC(), this,
981  this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
982 }
983 
984 template<class Impl>
985 bool
987 {
988  assert(_numOutstandingPackets == 1);
989  auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
990  flags.set(Flag::Complete);
991  state->outstanding--;
992  assert(pkt == _packets.front());
993  _port.completeDataAccess(pkt);
994  return true;
995 }
996 
997 template<class Impl>
998 bool
1000 {
1001  auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
1002  uint32_t pktIdx = 0;
1003  while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
1004  pktIdx++;
1005  assert(pktIdx < _packets.size());
1006  numReceivedPackets++;
1007  state->outstanding--;
1008  if (numReceivedPackets == _packets.size()) {
1009  flags.set(Flag::Complete);
1010  /* Assemble packets. */
1011  PacketPtr resp = isLoad()
1012  ? Packet::createRead(mainReq)
1013  : Packet::createWrite(mainReq);
1014  if (isLoad())
1015  resp->dataStatic(_inst->memData);
1016  else
1017  resp->dataStatic(_data);
1018  resp->senderState = _senderState;
1019  _port.completeDataAccess(resp);
1020  delete resp;
1021  }
1022  return true;
1023 }
1024 
1025 template<class Impl>
1026 void
1028 {
1029  assert(_senderState);
1030  /* Retries do not create new packets. */
1031  if (_packets.size() == 0) {
1032  _packets.push_back(
1033  isLoad()
1034  ? Packet::createRead(request())
1035  : Packet::createWrite(request()));
1036  _packets.back()->dataStatic(_inst->memData);
1037  _packets.back()->senderState = _senderState;
1038  }
1039  assert(_packets.size() == 1);
1040 }
1041 
1042 template<class Impl>
1043 void
1045 {
1046  /* Extra data?? */
1047  Addr base_address = _addr;
1048 
1049  if (_packets.size() == 0) {
1050  /* New stuff */
1051  if (isLoad()) {
1052  _mainPacket = Packet::createRead(mainReq);
1053  _mainPacket->dataStatic(_inst->memData);
1054  }
1055  for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
1056  RequestPtr r = _requests[i];
1057  PacketPtr pkt = isLoad() ? Packet::createRead(r)
1058  : Packet::createWrite(r);
1059  ptrdiff_t offset = r->getVaddr() - base_address;
1060  if (isLoad()) {
1061  pkt->dataStatic(_inst->memData + offset);
1062  } else {
1063  uint8_t* req_data = new uint8_t[r->getSize()];
1064  std::memcpy(req_data,
1065  _inst->memData + offset,
1066  r->getSize());
1067  pkt->dataDynamic(req_data);
1068  }
1069  pkt->senderState = _senderState;
1070  _packets.push_back(pkt);
1071  }
1072  }
1073  assert(_packets.size() > 0);
1074 }
1075 
1076 template<class Impl>
1077 void
1079 {
1080  assert(_numOutstandingPackets == 0);
1081  if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1082  _numOutstandingPackets = 1;
1083 }
1084 
1085 template<class Impl>
1086 void
1088 {
1089  /* Try to send the packets. */
1090  while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1091  lsqUnit()->trySendPacket(isLoad(),
1092  _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1093  _numOutstandingPackets++;
1094  }
1095 }
1096 
1097 template<class Impl>
1098 void
1100  PacketPtr pkt)
1101 {
1102  TheISA::handleIprWrite(thread, pkt);
1103 }
1104 
1105 template<class Impl>
1106 void
1108  PacketPtr mainPkt)
1109 {
1110  unsigned offset = 0;
1111  for (auto r: _requests) {
1112  PacketPtr pkt = new Packet(r, MemCmd::WriteReq);
1113  pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1114  TheISA::handleIprWrite(thread, pkt);
1115  offset += r->getSize();
1116  delete pkt;
1117  }
1118 }
1119 
1120 template<class Impl>
1121 Cycles
1123  PacketPtr pkt)
1124 {
1125  return TheISA::handleIprRead(thread, pkt);
1126 }
1127 
1128 template<class Impl>
1129 Cycles
1131  PacketPtr mainPkt)
1132 {
1133  Cycles delay(0);
1134  unsigned offset = 0;
1135 
1136  for (auto r: _requests) {
1137  PacketPtr pkt = new Packet(r, MemCmd::ReadReq);
1138  pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1139  Cycles d = TheISA::handleIprRead(thread, pkt);
1140  if (d > delay)
1141  delay = d;
1142  offset += r->getSize();
1143  delete pkt;
1144  }
1145  return delay;
1146 }
1147 
1148 template<class Impl>
1149 bool
1151 {
1152  return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
1153 }
1154 
1170 template<class Impl>
1171 bool
1173 {
1174  bool is_hit = false;
1175  for (auto &r: _requests) {
1185  if (r->hasPaddr() && (r->getPaddr() & blockMask) == blockAddr) {
1186  is_hit = true;
1187  break;
1188  }
1189  }
1190  return is_hit;
1191 }
1192 
1193 template <class Impl>
1194 bool
1196 {
1197  return lsq->recvTimingResp(pkt);
1198 }
1199 
1200 template <class Impl>
1201 void
1203 {
1204  for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
1205  if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1206  cpu->wakeup(tid);
1207  }
1208  }
1209  lsq->recvTimingSnoopReq(pkt);
1210 }
1211 
1212 template <class Impl>
1213 void
1215 {
1216  lsq->recvReqRetry();
1217 }
1218 
1219 #endif//__CPU_O3_LSQ_IMPL_HH__
IEW * iewStage
The IEW stage pointer.
Definition: lsq.hh:1049
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:167
#define DPRINTF(x,...)
Definition: trace.hh:229
void takeOverFrom()
Takes over execution from another CPU&#39;s thread.
Definition: lsq_impl.hh:169
virtual void initiateTranslation()
Definition: lsq_impl.hh:891
ThreadID numThreads
Number of Threads.
Definition: lsq.hh:1122
Impl::DynInstPtr DynInstPtr
Definition: lsq.hh:69
Bitfield< 28 > v
decltype(nullptr) constexpr NoFault
Definition: types.hh:245
Fault pushRequest(const DynInstPtr &inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector< bool > &byte_enable)
Definition: lsq_impl.hh:688
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:83
virtual bool recvTimingResp(PacketPtr pkt)
Timing version of receive.
Definition: lsq_impl.hh:1195
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Caches may probe into the load-store queue to enforce memory ordering guarantees. ...
Definition: lsq_impl.hh:1172
void taskId(const uint32_t &v)
Definition: lsq.hh:467
bool transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
Returns true if the given memory access (address, size) needs to be fragmented across aligned fixed-s...
Definition: utils.hh:79
bool willWB()
Returns if the LSQ will write back to memory this cycle.
Definition: lsq_impl.hh:657
Bitfield< 7 > i
std::vector< RequestPtr > _requests
Definition: lsq.hh:294
virtual void sendPacketToCache()
Definition: lsq_impl.hh:1087
virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
Definition: lsq_impl.hh:1107
unsigned maxSQEntries
Max SQ Size - Used to Enforce Sharing Policies.
Definition: lsq.hh:1113
SMTQueuePolicy lsqPolicy
The LSQ policy for SMT mode.
Definition: lsq.hh:1076
std::vector< LSQUnit > thread
The LSQ units for individual threads.
Definition: lsq.hh:1119
std::vector< bool > _byteEnable
Definition: lsq.hh:300
std::string name() const
Returns the name of the LSQ.
Definition: lsq_impl.hh:115
virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt)
Definition: lsq_impl.hh:1122
void tick()
Ticks the LSQ.
Definition: lsq_impl.hh:181
std::shared_ptr< Request > RequestPtr
Definition: request.hh:83
bool sqFull()
Returns if any of the SQs are full.
Definition: lsq_impl.hh:584
ip6_addr_t addr
Definition: inet.hh:335
static PacketPtr createWrite(const RequestPtr &req)
Definition: packet.hh:919
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition: amo.hh:230
Cycles handleIprRead(ThreadContext *, Packet *)
Definition: mmapped_ipr.hh:48
virtual void recvReqRetry()
Handles doing a retry of the previous send.
Definition: lsq_impl.hh:1214
bool violation()
Returns whether or not there was a memory ordering violation.
Definition: lsq_impl.hh:287
bool hasStoresToWB()
Returns whether or not there are any stores to write back to memory.
Definition: lsq_impl.hh:640
Bitfield< 23, 0 > offset
Definition: types.hh:154
Overload hash function for BasicBlockRange type.
Definition: vec_reg.hh:586
Derived class to hold any sender state the LSQ needs.
Definition: lsq.hh:75
Fault executeLoad(const DynInstPtr &inst)
Executes a load.
Definition: lsq_impl.hh:250
Bitfield< 4, 0 > mode
bool isTranslationComplete()
Definition: lsq.hh:593
int usedStorePorts
The number of used cache ports in this cycle by stores.
Definition: lsq.hh:1068
bool isFull()
Returns if the LSQ is full (either LQ or SQ is full).
Definition: lsq_impl.hh:485
Definition: lsq.hh:64
bool lqEmpty() const
Returns if all of the LQs are empty.
Definition: lsq_impl.hh:521
bool isInvalidate() const
Definition: packet.hh:543
ThreadContext is the external interface to all thread state for anything outside of the CPU...
T * getPtr()
get a pointer to the data ptr.
Definition: packet.hh:1090
void writebackStores()
Attempts to write back stores until all cache ports are used or the interface becomes blocked...
Definition: lsq_impl.hh:268
void dumpInsts() const
Debugging function to print out all instructions.
Definition: lsq_impl.hh:674
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:1040
virtual void finish(const Fault &fault, const RequestPtr &req, ThreadContext *tc, BaseTLB::Mode mode)
Definition: lsq_impl.hh:773
bool isMemAccessRequired()
Definition: lsq.hh:620
bool _cacheBlocked
D-cache is blocked.
Definition: lsq.hh:1064
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Test if the request accesses a particular cache line.
Definition: lsq_impl.hh:1150
void insertLoad(const DynInstPtr &load_inst)
Inserts a load into the LSQ.
Definition: lsq_impl.hh:232
bool isDrained() const
Has the LSQ drained?
Definition: lsq_impl.hh:150
Addr getVaddr(int idx=0) const
Definition: lsq.hh:483
void cachePortBusy(bool is_load)
Another store port is in use.
Definition: lsq_impl.hh:220
unsigned numFreeStoreEntries()
Returns the number of free store entries.
Definition: lsq_impl.hh:453
bool isError() const
Definition: packet.hh:555
virtual void recvTimingSnoopReq(PacketPtr pkt)
Receive a timing snoop request from the peer.
Definition: lsq_impl.hh:1202
virtual void buildPackets()
Definition: lsq_impl.hh:1027
virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
Definition: lsq_impl.hh:1099
DcachePort dcachePort
Data port.
Definition: lsq.hh:1116
int usedLoadPorts
The number of used cache ports in this cycle by loads.
Definition: lsq.hh:1072
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition: lsq_impl.hh:140
int numLoads()
Returns the total number of loads in the load queue.
Definition: lsq_impl.hh:399
Bitfield< 9 > d
unsigned maxLQEntries
Max LQ Size - Used to Enforce Sharing Policies.
Definition: lsq.hh:1110
void sendFragmentToTranslation(int i)
Definition: lsq_impl.hh:975
Addr getAddr() const
Definition: packet.hh:726
int numStoresToWB(ThreadID tid)
Returns the number of stores a specific thread has to write back.
Definition: lsq.hh:1000
Fault executeStore(const DynInstPtr &inst)
Executes a store.
Definition: lsq_impl.hh:259
virtual void finish(const Fault &fault, const RequestPtr &req, ThreadContext *tc, BaseTLB::Mode mode)
Definition: lsq_impl.hh:806
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition: packet.hh:913
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:255
virtual void sendPacketToCache()
Definition: lsq_impl.hh:1078
void insertStore(const DynInstPtr &store_inst)
Inserts a store into the LSQ.
Definition: lsq_impl.hh:241
const FlagsType total
Print the total.
Definition: info.hh:51
bool isEmpty() const
Returns if the LSQ is empty (both LQ and SQ are empty).
Definition: lsq_impl.hh:514
const DynInstPtr _inst
Definition: lsq.hh:290
int getCount()
Returns the number of instructions in all of the queues.
Definition: lsq_impl.hh:381
Mode
Definition: tlb.hh:59
virtual bool recvTimingResp(PacketPtr pkt)
Definition: lsq_impl.hh:999
int16_t ThreadID
Thread index/ID type.
Definition: types.hh:227
Impl::O3CPU O3CPU
Definition: lsq.hh:68
virtual void initiateTranslation()
Definition: lsq_impl.hh:855
uint8_t outstanding
Number of outstanding packets to complete.
Definition: lsq.hh:96
unsigned numFreeLoadEntries()
Returns the number of free load entries.
Definition: lsq_impl.hh:435
RequestPtr request(int idx=0)
Definition: lsq.hh:475
void recvReqRetry()
Retry the previous send that failed.
Definition: lsq_impl.hh:305
SenderState * senderState
This packet&#39;s sender state.
Definition: packet.hh:480
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets the pointer to the list of active threads.
Definition: lsq_impl.hh:132
int cacheStorePorts
The number of cache ports available each cycle (stores only).
Definition: lsq.hh:1066
virtual void buildPackets()
Definition: lsq_impl.hh:1044
LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
Constructs an LSQ with the given parameters.
Definition: lsq_impl.hh:63
int numStores()
Returns the total number of stores in the store queue.
Definition: lsq_impl.hh:417
virtual bool recvTimingResp(PacketPtr pkt)
Definition: lsq_impl.hh:986
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
Definition: lsq_impl.hh:207
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition: packet.hh:1078
Memory operation metadata.
Definition: lsq.hh:232
void completeDataAccess(PacketPtr pkt)
Definition: lsq_impl.hh:317
int cacheLoadPorts
The number of cache ports available each cycle (loads only).
Definition: lsq.hh:1070
virtual PacketPtr mainPacket()
Definition: lsq_impl.hh:877
void regStats()
Registers statistics of each LSQ unit.
Definition: lsq_impl.hh:122
virtual RequestPtr mainRequest()
Definition: lsq_impl.hh:884
Addr addrBlockAlign(Addr addr, Addr block_size)
Returns the address of the closest aligned fixed-size block to the given address. ...
Definition: utils.hh:65
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
Definition: lsq_impl.hh:326
virtual void initiateTranslation()=0
Cycles handleIprWrite(ThreadContext *, Packet *)
Definition: mmapped_ipr.hh:49
bool cacheBlocked() const
Is D-cache blocked?
Definition: lsq_impl.hh:193
static const int NumArgumentRegs M5_VAR_USED
Definition: process.cc:84
Impl::CPUPol::IEW IEW
Definition: lsq.hh:70
const std::string & cmdString() const
Return the string name of the cmd field (for debugging and tracing).
Definition: packet.hh:523
O3CPU * cpu
The CPU pointer.
Definition: lsq.hh:1046
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:185
virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt)
Definition: lsq_impl.hh:1130
std::list< ThreadID > * activeThreads
List of Active Threads in System.
Definition: lsq.hh:1102
bool isStalled()
Returns if the LSQ is stalled due to a memory operation that must be replayed.
Definition: lsq_impl.hh:613
void recvTimingSnoopReq(PacketPtr pkt)
Definition: lsq_impl.hh:364
const char data[]
std::shared_ptr< FaultBase > Fault
Definition: types.hh:240
bool sqEmpty() const
Returns if all of the SQs are empty.
Definition: lsq_impl.hh:538
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:104
bool lqFull()
Returns if any of the LQs are full.
Definition: lsq_impl.hh:555

Generated on Fri Feb 28 2020 16:26:59 for gem5 by doxygen 1.8.13