gem5  v20.0.0.3
lsq.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2014,2017-2018 ARM Limited
3  * All rights reserved
4  *
5  * The license below extends only to copyright in the software and shall
6  * not be construed as granting a license to any other intellectual
7  * property including but not limited to intellectual property relating
8  * to a hardware implementation of the functionality of the software
9  * licensed hereunder. You may use the software subject to the license
10  * terms below provided that you ensure that this notice is replicated
11  * unmodified and in its entirety in all distributions of the software,
12  * modified or unmodified, in source code or in binary form.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions are
16  * met: redistributions of source code must retain the above copyright
17  * notice, this list of conditions and the following disclaimer;
18  * redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution;
21  * neither the name of the copyright holders nor the names of its
22  * contributors may be used to endorse or promote products derived from
23  * this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include "cpu/minor/lsq.hh"
39 
40 #include <iomanip>
41 #include <sstream>
42 
43 #include "arch/locked_mem.hh"
44 #include "base/logging.hh"
45 #include "cpu/minor/cpu.hh"
47 #include "cpu/minor/execute.hh"
48 #include "cpu/minor/pipeline.hh"
49 #include "cpu/utils.hh"
50 #include "debug/Activity.hh"
51 #include "debug/MinorMem.hh"
52 
53 namespace Minor
54 {
55 
56 LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
57  PacketDataPtr data_, uint64_t *res_) :
58  SenderState(),
59  port(port_),
60  inst(inst_),
61  isLoad(isLoad_),
62  data(data_),
63  packet(NULL),
64  request(),
65  res(res_),
66  skipped(false),
67  issuedToMemory(false),
68  isTranslationDelayed(false),
69  state(NotIssued)
70 {
71  request = std::make_shared<Request>();
72 }
73 
74 void
76 {
77  SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
78  TheISA::PCState old_pc = thread.pcState();
79  ExecContext context(port.cpu, thread, port.execute, inst);
80  Fault M5_VAR_USED fault = inst->translationFault;
81 
82  // Give the instruction a chance to suppress a translation fault
83  inst->translationFault = inst->staticInst->initiateAcc(&context, nullptr);
84  if (inst->translationFault == NoFault) {
85  DPRINTFS(MinorMem, (&port),
86  "Translation fault suppressed for inst:%s\n", *inst);
87  } else {
88  assert(inst->translationFault == fault);
89  }
90  thread.pcState(old_pc);
91 }
92 
93 void
95 {
96  DPRINTFS(MinorMem, (&port), "Complete disabled mem access for inst:%s\n",
97  *inst);
98 
99  SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
100  TheISA::PCState old_pc = thread.pcState();
101 
102  ExecContext context(port.cpu, thread, port.execute, inst);
103 
104  context.setMemAccPredicate(false);
105  inst->staticInst->completeAcc(nullptr, &context, inst->traceData);
106 
107  thread.pcState(old_pc);
108 }
109 
110 void
112 {
113  port.cpu.threads[inst->id.threadId]->setMemAccPredicate(false);
114  DPRINTFS(MinorMem, (&port), "Disable mem access for inst:%s\n", *inst);
115 }
116 
119  Addr req1_addr, unsigned int req1_size,
120  Addr req2_addr, unsigned int req2_size)
121 {
122  /* 'end' here means the address of the byte just past the request
123  * blocks */
124  Addr req2_end_addr = req2_addr + req2_size;
125  Addr req1_end_addr = req1_addr + req1_size;
126 
127  AddrRangeCoverage ret;
128 
129  if (req1_addr >= req2_end_addr || req1_end_addr <= req2_addr)
130  ret = NoAddrRangeCoverage;
131  else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr)
132  ret = FullAddrRangeCoverage;
133  else
135 
136  return ret;
137 }
138 
141 {
143  request->getPaddr(), request->getSize(),
144  other_request->request->getPaddr(), other_request->request->getSize());
145  /* If there is a strobe mask then store data forwarding might not be
146  * correct. Instead of checking enablemant of every byte we just fall back
147  * to PartialAddrRangeCoverage to prohibit store data forwarding */
148  if (ret == FullAddrRangeCoverage && request->isMasked())
150  return ret;
151 }
152 
153 
154 bool
156 {
157  return inst->isInst() && inst->staticInst->isMemBarrier();
158 }
159 
160 bool
162 {
163  return state == StoreToStoreBuffer;
164 }
165 
166 void
168 {
169  DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:"
170  " %s\n", state, new_state, *inst);
171  state = new_state;
172 }
173 
174 bool
176 {
177  /* @todo, There is currently only one 'completed' state. This
178  * may not be a good choice */
179  return state == Complete;
180 }
181 
182 void
183 LSQ::LSQRequest::reportData(std::ostream &os) const
184 {
185  os << (isLoad ? 'R' : 'W') << ';';
186  inst->reportData(os);
187  os << ';' << state;
188 }
189 
190 std::ostream &
191 operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage)
192 {
193  switch (coverage) {
195  os << "PartialAddrRangeCoverage";
196  break;
198  os << "FullAddrRangeCoverage";
199  break;
201  os << "NoAddrRangeCoverage";
202  break;
203  default:
204  os << "AddrRangeCoverage-" << static_cast<int>(coverage);
205  break;
206  }
207  return os;
208 }
209 
210 std::ostream &
212 {
213  switch (state) {
215  os << "NotIssued";
216  break;
218  os << "InTranslation";
219  break;
221  os << "Translated";
222  break;
224  os << "Failed";
225  break;
227  os << "RequestIssuing";
228  break;
230  os << "StoreToStoreBuffer";
231  break;
233  os << "StoreInStoreBuffer";
234  break;
236  os << "StoreBufferIssuing";
237  break;
239  os << "RequestNeedsRetry";
240  break;
242  os << "StoreBufferNeedsRetry";
243  break;
245  os << "Complete";
246  break;
247  default:
248  os << "LSQRequestState-" << static_cast<int>(state);
249  break;
250  }
251  return os;
252 }
253 
254 void
256 {
257  bool is_last_barrier =
258  inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId];
259 
260  DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n",
261  (is_last_barrier ? "last" : "a"), *inst);
262 
263  if (is_last_barrier)
264  lastMemBarrier[inst->id.threadId] = 0;
265 }
266 
267 void
268 LSQ::SingleDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
270 {
272 
273  DPRINTFS(MinorMem, (&port), "Received translation response for"
274  " request: %s delayed:%d %s\n", *inst, isTranslationDelayed,
275  fault_ != NoFault ? fault_->name() : "");
276 
277  if (fault_ != NoFault) {
278  inst->translationFault = fault_;
279  if (isTranslationDelayed) {
281  if (inst->translationFault == NoFault) {
284  }
285  }
287  } else {
289  makePacket();
290  }
292 
293  /* Let's try and wake up the processor for the next cycle */
295 }
296 
297 void
299 {
300  ThreadContext *thread = port.cpu.getContext(
301  inst->id.threadId);
302 
303  const auto &byte_enable = request->getByteEnable();
304  if (byte_enable.size() == 0 ||
305  isAnyActiveElement(byte_enable.cbegin(), byte_enable.cend())) {
307 
309 
310  DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n");
311  /* Submit the translation request. The response will come through
312  * finish/markDelayed on the LSQRequest as it bears the Translation
313  * interface */
314  thread->getDTBPtr()->translateTiming(
315  request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write));
316  } else {
319  }
320 }
321 
322 void
324 {
325  DPRINTFS(MinorMem, (&port), "Retiring packet\n");
326  packet = packet_;
327  packetInFlight = false;
329 }
330 
331 void
332 LSQ::SplitDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
334 {
336 
337  unsigned int M5_VAR_USED expected_fragment_index =
338  numTranslatedFragments;
339 
340  numInTranslationFragments--;
341  numTranslatedFragments++;
342 
343  DPRINTFS(MinorMem, (&port), "Received translation response for fragment"
344  " %d of request: %s delayed:%d %s\n", expected_fragment_index,
346  fault_ != NoFault ? fault_->name() : "");
347 
348  assert(request_ == fragmentRequests[expected_fragment_index]);
349 
350  /* Wake up next cycle to get things going again in case the
351  * tryToSendToTransfers does take */
353 
354  if (fault_ != NoFault) {
355  /* tryToSendToTransfers will handle the fault */
356  inst->translationFault = fault_;
357 
358  DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:"
359  " %d of request: %s\n",
360  expected_fragment_index, *inst);
361 
362  if (expected_fragment_index > 0 || isTranslationDelayed)
364  if (expected_fragment_index == 0) {
365  if (isTranslationDelayed && inst->translationFault == NoFault) {
368  } else {
370  }
371  } else if (inst->translationFault == NoFault) {
373  numTranslatedFragments--;
374  makeFragmentPackets();
375  } else {
377  }
379  } else if (numTranslatedFragments == numFragments) {
380  makeFragmentPackets();
383  } else {
384  /* Avoid calling translateTiming from within ::finish */
385  assert(!translationEvent.scheduled());
386  port.cpu.schedule(translationEvent, curTick());
387  }
388 }
389 
391  bool isLoad_, PacketDataPtr data_, uint64_t *res_) :
392  LSQRequest(port_, inst_, isLoad_, data_, res_),
393  translationEvent([this]{ sendNextFragmentToTranslation(); },
394  "translationEvent"),
395  numFragments(0),
402 {
403  /* Don't know how many elements are needed until the request is
404  * populated by the caller. */
405 }
406 
408 {
409  for (auto i = fragmentPackets.begin();
410  i != fragmentPackets.end(); i++)
411  {
412  delete *i;
413  }
414 }
415 
416 void
418 {
419  Addr base_addr = request->getVaddr();
420  unsigned int whole_size = request->getSize();
421  unsigned int line_width = port.lineWidth;
422 
423  unsigned int fragment_size;
424  Addr fragment_addr;
425 
426  std::vector<bool> fragment_write_byte_en;
427 
428  /* Assume that this transfer is across potentially many block snap
429  * boundaries:
430  *
431  * | _|________|________|________|___ |
432  * | |0| 1 | 2 | 3 | 4 | |
433  * | |_|________|________|________|___| |
434  * | | | | | |
435  *
436  * The first transfer (0) can be up to lineWidth in size.
437  * All the middle transfers (1-3) are lineWidth in size
438  * The last transfer (4) can be from zero to lineWidth - 1 in size
439  */
440  unsigned int first_fragment_offset =
441  addrBlockOffset(base_addr, line_width);
442  unsigned int last_fragment_size =
443  addrBlockOffset(base_addr + whole_size, line_width);
444  unsigned int first_fragment_size =
445  line_width - first_fragment_offset;
446 
447  unsigned int middle_fragments_total_size =
448  whole_size - (first_fragment_size + last_fragment_size);
449 
450  assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0);
451 
452  unsigned int middle_fragment_count =
453  middle_fragments_total_size / line_width;
454 
455  numFragments = 1 /* first */ + middle_fragment_count +
456  (last_fragment_size == 0 ? 0 : 1);
457 
458  DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests."
459  " First fragment size: %d Last fragment size: %d\n",
460  numFragments, first_fragment_size,
461  (last_fragment_size == 0 ? line_width : last_fragment_size));
462 
463  assert(((middle_fragment_count * line_width) +
464  first_fragment_size + last_fragment_size) == whole_size);
465 
466  fragment_addr = base_addr;
467  fragment_size = first_fragment_size;
468 
469  /* Just past the last address in the request */
470  Addr end_addr = base_addr + whole_size;
471 
472  auto& byte_enable = request->getByteEnable();
473  unsigned int num_disabled_fragments = 0;
474 
475  for (unsigned int fragment_index = 0; fragment_index < numFragments;
476  fragment_index++)
477  {
478  bool M5_VAR_USED is_last_fragment = false;
479 
480  if (fragment_addr == base_addr) {
481  /* First fragment */
482  fragment_size = first_fragment_size;
483  } else {
484  if ((fragment_addr + line_width) > end_addr) {
485  /* Adjust size of last fragment */
486  fragment_size = end_addr - fragment_addr;
487  is_last_fragment = true;
488  } else {
489  /* Middle fragments */
490  fragment_size = line_width;
491  }
492  }
493 
494  RequestPtr fragment = std::make_shared<Request>();
495  bool disabled_fragment = false;
496 
497  fragment->setContext(request->contextId());
498  if (byte_enable.empty()) {
499  fragment->setVirt(
500  fragment_addr, fragment_size, request->getFlags(),
501  request->masterId(), request->getPC());
502  } else {
503  // Set up byte-enable mask for the current fragment
504  auto it_start = byte_enable.begin() +
505  (fragment_addr - base_addr);
506  auto it_end = byte_enable.begin() +
507  (fragment_addr - base_addr) + fragment_size;
508  if (isAnyActiveElement(it_start, it_end)) {
509  fragment->setVirt(
510  fragment_addr, fragment_size, request->getFlags(),
511  request->masterId(), request->getPC());
512  fragment->setByteEnable(std::vector<bool>(it_start, it_end));
513  } else {
514  disabled_fragment = true;
515  }
516  }
517 
518  if (!disabled_fragment) {
519  DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x"
520  " size: %d (whole request addr: 0x%x size: %d) %s\n",
521  fragment_addr, fragment_size, base_addr, whole_size,
522  (is_last_fragment ? "last fragment" : ""));
523 
524  fragmentRequests.push_back(fragment);
525  } else {
526  num_disabled_fragments++;
527  }
528 
529  fragment_addr += fragment_size;
530  }
531  assert(numFragments >= num_disabled_fragments);
532  numFragments -= num_disabled_fragments;
533 }
534 
535 void
537 {
538  assert(numTranslatedFragments > 0);
539  Addr base_addr = request->getVaddr();
540 
541  DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst);
542 
543  for (unsigned int fragment_index = 0;
544  fragment_index < numTranslatedFragments;
545  fragment_index++)
546  {
547  RequestPtr fragment = fragmentRequests[fragment_index];
548 
549  DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s"
550  " (%d, 0x%x)\n",
551  fragment_index, *inst,
552  (fragment->hasPaddr() ? "has paddr" : "no paddr"),
553  (fragment->hasPaddr() ? fragment->getPaddr() : 0));
554 
555  Addr fragment_addr = fragment->getVaddr();
556  unsigned int fragment_size = fragment->getSize();
557 
558  uint8_t *request_data = NULL;
559 
560  if (!isLoad) {
561  /* Split data for Packets. Will become the property of the
562  * outgoing Packets */
563  request_data = new uint8_t[fragment_size];
564  std::memcpy(request_data, data + (fragment_addr - base_addr),
565  fragment_size);
566  }
567 
568  assert(fragment->hasPaddr());
569 
570  PacketPtr fragment_packet =
571  makePacketForRequest(fragment, isLoad, this, request_data);
572 
573  fragmentPackets.push_back(fragment_packet);
574  /* Accumulate flags in parent request */
575  request->setFlags(fragment->getFlags());
576  }
577 
578  /* Might as well make the overall/response packet here */
579  /* Get the physical address for the whole request/packet from the first
580  * fragment */
581  request->setPaddr(fragmentRequests[0]->getPaddr());
582  makePacket();
583 }
584 
585 void
587 {
589 
590  if (numFragments > 0) {
594 
595  /* @todo, just do these in sequence for now with
596  * a loop of:
597  * do {
598  * sendNextFragmentToTranslation ; translateTiming ; finish
599  * } while (numTranslatedFragments != numFragments);
600  */
601 
602  /* Do first translation */
604  } else {
607  }
608 }
609 
610 PacketPtr
612 {
614 
616 }
617 
618 void
620 {
622 
624 }
625 
626 void
628 {
629  assert(inst->translationFault == NoFault);
631 
632  DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d"
633  " offset: 0x%x (retired fragment num: %d)\n",
634  response->req->getVaddr(), response->req->getSize(),
635  request->getVaddr() - response->req->getVaddr(),
637 
639 
640  if (skipped) {
641  /* Skip because we already knew the request had faulted or been
642  * skipped */
643  DPRINTFS(MinorMem, (&port), "Skipping this fragment\n");
644  } else if (response->isError()) {
645  /* Mark up the error and leave to execute to handle it */
646  DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n");
647  setSkipped();
648  packet->copyError(response);
649  } else {
650  if (isLoad) {
651  if (!data) {
652  /* For a split transfer, a Packet must be constructed
653  * to contain all returning data. This is that packet's
654  * data */
655  data = new uint8_t[request->getSize()];
656  }
657 
658  /* Populate the portion of the overall response data represented
659  * by the response fragment */
660  std::memcpy(
661  data + (response->req->getVaddr() - request->getVaddr()),
662  response->getConstPtr<uint8_t>(),
663  response->req->getSize());
664  }
665  }
666 
667  /* Complete early if we're skipping are no more in-flight accesses */
668  if (skipped && !hasPacketsInMemSystem()) {
669  DPRINTFS(MinorMem, (&port), "Completed skipped burst\n");
671  if (packet->needsResponse())
672  packet->makeResponse();
673  }
674 
675  if (numRetiredFragments == numTranslatedFragments)
677 
678  if (!skipped && isComplete()) {
679  DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL);
680 
681  DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d"
682  " needsResponse: %d packetSize: %s requestSize: %s responseSize:"
683  " %s\n", packet->isRead(), packet->isWrite(),
684  packet->needsResponse(), packet->getSize(), request->getSize(),
685  response->getSize());
686 
687  /* A request can become complete by several paths, this is a sanity
688  * check to make sure the packet's data is created */
689  if (!data) {
690  data = new uint8_t[request->getSize()];
691  }
692 
693  if (isLoad) {
694  DPRINTFS(MinorMem, (&port), "Copying read data\n");
695  std::memcpy(packet->getPtr<uint8_t>(), data, request->getSize());
696  }
697  packet->makeResponse();
698  }
699 
700  /* Packets are all deallocated together in ~SplitLSQRequest */
701 }
702 
703 void
705 {
706  unsigned int fragment_index = numTranslatedFragments;
707 
708  ThreadContext *thread = port.cpu.getContext(
709  inst->id.threadId);
710 
711  DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n",
712  fragment_index);
713 
716 
717  thread->getDTBPtr()->translateTiming(
718  fragmentRequests[fragment_index], thread, this, (isLoad ?
720 }
721 
722 bool
724 {
725  /* @todo, support store amalgamation */
726  return slots.size() < numSlots;
727 }
728 
729 void
731 {
732  auto found = std::find(slots.begin(), slots.end(), request);
733 
734  if (found != slots.end()) {
735  DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n",
736  request, *found, *(request->inst));
737  slots.erase(found);
738 
739  delete request;
740  }
741 }
742 
743 void
745 {
746  if (!canInsert()) {
747  warn("%s: store buffer insertion without space to insert from"
748  " inst: %s\n", name(), *(request->inst));
749  }
750 
751  DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request);
752 
753  numUnissuedAccesses++;
754 
755  if (request->state != LSQRequest::Complete)
757 
758  slots.push_back(request);
759 
760  /* Let's try and wake up the processor for the next cycle to step
761  * the store buffer */
762  lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
763 }
764 
767  unsigned int &found_slot)
768 {
769  unsigned int slot_index = slots.size() - 1;
770  auto i = slots.rbegin();
772 
773  /* Traverse the store buffer in reverse order (most to least recent)
774  * and try to find a slot whose address range overlaps this request */
775  while (ret == NoAddrRangeCoverage && i != slots.rend()) {
776  LSQRequestPtr slot = *i;
777 
778  /* Cache maintenance instructions go down via the store path but
779  * they carry no data and they shouldn't be considered
780  * for forwarding */
781  if (slot->packet &&
782  slot->inst->id.threadId == request->inst->id.threadId &&
783  !slot->packet->req->isCacheMaintenance()) {
784  AddrRangeCoverage coverage = slot->containsAddrRangeOf(request);
785 
786  if (coverage != NoAddrRangeCoverage) {
787  DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:"
788  " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n",
789  slot_index, coverage,
790  request->request->getPaddr(), request->request->getSize(),
791  slot->request->getPaddr(), slot->request->getSize());
792 
793  found_slot = slot_index;
794  ret = coverage;
795  }
796  }
797 
798  i++;
799  slot_index--;
800  }
801 
802  return ret;
803 }
804 
806 void
808  unsigned int slot_number)
809 {
810  assert(slot_number < slots.size());
811  assert(load->packet);
812  assert(load->isLoad);
813 
814  LSQRequestPtr store = slots[slot_number];
815 
816  assert(store->packet);
817  assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage);
818 
819  Addr load_addr = load->request->getPaddr();
820  Addr store_addr = store->request->getPaddr();
821  Addr addr_offset = load_addr - store_addr;
822 
823  unsigned int load_size = load->request->getSize();
824 
825  DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer"
826  " slot: %d addr: 0x%x addressOffset: 0x%x\n",
827  load_size, load_addr, slot_number,
828  store_addr, addr_offset);
829 
830  void *load_packet_data = load->packet->getPtr<void>();
831  void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset;
832 
833  std::memcpy(load_packet_data, store_packet_data, load_size);
834 }
835 
836 void
838 {
839  /* Barriers are accounted for as they are cleared from
840  * the queue, not after their transfers are complete */
841  if (!request->isBarrier())
842  numUnissuedAccesses--;
843 }
844 
845 void
847 {
848  DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n",
849  numUnissuedAccesses);
850 
851  if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) {
852  /* Clear all the leading barriers */
853  while (!slots.empty() &&
854  slots.front()->isComplete() && slots.front()->isBarrier())
855  {
856  LSQRequestPtr barrier = slots.front();
857 
858  DPRINTF(MinorMem, "Clearing barrier for inst: %s\n",
859  *(barrier->inst));
860 
861  numUnissuedAccesses--;
862  lsq.clearMemBarrier(barrier->inst);
863  slots.pop_front();
864 
865  delete barrier;
866  }
867 
868  auto i = slots.begin();
869  bool issued = true;
870  unsigned int issue_count = 0;
871 
872  /* Skip trying if the memory system is busy */
873  if (lsq.state == LSQ::MemoryNeedsRetry)
874  issued = false;
875 
876  /* Try to issue all stores in order starting from the head
877  * of the queue. Responses are allowed to be retired
878  * out of order */
879  while (issued &&
880  issue_count < storeLimitPerCycle &&
881  lsq.canSendToMemorySystem() &&
882  i != slots.end())
883  {
885 
886  DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d"
887  " state: %s\n",
888  *(request->inst), request->sentAllPackets(),
889  request->state);
890 
891  if (request->isBarrier() && request->isComplete()) {
892  /* Give up at barriers */
893  issued = false;
894  } else if (!(request->state == LSQRequest::StoreBufferIssuing &&
895  request->sentAllPackets()))
896  {
897  DPRINTF(MinorMem, "Trying to send request: %s to memory"
898  " system\n", *(request->inst));
899 
900  if (lsq.tryToSend(request)) {
901  countIssuedStore(request);
902  issue_count++;
903  } else {
904  /* Don't step on to the next store buffer entry if this
905  * one hasn't issued all its packets as the store
906  * buffer must still enforce ordering */
907  issued = false;
908  }
909  }
910  i++;
911  }
912  }
913 }
914 
915 void
917  bool committed)
918 {
919  if (committed) {
920  /* Not already sent to the store buffer as a store request? */
921  if (!inst->inStoreBuffer) {
922  /* Insert an entry into the store buffer to tick off barriers
923  * until there are none in flight */
924  storeBuffer.insert(new BarrierDataRequest(*this, inst));
925  }
926  } else {
927  /* Clear the barrier anyway if it wasn't actually committed */
928  clearMemBarrier(inst);
929  }
930 }
931 
932 void
934 {
935  unsigned int size = slots.size();
936  unsigned int i = 0;
937  std::ostringstream os;
938 
939  while (i < size) {
940  LSQRequestPtr request = slots[i];
941 
942  request->reportData(os);
943 
944  i++;
945  if (i < numSlots)
946  os << ',';
947  }
948 
949  while (i < numSlots) {
950  os << '-';
951 
952  i++;
953  if (i < numSlots)
954  os << ',';
955  }
956 
957  MINORTRACE("addr=%s num_unissued_stores=%d\n", os.str(),
958  numUnissuedAccesses);
959 }
960 
961 void
963 {
964  if (state == MemoryNeedsRetry) {
965  DPRINTF(MinorMem, "Request needs retry, not issuing to"
966  " memory until retry arrives\n");
967  return;
968  }
969 
970  if (request->state == LSQRequest::InTranslation) {
971  DPRINTF(MinorMem, "Request still in translation, not issuing to"
972  " memory\n");
973  return;
974  }
975 
976  assert(request->state == LSQRequest::Translated ||
977  request->state == LSQRequest::RequestIssuing ||
978  request->state == LSQRequest::Failed ||
979  request->state == LSQRequest::Complete);
980 
981  if (requests.empty() || requests.front() != request) {
982  DPRINTF(MinorMem, "Request not at front of requests queue, can't"
983  " issue to memory\n");
984  return;
985  }
986 
987  if (transfers.unreservedRemainingSpace() == 0) {
988  DPRINTF(MinorMem, "No space to insert request into transfers"
989  " queue\n");
990  return;
991  }
992 
993  if (request->isComplete() || request->state == LSQRequest::Failed) {
994  DPRINTF(MinorMem, "Passing a %s transfer on to transfers"
995  " queue\n", (request->isComplete() ? "completed" : "failed"));
996  request->setState(LSQRequest::Complete);
997  request->setSkipped();
999  return;
1000  }
1001 
1002  if (!execute.instIsRightStream(request->inst)) {
1003  /* Wrong stream, try to abort the transfer but only do so if
1004  * there are no packets in flight */
1005  if (request->hasPacketsInMemSystem()) {
1006  DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
1007  " waiting for responses before aborting request\n");
1008  } else {
1009  DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
1010  " aborting request\n");
1011  request->setState(LSQRequest::Complete);
1012  request->setSkipped();
1013  moveFromRequestsToTransfers(request);
1014  }
1015  return;
1016  }
1017 
1018  if (request->inst->translationFault != NoFault) {
1019  if (request->inst->staticInst->isPrefetch()) {
1020  DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n");
1021  }
1022  DPRINTF(MinorMem, "Moving faulting request into the transfers"
1023  " queue\n");
1024  request->setState(LSQRequest::Complete);
1025  request->setSkipped();
1026  moveFromRequestsToTransfers(request);
1027  return;
1028  }
1029 
1030  bool is_load = request->isLoad;
1031  bool is_llsc = request->request->isLLSC();
1032  bool is_swap = request->request->isSwap();
1033  bool is_atomic = request->request->isAtomic();
1034  bool bufferable = !(request->request->isStrictlyOrdered() ||
1035  is_llsc || is_swap || is_atomic);
1036 
1037  if (is_load) {
1038  if (numStoresInTransfers != 0) {
1039  DPRINTF(MinorMem, "Load request with stores still in transfers"
1040  " queue, stalling\n");
1041  return;
1042  }
1043  } else {
1044  /* Store. Can it be sent to the store buffer? */
1045  if (bufferable && !request->request->isLocalAccess()) {
1047  moveFromRequestsToTransfers(request);
1048  DPRINTF(MinorMem, "Moving store into transfers queue\n");
1049  return;
1050  }
1051  }
1052 
1053  /* Check if this is the head instruction (and so must be executable as
1054  * its stream sequence number was checked above) for loads which must
1055  * not be speculatively issued and stores which must be issued here */
1056  if (!bufferable) {
1057  if (!execute.instIsHeadInst(request->inst)) {
1058  DPRINTF(MinorMem, "Memory access not the head inst., can't be"
1059  " sure it can be performed, not issuing\n");
1060  return;
1061  }
1062 
1063  unsigned int forwarding_slot = 0;
1064 
1065  if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) !=
1067  {
1068  // There's at least another request that targets the same
1069  // address and is staying in the storeBuffer. Since our
1070  // request is non-bufferable (e.g., strictly ordered or atomic),
1071  // we must wait for the other request in the storeBuffer to
1072  // complete before we can issue this non-bufferable request.
1073  // This is to make sure that the order they access the cache is
1074  // correct.
1075  DPRINTF(MinorMem, "Memory access can receive forwarded data"
1076  " from the store buffer, but need to wait for store buffer"
1077  " to drain\n");
1078  return;
1079  }
1080  }
1081 
1082  /* True: submit this packet to the transfers queue to be sent to the
1083  * memory system.
1084  * False: skip the memory and push a packet for this request onto
1085  * requests */
1086  bool do_access = true;
1087 
1088  if (!is_llsc) {
1089  /* Check for match in the store buffer */
1090  if (is_load) {
1091  unsigned int forwarding_slot = 0;
1092  AddrRangeCoverage forwarding_result =
1094  forwarding_slot);
1095 
1096  switch (forwarding_result) {
1097  case FullAddrRangeCoverage:
1098  /* Forward data from the store buffer into this request and
1099  * repurpose this request's packet into a response packet */
1100  storeBuffer.forwardStoreData(request, forwarding_slot);
1101  request->packet->makeResponse();
1102 
1103  /* Just move between queues, no access */
1104  do_access = false;
1105  break;
1107  DPRINTF(MinorMem, "Load partly satisfied by store buffer"
1108  " data. Must wait for the store to complete\n");
1109  return;
1110  break;
1111  case NoAddrRangeCoverage:
1112  DPRINTF(MinorMem, "No forwardable data from store buffer\n");
1113  /* Fall through to try access */
1114  break;
1115  }
1116  }
1117  } else {
1118  if (!canSendToMemorySystem()) {
1119  DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1120  return;
1121  }
1122 
1123  SimpleThread &thread = *cpu.threads[request->inst->id.threadId];
1124 
1125  TheISA::PCState old_pc = thread.pcState();
1126  ExecContext context(cpu, thread, execute, request->inst);
1127 
1128  /* Handle LLSC requests and tests */
1129  if (is_load) {
1130  TheISA::handleLockedRead(&context, request->request);
1131  } else {
1132  do_access = TheISA::handleLockedWrite(&context,
1133  request->request, cacheBlockMask);
1134 
1135  if (!do_access) {
1136  DPRINTF(MinorMem, "Not perfoming a memory "
1137  "access for store conditional\n");
1138  }
1139  }
1140  thread.pcState(old_pc);
1141  }
1142 
1143  /* See the do_access comment above */
1144  if (do_access) {
1145  if (!canSendToMemorySystem()) {
1146  DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1147  return;
1148  }
1149 
1150  /* Remember if this is an access which can't be idly
1151  * discarded by an interrupt */
1152  if (!bufferable && !request->issuedToMemory) {
1154  request->issuedToMemory = true;
1155  }
1156 
1157  if (tryToSend(request)) {
1158  moveFromRequestsToTransfers(request);
1159  }
1160  } else {
1161  request->setState(LSQRequest::Complete);
1162  moveFromRequestsToTransfers(request);
1163  }
1164 }
1165 
1166 bool
1168 {
1169  bool ret = false;
1170 
1171  if (!canSendToMemorySystem()) {
1172  DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n",
1173  *(request->inst));
1174  } else {
1175  PacketPtr packet = request->getHeadPacket();
1176 
1177  DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n",
1178  *(request->inst), packet->req->getVaddr());
1179 
1180  /* The sender state of the packet *must* be an LSQRequest
1181  * so the response can be correctly handled */
1182  assert(packet->findNextSenderState<LSQRequest>());
1183 
1184  if (request->request->isLocalAccess()) {
1185  ThreadContext *thread =
1187  request->request->contextId()));
1188 
1189  if (request->isLoad)
1190  DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst));
1191  else
1192  DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst));
1193 
1194  request->request->localAccessor(thread, packet);
1195 
1196  request->stepToNextPacket();
1197  ret = request->sentAllPackets();
1198 
1199  if (!ret) {
1200  DPRINTF(MinorMem, "IPR access has another packet: %s\n",
1201  *(request->inst));
1202  }
1203 
1204  if (ret)
1205  request->setState(LSQRequest::Complete);
1206  else
1208  } else if (dcachePort.sendTimingReq(packet)) {
1209  DPRINTF(MinorMem, "Sent data memory request\n");
1210 
1212 
1213  request->stepToNextPacket();
1214 
1215  ret = request->sentAllPackets();
1216 
1217  switch (request->state) {
1220  /* Fully or partially issued a request in the transfers
1221  * queue */
1223  break;
1226  /* Fully or partially issued a request in the store
1227  * buffer */
1229  break;
1230  default:
1231  panic("Unrecognized LSQ request state %d.", request->state);
1232  }
1233 
1234  state = MemoryRunning;
1235  } else {
1236  DPRINTF(MinorMem,
1237  "Sending data memory request - needs retry\n");
1238 
1239  /* Needs to be resent, wait for that */
1242 
1243  switch (request->state) {
1247  break;
1251  break;
1252  default:
1253  panic("Unrecognized LSQ request state %d.", request->state);
1254  }
1255  }
1256  }
1257 
1258  if (ret)
1259  threadSnoop(request);
1260 
1261  return ret;
1262 }
1263 
1264 void
1266 {
1267  assert(!requests.empty() && requests.front() == request);
1268  assert(transfers.unreservedRemainingSpace() != 0);
1269 
1270  /* Need to count the number of stores in the transfers
1271  * queue so that loads know when their store buffer forwarding
1272  * results will be correct (only when all those stores
1273  * have reached the store buffer) */
1274  if (!request->isLoad)
1276 
1277  requests.pop();
1278  transfers.push(request);
1279 }
1280 
1281 bool
1283 {
1284  return state == MemoryRunning &&
1286 }
1287 
1288 bool
1290 {
1292  safe_cast<LSQRequestPtr>(response->popSenderState());
1293 
1294  DPRINTF(MinorMem, "Received response packet inst: %s"
1295  " addr: 0x%x cmd: %s\n",
1296  *(request->inst), response->getAddr(),
1297  response->cmd.toString());
1298 
1300 
1301  if (response->isError()) {
1302  DPRINTF(MinorMem, "Received error response packet: %s\n",
1303  *request->inst);
1304  }
1305 
1306  switch (request->state) {
1309  /* Response to a request from the transfers queue */
1310  request->retireResponse(response);
1311 
1312  DPRINTF(MinorMem, "Has outstanding packets?: %d %d\n",
1313  request->hasPacketsInMemSystem(), request->isComplete());
1314 
1315  break;
1318  /* Response to a request from the store buffer */
1319  request->retireResponse(response);
1320 
1321  /* Remove completed requests unless they are barriers (which will
1322  * need to be removed in order */
1323  if (request->isComplete()) {
1324  if (!request->isBarrier()) {
1325  storeBuffer.deleteRequest(request);
1326  } else {
1327  DPRINTF(MinorMem, "Completed transfer for barrier: %s"
1328  " leaving the request as it is also a barrier\n",
1329  *(request->inst));
1330  }
1331  }
1332  break;
1333  default:
1334  panic("Shouldn't be allowed to receive a response from another state");
1335  }
1336 
1337  /* We go to idle even if there are more things in the requests queue
1338  * as it's the job of step to actually step us on to the next
1339  * transaction */
1340 
1341  /* Let's try and wake up the processor for the next cycle */
1343 
1344  /* Never busy */
1345  return true;
1346 }
1347 
1348 void
1350 {
1351  DPRINTF(MinorMem, "Received retry request\n");
1352 
1353  assert(state == MemoryNeedsRetry);
1354 
1355  switch (retryRequest->state) {
1357  /* Retry in the requests queue */
1359  break;
1361  /* Retry in the store buffer */
1363  break;
1364  default:
1365  panic("Unrecognized retry request state %d.", retryRequest->state);
1366  }
1367 
1368  /* Set state back to MemoryRunning so that the following
1369  * tryToSend can actually send. Note that this won't
1370  * allow another transfer in as tryToSend should
1371  * issue a memory request and either succeed for this
1372  * request or return the LSQ back to MemoryNeedsRetry */
1373  state = MemoryRunning;
1374 
1375  /* Try to resend the request */
1376  if (tryToSend(retryRequest)) {
1377  /* Successfully sent, need to move the request */
1378  switch (retryRequest->state) {
1380  /* In the requests queue */
1382  break;
1384  /* In the store buffer */
1386  break;
1387  default:
1388  panic("Unrecognized retry request state %d.", retryRequest->state);
1389  }
1390 
1391  retryRequest = NULL;
1392  }
1393 }
1394 
1395 LSQ::LSQ(std::string name_, std::string dcache_port_name_,
1396  MinorCPU &cpu_, Execute &execute_,
1397  unsigned int in_memory_system_limit, unsigned int line_width,
1398  unsigned int requests_queue_size, unsigned int transfers_queue_size,
1399  unsigned int store_buffer_size,
1400  unsigned int store_buffer_cycle_store_limit) :
1401  Named(name_),
1402  cpu(cpu_),
1403  execute(execute_),
1404  dcachePort(dcache_port_name_, *this, cpu_),
1405  lastMemBarrier(cpu.numThreads, 0),
1407  inMemorySystemLimit(in_memory_system_limit),
1408  lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)),
1409  requests(name_ + ".requests", "addr", requests_queue_size),
1410  transfers(name_ + ".transfers", "addr", transfers_queue_size),
1411  storeBuffer(name_ + ".storeBuffer",
1412  *this, store_buffer_size, store_buffer_cycle_store_limit),
1414  numAccessesInDTLB(0),
1417  retryRequest(NULL),
1418  cacheBlockMask(~(cpu_.cacheLineSize() - 1))
1419 {
1420  if (in_memory_system_limit < 1) {
1421  fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_,
1422  in_memory_system_limit);
1423  }
1424 
1425  if (store_buffer_cycle_store_limit < 1) {
1426  fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be"
1427  " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit);
1428  }
1429 
1430  if (requests_queue_size < 1) {
1431  fatal("%s: executeLSQRequestsQueueSize must be"
1432  " >= 1 (%d)\n", name_, requests_queue_size);
1433  }
1434 
1435  if (transfers_queue_size < 1) {
1436  fatal("%s: executeLSQTransfersQueueSize must be"
1437  " >= 1 (%d)\n", name_, transfers_queue_size);
1438  }
1439 
1440  if (store_buffer_size < 1) {
1441  fatal("%s: executeLSQStoreBufferSize must be"
1442  " >= 1 (%d)\n", name_, store_buffer_size);
1443  }
1444 
1445  if ((lineWidth & (lineWidth - 1)) != 0) {
1446  fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth);
1447  }
1448 }
1449 
1451 { }
1452 
1454 {
1455  if (packet)
1456  delete packet;
1457  if (data)
1458  delete [] data;
1459 }
1460 
1467 void
1469 {
1470  /* Try to move address-translated requests between queues and issue
1471  * them */
1472  if (!requests.empty())
1474 
1475  storeBuffer.step();
1476 }
1477 
1480 {
1481  LSQ::LSQRequestPtr ret = NULL;
1482 
1483  if (!transfers.empty()) {
1484  LSQRequestPtr request = transfers.front();
1485 
1486  /* Same instruction and complete access or a store that's
1487  * capable of being moved to the store buffer */
1488  if (request->inst->id == inst->id) {
1489  bool complete = request->isComplete();
1490  bool can_store = storeBuffer.canInsert();
1491  bool to_store_buffer = request->state ==
1493 
1494  if ((complete && !(request->isBarrier() && !can_store)) ||
1495  (to_store_buffer && can_store))
1496  {
1497  ret = request;
1498  }
1499  }
1500  }
1501 
1502  if (ret) {
1503  DPRINTF(MinorMem, "Found matching memory response for inst: %s\n",
1504  *inst);
1505  } else {
1506  DPRINTF(MinorMem, "No matching memory response for inst: %s\n",
1507  *inst);
1508  }
1509 
1510  return ret;
1511 }
1512 
1513 void
1515 {
1516  assert(!transfers.empty() && transfers.front() == response);
1517 
1518  transfers.pop();
1519 
1520  if (!response->isLoad)
1522 
1523  if (response->issuedToMemory)
1525 
1526  if (response->state != LSQRequest::StoreInStoreBuffer) {
1527  DPRINTF(MinorMem, "Deleting %s request: %s\n",
1528  (response->isLoad ? "load" : "store"),
1529  *(response->inst));
1530 
1531  delete response;
1532  }
1533 }
1534 
1535 void
1537 {
1538  assert(request->state == LSQRequest::StoreToStoreBuffer);
1539 
1540  DPRINTF(MinorMem, "Sending store: %s to store buffer\n",
1541  *(request->inst));
1542 
1543  request->inst->inStoreBuffer = true;
1544 
1545  storeBuffer.insert(request);
1546 }
1547 
1548 bool
1550 {
1551  return requests.empty() && transfers.empty() &&
1553 }
1554 
1555 bool
1557 {
1558  bool ret = false;
1559 
1560  if (canSendToMemorySystem()) {
1561  bool have_translated_requests = !requests.empty() &&
1564 
1565  ret = have_translated_requests ||
1567  }
1568 
1569  if (ret)
1570  DPRINTF(Activity, "Need to tick\n");
1571 
1572  return ret;
1573 }
1574 
1575 Fault
1576 LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
1577  unsigned int size, Addr addr, Request::Flags flags,
1578  uint64_t *res, AtomicOpFunctorPtr amo_op,
1579  const std::vector<bool>& byte_enable)
1580 {
1581  assert(inst->translationFault == NoFault || inst->inLSQ);
1582 
1583  if (inst->inLSQ) {
1584  return inst->translationFault;
1585  }
1586 
1587  bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
1588 
1589  if (needs_burst && inst->staticInst->isAtomic()) {
1590  // AMO requests that access across a cache line boundary are not
1591  // allowed since the cache does not guarantee AMO ops to be executed
1592  // atomically in two cache lines
1593  // For ISAs such as x86 that requires AMO operations to work on
1594  // accesses that cross cache-line boundaries, the cache needs to be
1595  // modified to support locking both cache lines to guarantee the
1596  // atomicity.
1597  panic("Do not expect cross-cache-line atomic memory request\n");
1598  }
1599 
1600  LSQRequestPtr request;
1601 
1602  /* Copy given data into the request. The request will pass this to the
1603  * packet and then it will own the data */
1604  uint8_t *request_data = NULL;
1605 
1606  DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:"
1607  " 0x%x%s lineWidth : 0x%x\n",
1608  (isLoad ? "load" : "store/atomic"), addr, size, flags,
1609  (needs_burst ? " (needs burst)" : ""), lineWidth);
1610 
1611  if (!isLoad) {
1612  /* Request_data becomes the property of a ...DataRequest (see below)
1613  * and destroyed by its destructor */
1614  request_data = new uint8_t[size];
1615  if (inst->staticInst->isAtomic() ||
1616  (flags & Request::STORE_NO_DATA)) {
1617  /* For atomic or store-no-data, just use zeroed data */
1618  std::memset(request_data, 0, size);
1619  } else {
1620  std::memcpy(request_data, data, size);
1621  }
1622  }
1623 
1624  if (needs_burst) {
1625  request = new SplitDataRequest(
1626  *this, inst, isLoad, request_data, res);
1627  } else {
1628  request = new SingleDataRequest(
1629  *this, inst, isLoad, request_data, res);
1630  }
1631 
1632  if (inst->traceData)
1633  inst->traceData->setMem(addr, size, flags);
1634 
1635  int cid = cpu.threads[inst->id.threadId]->getTC()->contextId();
1636  request->request->setContext(cid);
1637  request->request->setVirt(
1638  addr, size, flags, cpu.dataMasterId(),
1639  /* I've no idea why we need the PC, but give it */
1640  inst->pc.instAddr(), std::move(amo_op));
1641  request->request->setByteEnable(byte_enable);
1642 
1643  requests.push(request);
1644  inst->inLSQ = true;
1645  request->startAddrTranslation();
1646 
1647  return inst->translationFault;
1648 }
1649 
1650 void
1652 {
1653  LSQRequestPtr request = new FailedDataRequest(*this, inst);
1654  requests.push(request);
1655 }
1656 
1657 void
1659 {
1660  MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d"
1661  " lastMemBarrier=%d\n",
1664  requests.minorTrace();
1667 }
1668 
1669 LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_,
1670  unsigned int store_buffer_size,
1671  unsigned int store_limit_per_cycle) :
1672  Named(name_), lsq(lsq_),
1673  numSlots(store_buffer_size),
1674  storeLimitPerCycle(store_limit_per_cycle),
1675  slots(),
1676  numUnissuedAccesses(0)
1677 {
1678 }
1679 
1680 PacketPtr
1681 makePacketForRequest(const RequestPtr &request, bool isLoad,
1682  Packet::SenderState *sender_state, PacketDataPtr data)
1683 {
1684  PacketPtr ret = isLoad ? Packet::createRead(request)
1685  : Packet::createWrite(request);
1686 
1687  if (sender_state)
1688  ret->pushSenderState(sender_state);
1689 
1690  if (isLoad) {
1691  ret->allocate();
1692  } else if (!request->isCacheMaintenance()) {
1693  // CMOs are treated as stores but they don't have data. All
1694  // stores otherwise need to allocate for data.
1695  ret->dataDynamic(data);
1696  }
1697 
1698  return ret;
1699 }
1700 
1701 void
1703 {
1704  assert(inst->isInst() && inst->staticInst->isMemBarrier());
1705  assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]);
1706 
1707  /* Remember the barrier. We only have a notion of one
1708  * barrier so this may result in some mem refs being
1709  * delayed if they are between barriers */
1710  lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum;
1711 }
1712 
1713 void
1715 {
1716  assert(inst->translationFault == NoFault);
1717 
1718  /* Make the function idempotent */
1719  if (packet)
1720  return;
1721 
1722  packet = makePacketForRequest(request, isLoad, this, data);
1723  /* Null the ret data so we know not to deallocate it when the
1724  * ret is destroyed. The data now belongs to the ret and
1725  * the ret is responsible for its destruction */
1726  data = NULL;
1727 }
1728 
1729 std::ostream &
1731 {
1732  switch (state) {
1733  case LSQ::MemoryRunning:
1734  os << "MemoryRunning";
1735  break;
1736  case LSQ::MemoryNeedsRetry:
1737  os << "MemoryNeedsRetry";
1738  break;
1739  default:
1740  os << "MemoryState-" << static_cast<int>(state);
1741  break;
1742  }
1743  return os;
1744 }
1745 
1746 void
1748 {
1749  /* LLSC operations in Minor can't be speculative and are executed from
1750  * the head of the requests queue. We shouldn't need to do more than
1751  * this action on snoops. */
1752  for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1753  if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1754  cpu.wakeup(tid);
1755  }
1756  }
1757 
1758  if (pkt->isInvalidate() || pkt->isWrite()) {
1759  for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1761  cacheBlockMask);
1762  }
1763  }
1764 }
1765 
1766 void
1768 {
1769  /* LLSC operations in Minor can't be speculative and are executed from
1770  * the head of the requests queue. We shouldn't need to do more than
1771  * this action on snoops. */
1772  ThreadID req_tid = request->inst->id.threadId;
1773  PacketPtr pkt = request->packet;
1774 
1775  for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1776  if (tid != req_tid) {
1777  if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1778  cpu.wakeup(tid);
1779  }
1780 
1781  if (pkt->isInvalidate() || pkt->isWrite()) {
1783  cacheBlockMask);
1784  }
1785  }
1786  }
1787 }
1788 
1789 }
MemoryState
State of memory access for head access.
Definition: lsq.hh:68
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:163
#define DPRINTF(x,...)
Definition: trace.hh:225
DcachePort dcachePort
Definition: lsq.hh:112
Addr addrBlockOffset(Addr addr, Addr block_size)
Calculates the offset of a given address wrt aligned fixed-size blocks.
Definition: utils.hh:50
virtual bool sentAllPackets()=0
Have all packets been sent?
MinorDynInstPtr inst
Instruction which made this request.
Definition: lsq.hh:127
SingleDataRequest is used for requests that don&#39;t fragment.
Definition: lsq.hh:337
unsigned int numAccessesIssuedToMemory
The number of accesses which have been issued to the memory system but have not been committed/discar...
Definition: lsq.hh:607
decltype(nullptr) constexpr NoFault
Definition: types.hh:243
Execute stage.
Definition: execute.hh:60
AddressMonitor * getCpuAddrMonitor(ThreadID tid)
Definition: base.hh:610
void pop()
Pop the head item.
Definition: buffers.hh:499
LSQQueue transfers
Once issued to memory (or, for stores, just had their state changed to StoreToStoreBuffer) LSQRequest...
Definition: lsq.hh:575
T * findNextSenderState() const
Go through the sender state stack and return the first instance that is of type T (as determined by a...
Definition: packet.hh:504
bool transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
Returns true if the given memory access (address, size) needs to be fragmented across aligned fixed-s...
Definition: utils.hh:77
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:171
PacketDataPtr data
Dynamically allocated and populated data carried for building write packets.
Definition: lsq.hh:135
void setState(LSQRequestState new_state)
Set state and output trace output.
Definition: lsq.cc:167
StoreBuffer(std::string name_, LSQ &lsq_, unsigned int store_buffer_size, unsigned int store_limit_per_cycle)
Definition: lsq.cc:1669
void finish(const Fault &fault_, const RequestPtr &request_, ThreadContext *tc, BaseTLB::Mode mode)
TLB interace.
Definition: lsq.cc:268
LSQQueue requests
requests contains LSQRequests which have been issued to the TLB by calling ExecContext::readMem/write...
Definition: lsq.hh:566
friend std::ostream & operator<<(std::ostream &os, MemoryState state)
Print MemoryState values as shown in the enum definition.
Definition: lsq.cc:1730
bool isTranslationDelayed
Address translation is delayed due to table walk.
Definition: lsq.hh:159
Bitfield< 7 > i
SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, PacketDataPtr data_=NULL, uint64_t *res_=NULL)
Definition: lsq.cc:390
std::vector< Packet * > fragmentPackets
Packets matching fragmentRequests to issue fragments to memory.
Definition: lsq.hh:410
void step()
Try to issue more stores to memory.
Definition: lsq.cc:846
ThreadID numThreads
Number of threads we&#39;re actually simulating (<= SMT_MAX_THREADS).
Definition: base.hh:374
virtual BaseTLB * getDTBPtr()=0
void minorTrace() const
Definition: lsq.cc:1658
void minorTrace() const
Report queue contents for MinorTrace.
Definition: lsq.cc:933
virtual bool isBarrier()
Is this a request a barrier?
Definition: lsq.cc:155
unsigned int numIssuedFragments
Number of fragments already issued (<= numFragments)
Definition: lsq.hh:400
void popResponse(LSQRequestPtr response)
Sanity check and pop the head response.
Definition: lsq.cc:1514
void issuedMemBarrierInst(MinorDynInstPtr inst)
A memory barrier instruction has been issued, remember its execSeqNum that we can avoid issuing memor...
Definition: lsq.cc:1702
bool empty() const
Is the queue empty?
Definition: buffers.hh:502
bool instIsHeadInst(MinorDynInstPtr inst)
Returns true if the given instruction is at the head of the inFlightInsts instruction queue...
Definition: execute.cc:1870
const std::string & toString() const
Return the string to a cmd given by idx.
Definition: packet.hh:231
void pushFailedRequest(MinorDynInstPtr inst)
Push a predicate failed-representing request into the queues just to maintain commit order...
Definition: lsq.cc:1651
std::shared_ptr< Request > RequestPtr
Definition: request.hh:81
All the fun of executing instructions from Decode and sending branch/new instruction stream info...
std::vector< RequestPtr > fragmentRequests
Fragment Requests corresponding to the address ranges of each fragment.
Definition: lsq.hh:407
void wakeupOnEvent(unsigned int stage_id)
Interface for stages to signal that they have become active after a callback or eventq event where th...
Definition: cpu.cc:287
unsigned int numStoresInTransfers
The number of stores in the transfers queue.
Definition: lsq.hh:602
uint8_t * PacketDataPtr
Definition: packet.hh:66
unsigned int unreservedRemainingSpace() const
Like remainingSpace but does not count reserved spaces.
Definition: buffers.hh:486
ip6_addr_t addr
Definition: inet.hh:330
static PacketPtr createWrite(const RequestPtr &req)
Definition: packet.hh:913
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition: amo.hh:228
void retireResponse(PacketPtr packet_)
For loads, paste the response data into the main response packet.
Definition: lsq.cc:627
void setMemAccPredicate(bool val) override
void startAddrTranslation()
Start a loop of do { sendNextFragmentToTranslation ; translateTiming ; finish } while (numTranslatedF...
Definition: lsq.cc:586
TheISA::PCState pcState() const override
bool needsToBeSentToStoreBuffer()
This request, once processed by the requests/transfers queues, will need to go to the store buffer...
Definition: lsq.cc:161
The SimpleThread object provides a combination of the ThreadState object and the ThreadContext interf...
bool handleLockedWrite(XC *xc, const RequestPtr &req, Addr cacheBlockMask)
Definition: locked_mem.hh:77
unsigned int numFragments
Number of fragments this request is split into.
Definition: lsq.hh:388
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the slave port by calling its corresponding receive function...
Definition: port.hh:441
Minor contains all the definitions within the MinorCPU apart from the CPU class itself.
Definition: activity.cc:44
bool canInsert() const
Can a new request be inserted into the queue?
Definition: lsq.cc:723
void handleLockedRead(XC *xc, const RequestPtr &req)
Definition: locked_mem.hh:64
Bitfield< 4, 0 > mode
unsigned int numAccessesInMemorySystem
Count of the number of mem.
Definition: lsq.hh:595
LSQRequestPtr findResponse(MinorDynInstPtr inst)
Returns a response if it&#39;s at the head of the transfers queue and it&#39;s either complete or can be sent...
Definition: lsq.cc:1479
bool isWrite() const
Definition: packet.hh:523
bool isInvalidate() const
Definition: packet.hh:537
ThreadContext is the external interface to all thread state for anything outside of the CPU...
T * getPtr()
get a pointer to the data ptr.
Definition: packet.hh:1084
bool isRead() const
Definition: packet.hh:522
ExecContext bears the exec_context interface for Minor.
Definition: exec_context.hh:69
struct ip6_opt_fragment fragment
Definition: inet.hh:331
Bitfield< 17 > os
Definition: misc.hh:803
bool isComplete() const
Has this request been completed.
Definition: lsq.cc:175
ExecContext bears the exec_context interface for Minor.
static AddrRangeCoverage containsAddrRangeOf(Addr req1_addr, unsigned int req1_size, Addr req2_addr, unsigned int req2_size)
Does address range req1 (req1_addr to req1_addr + req1_size - 1) fully cover, partially cover or not ...
Definition: lsq.cc:118
Derived SenderState to carry data access info.
Definition: lsq.hh:118
LSQRequestState state
Definition: lsq.hh:183
#define DPRINTFS(x,...)
Definition: trace.hh:226
RequestPtr req
A pointer to the original request.
Definition: packet.hh:321
std::vector< InstSeqNum > lastMemBarrier
Most recent execSeqNum of a memory barrier instruction or 0 if there are no in-flight barriers...
Definition: lsq.hh:533
Definition: trace.hh:147
unsigned getSize() const
Definition: packet.hh:730
Request for doing barrier accounting in the store buffer.
Definition: lsq.hh:325
void completeMemBarrierInst(MinorDynInstPtr inst, bool committed)
Complete a barrier instruction.
Definition: lsq.cc:916
LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, PacketDataPtr data_=NULL, uint64_t *res_=NULL)
Definition: lsq.cc:56
Tick curTick()
The current simulated tick.
Definition: core.hh:44
bool needsResponse() const
Definition: packet.hh:536
MemoryState state
Retry state of last issued memory transfer.
Definition: lsq.hh:537
bool isError() const
Definition: packet.hh:549
unsigned int numRetiredFragments
Number of fragments retired back to this request.
Definition: lsq.hh:403
ElemType & front()
Head value.
Definition: buffers.hh:494
MasterID dataMasterId() const
Reads this CPU&#39;s unique data requestor ID.
Definition: base.hh:185
PacketPtr getHeadPacket()
Get the head packet as counted by numIssuedFragments.
Definition: lsq.cc:611
StoreBuffer storeBuffer
Definition: lsq.hh:586
PacketPtr makePacketForRequest(const RequestPtr &request, bool isLoad, Packet::SenderState *sender_state, PacketDataPtr data)
Make a suitable packet for the given request.
Definition: lsq.cc:1681
bool issuedToMemory
This in an access other than a normal cacheable load that&#39;s visited the memory system.
Definition: lsq.hh:156
Fault pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector< bool > &byte_enable=std::vector< bool >())
Single interface for readMem/writeMem/amoMem to issue requests into the LSQ.
Definition: lsq.cc:1576
virtual void retireResponse(PacketPtr packet_)=0
Retire a response packet into the LSQRequest packet possibly completing this transfer.
void makePacket()
Make a packet to use with the memory transaction.
Definition: lsq.cc:1714
bool tryToSend(LSQRequestPtr request)
Try to send (or resend) a memory request&#39;s next/only packet to the memory system. ...
Definition: lsq.cc:1167
bool isLoad
Load/store indication used for building packet.
Definition: lsq.hh:131
bool doMonitor(PacketPtr pkt)
Definition: base.cc:737
virtual void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode)=0
virtual void stepToNextPacket()=0
Step to the next packet for the next call to getHeadPacket.
Addr getAddr() const
Definition: packet.hh:720
bool canSendToMemorySystem()
Can a request be sent to the memory system.
Definition: lsq.cc:1282
void makeFragmentPackets()
Make the packets to go with the requests so they can be sent to the memory system.
Definition: lsq.cc:536
virtual void startAddrTranslation()=0
Start the address translation process for this request.
static const FlagsType STORE_NO_DATA
Definition: request.hh:196
void schedule(Event &event, Tick when)
Definition: eventq.hh:934
FailedDataRequest represents requests from instructions that failed their predicates but need to ride...
Definition: lsq.hh:315
AddrRangeCoverage canForwardDataToLoad(LSQRequestPtr request, unsigned int &found_slot)
Look for a store which satisfies the given load.
Definition: lsq.cc:766
unsigned int numInTranslationFragments
Number of fragments in the address translation mechanism.
Definition: lsq.hh:391
void reportData(std::ostream &os) const
MinorTrace report interface.
Definition: lsq.cc:183
void clearMemBarrier(MinorDynInstPtr inst)
Clear a barrier (if it&#39;s the last one marked up in lastMemBarrier)
Definition: lsq.cc:255
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
MinorCPU & cpu
My owner(s)
Definition: lsq.hh:63
void minorTrace() const
Definition: buffers.hh:505
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition: packet.hh:907
T safe_cast(U ptr)
Definition: cast.hh:59
bool isDrained()
Is there nothing left in the LSQ.
Definition: lsq.cc:1549
virtual ~LSQRequest()
Definition: lsq.cc:1453
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
unsigned int numUnissuedStores()
Number of stores in the store buffer which have not been completely issued to the memory system...
Definition: lsq.hh:512
const std::string & name() const
Definition: trace.hh:156
void tryToSendToTransfers(LSQRequestPtr request)
Try and issue a memory access for a translated request at the head of the requests queue...
Definition: lsq.cc:962
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a SimObject that sees the packet.
Definition: packet.hh:397
AddrRangeCoverage
Coverage of one address range with another.
Definition: lsq.hh:79
void step()
Step checks the queues to see if their are issuable transfers which were not otherwise picked up by t...
Definition: lsq.cc:1468
A load/store queue that allows outstanding reads and writes.
unsigned int numAccessesInDTLB
Number of requests in the DTLB in the requests queue.
Definition: lsq.hh:598
Mode
Definition: tlb.hh:57
void makeFragmentRequests()
Make all the Requests for this transfer&#39;s fragments so that those requests can be sent for address tr...
Definition: lsq.cc:417
int16_t ThreadID
Thread index/ID type.
Definition: types.hh:225
void makeResponse()
Take a request packet and modify it in place to be suitable for returning as a response to that reque...
Definition: packet.hh:931
void sendStoreToStoreBuffer(LSQRequestPtr request)
A store has been committed, please move it to the store buffer.
Definition: lsq.cc:1536
LSQRequestPtr retryRequest
The request (from either requests or the store buffer) which is currently waiting have its memory acc...
Definition: lsq.hh:611
bool recvTimingResp(PacketPtr pkt)
Memory interface.
Definition: lsq.cc:1289
void disableMemAccess()
Definition: lsq.cc:111
void startAddrTranslation()
Send single translation request.
Definition: lsq.cc:298
virtual ~LSQ()
Definition: lsq.cc:1450
void insert(LSQRequestPtr request)
Insert a request at the back of the queue.
Definition: lsq.cc:744
bool instIsRightStream(MinorDynInstPtr inst)
Does the given instruction have the right stream sequence number to be committed? ...
Definition: execute.cc:1864
bool skipped
Was skipped.
Definition: lsq.hh:152
LSQ & port
Owning port.
Definition: lsq.hh:124
MemCmd cmd
The command field of the packet.
Definition: packet.hh:316
void sendNextFragmentToTranslation()
Part of the address translation loop, see startAddTranslation.
Definition: lsq.cc:704
bool hasPacketsInMemSystem()
True if this request has any issued packets in the memory system and so can&#39;t be interrupted until it...
Definition: lsq.hh:445
Top level definition of the Minor in-order CPU model.
Addr cacheBlockMask
Address Mask for a cache block (e.g.
Definition: lsq.hh:614
void wakeup(ThreadID tid) override
Definition: cpu.cc:147
void stepToNextPacket()
Step on numIssuedFragments.
Definition: lsq.cc:619
PacketPtr packet
Definition: lsq.hh:141
ThreadID contextToThread(ContextID cid)
Convert ContextID to threadID.
Definition: base.hh:302
virtual ThreadContext * getContext(int tn)
Given a thread num get tho thread context for it.
Definition: base.hh:294
std::vector< Minor::MinorThread * > threads
These are thread state-representing objects for this CPU.
Definition: cpu.hh:93
bool isDrained() const
Drained if there is absolutely nothing left in the buffer.
Definition: lsq.hh:520
LSQ(std::string name_, std::string dcache_port_name_, MinorCPU &cpu_, Execute &execute_, unsigned int max_accesses_in_memory_system, unsigned int line_width, unsigned int requests_queue_size, unsigned int transfers_queue_size, unsigned int store_buffer_size, unsigned int store_buffer_cycle_store_limit)
Definition: lsq.cc:1395
void push(ElemType &data)
Push an element into the buffer if it isn&#39;t a bubble.
Definition: buffers.hh:426
#define MINORTRACE(...)
DPRINTFN for MinorTrace reporting.
Definition: trace.hh:60
void retireResponse(PacketPtr packet_)
Keep the given packet as the response packet LSQRequest::packet.
Definition: lsq.cc:323
bool isAnyActiveElement(const std::vector< bool >::const_iterator &it_start, const std::vector< bool >::const_iterator &it_end)
Test if there is any active element in an enablement range.
Definition: utils.hh:86
void tryToSuppressFault()
Instructions may want to suppress translation faults (e.g.
Definition: lsq.cc:75
The constructed pipeline.
const T * getConstPtr() const
Definition: packet.hh:1093
virtual PacketPtr getHeadPacket()=0
Get the next packet to issue for this request.
void completeDisabledMemAccess()
Definition: lsq.cc:94
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition: packet.hh:1072
void handleLockedSnoop(XC *xc, PacketPtr pkt, Addr cacheBlockMask)
Definition: locked_mem.hh:58
void recvReqRetry()
Definition: lsq.cc:1349
void pushSenderState(SenderState *sender_state)
Push a new sender state to the packet and make the current sender state the predecessor of the new on...
Definition: packet.cc:316
SenderState * popSenderState()
Pop the top of the state stack and return a pointer to it.
Definition: packet.cc:324
void copyError(Packet *pkt)
Definition: packet.hh:718
#define warn(...)
Definition: logging.hh:208
const unsigned int inMemorySystemLimit
Maximum number of in-flight accesses issued to the memory system.
Definition: lsq.hh:540
const unsigned int lineWidth
Memory system access width (and snap) in bytes.
Definition: lsq.hh:543
MinorCPU is an in-order CPU model with four fixed pipeline stages:
Definition: cpu.hh:77
RequestPtr request
The underlying request of this LSQRequest.
Definition: lsq.hh:144
void threadSnoop(LSQRequestPtr request)
Snoop other threads monitors on memory system accesses.
Definition: lsq.cc:1767
GenericISA::DelaySlotPCState< MachInst > PCState
Definition: types.hh:41
void setSkipped()
Set this request as having been skipped before a memory transfer was attempt.
Definition: lsq.hh:211
const char data[]
std::shared_ptr< FaultBase > Fault
Definition: types.hh:238
void finish(const Fault &fault_, const RequestPtr &request_, ThreadContext *tc, BaseTLB::Mode mode)
TLB response interface.
Definition: lsq.cc:332
void allocate()
Allocate memory for the packet.
Definition: packet.hh:1226
void deleteRequest(LSQRequestPtr request)
Delete the given request and free the slot it occupied.
Definition: lsq.cc:730
void recvTimingSnoopReq(PacketPtr pkt)
Definition: lsq.cc:1747
void countIssuedStore(LSQRequestPtr request)
Count a store being issued to memory by decrementing numUnissuedAccesses.
Definition: lsq.cc:837
Execute & execute
Definition: lsq.hh:64
void moveFromRequestsToTransfers(LSQRequestPtr request)
Move a request between queues.
Definition: lsq.cc:1265
void forwardStoreData(LSQRequestPtr load, unsigned int slot_number)
Fill the given packet with appropriate date from slot slot_number.
Definition: lsq.cc:807
virtual bool hasPacketsInMemSystem()=0
True if this request has any issued packets in the memory system and so can&#39;t be interrupted until it...
bool needsToTick()
May need to be ticked next cycle as one of the queues contains an actionable transfers or address tra...
Definition: lsq.cc:1556
unsigned int numTranslatedFragments
Number of fragments that have completed address translation, (numTranslatedFragments + numInTranslati...
Definition: lsq.hh:397

Generated on Fri Jul 3 2020 15:53:00 for gem5 by doxygen 1.8.13