gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
lsq.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2014,2017-2018 ARM Limited
3  * All rights reserved
4  *
5  * The license below extends only to copyright in the software and shall
6  * not be construed as granting a license to any other intellectual
7  * property including but not limited to intellectual property relating
8  * to a hardware implementation of the functionality of the software
9  * licensed hereunder. You may use the software subject to the license
10  * terms below provided that you ensure that this notice is replicated
11  * unmodified and in its entirety in all distributions of the software,
12  * modified or unmodified, in source code or in binary form.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions are
16  * met: redistributions of source code must retain the above copyright
17  * notice, this list of conditions and the following disclaimer;
18  * redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution;
21  * neither the name of the copyright holders nor the names of its
22  * contributors may be used to endorse or promote products derived from
23  * this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  *
37  * Authors: Andrew Bardsley
38  */
39 
40 #include "cpu/minor/lsq.hh"
41 
42 #include <iomanip>
43 #include <sstream>
44 
45 #include "arch/locked_mem.hh"
46 #include "arch/mmapped_ipr.hh"
47 #include "base/logging.hh"
48 #include "cpu/minor/cpu.hh"
50 #include "cpu/minor/execute.hh"
51 #include "cpu/minor/pipeline.hh"
52 #include "cpu/utils.hh"
53 #include "debug/Activity.hh"
54 #include "debug/MinorMem.hh"
55 
56 namespace Minor
57 {
58 
59 LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
60  PacketDataPtr data_, uint64_t *res_) :
61  SenderState(),
62  port(port_),
63  inst(inst_),
64  isLoad(isLoad_),
65  data(data_),
66  packet(NULL),
67  request(),
68  res(res_),
69  skipped(false),
70  issuedToMemory(false),
71  isTranslationDelayed(false),
72  state(NotIssued)
73 {
74  request = std::make_shared<Request>();
75 }
76 
77 void
79 {
80  SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
81  TheISA::PCState old_pc = thread.pcState();
82  ExecContext context(port.cpu, thread, port.execute, inst);
83  Fault M5_VAR_USED fault = inst->translationFault;
84 
85  // Give the instruction a chance to suppress a translation fault
86  inst->translationFault = inst->staticInst->initiateAcc(&context, nullptr);
87  if (inst->translationFault == NoFault) {
88  DPRINTFS(MinorMem, (&port),
89  "Translation fault suppressed for inst:%s\n", *inst);
90  } else {
91  assert(inst->translationFault == fault);
92  }
93  thread.pcState(old_pc);
94 }
95 
96 void
98 {
99  DPRINTFS(MinorMem, (&port), "Complete disabled mem access for inst:%s\n",
100  *inst);
101 
102  SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
103  TheISA::PCState old_pc = thread.pcState();
104 
105  ExecContext context(port.cpu, thread, port.execute, inst);
106 
107  context.setMemAccPredicate(false);
108  inst->staticInst->completeAcc(nullptr, &context, inst->traceData);
109 
110  thread.pcState(old_pc);
111 }
112 
113 void
115 {
116  port.cpu.threads[inst->id.threadId]->setMemAccPredicate(false);
117  DPRINTFS(MinorMem, (&port), "Disable mem access for inst:%s\n", *inst);
118 }
119 
122  Addr req1_addr, unsigned int req1_size,
123  Addr req2_addr, unsigned int req2_size)
124 {
125  /* 'end' here means the address of the byte just past the request
126  * blocks */
127  Addr req2_end_addr = req2_addr + req2_size;
128  Addr req1_end_addr = req1_addr + req1_size;
129 
130  AddrRangeCoverage ret;
131 
132  if (req1_addr >= req2_end_addr || req1_end_addr <= req2_addr)
133  ret = NoAddrRangeCoverage;
134  else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr)
135  ret = FullAddrRangeCoverage;
136  else
138 
139  return ret;
140 }
141 
144 {
146  request->getPaddr(), request->getSize(),
147  other_request->request->getPaddr(), other_request->request->getSize());
148  /* If there is a strobe mask then store data forwarding might not be
149  * correct. Instead of checking enablemant of every byte we just fall back
150  * to PartialAddrRangeCoverage to prohibit store data forwarding */
151  if (ret == FullAddrRangeCoverage && request->isMasked())
153  return ret;
154 }
155 
156 
157 bool
159 {
160  return inst->isInst() && inst->staticInst->isMemBarrier();
161 }
162 
163 bool
165 {
166  return state == StoreToStoreBuffer;
167 }
168 
169 void
171 {
172  DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:"
173  " %s\n", state, new_state, *inst);
174  state = new_state;
175 }
176 
177 bool
179 {
180  /* @todo, There is currently only one 'completed' state. This
181  * may not be a good choice */
182  return state == Complete;
183 }
184 
185 void
186 LSQ::LSQRequest::reportData(std::ostream &os) const
187 {
188  os << (isLoad ? 'R' : 'W') << ';';
189  inst->reportData(os);
190  os << ';' << state;
191 }
192 
193 std::ostream &
194 operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage)
195 {
196  switch (coverage) {
198  os << "PartialAddrRangeCoverage";
199  break;
201  os << "FullAddrRangeCoverage";
202  break;
204  os << "NoAddrRangeCoverage";
205  break;
206  default:
207  os << "AddrRangeCoverage-" << static_cast<int>(coverage);
208  break;
209  }
210  return os;
211 }
212 
213 std::ostream &
215 {
216  switch (state) {
218  os << "NotIssued";
219  break;
221  os << "InTranslation";
222  break;
224  os << "Translated";
225  break;
227  os << "Failed";
228  break;
230  os << "RequestIssuing";
231  break;
233  os << "StoreToStoreBuffer";
234  break;
236  os << "StoreInStoreBuffer";
237  break;
239  os << "StoreBufferIssuing";
240  break;
242  os << "RequestNeedsRetry";
243  break;
245  os << "StoreBufferNeedsRetry";
246  break;
248  os << "Complete";
249  break;
250  default:
251  os << "LSQRequestState-" << static_cast<int>(state);
252  break;
253  }
254  return os;
255 }
256 
257 void
259 {
260  bool is_last_barrier =
261  inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId];
262 
263  DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n",
264  (is_last_barrier ? "last" : "a"), *inst);
265 
266  if (is_last_barrier)
267  lastMemBarrier[inst->id.threadId] = 0;
268 }
269 
270 void
271 LSQ::SingleDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
273 {
275 
276  DPRINTFS(MinorMem, (&port), "Received translation response for"
277  " request: %s delayed:%d %s\n", *inst, isTranslationDelayed,
278  fault_ != NoFault ? fault_->name() : "");
279 
280  if (fault_ != NoFault) {
281  inst->translationFault = fault_;
282  if (isTranslationDelayed) {
284  if (inst->translationFault == NoFault) {
287  }
288  }
290  } else {
292  makePacket();
293  }
295 
296  /* Let's try and wake up the processor for the next cycle */
298 }
299 
300 void
302 {
303  ThreadContext *thread = port.cpu.getContext(
304  inst->id.threadId);
305 
306  const auto &byte_enable = request->getByteEnable();
307  if (byte_enable.size() == 0 ||
308  isAnyActiveElement(byte_enable.cbegin(), byte_enable.cend())) {
310 
312 
313  DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n");
314  /* Submit the translation request. The response will come through
315  * finish/markDelayed on the LSQRequest as it bears the Translation
316  * interface */
317  thread->getDTBPtr()->translateTiming(
318  request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write));
319  } else {
322  }
323 }
324 
325 void
327 {
328  DPRINTFS(MinorMem, (&port), "Retiring packet\n");
329  packet = packet_;
330  packetInFlight = false;
332 }
333 
334 void
335 LSQ::SplitDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
337 {
339 
340  unsigned int M5_VAR_USED expected_fragment_index =
341  numTranslatedFragments;
342 
343  numInTranslationFragments--;
344  numTranslatedFragments++;
345 
346  DPRINTFS(MinorMem, (&port), "Received translation response for fragment"
347  " %d of request: %s delayed:%d %s\n", expected_fragment_index,
349  fault_ != NoFault ? fault_->name() : "");
350 
351  assert(request_ == fragmentRequests[expected_fragment_index]);
352 
353  /* Wake up next cycle to get things going again in case the
354  * tryToSendToTransfers does take */
356 
357  if (fault_ != NoFault) {
358  /* tryToSendToTransfers will handle the fault */
359  inst->translationFault = fault_;
360 
361  DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:"
362  " %d of request: %s\n",
363  expected_fragment_index, *inst);
364 
365  if (expected_fragment_index > 0 || isTranslationDelayed)
367  if (expected_fragment_index == 0) {
368  if (isTranslationDelayed && inst->translationFault == NoFault) {
371  } else {
373  }
374  } else if (inst->translationFault == NoFault) {
376  numTranslatedFragments--;
377  makeFragmentPackets();
378  } else {
380  }
382  } else if (numTranslatedFragments == numFragments) {
383  makeFragmentPackets();
386  } else {
387  /* Avoid calling translateTiming from within ::finish */
388  assert(!translationEvent.scheduled());
389  port.cpu.schedule(translationEvent, curTick());
390  }
391 }
392 
394  bool isLoad_, PacketDataPtr data_, uint64_t *res_) :
395  LSQRequest(port_, inst_, isLoad_, data_, res_),
396  translationEvent([this]{ sendNextFragmentToTranslation(); },
397  "translationEvent"),
398  numFragments(0),
405 {
406  /* Don't know how many elements are needed until the request is
407  * populated by the caller. */
408 }
409 
411 {
412  for (auto i = fragmentPackets.begin();
413  i != fragmentPackets.end(); i++)
414  {
415  delete *i;
416  }
417 }
418 
419 void
421 {
422  Addr base_addr = request->getVaddr();
423  unsigned int whole_size = request->getSize();
424  unsigned int line_width = port.lineWidth;
425 
426  unsigned int fragment_size;
427  Addr fragment_addr;
428 
429  std::vector<bool> fragment_write_byte_en;
430 
431  /* Assume that this transfer is across potentially many block snap
432  * boundaries:
433  *
434  * | _|________|________|________|___ |
435  * | |0| 1 | 2 | 3 | 4 | |
436  * | |_|________|________|________|___| |
437  * | | | | | |
438  *
439  * The first transfer (0) can be up to lineWidth in size.
440  * All the middle transfers (1-3) are lineWidth in size
441  * The last transfer (4) can be from zero to lineWidth - 1 in size
442  */
443  unsigned int first_fragment_offset =
444  addrBlockOffset(base_addr, line_width);
445  unsigned int last_fragment_size =
446  addrBlockOffset(base_addr + whole_size, line_width);
447  unsigned int first_fragment_size =
448  line_width - first_fragment_offset;
449 
450  unsigned int middle_fragments_total_size =
451  whole_size - (first_fragment_size + last_fragment_size);
452 
453  assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0);
454 
455  unsigned int middle_fragment_count =
456  middle_fragments_total_size / line_width;
457 
458  numFragments = 1 /* first */ + middle_fragment_count +
459  (last_fragment_size == 0 ? 0 : 1);
460 
461  DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests."
462  " First fragment size: %d Last fragment size: %d\n",
463  numFragments, first_fragment_size,
464  (last_fragment_size == 0 ? line_width : last_fragment_size));
465 
466  assert(((middle_fragment_count * line_width) +
467  first_fragment_size + last_fragment_size) == whole_size);
468 
469  fragment_addr = base_addr;
470  fragment_size = first_fragment_size;
471 
472  /* Just past the last address in the request */
473  Addr end_addr = base_addr + whole_size;
474 
475  auto& byte_enable = request->getByteEnable();
476  unsigned int num_disabled_fragments = 0;
477 
478  for (unsigned int fragment_index = 0; fragment_index < numFragments;
479  fragment_index++)
480  {
481  bool M5_VAR_USED is_last_fragment = false;
482 
483  if (fragment_addr == base_addr) {
484  /* First fragment */
485  fragment_size = first_fragment_size;
486  } else {
487  if ((fragment_addr + line_width) > end_addr) {
488  /* Adjust size of last fragment */
489  fragment_size = end_addr - fragment_addr;
490  is_last_fragment = true;
491  } else {
492  /* Middle fragments */
493  fragment_size = line_width;
494  }
495  }
496 
497  RequestPtr fragment = std::make_shared<Request>();
498  bool disabled_fragment = false;
499 
500  fragment->setContext(request->contextId());
501  if (byte_enable.empty()) {
502  fragment->setVirt(0 /* asid */,
503  fragment_addr, fragment_size, request->getFlags(),
504  request->masterId(),
505  request->getPC());
506  } else {
507  // Set up byte-enable mask for the current fragment
508  auto it_start = byte_enable.begin() +
509  (fragment_addr - base_addr);
510  auto it_end = byte_enable.begin() +
511  (fragment_addr - base_addr) + fragment_size;
512  if (isAnyActiveElement(it_start, it_end)) {
513  fragment->setVirt(0 /* asid */,
514  fragment_addr, fragment_size, request->getFlags(),
515  request->masterId(),
516  request->getPC());
517  fragment->setByteEnable(std::vector<bool>(it_start, it_end));
518  } else {
519  disabled_fragment = true;
520  }
521  }
522 
523  if (!disabled_fragment) {
524  DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x"
525  " size: %d (whole request addr: 0x%x size: %d) %s\n",
526  fragment_addr, fragment_size, base_addr, whole_size,
527  (is_last_fragment ? "last fragment" : ""));
528 
529  fragmentRequests.push_back(fragment);
530  } else {
531  num_disabled_fragments++;
532  }
533 
534  fragment_addr += fragment_size;
535  }
536  assert(numFragments >= num_disabled_fragments);
537  numFragments -= num_disabled_fragments;
538 }
539 
540 void
542 {
543  assert(numTranslatedFragments > 0);
544  Addr base_addr = request->getVaddr();
545 
546  DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst);
547 
548  for (unsigned int fragment_index = 0;
549  fragment_index < numTranslatedFragments;
550  fragment_index++)
551  {
552  RequestPtr fragment = fragmentRequests[fragment_index];
553 
554  DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s"
555  " (%d, 0x%x)\n",
556  fragment_index, *inst,
557  (fragment->hasPaddr() ? "has paddr" : "no paddr"),
558  (fragment->hasPaddr() ? fragment->getPaddr() : 0));
559 
560  Addr fragment_addr = fragment->getVaddr();
561  unsigned int fragment_size = fragment->getSize();
562 
563  uint8_t *request_data = NULL;
564 
565  if (!isLoad) {
566  /* Split data for Packets. Will become the property of the
567  * outgoing Packets */
568  request_data = new uint8_t[fragment_size];
569  std::memcpy(request_data, data + (fragment_addr - base_addr),
570  fragment_size);
571  }
572 
573  assert(fragment->hasPaddr());
574 
575  PacketPtr fragment_packet =
576  makePacketForRequest(fragment, isLoad, this, request_data);
577 
578  fragmentPackets.push_back(fragment_packet);
579  /* Accumulate flags in parent request */
580  request->setFlags(fragment->getFlags());
581  }
582 
583  /* Might as well make the overall/response packet here */
584  /* Get the physical address for the whole request/packet from the first
585  * fragment */
586  request->setPaddr(fragmentRequests[0]->getPaddr());
587  makePacket();
588 }
589 
590 void
592 {
594 
595  if (numFragments > 0) {
599 
600  /* @todo, just do these in sequence for now with
601  * a loop of:
602  * do {
603  * sendNextFragmentToTranslation ; translateTiming ; finish
604  * } while (numTranslatedFragments != numFragments);
605  */
606 
607  /* Do first translation */
609  } else {
612  }
613 }
614 
615 PacketPtr
617 {
619 
621 }
622 
623 void
625 {
627 
629 }
630 
631 void
633 {
634  assert(inst->translationFault == NoFault);
636 
637  DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d"
638  " offset: 0x%x (retired fragment num: %d)\n",
639  response->req->getVaddr(), response->req->getSize(),
640  request->getVaddr() - response->req->getVaddr(),
642 
644 
645  if (skipped) {
646  /* Skip because we already knew the request had faulted or been
647  * skipped */
648  DPRINTFS(MinorMem, (&port), "Skipping this fragment\n");
649  } else if (response->isError()) {
650  /* Mark up the error and leave to execute to handle it */
651  DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n");
652  setSkipped();
653  packet->copyError(response);
654  } else {
655  if (isLoad) {
656  if (!data) {
657  /* For a split transfer, a Packet must be constructed
658  * to contain all returning data. This is that packet's
659  * data */
660  data = new uint8_t[request->getSize()];
661  }
662 
663  /* Populate the portion of the overall response data represented
664  * by the response fragment */
665  std::memcpy(
666  data + (response->req->getVaddr() - request->getVaddr()),
667  response->getConstPtr<uint8_t>(),
668  response->req->getSize());
669  }
670  }
671 
672  /* Complete early if we're skipping are no more in-flight accesses */
673  if (skipped && !hasPacketsInMemSystem()) {
674  DPRINTFS(MinorMem, (&port), "Completed skipped burst\n");
676  if (packet->needsResponse())
677  packet->makeResponse();
678  }
679 
680  if (numRetiredFragments == numTranslatedFragments)
682 
683  if (!skipped && isComplete()) {
684  DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL);
685 
686  DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d"
687  " needsResponse: %d packetSize: %s requestSize: %s responseSize:"
688  " %s\n", packet->isRead(), packet->isWrite(),
689  packet->needsResponse(), packet->getSize(), request->getSize(),
690  response->getSize());
691 
692  /* A request can become complete by several paths, this is a sanity
693  * check to make sure the packet's data is created */
694  if (!data) {
695  data = new uint8_t[request->getSize()];
696  }
697 
698  if (isLoad) {
699  DPRINTFS(MinorMem, (&port), "Copying read data\n");
700  std::memcpy(packet->getPtr<uint8_t>(), data, request->getSize());
701  }
702  packet->makeResponse();
703  }
704 
705  /* Packets are all deallocated together in ~SplitLSQRequest */
706 }
707 
708 void
710 {
711  unsigned int fragment_index = numTranslatedFragments;
712 
713  ThreadContext *thread = port.cpu.getContext(
714  inst->id.threadId);
715 
716  DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n",
717  fragment_index);
718 
721 
722  thread->getDTBPtr()->translateTiming(
723  fragmentRequests[fragment_index], thread, this, (isLoad ?
725 }
726 
727 bool
729 {
730  /* @todo, support store amalgamation */
731  return slots.size() < numSlots;
732 }
733 
734 void
736 {
737  auto found = std::find(slots.begin(), slots.end(), request);
738 
739  if (found != slots.end()) {
740  DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n",
741  request, *found, *(request->inst));
742  slots.erase(found);
743 
744  delete request;
745  }
746 }
747 
748 void
750 {
751  if (!canInsert()) {
752  warn("%s: store buffer insertion without space to insert from"
753  " inst: %s\n", name(), *(request->inst));
754  }
755 
756  DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request);
757 
758  numUnissuedAccesses++;
759 
760  if (request->state != LSQRequest::Complete)
762 
763  slots.push_back(request);
764 
765  /* Let's try and wake up the processor for the next cycle to step
766  * the store buffer */
767  lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
768 }
769 
772  unsigned int &found_slot)
773 {
774  unsigned int slot_index = slots.size() - 1;
775  auto i = slots.rbegin();
777 
778  /* Traverse the store buffer in reverse order (most to least recent)
779  * and try to find a slot whose address range overlaps this request */
780  while (ret == NoAddrRangeCoverage && i != slots.rend()) {
781  LSQRequestPtr slot = *i;
782 
783  /* Cache maintenance instructions go down via the store path but
784  * they carry no data and they shouldn't be considered
785  * for forwarding */
786  if (slot->packet &&
787  slot->inst->id.threadId == request->inst->id.threadId &&
788  !slot->packet->req->isCacheMaintenance()) {
789  AddrRangeCoverage coverage = slot->containsAddrRangeOf(request);
790 
791  if (coverage != NoAddrRangeCoverage) {
792  DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:"
793  " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n",
794  slot_index, coverage,
795  request->request->getPaddr(), request->request->getSize(),
796  slot->request->getPaddr(), slot->request->getSize());
797 
798  found_slot = slot_index;
799  ret = coverage;
800  }
801  }
802 
803  i++;
804  slot_index--;
805  }
806 
807  return ret;
808 }
809 
811 void
813  unsigned int slot_number)
814 {
815  assert(slot_number < slots.size());
816  assert(load->packet);
817  assert(load->isLoad);
818 
819  LSQRequestPtr store = slots[slot_number];
820 
821  assert(store->packet);
822  assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage);
823 
824  Addr load_addr = load->request->getPaddr();
825  Addr store_addr = store->request->getPaddr();
826  Addr addr_offset = load_addr - store_addr;
827 
828  unsigned int load_size = load->request->getSize();
829 
830  DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer"
831  " slot: %d addr: 0x%x addressOffset: 0x%x\n",
832  load_size, load_addr, slot_number,
833  store_addr, addr_offset);
834 
835  void *load_packet_data = load->packet->getPtr<void>();
836  void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset;
837 
838  std::memcpy(load_packet_data, store_packet_data, load_size);
839 }
840 
841 void
843 {
844  /* Barriers are accounted for as they are cleared from
845  * the queue, not after their transfers are complete */
846  if (!request->isBarrier())
847  numUnissuedAccesses--;
848 }
849 
850 void
852 {
853  DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n",
854  numUnissuedAccesses);
855 
856  if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) {
857  /* Clear all the leading barriers */
858  while (!slots.empty() &&
859  slots.front()->isComplete() && slots.front()->isBarrier())
860  {
861  LSQRequestPtr barrier = slots.front();
862 
863  DPRINTF(MinorMem, "Clearing barrier for inst: %s\n",
864  *(barrier->inst));
865 
866  numUnissuedAccesses--;
867  lsq.clearMemBarrier(barrier->inst);
868  slots.pop_front();
869 
870  delete barrier;
871  }
872 
873  auto i = slots.begin();
874  bool issued = true;
875  unsigned int issue_count = 0;
876 
877  /* Skip trying if the memory system is busy */
878  if (lsq.state == LSQ::MemoryNeedsRetry)
879  issued = false;
880 
881  /* Try to issue all stores in order starting from the head
882  * of the queue. Responses are allowed to be retired
883  * out of order */
884  while (issued &&
885  issue_count < storeLimitPerCycle &&
886  lsq.canSendToMemorySystem() &&
887  i != slots.end())
888  {
890 
891  DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d"
892  " state: %s\n",
893  *(request->inst), request->sentAllPackets(),
894  request->state);
895 
896  if (request->isBarrier() && request->isComplete()) {
897  /* Give up at barriers */
898  issued = false;
899  } else if (!(request->state == LSQRequest::StoreBufferIssuing &&
900  request->sentAllPackets()))
901  {
902  DPRINTF(MinorMem, "Trying to send request: %s to memory"
903  " system\n", *(request->inst));
904 
905  if (lsq.tryToSend(request)) {
906  countIssuedStore(request);
907  issue_count++;
908  } else {
909  /* Don't step on to the next store buffer entry if this
910  * one hasn't issued all its packets as the store
911  * buffer must still enforce ordering */
912  issued = false;
913  }
914  }
915  i++;
916  }
917  }
918 }
919 
920 void
922  bool committed)
923 {
924  if (committed) {
925  /* Not already sent to the store buffer as a store request? */
926  if (!inst->inStoreBuffer) {
927  /* Insert an entry into the store buffer to tick off barriers
928  * until there are none in flight */
929  storeBuffer.insert(new BarrierDataRequest(*this, inst));
930  }
931  } else {
932  /* Clear the barrier anyway if it wasn't actually committed */
933  clearMemBarrier(inst);
934  }
935 }
936 
937 void
939 {
940  unsigned int size = slots.size();
941  unsigned int i = 0;
942  std::ostringstream os;
943 
944  while (i < size) {
945  LSQRequestPtr request = slots[i];
946 
947  request->reportData(os);
948 
949  i++;
950  if (i < numSlots)
951  os << ',';
952  }
953 
954  while (i < numSlots) {
955  os << '-';
956 
957  i++;
958  if (i < numSlots)
959  os << ',';
960  }
961 
962  MINORTRACE("addr=%s num_unissued_stores=%d\n", os.str(),
963  numUnissuedAccesses);
964 }
965 
966 void
968 {
969  if (state == MemoryNeedsRetry) {
970  DPRINTF(MinorMem, "Request needs retry, not issuing to"
971  " memory until retry arrives\n");
972  return;
973  }
974 
975  if (request->state == LSQRequest::InTranslation) {
976  DPRINTF(MinorMem, "Request still in translation, not issuing to"
977  " memory\n");
978  return;
979  }
980 
981  assert(request->state == LSQRequest::Translated ||
982  request->state == LSQRequest::RequestIssuing ||
983  request->state == LSQRequest::Failed ||
984  request->state == LSQRequest::Complete);
985 
986  if (requests.empty() || requests.front() != request) {
987  DPRINTF(MinorMem, "Request not at front of requests queue, can't"
988  " issue to memory\n");
989  return;
990  }
991 
992  if (transfers.unreservedRemainingSpace() == 0) {
993  DPRINTF(MinorMem, "No space to insert request into transfers"
994  " queue\n");
995  return;
996  }
997 
998  if (request->isComplete() || request->state == LSQRequest::Failed) {
999  DPRINTF(MinorMem, "Passing a %s transfer on to transfers"
1000  " queue\n", (request->isComplete() ? "completed" : "failed"));
1001  request->setState(LSQRequest::Complete);
1002  request->setSkipped();
1003  moveFromRequestsToTransfers(request);
1004  return;
1005  }
1006 
1007  if (!execute.instIsRightStream(request->inst)) {
1008  /* Wrong stream, try to abort the transfer but only do so if
1009  * there are no packets in flight */
1010  if (request->hasPacketsInMemSystem()) {
1011  DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
1012  " waiting for responses before aborting request\n");
1013  } else {
1014  DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
1015  " aborting request\n");
1016  request->setState(LSQRequest::Complete);
1017  request->setSkipped();
1018  moveFromRequestsToTransfers(request);
1019  }
1020  return;
1021  }
1022 
1023  if (request->inst->translationFault != NoFault) {
1024  if (request->inst->staticInst->isPrefetch()) {
1025  DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n");
1026  }
1027  DPRINTF(MinorMem, "Moving faulting request into the transfers"
1028  " queue\n");
1029  request->setState(LSQRequest::Complete);
1030  request->setSkipped();
1031  moveFromRequestsToTransfers(request);
1032  return;
1033  }
1034 
1035  bool is_load = request->isLoad;
1036  bool is_llsc = request->request->isLLSC();
1037  bool is_swap = request->request->isSwap();
1038  bool is_atomic = request->request->isAtomic();
1039  bool bufferable = !(request->request->isStrictlyOrdered() ||
1040  is_llsc || is_swap || is_atomic);
1041 
1042  if (is_load) {
1043  if (numStoresInTransfers != 0) {
1044  DPRINTF(MinorMem, "Load request with stores still in transfers"
1045  " queue, stalling\n");
1046  return;
1047  }
1048  } else {
1049  /* Store. Can it be sent to the store buffer? */
1050  if (bufferable && !request->request->isMmappedIpr()) {
1052  moveFromRequestsToTransfers(request);
1053  DPRINTF(MinorMem, "Moving store into transfers queue\n");
1054  return;
1055  }
1056  }
1057 
1058  /* Check if this is the head instruction (and so must be executable as
1059  * its stream sequence number was checked above) for loads which must
1060  * not be speculatively issued and stores which must be issued here */
1061  if (!bufferable) {
1062  if (!execute.instIsHeadInst(request->inst)) {
1063  DPRINTF(MinorMem, "Memory access not the head inst., can't be"
1064  " sure it can be performed, not issuing\n");
1065  return;
1066  }
1067 
1068  unsigned int forwarding_slot = 0;
1069 
1070  if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) !=
1072  {
1073  // There's at least another request that targets the same
1074  // address and is staying in the storeBuffer. Since our
1075  // request is non-bufferable (e.g., strictly ordered or atomic),
1076  // we must wait for the other request in the storeBuffer to
1077  // complete before we can issue this non-bufferable request.
1078  // This is to make sure that the order they access the cache is
1079  // correct.
1080  DPRINTF(MinorMem, "Memory access can receive forwarded data"
1081  " from the store buffer, but need to wait for store buffer"
1082  " to drain\n");
1083  return;
1084  }
1085  }
1086 
1087  /* True: submit this packet to the transfers queue to be sent to the
1088  * memory system.
1089  * False: skip the memory and push a packet for this request onto
1090  * requests */
1091  bool do_access = true;
1092 
1093  if (!is_llsc) {
1094  /* Check for match in the store buffer */
1095  if (is_load) {
1096  unsigned int forwarding_slot = 0;
1097  AddrRangeCoverage forwarding_result =
1099  forwarding_slot);
1100 
1101  switch (forwarding_result) {
1102  case FullAddrRangeCoverage:
1103  /* Forward data from the store buffer into this request and
1104  * repurpose this request's packet into a response packet */
1105  storeBuffer.forwardStoreData(request, forwarding_slot);
1106  request->packet->makeResponse();
1107 
1108  /* Just move between queues, no access */
1109  do_access = false;
1110  break;
1112  DPRINTF(MinorMem, "Load partly satisfied by store buffer"
1113  " data. Must wait for the store to complete\n");
1114  return;
1115  break;
1116  case NoAddrRangeCoverage:
1117  DPRINTF(MinorMem, "No forwardable data from store buffer\n");
1118  /* Fall through to try access */
1119  break;
1120  }
1121  }
1122  } else {
1123  if (!canSendToMemorySystem()) {
1124  DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1125  return;
1126  }
1127 
1128  SimpleThread &thread = *cpu.threads[request->inst->id.threadId];
1129 
1130  TheISA::PCState old_pc = thread.pcState();
1131  ExecContext context(cpu, thread, execute, request->inst);
1132 
1133  /* Handle LLSC requests and tests */
1134  if (is_load) {
1135  TheISA::handleLockedRead(&context, request->request);
1136  } else {
1137  do_access = TheISA::handleLockedWrite(&context,
1138  request->request, cacheBlockMask);
1139 
1140  if (!do_access) {
1141  DPRINTF(MinorMem, "Not perfoming a memory "
1142  "access for store conditional\n");
1143  }
1144  }
1145  thread.pcState(old_pc);
1146  }
1147 
1148  /* See the do_access comment above */
1149  if (do_access) {
1150  if (!canSendToMemorySystem()) {
1151  DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1152  return;
1153  }
1154 
1155  /* Remember if this is an access which can't be idly
1156  * discarded by an interrupt */
1157  if (!bufferable && !request->issuedToMemory) {
1159  request->issuedToMemory = true;
1160  }
1161 
1162  if (tryToSend(request)) {
1163  moveFromRequestsToTransfers(request);
1164  }
1165  } else {
1166  request->setState(LSQRequest::Complete);
1167  moveFromRequestsToTransfers(request);
1168  }
1169 }
1170 
1171 bool
1173 {
1174  bool ret = false;
1175 
1176  if (!canSendToMemorySystem()) {
1177  DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n",
1178  *(request->inst));
1179  } else {
1180  PacketPtr packet = request->getHeadPacket();
1181 
1182  DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n",
1183  *(request->inst), packet->req->getVaddr());
1184 
1185  /* The sender state of the packet *must* be an LSQRequest
1186  * so the response can be correctly handled */
1187  assert(packet->findNextSenderState<LSQRequest>());
1188 
1189  if (request->request->isMmappedIpr()) {
1190  ThreadContext *thread =
1192  request->request->contextId()));
1193 
1194  if (request->isLoad) {
1195  DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst));
1196  TheISA::handleIprRead(thread, packet);
1197  } else {
1198  DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst));
1199  TheISA::handleIprWrite(thread, packet);
1200  }
1201 
1202  request->stepToNextPacket();
1203  ret = request->sentAllPackets();
1204 
1205  if (!ret) {
1206  DPRINTF(MinorMem, "IPR access has another packet: %s\n",
1207  *(request->inst));
1208  }
1209 
1210  if (ret)
1211  request->setState(LSQRequest::Complete);
1212  else
1214  } else if (dcachePort.sendTimingReq(packet)) {
1215  DPRINTF(MinorMem, "Sent data memory request\n");
1216 
1218 
1219  request->stepToNextPacket();
1220 
1221  ret = request->sentAllPackets();
1222 
1223  switch (request->state) {
1226  /* Fully or partially issued a request in the transfers
1227  * queue */
1229  break;
1232  /* Fully or partially issued a request in the store
1233  * buffer */
1235  break;
1236  default:
1237  panic("Unrecognized LSQ request state %d.", request->state);
1238  }
1239 
1240  state = MemoryRunning;
1241  } else {
1242  DPRINTF(MinorMem,
1243  "Sending data memory request - needs retry\n");
1244 
1245  /* Needs to be resent, wait for that */
1248 
1249  switch (request->state) {
1253  break;
1257  break;
1258  default:
1259  panic("Unrecognized LSQ request state %d.", request->state);
1260  }
1261  }
1262  }
1263 
1264  if (ret)
1265  threadSnoop(request);
1266 
1267  return ret;
1268 }
1269 
1270 void
1272 {
1273  assert(!requests.empty() && requests.front() == request);
1274  assert(transfers.unreservedRemainingSpace() != 0);
1275 
1276  /* Need to count the number of stores in the transfers
1277  * queue so that loads know when their store buffer forwarding
1278  * results will be correct (only when all those stores
1279  * have reached the store buffer) */
1280  if (!request->isLoad)
1282 
1283  requests.pop();
1284  transfers.push(request);
1285 }
1286 
1287 bool
1289 {
1290  return state == MemoryRunning &&
1292 }
1293 
1294 bool
1296 {
1298  safe_cast<LSQRequestPtr>(response->popSenderState());
1299 
1300  DPRINTF(MinorMem, "Received response packet inst: %s"
1301  " addr: 0x%x cmd: %s\n",
1302  *(request->inst), response->getAddr(),
1303  response->cmd.toString());
1304 
1306 
1307  if (response->isError()) {
1308  DPRINTF(MinorMem, "Received error response packet: %s\n",
1309  *request->inst);
1310  }
1311 
1312  switch (request->state) {
1315  /* Response to a request from the transfers queue */
1316  request->retireResponse(response);
1317 
1318  DPRINTF(MinorMem, "Has outstanding packets?: %d %d\n",
1319  request->hasPacketsInMemSystem(), request->isComplete());
1320 
1321  break;
1324  /* Response to a request from the store buffer */
1325  request->retireResponse(response);
1326 
1327  /* Remove completed requests unless they are barriers (which will
1328  * need to be removed in order */
1329  if (request->isComplete()) {
1330  if (!request->isBarrier()) {
1331  storeBuffer.deleteRequest(request);
1332  } else {
1333  DPRINTF(MinorMem, "Completed transfer for barrier: %s"
1334  " leaving the request as it is also a barrier\n",
1335  *(request->inst));
1336  }
1337  }
1338  break;
1339  default:
1340  panic("Shouldn't be allowed to receive a response from another state");
1341  }
1342 
1343  /* We go to idle even if there are more things in the requests queue
1344  * as it's the job of step to actually step us on to the next
1345  * transaction */
1346 
1347  /* Let's try and wake up the processor for the next cycle */
1349 
1350  /* Never busy */
1351  return true;
1352 }
1353 
1354 void
1356 {
1357  DPRINTF(MinorMem, "Received retry request\n");
1358 
1359  assert(state == MemoryNeedsRetry);
1360 
1361  switch (retryRequest->state) {
1363  /* Retry in the requests queue */
1365  break;
1367  /* Retry in the store buffer */
1369  break;
1370  default:
1371  panic("Unrecognized retry request state %d.", retryRequest->state);
1372  }
1373 
1374  /* Set state back to MemoryRunning so that the following
1375  * tryToSend can actually send. Note that this won't
1376  * allow another transfer in as tryToSend should
1377  * issue a memory request and either succeed for this
1378  * request or return the LSQ back to MemoryNeedsRetry */
1379  state = MemoryRunning;
1380 
1381  /* Try to resend the request */
1382  if (tryToSend(retryRequest)) {
1383  /* Successfully sent, need to move the request */
1384  switch (retryRequest->state) {
1386  /* In the requests queue */
1388  break;
1390  /* In the store buffer */
1392  break;
1393  default:
1394  panic("Unrecognized retry request state %d.", retryRequest->state);
1395  }
1396 
1397  retryRequest = NULL;
1398  }
1399 }
1400 
1401 LSQ::LSQ(std::string name_, std::string dcache_port_name_,
1402  MinorCPU &cpu_, Execute &execute_,
1403  unsigned int in_memory_system_limit, unsigned int line_width,
1404  unsigned int requests_queue_size, unsigned int transfers_queue_size,
1405  unsigned int store_buffer_size,
1406  unsigned int store_buffer_cycle_store_limit) :
1407  Named(name_),
1408  cpu(cpu_),
1409  execute(execute_),
1410  dcachePort(dcache_port_name_, *this, cpu_),
1411  lastMemBarrier(cpu.numThreads, 0),
1413  inMemorySystemLimit(in_memory_system_limit),
1414  lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)),
1415  requests(name_ + ".requests", "addr", requests_queue_size),
1416  transfers(name_ + ".transfers", "addr", transfers_queue_size),
1417  storeBuffer(name_ + ".storeBuffer",
1418  *this, store_buffer_size, store_buffer_cycle_store_limit),
1420  numAccessesInDTLB(0),
1423  retryRequest(NULL),
1424  cacheBlockMask(~(cpu_.cacheLineSize() - 1))
1425 {
1426  if (in_memory_system_limit < 1) {
1427  fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_,
1428  in_memory_system_limit);
1429  }
1430 
1431  if (store_buffer_cycle_store_limit < 1) {
1432  fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be"
1433  " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit);
1434  }
1435 
1436  if (requests_queue_size < 1) {
1437  fatal("%s: executeLSQRequestsQueueSize must be"
1438  " >= 1 (%d)\n", name_, requests_queue_size);
1439  }
1440 
1441  if (transfers_queue_size < 1) {
1442  fatal("%s: executeLSQTransfersQueueSize must be"
1443  " >= 1 (%d)\n", name_, transfers_queue_size);
1444  }
1445 
1446  if (store_buffer_size < 1) {
1447  fatal("%s: executeLSQStoreBufferSize must be"
1448  " >= 1 (%d)\n", name_, store_buffer_size);
1449  }
1450 
1451  if ((lineWidth & (lineWidth - 1)) != 0) {
1452  fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth);
1453  }
1454 }
1455 
1457 { }
1458 
1460 {
1461  if (packet)
1462  delete packet;
1463  if (data)
1464  delete [] data;
1465 }
1466 
1473 void
1475 {
1476  /* Try to move address-translated requests between queues and issue
1477  * them */
1478  if (!requests.empty())
1480 
1481  storeBuffer.step();
1482 }
1483 
1486 {
1487  LSQ::LSQRequestPtr ret = NULL;
1488 
1489  if (!transfers.empty()) {
1490  LSQRequestPtr request = transfers.front();
1491 
1492  /* Same instruction and complete access or a store that's
1493  * capable of being moved to the store buffer */
1494  if (request->inst->id == inst->id) {
1495  bool complete = request->isComplete();
1496  bool can_store = storeBuffer.canInsert();
1497  bool to_store_buffer = request->state ==
1499 
1500  if ((complete && !(request->isBarrier() && !can_store)) ||
1501  (to_store_buffer && can_store))
1502  {
1503  ret = request;
1504  }
1505  }
1506  }
1507 
1508  if (ret) {
1509  DPRINTF(MinorMem, "Found matching memory response for inst: %s\n",
1510  *inst);
1511  } else {
1512  DPRINTF(MinorMem, "No matching memory response for inst: %s\n",
1513  *inst);
1514  }
1515 
1516  return ret;
1517 }
1518 
1519 void
1521 {
1522  assert(!transfers.empty() && transfers.front() == response);
1523 
1524  transfers.pop();
1525 
1526  if (!response->isLoad)
1528 
1529  if (response->issuedToMemory)
1531 
1532  if (response->state != LSQRequest::StoreInStoreBuffer) {
1533  DPRINTF(MinorMem, "Deleting %s request: %s\n",
1534  (response->isLoad ? "load" : "store"),
1535  *(response->inst));
1536 
1537  delete response;
1538  }
1539 }
1540 
1541 void
1543 {
1544  assert(request->state == LSQRequest::StoreToStoreBuffer);
1545 
1546  DPRINTF(MinorMem, "Sending store: %s to store buffer\n",
1547  *(request->inst));
1548 
1549  request->inst->inStoreBuffer = true;
1550 
1551  storeBuffer.insert(request);
1552 }
1553 
1554 bool
1556 {
1557  return requests.empty() && transfers.empty() &&
1559 }
1560 
1561 bool
1563 {
1564  bool ret = false;
1565 
1566  if (canSendToMemorySystem()) {
1567  bool have_translated_requests = !requests.empty() &&
1570 
1571  ret = have_translated_requests ||
1573  }
1574 
1575  if (ret)
1576  DPRINTF(Activity, "Need to tick\n");
1577 
1578  return ret;
1579 }
1580 
1581 Fault
1582 LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
1583  unsigned int size, Addr addr, Request::Flags flags,
1584  uint64_t *res, AtomicOpFunctorPtr amo_op,
1585  const std::vector<bool>& byte_enable)
1586 {
1587  assert(inst->translationFault == NoFault || inst->inLSQ);
1588 
1589  if (inst->inLSQ) {
1590  return inst->translationFault;
1591  }
1592 
1593  bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
1594 
1595  if (needs_burst && inst->staticInst->isAtomic()) {
1596  // AMO requests that access across a cache line boundary are not
1597  // allowed since the cache does not guarantee AMO ops to be executed
1598  // atomically in two cache lines
1599  // For ISAs such as x86 that requires AMO operations to work on
1600  // accesses that cross cache-line boundaries, the cache needs to be
1601  // modified to support locking both cache lines to guarantee the
1602  // atomicity.
1603  panic("Do not expect cross-cache-line atomic memory request\n");
1604  }
1605 
1606  LSQRequestPtr request;
1607 
1608  /* Copy given data into the request. The request will pass this to the
1609  * packet and then it will own the data */
1610  uint8_t *request_data = NULL;
1611 
1612  DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:"
1613  " 0x%x%s lineWidth : 0x%x\n",
1614  (isLoad ? "load" : "store/atomic"), addr, size, flags,
1615  (needs_burst ? " (needs burst)" : ""), lineWidth);
1616 
1617  if (!isLoad) {
1618  /* Request_data becomes the property of a ...DataRequest (see below)
1619  * and destroyed by its destructor */
1620  request_data = new uint8_t[size];
1621  if (inst->staticInst->isAtomic() ||
1622  (flags & Request::STORE_NO_DATA)) {
1623  /* For atomic or store-no-data, just use zeroed data */
1624  std::memset(request_data, 0, size);
1625  } else {
1626  std::memcpy(request_data, data, size);
1627  }
1628  }
1629 
1630  if (needs_burst) {
1631  request = new SplitDataRequest(
1632  *this, inst, isLoad, request_data, res);
1633  } else {
1634  request = new SingleDataRequest(
1635  *this, inst, isLoad, request_data, res);
1636  }
1637 
1638  if (inst->traceData)
1639  inst->traceData->setMem(addr, size, flags);
1640 
1641  int cid = cpu.threads[inst->id.threadId]->getTC()->contextId();
1642  request->request->setContext(cid);
1643  request->request->setVirt(0 /* asid */,
1644  addr, size, flags, cpu.dataMasterId(),
1645  /* I've no idea why we need the PC, but give it */
1646  inst->pc.instAddr(), std::move(amo_op));
1647  request->request->setByteEnable(byte_enable);
1648 
1649  requests.push(request);
1650  inst->inLSQ = true;
1651  request->startAddrTranslation();
1652 
1653  return inst->translationFault;
1654 }
1655 
1656 void
1658 {
1659  LSQRequestPtr request = new FailedDataRequest(*this, inst);
1660  requests.push(request);
1661 }
1662 
1663 void
1665 {
1666  MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d"
1667  " lastMemBarrier=%d\n",
1670  requests.minorTrace();
1673 }
1674 
1675 LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_,
1676  unsigned int store_buffer_size,
1677  unsigned int store_limit_per_cycle) :
1678  Named(name_), lsq(lsq_),
1679  numSlots(store_buffer_size),
1680  storeLimitPerCycle(store_limit_per_cycle),
1681  slots(),
1682  numUnissuedAccesses(0)
1683 {
1684 }
1685 
1686 PacketPtr
1687 makePacketForRequest(const RequestPtr &request, bool isLoad,
1688  Packet::SenderState *sender_state, PacketDataPtr data)
1689 {
1690  PacketPtr ret = isLoad ? Packet::createRead(request)
1691  : Packet::createWrite(request);
1692 
1693  if (sender_state)
1694  ret->pushSenderState(sender_state);
1695 
1696  if (isLoad) {
1697  ret->allocate();
1698  } else if (!request->isCacheMaintenance()) {
1699  // CMOs are treated as stores but they don't have data. All
1700  // stores otherwise need to allocate for data.
1701  ret->dataDynamic(data);
1702  }
1703 
1704  return ret;
1705 }
1706 
1707 void
1709 {
1710  assert(inst->isInst() && inst->staticInst->isMemBarrier());
1711  assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]);
1712 
1713  /* Remember the barrier. We only have a notion of one
1714  * barrier so this may result in some mem refs being
1715  * delayed if they are between barriers */
1716  lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum;
1717 }
1718 
1719 void
1721 {
1722  assert(inst->translationFault == NoFault);
1723 
1724  /* Make the function idempotent */
1725  if (packet)
1726  return;
1727 
1728  packet = makePacketForRequest(request, isLoad, this, data);
1729  /* Null the ret data so we know not to deallocate it when the
1730  * ret is destroyed. The data now belongs to the ret and
1731  * the ret is responsible for its destruction */
1732  data = NULL;
1733 }
1734 
1735 std::ostream &
1737 {
1738  switch (state) {
1739  case LSQ::MemoryRunning:
1740  os << "MemoryRunning";
1741  break;
1742  case LSQ::MemoryNeedsRetry:
1743  os << "MemoryNeedsRetry";
1744  break;
1745  default:
1746  os << "MemoryState-" << static_cast<int>(state);
1747  break;
1748  }
1749  return os;
1750 }
1751 
1752 void
1754 {
1755  /* LLSC operations in Minor can't be speculative and are executed from
1756  * the head of the requests queue. We shouldn't need to do more than
1757  * this action on snoops. */
1758  for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1759  if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1760  cpu.wakeup(tid);
1761  }
1762  }
1763 
1764  if (pkt->isInvalidate() || pkt->isWrite()) {
1765  for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1767  cacheBlockMask);
1768  }
1769  }
1770 }
1771 
1772 void
1774 {
1775  /* LLSC operations in Minor can't be speculative and are executed from
1776  * the head of the requests queue. We shouldn't need to do more than
1777  * this action on snoops. */
1778  ThreadID req_tid = request->inst->id.threadId;
1779  PacketPtr pkt = request->packet;
1780 
1781  for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1782  if (tid != req_tid) {
1783  if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1784  cpu.wakeup(tid);
1785  }
1786 
1787  if (pkt->isInvalidate() || pkt->isWrite()) {
1789  cacheBlockMask);
1790  }
1791  }
1792  }
1793 }
1794 
1795 }
MemoryState
State of memory access for head access.
Definition: lsq.hh:70
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:167
#define DPRINTF(x,...)
Definition: trace.hh:229
DcachePort dcachePort
Definition: lsq.hh:114
Addr addrBlockOffset(Addr addr, Addr block_size)
Calculates the offset of a given address wrt aligned fixed-size blocks.
Definition: utils.hh:52
virtual bool sentAllPackets()=0
Have all packets been sent?
MinorDynInstPtr inst
Instruction which made this request.
Definition: lsq.hh:129
SingleDataRequest is used for requests that don&#39;t fragment.
Definition: lsq.hh:339
unsigned int numAccessesIssuedToMemory
The number of accesses which have been issued to the memory system but have not been committed/discar...
Definition: lsq.hh:609
decltype(nullptr) constexpr NoFault
Definition: types.hh:245
Execute stage.
Definition: execute.hh:62
AddressMonitor * getCpuAddrMonitor(ThreadID tid)
Definition: base.hh:614
void pop()
Pop the head item.
Definition: buffers.hh:501
LSQQueue transfers
Once issued to memory (or, for stores, just had their state changed to StoreToStoreBuffer) LSQRequest...
Definition: lsq.hh:577
T * findNextSenderState() const
Go through the sender state stack and return the first instance that is of type T (as determined by a...
Definition: packet.hh:510
bool transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
Returns true if the given memory access (address, size) needs to be fragmented across aligned fixed-s...
Definition: utils.hh:79
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:175
PacketDataPtr data
Dynamically allocated and populated data carried for building write packets.
Definition: lsq.hh:137
void setState(LSQRequestState new_state)
Set state and output trace output.
Definition: lsq.cc:170
StoreBuffer(std::string name_, LSQ &lsq_, unsigned int store_buffer_size, unsigned int store_limit_per_cycle)
Definition: lsq.cc:1675
void finish(const Fault &fault_, const RequestPtr &request_, ThreadContext *tc, BaseTLB::Mode mode)
TLB interace.
Definition: lsq.cc:271
LSQQueue requests
requests contains LSQRequests which have been issued to the TLB by calling ExecContext::readMem/write...
Definition: lsq.hh:568
friend std::ostream & operator<<(std::ostream &os, MemoryState state)
Print MemoryState values as shown in the enum definition.
Definition: lsq.cc:1736
bool isTranslationDelayed
Address translation is delayed due to table walk.
Definition: lsq.hh:161
Bitfield< 7 > i
SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, PacketDataPtr data_=NULL, uint64_t *res_=NULL)
Definition: lsq.cc:393
std::vector< Packet * > fragmentPackets
Packets matching fragmentRequests to issue fragments to memory.
Definition: lsq.hh:412
void step()
Try to issue more stores to memory.
Definition: lsq.cc:851
ThreadID numThreads
Number of threads we&#39;re actually simulating (<= SMT_MAX_THREADS).
Definition: base.hh:378
virtual BaseTLB * getDTBPtr()=0
void minorTrace() const
Definition: lsq.cc:1664
void minorTrace() const
Report queue contents for MinorTrace.
Definition: lsq.cc:938
virtual bool isBarrier()
Is this a request a barrier?
Definition: lsq.cc:158
unsigned int numIssuedFragments
Number of fragments already issued (<= numFragments)
Definition: lsq.hh:402
void popResponse(LSQRequestPtr response)
Sanity check and pop the head response.
Definition: lsq.cc:1520
void issuedMemBarrierInst(MinorDynInstPtr inst)
A memory barrier instruction has been issued, remember its execSeqNum that we can avoid issuing memor...
Definition: lsq.cc:1708
bool empty() const
Is the queue empty?
Definition: buffers.hh:504
bool instIsHeadInst(MinorDynInstPtr inst)
Returns true if the given instruction is at the head of the inFlightInsts instruction queue...
Definition: execute.cc:1872
const std::string & toString() const
Return the string to a cmd given by idx.
Definition: packet.hh:237
void pushFailedRequest(MinorDynInstPtr inst)
Push a predicate failed-representing request into the queues just to maintain commit order...
Definition: lsq.cc:1657
std::shared_ptr< Request > RequestPtr
Definition: request.hh:83
All the fun of executing instructions from Decode and sending branch/new instruction stream info...
std::vector< RequestPtr > fragmentRequests
Fragment Requests corresponding to the address ranges of each fragment.
Definition: lsq.hh:409
void wakeupOnEvent(unsigned int stage_id)
Interface for stages to signal that they have become active after a callback or eventq event where th...
Definition: cpu.cc:298
unsigned int numStoresInTransfers
The number of stores in the transfers queue.
Definition: lsq.hh:604
uint8_t * PacketDataPtr
Definition: packet.hh:72
unsigned int unreservedRemainingSpace() const
Like remainingSpace but does not count reserved spaces.
Definition: buffers.hh:488
ip6_addr_t addr
Definition: inet.hh:335
static PacketPtr createWrite(const RequestPtr &req)
Definition: packet.hh:919
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition: amo.hh:230
Cycles handleIprRead(ThreadContext *, Packet *)
Definition: mmapped_ipr.hh:48
void retireResponse(PacketPtr packet_)
For loads, paste the response data into the main response packet.
Definition: lsq.cc:632
void setMemAccPredicate(bool val) override
void startAddrTranslation()
Start a loop of do { sendNextFragmentToTranslation ; translateTiming ; finish } while (numTranslatedF...
Definition: lsq.cc:591
TheISA::PCState pcState() const override
bool needsToBeSentToStoreBuffer()
This request, once processed by the requests/transfers queues, will need to go to the store buffer...
Definition: lsq.cc:164
The SimpleThread object provides a combination of the ThreadState object and the ThreadContext interf...
bool handleLockedWrite(XC *xc, const RequestPtr &req, Addr cacheBlockMask)
Definition: locked_mem.hh:79
unsigned int numFragments
Number of fragments this request is split into.
Definition: lsq.hh:390
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the slave port by calling its corresponding receive function...
Definition: port.hh:445
Minor contains all the definitions within the MinorCPU apart from the CPU class itself.
Definition: activity.cc:46
bool canInsert() const
Can a new request be inserted into the queue?
Definition: lsq.cc:728
void handleLockedRead(XC *xc, const RequestPtr &req)
Definition: locked_mem.hh:66
Bitfield< 4, 0 > mode
unsigned int numAccessesInMemorySystem
Count of the number of mem.
Definition: lsq.hh:597
LSQRequestPtr findResponse(MinorDynInstPtr inst)
Returns a response if it&#39;s at the head of the transfers queue and it&#39;s either complete or can be sent...
Definition: lsq.cc:1485
bool isWrite() const
Definition: packet.hh:529
bool isInvalidate() const
Definition: packet.hh:543
ThreadContext is the external interface to all thread state for anything outside of the CPU...
T * getPtr()
get a pointer to the data ptr.
Definition: packet.hh:1090
bool isRead() const
Definition: packet.hh:528
ExecContext bears the exec_context interface for Minor.
Definition: exec_context.hh:74
struct ip6_opt_fragment fragment
Definition: inet.hh:336
Bitfield< 17 > os
Definition: misc.hh:805
bool isComplete() const
Has this request been completed.
Definition: lsq.cc:178
ExecContext bears the exec_context interface for Minor.
static AddrRangeCoverage containsAddrRangeOf(Addr req1_addr, unsigned int req1_size, Addr req2_addr, unsigned int req2_size)
Does address range req1 (req1_addr to req1_addr + req1_size - 1) fully cover, partially cover or not ...
Definition: lsq.cc:121
Derived SenderState to carry data access info.
Definition: lsq.hh:120
LSQRequestState state
Definition: lsq.hh:185
#define DPRINTFS(x,...)
Definition: trace.hh:230
RequestPtr req
A pointer to the original request.
Definition: packet.hh:327
std::vector< InstSeqNum > lastMemBarrier
Most recent execSeqNum of a memory barrier instruction or 0 if there are no in-flight barriers...
Definition: lsq.hh:535
Definition: trace.hh:151
unsigned getSize() const
Definition: packet.hh:736
Request for doing barrier accounting in the store buffer.
Definition: lsq.hh:327
void completeMemBarrierInst(MinorDynInstPtr inst, bool committed)
Complete a barrier instruction.
Definition: lsq.cc:921
LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, PacketDataPtr data_=NULL, uint64_t *res_=NULL)
Definition: lsq.cc:59
Tick curTick()
The current simulated tick.
Definition: core.hh:47
bool needsResponse() const
Definition: packet.hh:542
MemoryState state
Retry state of last issued memory transfer.
Definition: lsq.hh:539
bool isError() const
Definition: packet.hh:555
unsigned int numRetiredFragments
Number of fragments retired back to this request.
Definition: lsq.hh:405
ElemType & front()
Head value.
Definition: buffers.hh:496
MasterID dataMasterId() const
Reads this CPU&#39;s unique data requestor ID.
Definition: base.hh:189
PacketPtr getHeadPacket()
Get the head packet as counted by numIssuedFragments.
Definition: lsq.cc:616
StoreBuffer storeBuffer
Definition: lsq.hh:588
PacketPtr makePacketForRequest(const RequestPtr &request, bool isLoad, Packet::SenderState *sender_state, PacketDataPtr data)
Make a suitable packet for the given request.
Definition: lsq.cc:1687
bool issuedToMemory
This in an access other than a normal cacheable load that&#39;s visited the memory system.
Definition: lsq.hh:158
Fault pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector< bool > &byte_enable=std::vector< bool >())
Single interface for readMem/writeMem/amoMem to issue requests into the LSQ.
Definition: lsq.cc:1582
virtual void retireResponse(PacketPtr packet_)=0
Retire a response packet into the LSQRequest packet possibly completing this transfer.
void makePacket()
Make a packet to use with the memory transaction.
Definition: lsq.cc:1720
bool tryToSend(LSQRequestPtr request)
Try to send (or resend) a memory request&#39;s next/only packet to the memory system. ...
Definition: lsq.cc:1172
bool isLoad
Load/store indication used for building packet.
Definition: lsq.hh:133
bool doMonitor(PacketPtr pkt)
Definition: base.cc:741
virtual void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode)=0
virtual void stepToNextPacket()=0
Step to the next packet for the next call to getHeadPacket.
Addr getAddr() const
Definition: packet.hh:726
bool canSendToMemorySystem()
Can a request be sent to the memory system.
Definition: lsq.cc:1288
void makeFragmentPackets()
Make the packets to go with the requests so they can be sent to the memory system.
Definition: lsq.cc:541
virtual void startAddrTranslation()=0
Start the address translation process for this request.
static const FlagsType STORE_NO_DATA
Definition: request.hh:200
FailedDataRequest represents requests from instructions that failed their predicates but need to ride...
Definition: lsq.hh:317
AddrRangeCoverage canForwardDataToLoad(LSQRequestPtr request, unsigned int &found_slot)
Look for a store which satisfies the given load.
Definition: lsq.cc:771
unsigned int numInTranslationFragments
Number of fragments in the address translation mechanism.
Definition: lsq.hh:393
void reportData(std::ostream &os) const
MinorTrace report interface.
Definition: lsq.cc:186
void clearMemBarrier(MinorDynInstPtr inst)
Clear a barrier (if it&#39;s the last one marked up in lastMemBarrier)
Definition: lsq.cc:258
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
MinorCPU & cpu
My owner(s)
Definition: lsq.hh:65
void minorTrace() const
Definition: buffers.hh:507
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition: packet.hh:913
T safe_cast(U ptr)
Definition: cast.hh:61
bool isDrained()
Is there nothing left in the LSQ.
Definition: lsq.cc:1555
virtual ~LSQRequest()
Definition: lsq.cc:1459
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:255
unsigned int numUnissuedStores()
Number of stores in the store buffer which have not been completely issued to the memory system...
Definition: lsq.hh:514
const std::string & name() const
Definition: trace.hh:160
void tryToSendToTransfers(LSQRequestPtr request)
Try and issue a memory access for a translated request at the head of the requests queue...
Definition: lsq.cc:967
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a SimObject that sees the packet.
Definition: packet.hh:403
AddrRangeCoverage
Coverage of one address range with another.
Definition: lsq.hh:81
void step()
Step checks the queues to see if their are issuable transfers which were not otherwise picked up by t...
Definition: lsq.cc:1474
A load/store queue that allows outstanding reads and writes.
unsigned int numAccessesInDTLB
Number of requests in the DTLB in the requests queue.
Definition: lsq.hh:600
Mode
Definition: tlb.hh:59
void makeFragmentRequests()
Make all the Requests for this transfer&#39;s fragments so that those requests can be sent for address tr...
Definition: lsq.cc:420
int16_t ThreadID
Thread index/ID type.
Definition: types.hh:227
void makeResponse()
Take a request packet and modify it in place to be suitable for returning as a response to that reque...
Definition: packet.hh:937
void sendStoreToStoreBuffer(LSQRequestPtr request)
A store has been committed, please move it to the store buffer.
Definition: lsq.cc:1542
LSQRequestPtr retryRequest
The request (from either requests or the store buffer) which is currently waiting have its memory acc...
Definition: lsq.hh:613
bool recvTimingResp(PacketPtr pkt)
Memory interface.
Definition: lsq.cc:1295
void disableMemAccess()
Definition: lsq.cc:114
void startAddrTranslation()
Send single translation request.
Definition: lsq.cc:301
virtual ~LSQ()
Definition: lsq.cc:1456
void insert(LSQRequestPtr request)
Insert a request at the back of the queue.
Definition: lsq.cc:749
bool instIsRightStream(MinorDynInstPtr inst)
Does the given instruction have the right stream sequence number to be committed? ...
Definition: execute.cc:1866
bool skipped
Was skipped.
Definition: lsq.hh:154
LSQ & port
Owning port.
Definition: lsq.hh:126
GenericISA::SimplePCState< MachInst > PCState
Definition: types.hh:43
MemCmd cmd
The command field of the packet.
Definition: packet.hh:322
void sendNextFragmentToTranslation()
Part of the address translation loop, see startAddTranslation.
Definition: lsq.cc:709
bool hasPacketsInMemSystem()
True if this request has any issued packets in the memory system and so can&#39;t be interrupted until it...
Definition: lsq.hh:447
Top level definition of the Minor in-order CPU model.
Addr cacheBlockMask
Address Mask for a cache block (e.g.
Definition: lsq.hh:616
void wakeup(ThreadID tid) override
Definition: cpu.cc:158
void stepToNextPacket()
Step on numIssuedFragments.
Definition: lsq.cc:624
PacketPtr packet
Definition: lsq.hh:143
ThreadID contextToThread(ContextID cid)
Convert ContextID to threadID.
Definition: base.hh:306
virtual ThreadContext * getContext(int tn)
Given a thread num get tho thread context for it.
Definition: base.hh:298
std::vector< Minor::MinorThread * > threads
These are thread state-representing objects for this CPU.
Definition: cpu.hh:95
bool isDrained() const
Drained if there is absolutely nothing left in the buffer.
Definition: lsq.hh:522
LSQ(std::string name_, std::string dcache_port_name_, MinorCPU &cpu_, Execute &execute_, unsigned int max_accesses_in_memory_system, unsigned int line_width, unsigned int requests_queue_size, unsigned int transfers_queue_size, unsigned int store_buffer_size, unsigned int store_buffer_cycle_store_limit)
Definition: lsq.cc:1401
void push(ElemType &data)
Push an element into the buffer if it isn&#39;t a bubble.
Definition: buffers.hh:428
#define MINORTRACE(...)
DPRINTFN for MinorTrace reporting.
Definition: trace.hh:62
void retireResponse(PacketPtr packet_)
Keep the given packet as the response packet LSQRequest::packet.
Definition: lsq.cc:326
bool isAnyActiveElement(const std::vector< bool >::const_iterator &it_start, const std::vector< bool >::const_iterator &it_end)
Test if there is any active element in an enablement range.
Definition: utils.hh:88
void tryToSuppressFault()
Instructions may want to suppress translation faults (e.g.
Definition: lsq.cc:78
void schedule(Event &event, Tick when)
Definition: eventq.hh:744
The constructed pipeline.
const T * getConstPtr() const
Definition: packet.hh:1099
virtual PacketPtr getHeadPacket()=0
Get the next packet to issue for this request.
void completeDisabledMemAccess()
Definition: lsq.cc:97
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition: packet.hh:1078
void handleLockedSnoop(XC *xc, PacketPtr pkt, Addr cacheBlockMask)
Definition: locked_mem.hh:60
void recvReqRetry()
Definition: lsq.cc:1355
void pushSenderState(SenderState *sender_state)
Push a new sender state to the packet and make the current sender state the predecessor of the new on...
Definition: packet.cc:319
SenderState * popSenderState()
Pop the top of the state stack and return a pointer to it.
Definition: packet.cc:327
void copyError(Packet *pkt)
Definition: packet.hh:724
#define warn(...)
Definition: logging.hh:212
const unsigned int inMemorySystemLimit
Maximum number of in-flight accesses issued to the memory system.
Definition: lsq.hh:542
Cycles handleIprWrite(ThreadContext *, Packet *)
Definition: mmapped_ipr.hh:49
const unsigned int lineWidth
Memory system access width (and snap) in bytes.
Definition: lsq.hh:545
MinorCPU is an in-order CPU model with four fixed pipeline stages:
Definition: cpu.hh:79
static const int NumArgumentRegs M5_VAR_USED
Definition: process.cc:84
RequestPtr request
The underlying request of this LSQRequest.
Definition: lsq.hh:146
void threadSnoop(LSQRequestPtr request)
Snoop other threads monitors on memory system accesses.
Definition: lsq.cc:1773
void setSkipped()
Set this request as having been skipped before a memory transfer was attempt.
Definition: lsq.hh:213
const char data[]
std::shared_ptr< FaultBase > Fault
Definition: types.hh:240
void finish(const Fault &fault_, const RequestPtr &request_, ThreadContext *tc, BaseTLB::Mode mode)
TLB response interface.
Definition: lsq.cc:335
void allocate()
Allocate memory for the packet.
Definition: packet.hh:1232
void deleteRequest(LSQRequestPtr request)
Delete the given request and free the slot it occupied.
Definition: lsq.cc:735
void recvTimingSnoopReq(PacketPtr pkt)
Definition: lsq.cc:1753
void countIssuedStore(LSQRequestPtr request)
Count a store being issued to memory by decrementing numUnissuedAccesses.
Definition: lsq.cc:842
Execute & execute
Definition: lsq.hh:66
void moveFromRequestsToTransfers(LSQRequestPtr request)
Move a request between queues.
Definition: lsq.cc:1271
void forwardStoreData(LSQRequestPtr load, unsigned int slot_number)
Fill the given packet with appropriate date from slot slot_number.
Definition: lsq.cc:812
virtual bool hasPacketsInMemSystem()=0
True if this request has any issued packets in the memory system and so can&#39;t be interrupted until it...
bool needsToTick()
May need to be ticked next cycle as one of the queues contains an actionable transfers or address tra...
Definition: lsq.cc:1562
unsigned int numTranslatedFragments
Number of fragments that have completed address translation, (numTranslatedFragments + numInTranslati...
Definition: lsq.hh:399

Generated on Fri Feb 28 2020 16:26:59 for gem5 by doxygen 1.8.13