Go to the documentation of this file.
   42 #ifndef __CPU_O3_LSQ_UNIT_HH__ 
   43 #define __CPU_O3_LSQ_UNIT_HH__ 
   53 #include "arch/locked_mem.hh" 
   54 #include "config/the_isa.hh" 
   57 #include "debug/HtmCpu.hh" 
   58 #include "debug/LSQUnit.hh" 
   62 struct DerivO3CPUParams;
 
   83     typedef typename Impl::O3CPU 
O3CPU;
 
   85     typedef typename Impl::CPUPol::IEW 
IEW;
 
   86     typedef typename Impl::CPUPol::LSQ 
LSQ;
 
   90     using LSQRequest = 
typename Impl::CPUPol::LSQ::LSQRequest;
 
  113             if (
req != 
nullptr) {
 
  123             if (
req != 
nullptr) {
 
  223     LSQUnit(uint32_t lqEntries, uint32_t sqEntries);
 
  231         panic(
"LSQUnit is not copy-able");
 
  235     void init(
O3CPU *cpu_ptr, 
IEW *iew_ptr, 
const DerivO3CPUParams ¶ms,
 
  236             LSQ *lsq_ptr, 
unsigned id);
 
  239     std::string 
name() 
const;
 
  323         const auto& htm_cpt = 
cpu->tcBase(
lsqID)->getHtmCheckpointPtr();
 
  324         return htm_cpt->getHtmUid();
 
  422         using LSQSenderState::alive;
 
  428         typename LoadQueue::iterator 
idx;
 
  441         using LSQSenderState::alive;
 
  634 template <
class Impl>
 
  638     LQEntry& load_req = loadQueue[load_idx];
 
  644     assert(!load_inst->isExecuted());
 
  651     if (req->mainRequest()->isStrictlyOrdered() &&
 
  652         (load_idx != loadQueue.head() || !load_inst->isAtCommit())) {
 
  655         iewStage->rescheduleMemInst(load_inst);
 
  656         load_inst->clearIssued();
 
  657         load_inst->effAddrValid(
false);
 
  658         ++stats.rescheduledLoads;
 
  660                 load_inst->seqNum, load_inst->pcState());
 
  667         return std::make_shared<GenericISA::M5PanicFault>(
 
  668             "Strictly ordered load [sn:%llx] PC %s\n",
 
  669             load_inst->seqNum, load_inst->pcState());
 
  673             "storeHead: %i addr: %#x%s\n",
 
  674             load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1,
 
  675             req->mainRequest()->getPaddr(), req->isSplit() ? 
" split" : 
"");
 
  677     if (req->mainRequest()->isLLSC()) {
 
  681         load_inst->recordResult(
false);
 
  683         load_inst->recordResult(
true);
 
  686     if (req->mainRequest()->isLocalAccess()) {
 
  687         assert(!load_inst->memData);
 
  688         assert(!load_inst->inHtmTransactionalState());
 
  689         load_inst->memData = 
new uint8_t[MaxDataBytes];
 
  696         Cycles delay = req->mainRequest()->localAccessor(thread, main_pkt);
 
  699         cpu->schedule(wb, cpu->clockEdge(delay));
 
  704     if (req->mainRequest()->isHTMStart() || req->mainRequest()->isHTMCommit())
 
  714             if (!load_inst->memData) {
 
  716                     new uint8_t[req->mainRequest()->getSize()];
 
  718                 memset(load_inst->memData, 0, req->mainRequest()->getSize());
 
  722             if (load_inst->inHtmTransactionalState()) {
 
  724                     load_inst->getHtmTransactionUid());
 
  729             cpu->schedule(wb, cpu->clockEdge(delay));
 
  735     auto store_it = load_inst->sqIt;
 
  736     assert (store_it >= storeWBIt);
 
  738     while (store_it != storeWBIt) {
 
  741         assert(store_it->valid());
 
  742         assert(store_it->instruction()->seqNum < load_inst->seqNum);
 
  743         int store_size = store_it->size();
 
  748         if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&
 
  749             !(store_it->request()->mainRequest() &&
 
  750               store_it->request()->mainRequest()->isCacheMaintenance())) {
 
  751             assert(store_it->instruction()->effAddrValid());
 
  755             auto req_s = req->mainRequest()->getVaddr();
 
  756             auto req_e = req_s + req->mainRequest()->getSize();
 
  757             auto st_s = store_it->instruction()->effAddr;
 
  758             auto st_e = st_s + store_size;
 
  760             bool store_has_lower_limit = req_s >= st_s;
 
  761             bool store_has_upper_limit = req_e <= st_e;
 
  762             bool lower_load_has_store_part = req_s < st_e;
 
  763             bool upper_load_has_store_part = req_e > st_s;
 
  765             auto coverage = AddrRangeCoverage::NoAddrRangeCoverage;
 
  771             if (!store_it->instruction()->isAtomic() &&
 
  772                 store_has_lower_limit && store_has_upper_limit &&
 
  773                 !req->mainRequest()->isLLSC()) {
 
  775                 const auto& store_req = store_it->request()->mainRequest();
 
  776                 coverage = store_req->isMasked() ?
 
  777                     AddrRangeCoverage::PartialAddrRangeCoverage :
 
  778                     AddrRangeCoverage::FullAddrRangeCoverage;
 
  782                 (!req->mainRequest()->isLLSC() &&
 
  783                  ((store_has_lower_limit && lower_load_has_store_part) ||
 
  784                   (store_has_upper_limit && upper_load_has_store_part) ||
 
  785                   (lower_load_has_store_part && upper_load_has_store_part))) ||
 
  788                 (req->mainRequest()->isLLSC() &&
 
  789                  ((store_has_lower_limit || upper_load_has_store_part) &&
 
  790                   (store_has_upper_limit || lower_load_has_store_part))) ||
 
  793                 (store_it->instruction()->isAtomic() &&
 
  794                  ((store_has_lower_limit || upper_load_has_store_part) &&
 
  795                   (store_has_upper_limit || lower_load_has_store_part)))) {
 
  797                 coverage = AddrRangeCoverage::PartialAddrRangeCoverage;
 
  800             if (coverage == AddrRangeCoverage::FullAddrRangeCoverage) {
 
  802                 int shift_amt = req->mainRequest()->getVaddr() -
 
  803                     store_it->instruction()->effAddr;
 
  806                 if (!load_inst->memData) {
 
  808                         new uint8_t[req->mainRequest()->getSize()];
 
  810                 if (store_it->isAllZeros())
 
  811                     memset(load_inst->memData, 0,
 
  812                             req->mainRequest()->getSize());
 
  814                     memcpy(load_inst->memData,
 
  815                         store_it->data() + shift_amt,
 
  816                         req->mainRequest()->getSize());
 
  819                         "addr %#x\n", store_it._idx,
 
  820                         req->mainRequest()->getVaddr());
 
  833                 assert(!req->mainRequest()->isHTMCmd());
 
  834                 if (load_inst->inHtmTransactionalState()) {
 
  835                     assert (!storeQueue[store_it._idx].completed());
 
  837                         storeQueue[store_it._idx].instruction()->
 
  838                           inHtmTransactionalState());
 
  840                         load_inst->getHtmTransactionUid() ==
 
  841                         storeQueue[store_it._idx].instruction()->
 
  842                           getHtmTransactionUid());
 
  844                         load_inst->getHtmTransactionUid());
 
  845                     DPRINTF(HtmCpu, 
"HTM LD (ST2LDF) " 
  846                       "pc=0x%lx - vaddr=0x%lx - " 
  847                       "paddr=0x%lx - htmUid=%u\n",
 
  848                       load_inst->instAddr(),
 
  849                       data_pkt->
req->hasVaddr() ?
 
  850                         data_pkt->
req->getVaddr() : 0lu,
 
  852                       load_inst->getHtmTransactionUid());
 
  855                 if (req->isAnyOutstandingRequest()) {
 
  856                     assert(req->_numOutstandingPackets > 0);
 
  861                     req->discardSenderState();
 
  876             } 
else if (coverage == AddrRangeCoverage::PartialAddrRangeCoverage) {
 
  879                 if (store_it->completed()) {
 
  880                     panic(
"Should not check one of these");
 
  889                      loadQueue[stallingLoadIdx].instruction()->seqNum)) {
 
  891                     stallingStoreIsn = store_it->instruction()->seqNum;
 
  892                     stallingLoadIdx = load_idx;
 
  897                 iewStage->rescheduleMemInst(load_inst);
 
  898                 load_inst->clearIssued();
 
  899                 load_inst->effAddrValid(
false);
 
  900                 ++stats.rescheduledLoads;
 
  905                         "Store idx %i to load addr %#x\n",
 
  906                         store_it._idx, req->mainRequest()->getVaddr());
 
  917     DPRINTF(
LSQUnit, 
"Doing memory access for inst [sn:%lli] PC %s\n",
 
  918             load_inst->seqNum, load_inst->pcState());
 
  921     if (!load_inst->memData) {
 
  922         load_inst->memData = 
new uint8_t[req->mainRequest()->getSize()];
 
  927     if (req->mainRequest()->isHTMCmd()) {
 
  931         *load_inst->memData = (uint64_t) 0x1ull;
 
  941     if (req->senderState() == 
nullptr) {
 
  943                 loadQueue.getIterator(load_idx));
 
  944         state->isLoad = 
true;
 
  945         state->inst = load_inst;
 
  946         state->isSplit = req->isSplit();
 
  947         req->senderState(state);
 
  950     req->sendPacketToCache();
 
  952         iewStage->blockMemInst(load_inst);
 
  957 template <
class Impl>
 
  961     assert(storeQueue[store_idx].valid());
 
  963     DPRINTF(
LSQUnit, 
"Doing write to store idx %i, addr %#x | storeHead:%i " 
  965             store_idx - 1, req->request()->getPaddr(), storeQueue.head() - 1,
 
  966             storeQueue[store_idx].instruction()->seqNum);
 
  968     storeQueue[store_idx].setRequest(req);
 
  969     unsigned size = req->_size;
 
  970     storeQueue[store_idx].size() = size;
 
  973     storeQueue[store_idx].isAllZeros() = store_no_data;
 
  974     assert(size <= SQEntry::DataSize || store_no_data);
 
  978         !req->request()->isCacheMaintenance() &&
 
  979         !req->request()->isAtomic())
 
  980         memcpy(storeQueue[store_idx].
data(), 
data, size);
 
  987 #endif // __CPU_O3_LSQ_UNIT_HH__ 
  
void insertLoad(const DynInstPtr &load_inst)
Inserts a load instruction.
void writebackBlockedStore()
Try to finish a previously blocked write back attempt.
Impl::DynInstPtr DynInstPtr
bool _isAllZeros
Does this request write all zeros and thus doesn't have any data attached to it.
PacketPtr pkt
The packet that would have been sent to memory.
void dumpInsts() const
Debugging function to dump instructions in the LSQ.
void set(const DynInstPtr &inst)
void commitLoad()
Commits the head load.
unsigned depCheckShift
The number of places to shift addresses in the LSQ before checking for dependency violations.
char _data[MaxDataBytes]
The store data.
Stats::Scalar memOrderViolation
Tota number of memory ordering violations.
bool dereferenceable() const
Test dereferenceability.
LSQRequest * pendingRequest
The packet that is pending free cache ports.
unsigned getCount()
Returns the number of instructions in the LSQ.
void set(const DynInstPtr &inst)
InstSeqNum getStoreHeadSeqNum()
Returns the sequence number of the head store instruction.
const char * data() const
void storePostSend()
Handles completing the send of a store to memory.
int16_t ThreadID
Thread index/ID type.
bool isFull()
Returns if either the LQ or SQ is full.
InstSeqNum stallingStoreIsn
The store that causes the stall due to partial store to load forwarding.
IEW * iewStage
Pointer to the IEW stage.
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
Stats::Scalar forwLoads
Total number of loads forwaded from LSQ stores.
int numStoresToWB()
Returns the number of stores to writeback.
@ CACHE_BLOCK_ZERO
This is a write that is targeted and zeroing an entire cache block.
unsigned int cacheLineSize()
SQSenderState(typename StoreQueue::iterator idx_)
SQEntry()
Constructs an empty store queue entry.
uint64_t Tick
Tick count type.
O3CPU * cpu
Pointer to the CPU.
bool isStalled()
Returns whether or not the LSQ unit is stalled.
void checkSnoop(PacketPtr pkt)
Check if an incoming invalidate hits in the lsq on a load that might have issued out of order wrt ano...
bool isEmpty() const
Returns if both the LQ and SQ are empty.
RequestPtr req
A pointer to the original request.
DynInstPtr getMemDepViolator()
Returns the memory ordering violator.
int storesToWB
The number of store instructions in the SQ waiting to writeback.
void init(O3CPU *cpu_ptr, IEW *iew_ptr, const DerivO3CPUParams ¶ms, LSQ *lsq_ptr, unsigned id)
Initializes the LSQ unit with the specified number of entries.
constexpr unsigned MaxVecRegLenInBytes
const uint32_t & size() const
void writeback(const DynInstPtr &inst, PacketPtr pkt)
Writes back the instruction, sending it to IEW.
Particularisation of the LSQSenderState to the SQ.
int numStores()
Returns the number of stores in the SQ.
bool storeInFlight
Whether or not a store is in flight.
void commitLoads(InstSeqNum &youngest_inst)
Commits loads older than a specific sequence number.
Iterator to the circular queue.
CircularQueue< LQEntry > LQueue
unsigned numFreeStoreEntries()
Returns the number of free SQ entries.
static constexpr auto MaxDataBytes
void setRequest(LSQRequest *r)
Particularisation of the LSQSenderState to the LQ.
int stores
The number of store instructions in the SQ.
void drainSanityCheck() const
Perform sanity checks after a drain.
const bool & committed() const
bool full() const
Is the queue full? A queue is full if the head is the 0^{th} element and the tail is the (size-1)^{th...
void completeStore(typename StoreQueue::iterator store_idx)
Completes the store at the specified index.
This is a simple scalar statistic, like a counter.
Stats::Scalar blockedByCache
Number of times the LSQ is blocked due to the cache.
bool checkLoads
Should loads be checked for dependency issues.
void resetState()
Reset the LSQ state.
Class that implements the actual LQ and SQ for each specific thread.
DynInstPtr inst
Instruction whose results are being written back.
bool sqFull()
Returns if the SQ is full.
void takeOverFrom()
Takes over from another CPU's thread.
DynInstPtr inst
The instruction.
Stats::Scalar ignoredResponses
Total number of responses from the memory system that are ignored due to the instruction already bein...
LSQ * lsq
Pointer to the LSQ.
LoadQueue loadQueue
The load queue.
ThreadContext is the external interface to all thread state for anything outside of the CPU.
bool hasPendingRequest
Whether or not there is a packet that couldn't be sent because of a lack of cache ports.
WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr)
Constructs a writeback event.
uint64_t getLatestHtmUid() const
const bool & isAllZeros() const
bool lqEmpty() const
Returns if the LQ is empty.
std::shared_ptr< FaultBase > Fault
CircularQueue< SQEntry >::iterator SQIterator
bool & canWB()
Member accessors.
int getLoadHead()
Returns the index of the head load instruction.
LoadQueue::iterator idx
The LQ index of the instruction.
bool willWB()
Returns if the LSQ unit will writeback on this cycle.
Derived class to hold any sender state the LSQ needs.
void process()
Processes the writeback event.
void commitStores(InstSeqNum &youngest_inst)
Commits stores older than a specific sequence number.
void insert(const DynInstPtr &inst)
Inserts an instruction.
void resetHtmStartsStops()
StoreQueue::iterator storeWBIt
The index of the first instruction that may be ready to be written back, and has not yet been written...
AddrRangeCoverage
Coverage of one address range with another.
void setHtmTransactional(uint64_t val)
Stipulates that this packet/request originates in the CPU executing in transactional mode,...
void writebackStores()
Writes back stores.
bool lqFull()
Returns if the LQ is full.
Fault executeLoad(int lq_idx)
const bool & completed() const
Stats::Scalar rescheduledLoads
Number of loads that were rescheduled.
unsigned numFreeLoadEntries()
Returns the number of free LQ entries.
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
int loads
The number of load instructions in the LQ.
constexpr decltype(nullptr) NoFault
const DynInstPtr & instruction() const
int stallingLoadIdx
The index of the above store.
ThreadID lsqID
The LSQUnit thread id.
ProbePointArg< PacketInfo > Packet
Packet probe point.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
void makeResponse()
Take a request packet and modify it in place to be suitable for returning as a response to that reque...
bool trySendPacket(bool isLoad, PacketPtr data_pkt)
Attempts to send a packet to the cache.
RequestPort * dcachePort
Pointer to the dcache port.
TimeBuffer< IssueStruct >::wire fromIssue
Wire to read information from the issue stage time queue.
PacketPtr retryPkt
The packet that needs to be retried.
int numLoads()
Returns the number of loads in the LQ.
CircularQueue< SQEntry > SQueue
LSQRequest * req
The request.
void recvRetry()
Handles doing the retry.
@ NO_ACCESS
The request should not cause a memory access.
LSQUnit(const LSQUnit &l)
We cannot copy LSQUnit because it has stats for which copy contructor is deleted explicitly.
void handleLockedRead(XC *xc, const RequestPtr &req)
int getStoreHead()
Returns the index of the head store instruction.
typename Impl::CPUPol::LSQ::LSQRequest LSQRequest
DynInstPtr memDepViolator
The oldest load that caused a memory ordering violation.
std::string name() const
Returns the name of the LSQ unit.
Fault read(LSQRequest *req, int load_idx)
Executes the load at the given index.
void setDcachePort(RequestPort *dcache_port)
Sets the pointer to the dcache port.
LQSenderState(typename LoadQueue::iterator idx_)
Stats::Scalar squashedStores
Total number of squashed stores.
CircularQueue< LQEntry >::iterator LQIterator
bool valid() const
Member accessors.
Fault executeStore(const DynInstPtr &inst)
Executes a store instruction.
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
bool needsTSO
Flag for memory model.
void insertStore(const DynInstPtr &store_inst)
Inserts a store instruction.
bool violation()
Returns if there is a memory ordering violation.
bool stalled
Whether or not the LSQ is stalled.
void completeDataAccess(PacketPtr pkt)
Completes the data access that has been returned from the memory system.
CircularQueue< SQEntry > storeQueue
The store queue.
@ PartialAddrRangeCoverage
void schedule(Event &ev, Tick when)
Schedule event for the cpu.
LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
Constructs an LSQ unit.
bool _committed
Whether or not the store is committed.
Cycles is a wrapper class for representing cycle counts, i.e.
Stats::Scalar squashedLoads
Total number of squashed loads.
bool _completed
Whether or not the store is completed.
Addr cacheBlockMask
Address Mask for a cache block (e.g.
InstSeqNum getLoadHeadSeqNum()
Returns the sequence number of the head load instruction.
bool isStoreBlocked
Whehter or not a store is blocked due to the memory system.
Tick curTick()
The universal simulation clock.
uint64_t lastRetiredHtmUid
bool _canWB
Whether or not the store can writeback.
bool hasStoresToWB()
Returns if there are any stores to writeback.
void setLastRetiredHtmUid(uint64_t htm_uid)
void squash(const InstSeqNum &squashed_num)
Squashes all instructions younger than a specific sequence number.
bool sqEmpty() const
Returns if the SQ is empty.
Impl::CPUPol::IssueStruct IssueStruct
static const FlagsType STORE_NO_DATA
LSQEntry()
Constructs an empty store queue entry.
StoreQueue::iterator idx
The SQ index of the instruction.
LSQUnitStats(Stats::Group *parent)
static constexpr size_t DataSize
Fault executeLoad(const DynInstPtr &inst)
Executes a load instruction.
uint32_t _size
The size of the operation.
const char * description() const
Returns the description of this event.
LSQUnit::LSQUnitStats stats
typename LSQ::LSQSenderState LSQSenderState
Writeback event, specifically for when stores forward data to loads.
#define panic(...)
This implements a cprintf based panic() function.
const bool & canWB() const
Fault write(LSQRequest *req, uint8_t *data, int store_idx)
Executes the store at the given index.
LSQUnit< Impl > * lsqPtr
The pointer to the LSQ unit that issued the store.
Fault checkViolations(typename LoadQueue::iterator &loadIt, const DynInstPtr &inst)
Check for ordering violations in the LSQ.
Generated on Tue Jun 22 2021 15:28:26 for gem5 by  doxygen 1.8.17