gem5 [DEVELOP-FOR-25.1]
Loading...
Searching...
No Matches
cache.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2010-2019, 2024 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2002-2005 The Regents of The University of Michigan
15 * Copyright (c) 2010,2015 Advanced Micro Devices, Inc.
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 */
41
46
47#include "mem/cache/cache.hh"
48
49#include <cassert>
50
51#include "base/compiler.hh"
52#include "base/logging.hh"
53#include "base/trace.hh"
54#include "base/types.hh"
55#include "debug/Cache.hh"
56#include "debug/CacheTags.hh"
57#include "debug/CacheVerbose.hh"
58#include "enums/Clusivity.hh"
61#include "mem/cache/mshr.hh"
64#include "mem/request.hh"
65#include "params/Cache.hh"
66
67namespace gem5
68{
69
71 : BaseCache(p, p.system->cacheLineSize()),
72 doFastWrites(true)
73{
74 assert(p.tags);
75 assert(p.replacement_policy);
76}
77
78void
80 bool deferred_response, bool pending_downgrade)
81{
83
84 if (pkt->isRead()) {
85 // determine if this read is from a (coherent) cache or not
86 if (pkt->fromCache()) {
87 assert(pkt->getSize() == blkSize);
88 // special handling for coherent block requests from
89 // upper-level caches
90 if (pkt->needsWritable()) {
91 // sanity check
92 assert(pkt->cmd == MemCmd::ReadExReq ||
94 assert(!pkt->hasSharers());
95
96 // if we have a dirty copy, make sure the recipient
97 // keeps it marked dirty (in the modified state)
98 if (blk->isSet(CacheBlk::DirtyBit)) {
99 pkt->setCacheResponding();
101 }
102 } else if (blk->isSet(CacheBlk::WritableBit) &&
103 !pending_downgrade && !pkt->hasSharers() &&
104 pkt->cmd != MemCmd::ReadCleanReq) {
105 // we can give the requestor a writable copy on a read
106 // request if:
107 // - we have a writable copy at this level (& below)
108 // - we don't have a pending snoop from below
109 // signaling another read request
110 // - no other cache above has a copy (otherwise it
111 // would have set hasSharers flag when
112 // snooping the packet)
113 // - the read has explicitly asked for a clean
114 // copy of the line
115 if (blk->isSet(CacheBlk::DirtyBit)) {
116 // special considerations if we're owner:
117 if (!deferred_response) {
118 // respond with the line in Modified state
119 // (cacheResponding set, hasSharers not set)
120 pkt->setCacheResponding();
121
122 // if this cache is mostly inclusive, we
123 // keep the block in the Exclusive state,
124 // and pass it upwards as Modified
125 // (writable and dirty), hence we have
126 // multiple caches, all on the same path
127 // towards memory, all considering the
128 // same block writable, but only one
129 // considering it Modified
130
131 // we get away with multiple caches (on
132 // the same path to memory) considering
133 // the block writeable as we always enter
134 // the cache hierarchy through a cache,
135 // and first snoop upwards in all other
136 // branches
138 } else {
139 // if we're responding after our own miss,
140 // there's a window where the recipient didn't
141 // know it was getting ownership and may not
142 // have responded to snoops correctly, so we
143 // have to respond with a shared line
144 pkt->setHasSharers();
145 }
146 }
147 } else {
148 // otherwise only respond with a shared copy
149 pkt->setHasSharers();
150 }
151 }
152 }
153}
154
156//
157// Access path: requests coming in from the CPU side
158//
160
161bool
163 PacketList &writebacks)
164{
165
166 if (pkt->req->isUncacheable()) {
167 assert(pkt->isRequest());
168
169 gem5_assert(!(isReadOnly && pkt->isWrite()),
170 "Should never see a write in a read-only cache %s\n",
171 name());
172
173 DPRINTF(Cache, "%s for %s\n", __func__, pkt->print());
174
175 // flush and invalidate any existing block
176 CacheBlk *old_blk(tags->findBlock({pkt->getAddr(), pkt->isSecure()}));
177 if (old_blk && old_blk->isValid()) {
178 BaseCache::evictBlock(old_blk, writebacks);
179 }
180
181 blk = nullptr;
182 // lookupLatency is the latency in case the request is uncacheable.
183 lat = lookupLatency;
184 return false;
185 }
186
187 return BaseCache::access(pkt, blk, lat, writebacks);
188}
189
190void
191Cache::doWritebacks(PacketList& writebacks, Tick forward_time)
192{
193 while (!writebacks.empty()) {
194 PacketPtr wbPkt = writebacks.front();
195 // We use forwardLatency here because we are copying writebacks to
196 // write buffer.
197
198 // Call isCachedAbove for Writebacks, CleanEvicts and
199 // WriteCleans to discover if the block is cached above.
200 if (isCachedAbove(wbPkt)) {
201 if (wbPkt->cmd == MemCmd::CleanEvict) {
202 // Delete CleanEvict because cached copies exist above. The
203 // packet destructor will delete the request object because
204 // this is a non-snoop request packet which does not require a
205 // response.
206 delete wbPkt;
207 } else if (wbPkt->cmd == MemCmd::WritebackClean) {
208 // clean writeback, do not send since the block is
209 // still cached above
210 assert(writebackClean);
211 delete wbPkt;
212 } else {
213 assert(wbPkt->cmd == MemCmd::WritebackDirty ||
214 wbPkt->cmd == MemCmd::WriteClean);
215 // Set BLOCK_CACHED flag in Writeback and send below, so that
216 // the Writeback does not reset the bit corresponding to this
217 // address in the snoop filter below.
218 wbPkt->setBlockCached();
219 allocateWriteBuffer(wbPkt, forward_time);
220 }
221 } else {
222 // If the block is not cached above, send packet below. Both
223 // CleanEvict and Writeback with BLOCK_CACHED flag cleared will
224 // reset the bit corresponding to this address in the snoop filter
225 // below.
226 allocateWriteBuffer(wbPkt, forward_time);
227 }
228 writebacks.pop_front();
229 }
230}
231
232void
234{
235 while (!writebacks.empty()) {
236 PacketPtr wbPkt = writebacks.front();
237 // Call isCachedAbove for both Writebacks and CleanEvicts. If
238 // isCachedAbove returns true we set BLOCK_CACHED flag in Writebacks
239 // and discard CleanEvicts.
240 if (isCachedAbove(wbPkt, false)) {
241 if (wbPkt->cmd == MemCmd::WritebackDirty ||
242 wbPkt->cmd == MemCmd::WriteClean) {
243 // Set BLOCK_CACHED flag in Writeback and send below,
244 // so that the Writeback does not reset the bit
245 // corresponding to this address in the snoop filter
246 // below. We can discard CleanEvicts because cached
247 // copies exist above. Atomic mode isCachedAbove
248 // modifies packet to set BLOCK_CACHED flag
249 memSidePort.sendAtomic(wbPkt);
250 }
251 } else {
252 // If the block is not cached above, send packet below. Both
253 // CleanEvict and Writeback with BLOCK_CACHED flag cleared will
254 // reset the bit corresponding to this address in the snoop filter
255 // below.
256 memSidePort.sendAtomic(wbPkt);
257 }
258 writebacks.pop_front();
259 // In case of CleanEvicts, the packet destructor will delete the
260 // request object because this is a non-snoop request packet which
261 // does not require a response.
262 delete wbPkt;
263 }
264}
265
266
267void
269{
270 DPRINTF(Cache, "%s for %s\n", __func__, pkt->print());
271
272 // determine if the response is from a snoop request we created
273 // (in which case it should be in the outstandingSnoop), or if we
274 // merely forwarded someone else's snoop request
275 const bool forwardAsSnoop = outstandingSnoop.find(pkt->req) ==
276 outstandingSnoop.end();
277
278 if (!forwardAsSnoop) {
279 // the packet came from this cache, so sink it here and do not
280 // forward it
281 assert(pkt->cmd == MemCmd::HardPFResp);
282
283 outstandingSnoop.erase(pkt->req);
284
285 DPRINTF(Cache, "Got prefetch response from above for addr "
286 "%#llx (%s)\n", pkt->getAddr(), pkt->isSecure() ? "s" : "ns");
287 recvTimingResp(pkt);
288 return;
289 }
290
291 // forwardLatency is set here because there is a response from an
292 // upper level cache.
293 // To pay the delay that occurs if the packet comes from the bus,
294 // we charge also headerDelay.
295 Tick snoop_resp_time = clockEdge(forwardLatency) + pkt->headerDelay;
296 // Reset the timing of the packet.
297 pkt->headerDelay = pkt->payloadDelay = 0;
298 memSidePort.schedTimingSnoopResp(pkt, snoop_resp_time);
299}
300
301void
303{
304 // Cache line clearing instructions
305 if (doFastWrites && (pkt->cmd == MemCmd::WriteReq) &&
306 (pkt->getSize() == blkSize) && (pkt->getOffset(blkSize) == 0) &&
307 !pkt->isMaskedWrite()) {
309 DPRINTF(Cache, "packet promoted from Write to WriteLineReq\n");
310 }
311}
312
313void
315{
316 // should never be satisfying an uncacheable access as we
317 // flush and invalidate any existing block as part of the
318 // lookup
319 assert(!pkt->req->isUncacheable());
320
321 BaseCache::handleTimingReqHit(pkt, blk, request_time);
322}
323
324void
326 Tick request_time)
327{
328
329 // These should always hit due to the earlier Locked Read
330 assert(pkt->cmd != MemCmd::LockedRMWWriteReq);
331 if (pkt->req->isUncacheable()) {
332 // ignore any existing MSHR if we are dealing with an
333 // uncacheable request
334
335 // should have flushed and have no valid block
336 assert(!blk || !blk->isValid());
337
338 stats.cmdStats(pkt).mshrUncacheable[pkt->req->requestorId()]++;
339
340 if (pkt->isWrite()) {
341 allocateWriteBuffer(pkt, forward_time);
342 } else {
343 // uncacheable accesses always allocate a new MSHR
344
345 // Here we are using forward_time, modelling the latency of
346 // a miss (outbound) just as forwardLatency, neglecting the
347 // lookupLatency component.
348
349 // Here we allow allocating miss buffer for read requests
350 // and x86's clflush requests. A clflush request should be
351 // propagate through all levels of the cache system.
352
353 // Doing clflush in uncacheable regions might sound contradictory;
354 // however, it is entirely possible due to how the Linux kernel
355 // handle page property changes. When a linux kernel wants to
356 // change a page property, it flushes the related cache lines. The
357 // kernel might change the page property before flushing the cache
358 // lines. This results in the clflush might occur in an uncacheable
359 // region, where the kernel marks a region uncacheable before
360 // flushing. clflush results in a CleanInvalidReq.
361 assert(pkt->isRead() || pkt->isCleanInvalidateRequest());
362 allocateMissBuffer(pkt, forward_time);
363 }
364
365 return;
366 }
367
368 Addr blk_addr = pkt->getBlockAddr(blkSize);
369
370 MSHR *mshr = mshrQueue.findMatch(blk_addr, pkt->isSecure());
371
372 // Software prefetch handling:
373 // To keep the core from waiting on data it won't look at
374 // anyway, send back a response with dummy data. Miss handling
375 // will continue asynchronously. Unfortunately, the core will
376 // insist upon freeing original Packet/Request, so we have to
377 // create a new pair with a different lifecycle. Note that this
378 // processing happens before any MSHR munging on the behalf of
379 // this request because this new Request will be the one stored
380 // into the MSHRs, not the original.
381 if (pkt->cmd.isSWPrefetch()) {
382 assert(pkt->needsResponse());
383 assert(pkt->req->hasPaddr());
384 assert(!pkt->req->isUncacheable());
385
386 // There's no reason to add a prefetch as an additional target
387 // to an existing MSHR. If an outstanding request is already
388 // in progress, there is nothing for the prefetch to do.
389 // If this is the case, we don't even create a request at all.
390 PacketPtr pf = nullptr;
391
392 if (!mshr) {
393 // copy the request and create a new SoftPFReq packet
394 RequestPtr req = std::make_shared<Request>(pkt->req->getPaddr(),
395 pkt->req->getSize(),
396 pkt->req->getFlags(),
397 pkt->req->requestorId());
398 pf = new Packet(req, pkt->cmd);
399 pf->allocate();
400 assert(pf->matchAddr(pkt));
401 assert(pf->getSize() == pkt->getSize());
402 }
403
404 pkt->makeTimingResponse();
405
406 // request_time is used here, taking into account lat and the delay
407 // charged if the packet comes from the xbar.
408 cpuSidePort.schedTimingResp(pkt, request_time);
409
410 // If an outstanding request is in progress (we found an
411 // MSHR) this is set to null
412 pkt = pf;
413 }
414
415 BaseCache::handleTimingReqMiss(pkt, mshr, blk, forward_time, request_time);
416}
417
418void
420{
421 DPRINTF(CacheTags, "%s tags:\n%s\n", __func__, tags->print());
422
424
425 if (pkt->cacheResponding()) {
426 // a cache above us (but not where the packet came from) is
427 // responding to the request, in other words it has the line
428 // in Modified or Owned state
429 DPRINTF(Cache, "Cache above responding to %s: not responding\n",
430 pkt->print());
431
432 // if the packet needs the block to be writable, and the cache
433 // that has promised to respond (setting the cache responding
434 // flag) is not providing writable (it is in Owned rather than
435 // the Modified state), we know that there may be other Shared
436 // copies in the system; go out and invalidate them all
437 assert(pkt->needsWritable() && !pkt->responderHadWritable());
438
439 // an upstream cache that had the line in Owned state
440 // (dirty, but not writable), is responding and thus
441 // transferring the dirty line from one branch of the
442 // cache hierarchy to another
443
444 // send out an express snoop and invalidate all other
445 // copies (snooping a packet that needs writable is the
446 // same as an invalidation), thus turning the Owned line
447 // into a Modified line, note that we don't invalidate the
448 // block in the current cache or any other cache on the
449 // path to memory
450
451 // create a downstream express snoop with cleared packet
452 // flags, there is no need to allocate any data as the
453 // packet is merely used to co-ordinate state transitions
454 Packet *snoop_pkt = new Packet(pkt, true, false);
455
456 // also reset the bus time that the original packet has
457 // not yet paid for
458 snoop_pkt->headerDelay = snoop_pkt->payloadDelay = 0;
459
460 // make this an instantaneous express snoop, and let the
461 // other caches in the system know that the another cache
462 // is responding, because we have found the authorative
463 // copy (Modified or Owned) that will supply the right
464 // data
465 snoop_pkt->setExpressSnoop();
466 snoop_pkt->setCacheResponding();
467
468 // this express snoop travels towards the memory, and at
469 // every crossbar it is snooped upwards thus reaching
470 // every cache in the system
471 [[maybe_unused]] bool success = memSidePort.sendTimingReq(snoop_pkt);
472 // express snoops always succeed
473 assert(success);
474
475 // main memory will delete the snoop packet
476
477 // queue for deletion, as opposed to immediate deletion, as
478 // the sending cache is still relying on the packet
479 pendingDelete.reset(pkt);
480
481 // no need to take any further action in this particular cache
482 // as an upstram cache has already committed to responding,
483 // and we have already sent out any express snoops in the
484 // section above to ensure all other copies in the system are
485 // invalidated
486 return;
487 }
488
490}
491
494 bool needsWritable,
495 bool is_whole_line_write) const
496{
497 // should never see evictions here
498 assert(!cpu_pkt->isEviction());
499
500 bool blkValid = blk && blk->isValid();
501
502 if (cpu_pkt->req->isUncacheable() ||
503 (!blkValid && cpu_pkt->isUpgrade()) ||
504 cpu_pkt->cmd == MemCmd::InvalidateReq || cpu_pkt->isClean()) {
505 // uncacheable requests and upgrades from upper-level caches
506 // that missed completely just go through as is
507 return nullptr;
508 }
509
510 assert(cpu_pkt->needsResponse());
511
512 MemCmd cmd;
513 // @TODO make useUpgrades a parameter.
514 // Note that ownership protocols require upgrade, otherwise a
515 // write miss on a shared owned block will generate a ReadExcl,
516 // which will clobber the owned copy.
517 const bool useUpgrades = true;
518 assert(cpu_pkt->cmd != MemCmd::WriteLineReq || is_whole_line_write);
519 if (is_whole_line_write) {
520 assert(!blkValid || !blk->isSet(CacheBlk::WritableBit));
521 // forward as invalidate to all other caches, this gives us
522 // the line in Exclusive state, and invalidates all other
523 // copies
525 } else if (blkValid && useUpgrades) {
526 // only reason to be here is that blk is read only and we need
527 // it to be writable
528 assert(needsWritable);
529 assert(!blk->isSet(CacheBlk::WritableBit));
531 } else if (cpu_pkt->cmd == MemCmd::SCUpgradeFailReq ||
532 cpu_pkt->cmd == MemCmd::StoreCondFailReq) {
533 // Even though this SC will fail, we still need to send out the
534 // request and get the data to supply it to other snoopers in the case
535 // where the determination the StoreCond fails is delayed due to
536 // all caches not being on the same local bus.
538 } else {
539 // block is invalid
540
541 // If the request does not need a writable there are two cases
542 // where we need to ensure the response will not fetch the
543 // block in dirty state:
544 // * this cache is read only and it does not perform
545 // writebacks,
546 // * this cache is mostly exclusive and will not fill (since
547 // it does not fill it will have to writeback the dirty data
548 // immediately which generates uneccesary writebacks).
549 bool force_clean_rsp = isReadOnly || clusivity == enums::mostly_excl;
550 cmd = needsWritable ? MemCmd::ReadExReq :
551 (force_clean_rsp ? MemCmd::ReadCleanReq : MemCmd::ReadSharedReq);
552 }
553 PacketPtr pkt = new Packet(cpu_pkt->req, cmd, blkSize);
554
555 // if there are upstream caches that have already marked the
556 // packet as having sharers (not passing writable), pass that info
557 // downstream
558 if (cpu_pkt->hasSharers() && !needsWritable) {
559 // note that cpu_pkt may have spent a considerable time in the
560 // MSHR queue and that the information could possibly be out
561 // of date, however, there is no harm in conservatively
562 // assuming the block has sharers
563 pkt->setHasSharers();
564 DPRINTF(Cache, "%s: passing hasSharers from %s to %s\n",
565 __func__, cpu_pkt->print(), pkt->print());
566 }
567
568 // the packet should be block aligned
569 assert(pkt->getAddr() == pkt->getBlockAddr(blkSize));
570
571 pkt->allocate();
572 DPRINTF(Cache, "%s: created %s from %s\n", __func__, pkt->print(),
573 cpu_pkt->print());
574 return pkt;
575}
576
577
578Cycles
580 PacketList &writebacks)
581{
582 // deal with the packets that go through the write path of
583 // the cache, i.e. any evictions and writes
584 if (pkt->isEviction() || pkt->cmd == MemCmd::WriteClean ||
585 (pkt->req->isUncacheable() && pkt->isWrite())) {
586 Cycles latency = ticksToCycles(memSidePort.sendAtomic(pkt));
587
588 // at this point, if the request was an uncacheable write
589 // request, it has been satisfied by a memory below and the
590 // packet carries the response back
591 assert(!(pkt->req->isUncacheable() && pkt->isWrite()) ||
592 pkt->isResponse());
593
594 return latency;
595 }
596
597 // only misses left
598
599 PacketPtr bus_pkt = createMissPacket(pkt, blk, pkt->needsWritable(),
601
602 bool is_forward = (bus_pkt == nullptr);
603
604 if (is_forward) {
605 // just forwarding the same request to the next level
606 // no local cache operation involved
607 bus_pkt = pkt;
608 }
609
610 DPRINTF(Cache, "%s: Sending an atomic %s\n", __func__,
611 bus_pkt->print());
612
613 const std::string old_state = blk ? blk->print() : "";
614
615 Cycles latency = ticksToCycles(memSidePort.sendAtomic(bus_pkt));
616
617 bool is_invalidate = bus_pkt->isInvalidate();
618
619 // We are now dealing with the response handling
620 DPRINTF(Cache, "%s: Receive response: %s for %s\n", __func__,
621 bus_pkt->print(), old_state);
622
623 // If packet was a forward, the response (if any) is already
624 // in place in the bus_pkt == pkt structure, so we don't need
625 // to do anything. Otherwise, use the separate bus_pkt to
626 // generate response to pkt and then delete it.
627 if (!is_forward) {
628 if (pkt->needsResponse()) {
629 assert(bus_pkt->isResponse());
630 if (bus_pkt->isError()) {
631 pkt->makeAtomicResponse();
632 pkt->copyError(bus_pkt);
633 } else if (pkt->isWholeLineWrite(blkSize)) {
634 // note the use of pkt, not bus_pkt here.
635
636 // write-line request to the cache that promoted
637 // the write to a whole line
638 const bool allocate = allocOnFill(pkt->cmd) &&
639 (!writeAllocator || writeAllocator->allocate());
640 blk = handleFill(bus_pkt, blk, writebacks, allocate);
641 assert(blk != NULL);
642 is_invalidate = false;
643 satisfyRequest(pkt, blk);
644 } else if (bus_pkt->isRead() ||
645 bus_pkt->cmd == MemCmd::UpgradeResp) {
646 // we're updating cache state to allow us to
647 // satisfy the upstream request from the cache
648 blk = handleFill(bus_pkt, blk, writebacks,
649 allocOnFill(pkt->cmd));
650 satisfyRequest(pkt, blk);
651 maintainClusivity(pkt->fromCache(), blk);
652 } else {
653 // we're satisfying the upstream request without
654 // modifying cache state, e.g., a write-through
655 pkt->makeAtomicResponse();
656 }
657 }
658 delete bus_pkt;
659 }
660
661 if (is_invalidate && blk && blk->isValid()) {
662 invalidateBlock(blk);
663 }
664
665 return latency;
666}
667
668Tick
670{
672
673 // follow the same flow as in recvTimingReq, and check if a cache
674 // above us is responding
675 if (pkt->cacheResponding()) {
676 assert(!pkt->req->isCacheInvalidate());
677 DPRINTF(Cache, "Cache above responding to %s: not responding\n",
678 pkt->print());
679
680 // if a cache is responding, and it had the line in Owned
681 // rather than Modified state, we need to invalidate any
682 // copies that are not on the same path to memory
683 assert(pkt->needsWritable() && !pkt->responderHadWritable());
684
685 return memSidePort.sendAtomic(pkt);
686 }
687
688 return BaseCache::recvAtomic(pkt);
689}
690
691
693//
694// Response handling: responses from the memory side
695//
697
698
699void
701{
702 QueueEntry::Target *initial_tgt = mshr->getTarget();
703 // First offset for critical word first calculations
704 const int initial_offset = initial_tgt->pkt->getOffset(blkSize);
705
706 const bool is_error = pkt->isError();
707 // allow invalidation responses originating from write-line
708 // requests to be discarded
709 bool is_invalidate = pkt->isInvalidate() &&
710 !mshr->wasWholeLineWrite;
711
712 bool from_core = false;
713 bool from_pref = false;
714
715 if (pkt->cmd == MemCmd::LockedRMWWriteResp) {
716 // This is the fake response generated by the write half of the RMW;
717 // see comments in recvTimingReq(). The first target on the list
718 // should be the LockedRMWReadReq which has already been satisfied,
719 // either because it was a hit (and the MSHR was allocated in
720 // recvTimingReq()) or because it was left there after the inital
721 // response in extractServiceableTargets. In either case, we
722 // don't need to respond now, so pop it off to prevent the loop
723 // below from generating another response.
724 assert(initial_tgt->pkt->cmd == MemCmd::LockedRMWReadReq);
725 delete initial_tgt->pkt;
726 mshr->popTarget();
727 initial_tgt = nullptr;
728 }
729
730 MSHR::TargetList targets = mshr->extractServiceableTargets(pkt);
731 for (auto &target: targets) {
732 Packet *tgt_pkt = target.pkt;
733 switch (target.source) {
735 from_core = true;
736
737 Tick completion_time;
738 // Here we charge on completion_time the delay of the xbar if the
739 // packet comes from it, charged on headerDelay.
740 completion_time = pkt->headerDelay;
741
742 // Software prefetch handling for cache closest to core
743 if (tgt_pkt->cmd.isSWPrefetch()) {
744 if (tgt_pkt->needsWritable()) {
745 // All other copies of the block were invalidated and we
746 // have an exclusive copy.
747
748 // The coherence protocol assumes that if we fetched an
749 // exclusive copy of the block, we have the intention to
750 // modify it. Therefore the MSHR for the PrefetchExReq has
751 // been the point of ordering and this cache has commited
752 // to respond to snoops for the block.
753 //
754 // In most cases this is true anyway - a PrefetchExReq
755 // will be followed by a WriteReq. However, if that
756 // doesn't happen, the block is not marked as dirty and
757 // the cache doesn't respond to snoops that has committed
758 // to do so.
759 //
760 // To avoid deadlocks in cases where there is a snoop
761 // between the PrefetchExReq and the expected WriteReq, we
762 // proactively mark the block as Dirty.
763 assert(blk);
765
766 panic_if(isReadOnly, "Prefetch exclusive requests from "
767 "read-only cache %s\n", name());
768 }
769
770 // a software prefetch would have already been ack'd
771 // immediately with dummy data so the core would be able to
772 // retire it. This request completes right here, so we
773 // deallocate it.
774 delete tgt_pkt;
775 break; // skip response
776 }
777
778 // unlike the other packet flows, where data is found in other
779 // caches or memory and brought back, write-line requests always
780 // have the data right away, so the above check for "is fill?"
781 // cannot actually be determined until examining the stored MSHR
782 // state. We "catch up" with that logic here, which is duplicated
783 // from above.
784 if (tgt_pkt->cmd == MemCmd::WriteLineReq) {
785 assert(!is_error);
786 assert(blk);
787 assert(blk->isSet(CacheBlk::WritableBit));
788 }
789
790 // Here we decide whether we will satisfy the target using
791 // data from the block or from the response. We use the
792 // block data to satisfy the request when the block is
793 // present and valid and in addition the response in not
794 // forwarding data to the cache above (we didn't fill
795 // either); otherwise we use the packet data.
796 if (blk && blk->isValid() &&
797 (!mshr->isForward || !pkt->hasData())) {
798 satisfyRequest(tgt_pkt, blk, true, mshr->hasPostDowngrade());
799
800 // How many bytes past the first request is this one
801 int transfer_offset =
802 tgt_pkt->getOffset(blkSize) - initial_offset;
803 if (transfer_offset < 0) {
804 transfer_offset += blkSize;
805 }
806
807 // If not critical word (offset) return payloadDelay.
808 // responseLatency is the latency of the return path
809 // from lower level caches/memory to an upper level cache or
810 // the core.
811 completion_time += clockEdge(responseLatency) +
812 (transfer_offset ? pkt->payloadDelay : 0);
813
814 assert(!tgt_pkt->req->isUncacheable());
815
816 assert(tgt_pkt->req->requestorId() < system->maxRequestors());
817 stats.cmdStats(tgt_pkt)
818 .missLatency[tgt_pkt->req->requestorId()] +=
819 completion_time - target.recvTime;
820
821 if (tgt_pkt->cmd == MemCmd::LockedRMWReadReq) {
822 // We're going to leave a target in the MSHR until the
823 // write half of the RMW occurs (see comments above in
824 // recvTimingReq()). Since we'll be using the current
825 // request packet (which has the allocated data pointer)
826 // to form the response, we have to allocate a new dummy
827 // packet to save in the MSHR target.
828 mshr->updateLockedRMWReadTarget(tgt_pkt);
829 // skip the rest of target processing after we
830 // send the response
831 // Mark block inaccessible until write arrives
834 }
835 } else if (pkt->cmd == MemCmd::UpgradeFailResp) {
836 // failed StoreCond upgrade
837 assert(tgt_pkt->cmd == MemCmd::StoreCondReq ||
838 tgt_pkt->cmd == MemCmd::StoreCondFailReq ||
839 tgt_pkt->cmd == MemCmd::SCUpgradeFailReq);
840 // responseLatency is the latency of the return path
841 // from lower level caches/memory to an upper level cache or
842 // the core.
843 completion_time += clockEdge(responseLatency) +
844 pkt->payloadDelay;
845 tgt_pkt->req->setExtraData(0);
846 } else if (pkt->cmd == MemCmd::LockedRMWWriteResp) {
847 // Fake response on LockedRMW completion, see above.
848 // Since the data is already in the cache, we just use
849 // responseLatency with no extra penalties.
850 completion_time = clockEdge(responseLatency);
851 } else {
852 if (is_invalidate && blk && blk->isValid()) {
853 // We are about to send a response to a cache above
854 // that asked for an invalidation; we need to
855 // invalidate our copy immediately as the most
856 // up-to-date copy of the block will now be in the
857 // cache above. It will also prevent this cache from
858 // responding (if the block was previously dirty) to
859 // snoops as they should snoop the caches above where
860 // they will get the response from.
861 invalidateBlock(blk);
862 }
863 // not a cache fill, just forwarding response
864 // responseLatency is the latency of the return path
865 // from lower level caches/memory to the core.
866 completion_time += clockEdge(responseLatency) +
867 pkt->payloadDelay;
868 if (!is_error) {
869 if (pkt->isRead()) {
870 // sanity check
871 assert(pkt->matchAddr(tgt_pkt));
872 assert(pkt->getSize() >= tgt_pkt->getSize());
873
874 tgt_pkt->setData(pkt->getConstPtr<uint8_t>());
875 } else {
876 // MSHR targets can read data either from the
877 // block or the response pkt. If we can't get data
878 // from the block (i.e., invalid or has old data)
879 // or the response (did not bring in any data)
880 // then make sure that the target didn't expect
881 // any.
882 assert(!tgt_pkt->hasRespData());
883 }
884 }
885
886 // this response did not allocate here and therefore
887 // it was not consumed, make sure that any flags are
888 // carried over to cache above
889 tgt_pkt->copyResponderFlags(pkt);
890 }
891 tgt_pkt->makeTimingResponse();
892 // if this packet is an error copy that to the new packet
893 if (is_error)
894 tgt_pkt->copyError(pkt);
895 if (tgt_pkt->cmd == MemCmd::ReadResp &&
896 (is_invalidate || mshr->hasPostInvalidate())) {
897 // If intermediate cache got ReadRespWithInvalidate,
898 // propagate that. Response should not have
899 // isInvalidate() set otherwise.
901 DPRINTF(Cache, "%s: updated cmd to %s\n", __func__,
902 tgt_pkt->print());
903 }
904 // Reset the bus additional time as it is now accounted for
905 tgt_pkt->headerDelay = tgt_pkt->payloadDelay = 0;
906 cpuSidePort.schedTimingResp(tgt_pkt, completion_time);
907 break;
908
910 assert(tgt_pkt->cmd == MemCmd::HardPFReq);
911 from_pref = true;
912
913 delete tgt_pkt;
914 break;
915
917 // I don't believe that a snoop can be in an error state
918 assert(!is_error);
919 // response to snoop request
920 DPRINTF(Cache, "processing deferred snoop...\n");
921 // If the response is invalidating, a snooping target can
922 // be satisfied if it is also invalidating. If the reponse is, not
923 // only invalidating, but more specifically an InvalidateResp and
924 // the MSHR was created due to an InvalidateReq then a cache above
925 // is waiting to satisfy a WriteLineReq. In this case even an
926 // non-invalidating snoop is added as a target here since this is
927 // the ordering point. When the InvalidateResp reaches this cache,
928 // the snooping target will snoop further the cache above with the
929 // WriteLineReq.
930 assert(!is_invalidate || pkt->cmd == MemCmd::InvalidateResp ||
931 pkt->req->isCacheMaintenance() ||
932 mshr->hasPostInvalidate());
933 handleSnoop(tgt_pkt, blk, true, true, mshr->hasPostInvalidate());
934 break;
935
936 default:
937 panic("Illegal target->source enum %d\n", target.source);
938 }
939 }
940
941 if (blk && !from_core && from_pref) {
942 blk->setPrefetched();
943 }
944
945 if (!mshr->hasLockedRMWReadTarget()) {
946 maintainClusivity(targets.hasFromCache, blk);
947
948 if (blk && blk->isValid()) {
949 // an invalidate response stemming from a write line request
950 // should not invalidate the block, so check if the
951 // invalidation should be discarded
952 if (is_invalidate || mshr->hasPostInvalidate()) {
953 invalidateBlock(blk);
954 } else if (mshr->hasPostDowngrade()) {
956 }
957 }
958 }
959}
960
963{
965 writebackBlk(blk) : cleanEvictBlk(blk);
966
967 invalidateBlock(blk);
968
969 return pkt;
970}
971
974{
975 assert(!writebackClean);
976 assert(blk && blk->isValid() && !blk->isSet(CacheBlk::DirtyBit));
977
978 // Creating a zero sized write, a message to the snoop filter
979 RequestPtr req = std::make_shared<Request>(
981
982 if (blk->isSecure())
983 req->setFlags(Request::SECURE);
984
985 req->taskId(blk->getTaskId());
986
987 PacketPtr pkt = new Packet(req, MemCmd::CleanEvict);
988 pkt->allocate();
989 DPRINTF(Cache, "Create CleanEvict %s\n", pkt->print());
990
991 return pkt;
992}
993
995//
996// Snoop path: requests coming in from the memory side
997//
999
1000void
1001Cache::doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data,
1002 bool already_copied, bool pending_inval)
1003{
1004 // sanity check
1005 assert(req_pkt->isRequest());
1006 assert(req_pkt->needsResponse());
1007
1008 DPRINTF(Cache, "%s: for %s\n", __func__, req_pkt->print());
1009 // timing-mode snoop responses require a new packet, unless we
1010 // already made a copy...
1011 PacketPtr pkt = req_pkt;
1012 if (!already_copied)
1013 // do not clear flags, and allocate space for data if the
1014 // packet needs it (the only packets that carry data are read
1015 // responses)
1016 pkt = new Packet(req_pkt, false, req_pkt->isRead());
1017
1018 assert(req_pkt->req->isUncacheable() || req_pkt->isInvalidate() ||
1019 pkt->hasSharers());
1020 pkt->makeTimingResponse();
1021 if (pkt->isRead()) {
1022 pkt->setDataFromBlock(blk_data, blkSize);
1023 }
1024 if (pkt->cmd == MemCmd::ReadResp && pending_inval) {
1025 // Assume we defer a response to a read from a far-away cache
1026 // A, then later defer a ReadExcl from a cache B on the same
1027 // bus as us. We'll assert cacheResponding in both cases, but
1028 // in the latter case cacheResponding will keep the
1029 // invalidation from reaching cache A. This special response
1030 // tells cache A that it gets the block to satisfy its read,
1031 // but must immediately invalidate it.
1033 }
1034 // Here we consider forward_time, paying for just forward latency and
1035 // also charging the delay provided by the xbar.
1036 // forward_time is used as send_time in next allocateWriteBuffer().
1037 Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;
1038 // Here we reset the timing of the packet.
1039 pkt->headerDelay = pkt->payloadDelay = 0;
1040 DPRINTF(CacheVerbose, "%s: created response: %s tick: %lu\n", __func__,
1041 pkt->print(), forward_time);
1042 memSidePort.schedTimingSnoopResp(pkt, forward_time);
1043}
1044
1045uint32_t
1046Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
1047 bool is_deferred, bool pending_inval)
1048{
1049 DPRINTF(CacheVerbose, "%s: for %s\n", __func__, pkt->print());
1050 // deferred snoops can only happen in timing mode
1051 assert(!(is_deferred && !is_timing));
1052 // pending_inval only makes sense on deferred snoops
1053 assert(!(pending_inval && !is_deferred));
1054 assert(pkt->isRequest());
1055
1056 // the packet may get modified if we or a forwarded snooper
1057 // responds in atomic mode, so remember a few things about the
1058 // original packet up front
1059 bool invalidate = pkt->isInvalidate();
1060 [[maybe_unused]] bool needs_writable = pkt->needsWritable();
1061
1062 // at the moment we could get an uncacheable write which does not
1063 // have the invalidate flag, and we need a suitable way of dealing
1064 // with this case
1065 panic_if(invalidate && pkt->req->isUncacheable(),
1066 "%s got an invalidating uncacheable snoop request %s",
1067 name(), pkt->print());
1068
1069 uint32_t snoop_delay = 0;
1070
1071 if (forwardSnoops) {
1072 // first propagate snoop upward to see if anyone above us wants to
1073 // handle it. save & restore packet src since it will get
1074 // rewritten to be relative to CPU-side bus (if any)
1075 if (is_timing) {
1076 // copy the packet so that we can clear any flags before
1077 // forwarding it upwards, we also allocate data (passing
1078 // the pointer along in case of static data), in case
1079 // there is a snoop hit in upper levels
1080 Packet snoopPkt(pkt, true, true);
1081 snoopPkt.setExpressSnoop();
1082 // the snoop packet does not need to wait any additional
1083 // time
1084 snoopPkt.headerDelay = snoopPkt.payloadDelay = 0;
1085 cpuSidePort.sendTimingSnoopReq(&snoopPkt);
1086
1087 // add the header delay (including crossbar and snoop
1088 // delays) of the upward snoop to the snoop delay for this
1089 // cache
1090 snoop_delay += snoopPkt.headerDelay;
1091
1092 // If this request is a prefetch or clean evict and an upper level
1093 // signals block present, make sure to propagate the block
1094 // presence to the requestor.
1095 if (snoopPkt.isBlockCached()) {
1096 pkt->setBlockCached();
1097 }
1098 // If the request was satisfied by snooping the cache
1099 // above, mark the original packet as satisfied too.
1100 if (snoopPkt.satisfied()) {
1101 pkt->setSatisfied();
1102 }
1103
1104 // Copy over flags from the snoop response to make sure we
1105 // inform the final destination
1106 pkt->copyResponderFlags(&snoopPkt);
1107 } else {
1108 bool already_responded = pkt->cacheResponding();
1109 cpuSidePort.sendAtomicSnoop(pkt);
1110 if (!already_responded && pkt->cacheResponding()) {
1111 // cache-to-cache response from some upper cache:
1112 // forward response to original requestor
1113 assert(pkt->isResponse());
1114 }
1115 }
1116 }
1117
1118 bool respond = false;
1119 bool blk_valid = blk && blk->isValid();
1120 if (pkt->isClean()) {
1121 if (blk_valid && blk->isSet(CacheBlk::DirtyBit)) {
1122 DPRINTF(CacheVerbose, "%s: packet (snoop) %s found block: %s\n",
1123 __func__, pkt->print(), blk->print());
1124 PacketPtr wb_pkt =
1125 writecleanBlk(blk, pkt->req->getDest(), pkt->id);
1126 PacketList writebacks;
1127 writebacks.push_back(wb_pkt);
1128
1129 if (is_timing) {
1130 // anything that is merely forwarded pays for the forward
1131 // latency and the delay provided by the crossbar
1132 Tick forward_time = clockEdge(forwardLatency) +
1133 pkt->headerDelay;
1134 doWritebacks(writebacks, forward_time);
1135 } else {
1136 doWritebacksAtomic(writebacks);
1137 }
1138 pkt->setSatisfied();
1139 }
1140 } else if (!blk_valid) {
1141 DPRINTF(CacheVerbose, "%s: snoop miss for %s\n", __func__,
1142 pkt->print());
1143 if (is_deferred) {
1144 // we no longer have the block, and will not respond, but a
1145 // packet was allocated in MSHR::handleSnoop and we have
1146 // to delete it
1147 assert(pkt->needsResponse());
1148
1149 // we have passed the block to a cache upstream, that
1150 // cache should be responding
1151 assert(pkt->cacheResponding());
1152
1153 delete pkt;
1154 }
1155 return snoop_delay;
1156 } else {
1157 DPRINTF(Cache, "%s: snoop hit for %s, old state is %s\n", __func__,
1158 pkt->print(), blk->print());
1159
1160 // We may end up modifying both the block state and the packet (if
1161 // we respond in atomic mode), so just figure out what to do now
1162 // and then do it later. We respond to all snoops that need
1163 // responses provided we have the block in dirty state. The
1164 // invalidation itself is taken care of below. We don't respond to
1165 // cache maintenance operations as this is done by the destination
1166 // xbar.
1167 respond = blk->isSet(CacheBlk::DirtyBit) && pkt->needsResponse();
1168
1170 "Should never have a dirty block in a read-only cache %s\n",
1171 name());
1172 }
1173
1174 // Invalidate any prefetch's from below that would strip write permissions
1175 // MemCmd::HardPFReq is only observed by upstream caches. After missing
1176 // above and in it's own cache, a new MemCmd::ReadReq is created that
1177 // downstream caches observe.
1178 if (pkt->mustCheckAbove()) {
1179 DPRINTF(Cache, "Found addr %#llx in upper level cache for snoop %s "
1180 "from lower cache\n", pkt->getAddr(), pkt->print());
1181 pkt->setBlockCached();
1182 return snoop_delay;
1183 }
1184
1185 if (pkt->isRead() && !invalidate) {
1186 // reading without requiring the line in a writable state
1187 assert(!needs_writable);
1188 pkt->setHasSharers();
1189
1190 // if the requesting packet is uncacheable, retain the line in
1191 // the current state, otherwhise unset the writable flag,
1192 // which means we go from Modified to Owned (and will respond
1193 // below), remain in Owned (and will respond below), from
1194 // Exclusive to Shared, or remain in Shared
1195 if (!pkt->req->isUncacheable()) {
1197 }
1198 DPRINTF(Cache, "new state is %s\n", blk->print());
1199 }
1200
1201 if (respond) {
1202 // prevent anyone else from responding, cache as well as
1203 // memory, and also prevent any memory from even seeing the
1204 // request
1205 pkt->setCacheResponding();
1206 if (!pkt->isClean() && blk->isSet(CacheBlk::WritableBit)) {
1207 // inform the cache hierarchy that this cache had the line
1208 // in the Modified state so that we avoid unnecessary
1209 // invalidations (see Packet::setResponderHadWritable)
1211
1212 // in the case of an uncacheable request there is no point
1213 // in setting the responderHadWritable flag, but since the
1214 // recipient does not care there is no harm in doing so
1215 } else {
1216 // if the packet has needsWritable set we invalidate our
1217 // copy below and all other copies will be invalidates
1218 // through express snoops, and if needsWritable is not set
1219 // we already called setHasSharers above
1220 }
1221
1222 // if we are returning a writable and dirty (Modified) line,
1223 // we should be invalidating the line
1224 panic_if(!invalidate && !pkt->hasSharers(),
1225 "%s is passing a Modified line through %s, "
1226 "but keeping the block", name(), pkt->print());
1227
1228 if (is_timing) {
1229 doTimingSupplyResponse(pkt, blk->data, is_deferred, pending_inval);
1230 } else {
1231 pkt->makeAtomicResponse();
1232 // packets such as upgrades do not actually have any data
1233 // payload
1234 if (pkt->hasData())
1235 pkt->setDataFromBlock(blk->data, blkSize);
1236 }
1237
1238 // When a block is compressed, it must first be decompressed before
1239 // being read, and this increases the snoop delay.
1240 if (compressor && pkt->isRead()) {
1241 snoop_delay += compressor->getDecompressionLatency(blk);
1242 }
1243 }
1244
1245 if (!respond && is_deferred) {
1246 assert(pkt->needsResponse());
1247 delete pkt;
1248 }
1249
1250 // Do this last in case it deallocates block data or something
1251 // like that
1252 if (blk_valid && invalidate) {
1253 invalidateBlock(blk);
1254 DPRINTF(Cache, "new state is %s\n", blk->print());
1255 }
1256
1257 return snoop_delay;
1258}
1259
1260
1261void
1263{
1264 DPRINTF(CacheVerbose, "%s: for %s\n", __func__, pkt->print());
1265
1266 // no need to snoop requests that are not in range
1267 if (!inRange(pkt->getAddr())) {
1268 return;
1269 }
1270
1271 bool is_secure = pkt->isSecure();
1272 CacheBlk *blk = tags->findBlock({pkt->getAddr(), is_secure});
1273
1274 Addr blk_addr = pkt->getBlockAddr(blkSize);
1275 MSHR *mshr = mshrQueue.findMatch(blk_addr, is_secure);
1276
1277 // Update the latency cost of the snoop so that the crossbar can
1278 // account for it. Do not overwrite what other neighbouring caches
1279 // have already done, rather take the maximum. The update is
1280 // tentative, for cases where we return before an upward snoop
1281 // happens below.
1282 pkt->snoopDelay = std::max<uint32_t>(pkt->snoopDelay,
1284
1285 // Inform request(Prefetch, CleanEvict or Writeback) from below of
1286 // MSHR hit, set setBlockCached.
1287 if (mshr && pkt->mustCheckAbove()) {
1288 DPRINTF(Cache, "Setting block cached for %s from lower cache on "
1289 "mshr hit\n", pkt->print());
1290 pkt->setBlockCached();
1291 return;
1292 }
1293
1294 // Let the MSHR itself track the snoop and decide whether we want
1295 // to go ahead and do the regular cache snoop
1296 if (mshr && mshr->handleSnoop(pkt, order++)) {
1297 DPRINTF(Cache, "Deferring snoop on in-service MSHR to blk %#llx (%s)."
1298 "mshrs: %s\n", blk_addr, is_secure ? "s" : "ns",
1299 mshr->print());
1300
1301 if (mshr->getNumTargets() > numTarget)
1302 warn("allocating bonus target for snoop"); //handle later
1303 return;
1304 }
1305
1306 //We also need to check the writeback buffers and handle those
1307 WriteQueueEntry *wb_entry = writeBuffer.findMatch(blk_addr, is_secure);
1308 if (wb_entry) {
1309 DPRINTF(Cache, "Snoop hit in writeback to addr %#llx (%s)\n",
1310 pkt->getAddr(), is_secure ? "s" : "ns");
1311 // Expect to see only Writebacks and/or CleanEvicts here, both of
1312 // which should not be generated for uncacheable data.
1313 assert(!wb_entry->isUncacheable());
1314 // There should only be a single request responsible for generating
1315 // Writebacks/CleanEvicts.
1316 assert(wb_entry->getNumTargets() == 1);
1317 PacketPtr wb_pkt = wb_entry->getTarget()->pkt;
1318 assert(wb_pkt->isEviction() || wb_pkt->cmd == MemCmd::WriteClean);
1319
1320 if (pkt->isEviction()) {
1321 // if the block is found in the write queue, set the BLOCK_CACHED
1322 // flag for Writeback/CleanEvict snoop. On return the snoop will
1323 // propagate the BLOCK_CACHED flag in Writeback packets and prevent
1324 // any CleanEvicts from travelling down the memory hierarchy.
1325 pkt->setBlockCached();
1326 DPRINTF(Cache, "%s: Squashing %s from lower cache on writequeue "
1327 "hit\n", __func__, pkt->print());
1328 return;
1329 }
1330
1331 // conceptually writebacks are no different to other blocks in
1332 // this cache, so the behaviour is modelled after handleSnoop,
1333 // the difference being that instead of querying the block
1334 // state to determine if it is dirty and writable, we use the
1335 // command and fields of the writeback packet
1336 bool respond = wb_pkt->cmd == MemCmd::WritebackDirty &&
1337 pkt->needsResponse();
1338 bool have_writable = !wb_pkt->hasSharers();
1339 bool invalidate = pkt->isInvalidate();
1340
1341 if (!pkt->req->isUncacheable() && pkt->isRead() && !invalidate) {
1342 assert(!pkt->needsWritable());
1343 pkt->setHasSharers();
1344 wb_pkt->setHasSharers();
1345 }
1346
1347 if (respond) {
1348 pkt->setCacheResponding();
1349
1350 if (have_writable) {
1352 }
1353
1354 doTimingSupplyResponse(pkt, wb_pkt->getConstPtr<uint8_t>(),
1355 false, false);
1356 }
1357
1358 if (invalidate && wb_pkt->cmd != MemCmd::WriteClean) {
1359 // Invalidation trumps our writeback... discard here
1360 // Note: markInService will remove entry from writeback buffer.
1361 markInService(wb_entry);
1362 delete wb_pkt;
1363 }
1364 }
1365
1366 // If this was a shared writeback, there may still be
1367 // other shared copies above that require invalidation.
1368 // We could be more selective and return here if the
1369 // request is non-exclusive or if the writeback is
1370 // exclusive.
1371 uint32_t snoop_delay = handleSnoop(pkt, blk, true, false, false);
1372
1373 // Override what we did when we first saw the snoop, as we now
1374 // also have the cost of the upwards snoops to account for
1375 pkt->snoopDelay = std::max<uint32_t>(pkt->snoopDelay, snoop_delay +
1377}
1378
1379Tick
1381{
1382 // no need to snoop requests that are not in range.
1383 if (!inRange(pkt->getAddr())) {
1384 return 0;
1385 }
1386
1387 CacheBlk *blk = tags->findBlock({pkt->getAddr(), pkt->isSecure()});
1388 uint32_t snoop_delay = handleSnoop(pkt, blk, false, false, false);
1389 return snoop_delay + lookupLatency * clockPeriod();
1390}
1391
1392bool
1394{
1395 if (!forwardSnoops)
1396 return false;
1397 // Mirroring the flow of HardPFReqs, the cache sends CleanEvict and
1398 // Writeback snoops into upper level caches to check for copies of the
1399 // same block. Using the BLOCK_CACHED flag with the Writeback/CleanEvict
1400 // packet, the cache can inform the crossbar below of presence or absence
1401 // of the block.
1402 if (is_timing) {
1403 Packet snoop_pkt(pkt, true, false);
1404 snoop_pkt.setExpressSnoop();
1405 // Assert that packet is either Writeback or CleanEvict and not a
1406 // prefetch request because prefetch requests need an MSHR and may
1407 // generate a snoop response.
1408 assert(pkt->isEviction() || pkt->cmd == MemCmd::WriteClean);
1409 snoop_pkt.senderState = nullptr;
1410 cpuSidePort.sendTimingSnoopReq(&snoop_pkt);
1411 // Writeback/CleanEvict snoops do not generate a snoop response.
1412 assert(!(snoop_pkt.cacheResponding()));
1413 return snoop_pkt.isBlockCached();
1414 } else {
1415 cpuSidePort.sendAtomicSnoop(pkt);
1416 return pkt->isBlockCached();
1417 }
1418}
1419
1420bool
1422{
1423 assert(mshr);
1424
1425 // use request from 1st target
1426 PacketPtr tgt_pkt = mshr->getTarget()->pkt;
1427
1428 if (tgt_pkt->cmd == MemCmd::HardPFReq && forwardSnoops) {
1429 DPRINTF(Cache, "%s: MSHR %s\n", __func__, tgt_pkt->print());
1430
1431 // we should never have hardware prefetches to allocated
1432 // blocks
1433 assert(!tags->findBlock({mshr->blkAddr, mshr->isSecure}));
1434
1435 // We need to check the caches above us to verify that
1436 // they don't have a copy of this block in the dirty state
1437 // at the moment. Without this check we could get a stale
1438 // copy from memory that might get used in place of the
1439 // dirty one.
1440 Packet snoop_pkt(tgt_pkt, true, false);
1441 snoop_pkt.setExpressSnoop();
1442 // We are sending this packet upwards, but if it hits we will
1443 // get a snoop response that we end up treating just like a
1444 // normal response, hence it needs the MSHR as its sender
1445 // state
1446 snoop_pkt.senderState = mshr;
1447 cpuSidePort.sendTimingSnoopReq(&snoop_pkt);
1448
1449 // Check to see if the prefetch was squashed by an upper cache (to
1450 // prevent us from grabbing the line) or if a Check to see if a
1451 // writeback arrived between the time the prefetch was placed in
1452 // the MSHRs and when it was selected to be sent or if the
1453 // prefetch was squashed by an upper cache.
1454
1455 // It is important to check cacheResponding before
1456 // prefetchSquashed. If another cache has committed to
1457 // responding, it will be sending a dirty response which will
1458 // arrive at the MSHR allocated for this request. Checking the
1459 // prefetchSquash first may result in the MSHR being
1460 // prematurely deallocated.
1461 if (snoop_pkt.cacheResponding()) {
1462 [[maybe_unused]] auto r = outstandingSnoop.insert(snoop_pkt.req);
1463 assert(r.second);
1464
1465 // if we are getting a snoop response with no sharers it
1466 // will be allocated as Modified
1467 bool pending_modified_resp = !snoop_pkt.hasSharers();
1468 markInService(mshr, pending_modified_resp);
1469
1470 DPRINTF(Cache, "Upward snoop of prefetch for addr"
1471 " %#x (%s) hit\n",
1472 tgt_pkt->getAddr(), tgt_pkt->isSecure()? "s": "ns");
1473 return false;
1474 }
1475
1476 if (snoop_pkt.isBlockCached()) {
1477 DPRINTF(Cache, "Block present, prefetch squashed by cache. "
1478 "Deallocating mshr target %#x.\n",
1479 mshr->blkAddr);
1480
1481 // Deallocate the mshr target
1482 if (mshrQueue.forceDeallocateTarget(mshr)) {
1483 // Clear block if this deallocation resulted freed an
1484 // mshr when all had previously been utilized
1486 }
1487
1488 // given that no response is expected, delete Request and Packet
1489 delete tgt_pkt;
1490
1491 return false;
1492 }
1493 }
1494
1495 return BaseCache::sendMSHRQueuePacket(mshr);
1496}
1497
1498} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
Describes a cache.
Definitions of a simple cache block class.
const bool isReadOnly
Is this cache read only, for example the instruction cache, or table-walker cache.
Definition base.hh:945
virtual void recvTimingReq(PacketPtr pkt)
Performs the access specified by the request.
Definition base.cc:454
MSHR * allocateMissBuffer(PacketPtr pkt, Tick time, bool sched_send=true)
Definition base.hh:1175
CpuSidePort cpuSidePort
Definition base.hh:321
void invalidateBlock(CacheBlk *blk)
Invalidate a cache block.
Definition base.cc:1727
const bool writebackClean
Determine if clean lines should be written back or not.
Definition base.hh:677
bool inRange(Addr addr) const
Determine if an address is in the ranges covered by this cache.
Definition base.cc:220
virtual void handleTimingReqMiss(PacketPtr pkt, CacheBlk *blk, Tick forward_time, Tick request_time)=0
bool allocOnFill(MemCmd cmd) const
Determine whether we should allocate on a fill or not.
Definition base.hh:443
bool forwardSnoops
Do we forward snoops from mem side port through to cpu side port?
Definition base.hh:930
uint64_t order
Increasing order number assigned to each incoming request.
Definition base.hh:970
virtual void satisfyRequest(PacketPtr pkt, CacheBlk *blk, bool deferred_response=false, bool pending_downgrade=false)
Perform any necessary updates to the block and perform any data exchange between the packet and the b...
Definition base.cc:1146
gem5::BaseCache::CacheStats stats
const Cycles lookupLatency
The latency of tag lookup of a cache.
Definition base.hh:896
MSHRQueue mshrQueue
Miss status registers.
Definition base.hh:351
const unsigned blkSize
Block size of this cache.
Definition base.hh:890
const Cycles forwardLatency
This is the forward latency of the cache.
Definition base.hh:909
compression::Base * compressor
Compression method being used.
Definition base.hh:360
const Cycles responseLatency
The latency of sending reponse to its upper level cache/core on a linefill.
Definition base.hh:919
PacketPtr writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id)
Create a writeclean request for the given block.
Definition base.cc:1806
MemSidePort memSidePort
Definition base.hh:322
virtual void handleTimingReqHit(PacketPtr pkt, CacheBlk *blk, Tick request_time)
Definition base.cc:275
virtual Tick recvAtomic(PacketPtr pkt)
Performs the access specified by the request.
Definition base.cc:685
virtual void recvTimingResp(PacketPtr pkt)
Handles a response (cache line fill/write ack) from the bus.
Definition base.cc:539
virtual bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat, PacketList &writebacks)
Does all the processing necessary to perform the provided request.
Definition base.cc:1290
BaseCache(const BaseCacheParams &p, unsigned blk_size)
Definition base.cc:82
Addr regenerateBlkAddr(CacheBlk *blk)
Regenerate block address using tags.
Definition base.cc:189
std::unique_ptr< Packet > pendingDelete
Upstream caches need this packet until true is returned, so hold it for deletion until a subsequent c...
Definition base.hh:412
const int numTarget
The number of targets for each MSHR.
Definition base.hh:927
WriteAllocator *const writeAllocator
The writeAllocator drive optimizations for streaming writes.
Definition base.hh:398
void markInService(MSHR *mshr, bool pending_modified_resp)
Mark a request as in service (sent downstream in the memory system), effectively making this MSHR the...
Definition base.hh:418
void allocateWriteBuffer(PacketPtr pkt, Tick time)
Definition base.cc:231
CacheBlk * handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks, bool allocate)
Handle a fill operation caused by a received packet.
Definition base.cc:1571
WriteQueue writeBuffer
Write/writeback buffer.
Definition base.hh:354
PacketPtr writebackBlk(CacheBlk *blk)
Create a writeback request for the given block.
Definition base.cc:1756
void clearBlocked(BlockedCause cause)
Marks the cache as unblocked for the given cause.
Definition base.hh:1227
virtual PacketPtr evictBlock(CacheBlk *blk)=0
Evict a cache block.
BaseTags * tags
Tag and data Storage.
Definition base.hh:357
const enums::Clusivity clusivity
Clusivity with respect to the upstream cache, determining if we fill into both this cache and the cac...
Definition base.hh:937
virtual bool sendMSHRQueuePacket(MSHR *mshr)
Take an MSHR, turn it into a suitable downstream packet, and send it out.
Definition base.cc:1930
void maintainClusivity(bool from_cache, CacheBlk *blk)
Maintain the clusivity of this cache by potentially invalidating a block.
Definition base.cc:1559
System * system
System we are currently operating in.
Definition base.hh:988
A Basic Cache block.
Definition cache_blk.hh:72
@ ReadableBit
Read permission.
Definition cache_blk.hh:86
@ WritableBit
write permission
Definition cache_blk.hh:81
@ DirtyBit
dirty (modified)
Definition cache_blk.hh:88
void setPrefetched()
Marks this blocks as a recently prefetched block.
Definition cache_blk.hh:261
std::string print() const override
Pretty-print tag, set and way, and interpret state bits to readable form including mapping to a MOESI...
Definition cache_blk.hh:372
bool isSet(unsigned bits) const
Checks the given coherence bits are set.
Definition cache_blk.hh:242
void clearCoherenceBits(unsigned bits)
Clear the corresponding coherence bits.
Definition cache_blk.hh:234
uint32_t getTaskId() const
Get the task id associated to this block.
Definition cache_blk.hh:288
uint8_t * data
Contains a copy of the data in this block for easy access.
Definition cache_blk.hh:104
void setCoherenceBits(unsigned bits)
Sets the corresponding coherence bits.
Definition cache_blk.hh:223
PacketPtr cleanEvictBlk(CacheBlk *blk)
Create a CleanEvict request for the given block.
Definition cache.cc:973
Cache(const CacheParams &p)
Instantiates a basic cache object.
Definition cache.cc:70
void recvTimingSnoopReq(PacketPtr pkt) override
Snoops bus transactions to maintain coherence.
Definition cache.cc:1262
bool isCachedAbove(PacketPtr pkt, bool is_timing=true)
Send up a snoop request and find cached copies.
Definition cache.cc:1393
void promoteWholeLineWrites(PacketPtr pkt)
Turn line-sized writes into WriteInvalidate transactions.
Definition cache.cc:302
void serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt, CacheBlk *blk) override
Service non-deferred MSHR targets using the received response.
Definition cache.cc:700
Cycles handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk, PacketList &writebacks) override
Handle a request in atomic mode that missed in this cache.
Definition cache.cc:579
Tick recvAtomicSnoop(PacketPtr pkt) override
Snoop for the provided request in the cache and return the estimated time taken.
Definition cache.cc:1380
std::unordered_set< RequestPtr > outstandingSnoop
Store the outstanding requests that we are expecting snoop responses from so we can determine which s...
Definition cache.hh:80
void satisfyRequest(PacketPtr pkt, CacheBlk *blk, bool deferred_response=false, bool pending_downgrade=false) override
Perform any necessary updates to the block and perform any data exchange between the packet and the b...
Definition cache.cc:79
void recvTimingSnoopResp(PacketPtr pkt) override
Handle a snoop response.
Definition cache.cc:268
void recvTimingReq(PacketPtr pkt) override
Performs the access specified by the request.
Definition cache.cc:419
Tick recvAtomic(PacketPtr pkt) override
Performs the access specified by the request.
Definition cache.cc:669
void handleTimingReqMiss(PacketPtr pkt, CacheBlk *blk, Tick forward_time, Tick request_time) override
Definition cache.cc:325
bool sendMSHRQueuePacket(MSHR *mshr) override
Take an MSHR, turn it into a suitable downstream packet, and send it out.
Definition cache.cc:1421
void doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data, bool already_copied, bool pending_inval)
Definition cache.cc:1001
PacketPtr createMissPacket(PacketPtr cpu_pkt, CacheBlk *blk, bool needs_writable, bool is_whole_line_write) const override
Create an appropriate downstream bus request packet.
Definition cache.cc:493
void handleTimingReqHit(PacketPtr pkt, CacheBlk *blk, Tick request_time) override
Definition cache.cc:314
uint32_t handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing, bool is_deferred, bool pending_inval)
Perform an upward snoop if needed, and update the block state (possibly invalidating the block).
Definition cache.cc:1046
PacketPtr evictBlock(CacheBlk *blk) override
Evict a cache block.
Definition cache.cc:962
const bool doFastWrites
This cache should allocate a block on a line-sized write miss.
Definition cache.hh:73
void doWritebacks(PacketList &writebacks, Tick forward_time) override
Insert writebacks into the write buffer.
Definition cache.cc:191
bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat, PacketList &writebacks) override
Does all the processing necessary to perform the provided request.
Definition cache.cc:162
void doWritebacksAtomic(PacketList &writebacks) override
Send writebacks down the memory hierarchy in atomic mode.
Definition cache.cc:233
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Tick clockPeriod() const
Cycles ticksToCycles(Tick t) const
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
bool hasFromCache
Determine whether there was at least one non-snooping target coming from another cache.
Definition mshr.hh:181
Miss Status and handling Register.
Definition mshr.hh:75
bool wasWholeLineWrite
Track if we sent this as a whole line write or not.
Definition mshr.hh:124
void updateLockedRMWReadTarget(PacketPtr pkt)
Replaces the matching packet in the Targets list with a dummy packet to ensure the MSHR remains alloc...
Definition mshr.cc:782
TargetList extractServiceableTargets(PacketPtr pkt)
Extracts the subset of the targets that can be serviced given a received response.
Definition mshr.cc:544
void popTarget()
Pop first target.
Definition mshr.hh:482
void print(std::ostream &os, int verbosity=0, const std::string &prefix="") const override
Prints the contents of this MSHR for debugging.
Definition mshr.cc:727
int getNumTargets() const
Returns the current number of allocated targets.
Definition mshr.hh:446
bool hasPostDowngrade() const
Definition mshr.hh:334
QueueEntry::Target * getTarget() override
Returns a reference to the first target.
Definition mshr.hh:473
bool handleSnoop(PacketPtr target, Counter order)
Definition mshr.cc:420
bool isForward
True if the entry is just a simple forward from an upper level.
Definition mshr.hh:127
bool hasLockedRMWReadTarget()
Determine if there are any LockedRMWReads in the Targets list.
Definition mshr.cc:790
bool hasPostInvalidate() const
Definition mshr.hh:330
bool isSWPrefetch() const
Definition packet.hh:253
@ ReadRespWithInvalidate
Definition packet.hh:89
@ WritebackDirty
Definition packet.hh:93
@ StoreCondFailReq
Definition packet.hh:114
@ LockedRMWReadReq
Definition packet.hh:116
@ LockedRMWWriteReq
Definition packet.hh:118
@ WritebackClean
Definition packet.hh:94
@ LockedRMWWriteResp
Definition packet.hh:119
@ SCUpgradeFailReq
Definition packet.hh:106
@ UpgradeFailResp
Definition packet.hh:107
virtual std::string name() const
Definition named.hh:60
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
void setExpressSnoop()
The express snoop flag is used for two purposes.
Definition packet.hh:701
bool responderHadWritable() const
Definition packet.hh:719
bool isUpgrade() const
Definition packet.hh:596
bool isRead() const
Definition packet.hh:593
bool isSecure() const
Definition packet.hh:836
const PacketId id
Definition packet.hh:374
Addr getAddr() const
Definition packet.hh:807
bool isError() const
Definition packet.hh:622
bool isResponse() const
Definition packet.hh:598
uint32_t snoopDelay
Keep track of the extra delay incurred by snooping upwards before sending a request down the memory s...
Definition packet.hh:439
void makeTimingResponse()
Definition packet.hh:1080
bool needsWritable() const
Definition packet.hh:599
void print(std::ostream &o, int verbosity=0, const std::string &prefix="") const
Definition packet.cc:368
void copyError(Packet *pkt)
Definition packet.hh:805
bool satisfied() const
Definition packet.hh:755
bool needsResponse() const
Definition packet.hh:608
SenderState * senderState
This packet's sender state.
Definition packet.hh:545
uint32_t payloadDelay
The extra pipelining delay from seeing the packet until the end of payload is transmitted by the comp...
Definition packet.hh:449
bool matchAddr(const Addr addr, const bool is_secure) const
Check if packet corresponds to a given address and address space.
Definition packet.cc:403
uint32_t headerDelay
The extra delay from seeing the packet until the header is transmitted.
Definition packet.hh:431
void setResponderHadWritable()
On responding to a snoop request (which only happens for Modified or Owned lines),...
Definition packet.hh:713
Addr getOffset(unsigned int blk_size) const
Definition packet.hh:826
bool mustCheckAbove() const
Does the request need to check for cached copies of the same block in the memory hierarchy above.
Definition packet.hh:1425
bool hasData() const
Definition packet.hh:614
void copyResponderFlags(const PacketPtr pkt)
Copy the reponse flags from an input packet to this packet.
Definition packet.cc:324
bool hasRespData() const
Definition packet.hh:615
bool fromCache() const
Definition packet.hh:612
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
Definition packet.hh:1293
bool isWrite() const
Definition packet.hh:594
void setDataFromBlock(const uint8_t *blk_data, int blkSize)
Copy data into the packet from the provided block pointer, which is aligned to the given block size.
Definition packet.hh:1312
Addr getBlockAddr(unsigned int blk_size) const
Definition packet.hh:831
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
unsigned getSize() const
Definition packet.hh:817
bool isCleanInvalidateRequest() const
Is this packet a clean invalidate request, e.g., clflush/clflushopt?
Definition packet.hh:1444
void setCacheResponding()
Snoop flags.
Definition packet.hh:653
bool isClean() const
Definition packet.hh:611
const T * getConstPtr() const
Definition packet.hh:1234
void setHasSharers()
On fills, the hasSharers flag is used by the caches in combination with the cacheResponding flag,...
Definition packet.hh:685
bool isLLSC() const
Definition packet.hh:620
bool cacheResponding() const
Definition packet.hh:659
void makeAtomicResponse()
Definition packet.hh:1074
void setSatisfied()
Set when a request hits in a cache and the cache is not going to respond.
Definition packet.hh:749
MemCmd cmd
The command field of the packet.
Definition packet.hh:372
bool isMaskedWrite() const
Definition packet.hh:1450
bool isInvalidate() const
Definition packet.hh:609
bool isWholeLineWrite(unsigned blk_size)
Definition packet.hh:626
bool hasSharers() const
Definition packet.hh:686
bool isBlockCached() const
Definition packet.hh:760
void setBlockCached()
Definition packet.hh:759
void allocate()
Allocate memory for the packet.
Definition packet.hh:1367
bool isEviction() const
Definition packet.hh:610
bool isRequest() const
Definition packet.hh:597
A queue entry is holding packets that will be serviced as soon as resources are available.
PacketPtr pkt
Pending request packet.
bool isUncacheable() const
Addr blkAddr
Block aligned address.
@ SECURE
The request targets the secure memory space.
Definition request.hh:186
@ wbRequestorId
This requestor id is used for writeback requests by the caches.
Definition request.hh:277
virtual bool isValid() const
Checks if the entry is valid.
bool isSecure() const
Check if this block holds data from the secure memory space.
int getNumTargets() const
Returns the current number of allocated targets.
Target * getTarget() override
Returns a reference to the first target.
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
#define gem5_assert(cond,...)
The assert macro will function like a normal assert, but will use panic instead of straight abort().
Definition logging.hh:349
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:246
#define warn(...)
Definition logging.hh:288
Definition of a basic cache compressor.
Declaration of a common base class for cache tagstore objects.
Miss Status and Handling Register (MSHR) declaration.
Bitfield< 0 > p
@ CacheParams
Definition cpuid.hh:51
Bitfield< 2 > pf
Definition misc.hh:565
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
Packet * PacketPtr
std::list< PacketPtr > PacketList
Definition packet.hh:73
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
Write queue entry.

Generated on Mon Oct 27 2025 04:13:03 for gem5 by doxygen 1.14.0