gem5  v22.1.0.0
tlb.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #ifndef __GPU_TLB_HH__
33 #define __GPU_TLB_HH__
34 
35 #include <fstream>
36 #include <list>
37 #include <queue>
38 #include <string>
39 #include <vector>
40 
41 #include "arch/generic/tlb.hh"
42 #include "arch/x86/pagetable.hh"
44 #include "arch/x86/regs/segment.hh"
45 #include "base/callback.hh"
46 #include "base/logging.hh"
47 #include "base/statistics.hh"
48 #include "base/stats/group.hh"
50 #include "mem/port.hh"
51 #include "mem/request.hh"
52 #include "params/X86GPUTLB.hh"
53 #include "sim/clocked_object.hh"
54 #include "sim/sim_object.hh"
55 
56 namespace gem5
57 {
58 
59 class BaseTLB;
60 class Packet;
61 class ThreadContext;
62 
63 namespace X86ISA
64 {
65  class GpuTLB : public ClockedObject
66  {
67  protected:
68  friend class Walker;
69 
71 
72  uint32_t configAddress;
73 
74  public:
75  typedef X86GPUTLBParams Params;
76  GpuTLB(const Params &p);
78 
79  typedef enum BaseMMU::Mode Mode;
80 
82  {
83  public:
84  virtual ~Translation() { }
85 
90  virtual void markDelayed() = 0;
91 
97  virtual void finish(Fault fault, const RequestPtr &req,
98  ThreadContext *tc, Mode mode) = 0;
99  };
100 
101  void dumpAll();
102  TlbEntry *lookup(Addr va, bool update_lru=true);
103  void setConfigAddress(uint32_t addr);
104 
105  protected:
106  EntryList::iterator lookupIt(Addr va, bool update_lru=true);
108 
109  public:
110  Walker *getWalker();
111  void invalidateAll();
112  void invalidateNonGlobal();
113  void demapPage(Addr va, uint64_t asn);
114 
115  protected:
116  int size;
117  int assoc;
118  int numSets;
119 
123  bool FA;
125 
131 
136 
142 
144 
145  /*
146  * It's a per-set list. As long as we have not reached
147  * the full capacity of the given set, grab an entry from
148  * the freeList.
149  */
151 
160 
161  Fault translateInt(bool read, const RequestPtr &req,
162  ThreadContext *tc);
163 
164  Fault translate(const RequestPtr &req, ThreadContext *tc,
165  Translation *translation, Mode mode, bool &delayedResponse,
166  bool timing, int &latency);
167 
168  public:
169  // latencies for a TLB hit, miss and page fault
173 
174  void updatePageFootprint(Addr virt_page_addr);
176 
177 
179  Mode mode, int &latency);
180 
181  void translateTiming(const RequestPtr &req, ThreadContext *tc,
182  Translation *translation, Mode mode,
183  int &latency);
184 
187 
188  TlbEntry *insert(Addr vpn, TlbEntry &entry);
189 
190  // Checkpointing
191  virtual void serialize(CheckpointOut& cp) const override;
192  virtual void unserialize(CheckpointIn& cp) override;
195  bool tlbLookup(const RequestPtr &req,
196  ThreadContext *tc, bool update_stats);
197 
199  PacketPtr pkt);
200 
202 
204  TlbEntry *tlb_entry, Mode mode);
205 
206  void updatePhysAddresses(Addr virt_page_addr, TlbEntry *tlb_entry,
207  Addr phys_page_addr);
208 
209  void issueTLBLookup(PacketPtr pkt);
210 
211  // CpuSidePort is the TLB Port closer to the CPU/CU side
212  class CpuSidePort : public ResponsePort
213  {
214  public:
215  CpuSidePort(const std::string &_name, GpuTLB * gpu_TLB,
216  PortID _index)
217  : ResponsePort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
218 
219  protected:
221  int index;
222 
223  virtual bool recvTimingReq(PacketPtr pkt);
224  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
225  virtual void recvFunctional(PacketPtr pkt);
226  virtual void recvRangeChange() { }
227  virtual void recvReqRetry();
228  virtual void recvRespRetry() { panic("recvRespRetry called"); }
229  virtual AddrRangeList getAddrRanges() const;
230  };
231 
239  class MemSidePort : public RequestPort
240  {
241  public:
242  MemSidePort(const std::string &_name, GpuTLB * gpu_TLB,
243  PortID _index)
244  : RequestPort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
245 
247 
248  protected:
250  int index;
251 
252  virtual bool recvTimingResp(PacketPtr pkt);
253  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
254  virtual void recvFunctional(PacketPtr pkt) { }
255  virtual void recvRangeChange() { }
256  virtual void recvReqRetry();
257  };
258 
259  // TLB ports on the cpu Side
261  // TLB ports on the memory side
263 
264  Port &getPort(const std::string &if_name,
265  PortID idx=InvalidPortID) override;
266 
267 
268  // maximum number of permitted coalesced requests per cycle
270 
271  // Current number of outstandings coalesced requests.
272  // Should be <= maxCoalescedReqs
274 
282  void translationReturn(Addr virtPageAddr, tlbOutcome outcome,
283  PacketPtr pkt);
284 
285  class TLBEvent : public Event
286  {
287  private:
295 
296  public:
297  TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome,
298  PacketPtr _pkt);
299 
300  void process();
301  const char *description() const;
302 
303  // updateOutcome updates the tlbOutcome of a TLBEvent
304  void updateOutcome(tlbOutcome _outcome);
306  };
307 
308  std::unordered_map<Addr, TLBEvent*> translationReturnEvent;
309 
310  // this FIFO queue keeps track of the virt. page addresses
311  // that are pending cleanup
312  std::queue<Addr> cleanupQueue;
313 
314  // the cleanupEvent is scheduled after a TLBEvent triggers in order to
315  // free memory and do the required clean-up
316  void cleanup();
317 
319 
325  struct AccessInfo
326  {
327  unsigned int lastTimeAccessed; // last access to this page
328  unsigned int accessesPerPage;
329  // need to divide it by accessesPerPage at the end
330  unsigned int totalReuseDistance;
331 
341  unsigned int sumDistance;
342  unsigned int meanDistance;
343  };
344 
345  typedef std::unordered_map<Addr, AccessInfo> AccessPatternTable;
347 
348  // Called at the end of simulation to dump page access stats.
349  void exitCallback();
350 
352 
353  protected:
355  {
357 
358  // local_stats are as seen from the TLB
359  // without taking into account coalescing
364 
365  // global_stats are as seen from the
366  // CU's perspective taking into account
367  // all coalesced requests.
372 
373  // from the CU perspective (global)
375  // from the CU perspective (global)
378  // from the perspective of this TLB
380  // from the perspective of this TLB
382  // I take the avg. per page and then
383  // the avg. over all pages.
385  } stats;
386  };
387 }
388 
389 } // namespace gem5
390 
391 #endif // __GPU_TLB_HH__
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
const std::string _name
Definition: named.hh:41
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
Ports are used to interface objects to each other.
Definition: port.hh:62
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Definition: port.hh:79
A ResponsePort is a specialization of a port.
Definition: port.hh:270
ThreadContext is the external interface to all thread state for anything outside of the CPU.
virtual void recvRangeChange()
Definition: tlb.hh:226
virtual void recvReqRetry()
Definition: tlb.cc:1225
virtual void recvRespRetry()
Called by the peer if sendTimingResp was called on this protocol (causing recvTimingResp to be called...
Definition: tlb.hh:228
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
Definition: tlb.cc:1113
virtual Tick recvAtomic(PacketPtr pkt)
Receive an atomic request packet from the peer.
Definition: tlb.hh:224
virtual bool recvTimingReq(PacketPtr pkt)
recvTiming receives a coalesced timing request from a TLBCoalescer and it calls issueTLBLookup() It o...
Definition: tlb.cc:1018
CpuSidePort(const std::string &_name, GpuTLB *gpu_TLB, PortID _index)
Definition: tlb.hh:215
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: tlb.cc:1233
MemSidePort is the TLB Port closer to the memory side If this is a last level TLB then this port will...
Definition: tlb.hh:240
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: tlb.cc:1266
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
Definition: tlb.cc:1247
virtual Tick recvAtomic(PacketPtr pkt)
Definition: tlb.hh:253
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: tlb.hh:255
std::deque< PacketPtr > retries
Definition: tlb.hh:246
virtual void recvFunctional(PacketPtr pkt)
Definition: tlb.hh:254
MemSidePort(const std::string &_name, GpuTLB *gpu_TLB, PortID _index)
Definition: tlb.hh:242
tlbOutcome outcome
outcome can be TLB_HIT, TLB_MISS, or PAGE_WALK
Definition: tlb.hh:293
void updateOutcome(tlbOutcome _outcome)
Definition: tlb.cc:1000
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
Definition: tlb.cc:741
const char * description() const
Return a C string describing the event.
Definition: tlb.cc:994
virtual void finish(Fault fault, const RequestPtr &req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...
virtual void markDelayed()=0
Signal that the translation has been delayed due to a hw page table walk.
std::unordered_map< Addr, AccessInfo > AccessPatternTable
Definition: tlb.hh:345
void exitCallback()
Definition: tlb.cc:1333
X86GPUTLBParams Params
Definition: tlb.hh:75
int maxCoalescedReqs
Definition: tlb.hh:269
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access latency> cycles l...
Definition: tlb.cc:661
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
Definition: tlb.hh:308
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e....
Definition: tlb.cc:874
void invalidateNonGlobal()
Definition: tlb.cc:249
enum BaseMMU::Mode Mode
Definition: tlb.hh:79
EventFunctionWrapper cleanupEvent
Definition: tlb.hh:318
Tick doMmuRegRead(ThreadContext *tc, Packet *pkt)
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
Definition: tlb.cc:186
Fault translateInt(bool read, const RequestPtr &req, ThreadContext *tc)
Definition: tlb.cc:302
GpuTLB(const Params &p)
Definition: tlb.cc:67
std::list< TlbEntry * > EntryList
Definition: tlb.hh:70
Fault translate(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
Definition: tlb.cc:420
AccessPatternTable TLBFootprint
Definition: tlb.hh:346
void demapPage(Addr va, uint64_t asn)
Definition: tlb.cc:267
uint32_t configAddress
Definition: tlb.hh:72
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: tlb.cc:651
TlbEntry * lookup(Addr va, bool update_lru=true)
Definition: tlb.cc:216
std::vector< TlbEntry > tlb
Definition: tlb.hh:143
std::vector< MemSidePort * > memSidePort
Definition: tlb.hh:262
bool accessDistance
Print out accessDistance stats.
Definition: tlb.hh:141
void invalidateAll()
Definition: tlb.cc:229
EventFunctionWrapper exitEvent
Definition: tlb.hh:351
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition: tlb.cc:138
Walker * walker
Definition: tlb.hh:107
std::queue< Addr > cleanupQueue
Definition: tlb.hh:312
Tick doMmuRegWrite(ThreadContext *tc, Packet *pkt)
bool FA
true if this is a fully-associative TLB
Definition: tlb.hh:123
TlbEntry * insert(Addr vpn, TlbEntry &entry)
Definition: tlb.cc:160
void updatePhysAddresses(Addr virt_page_addr, TlbEntry *tlb_entry, Addr phys_page_addr)
gem5::X86ISA::GpuTLB::GpuTLBStats stats
void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
Definition: tlb.cc:625
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Definition: tlb.cc:1041
std::vector< EntryList > freeList
Definition: tlb.hh:150
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions.
Definition: tlb.hh:159
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: tlb.cc:646
void setConfigAddress(uint32_t addr)
Definition: tlb.cc:243
bool hasMemSidePort
if true, then this is not the last level TLB
Definition: tlb.hh:135
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Definition: tlb.hh:130
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns.
Definition: tlb.cc:787
void updatePageFootprint(Addr virt_page_addr)
Definition: tlb.cc:1298
bool tlbLookup(const RequestPtr &req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss.
Definition: tlb.cc:371
Walker * getWalker()
Definition: tlb.cc:639
std::vector< CpuSidePort * > cpuSidePort
Definition: tlb.hh:260
Fault translateAtomic(const RequestPtr &req, ThreadContext *tc, Mode mode, int &latency)
Definition: tlb.cc:615
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, TlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Definition: tlb.cc:753
A formula for statistics that is calculated when printed.
Definition: statistics.hh:2540
Statistics container.
Definition: group.hh:94
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:1931
STL deque class.
Definition: stl.hh:44
STL vector class.
Definition: stl.hh:37
ClockedObject declaration and implementation.
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
Port Object Declaration.
Bitfield< 8 > va
Definition: misc_types.hh:282
Bitfield< 3 > addr
Definition: types.hh:84
Bitfield< 3 > mode
Definition: types.hh:192
Bitfield< 0 > p
Definition: pagetable.hh:151
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< FaultBase > Fault
Definition: types.hh:248
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
const PortID InvalidPortID
Definition: types.hh:246
std::ostream CheckpointOut
Definition: serialize.hh:66
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:245
uint64_t Tick
Tick count type.
Definition: types.hh:58
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
Declaration of Statistics objects.
This hash map will use the virtual page address as a key and will keep track of total number of acces...
Definition: tlb.hh:326
unsigned int accessesPerPage
Definition: tlb.hh:328
std::vector< unsigned int > localTLBAccesses
The field below will help us compute the access distance, that is the number of (coalesced) TLB acces...
Definition: tlb.hh:340
unsigned int totalReuseDistance
Definition: tlb.hh:330
unsigned int lastTimeAccessed
Definition: tlb.hh:327
GpuTLBStats(statistics::Group *parent)
Definition: tlb.cc:1425
statistics::Formula localTLBMissRate
Definition: tlb.hh:363
statistics::Scalar globalNumTLBMisses
Definition: tlb.hh:370
statistics::Scalar localCycles
Definition: tlb.hh:379
statistics::Formula globalTLBMissRate
Definition: tlb.hh:371
statistics::Scalar localNumTLBHits
Definition: tlb.hh:361
statistics::Scalar numUniquePages
Definition: tlb.hh:377
statistics::Scalar avgReuseDistance
Definition: tlb.hh:384
statistics::Scalar globalNumTLBHits
Definition: tlb.hh:369
statistics::Formula localLatency
Definition: tlb.hh:381
statistics::Scalar localNumTLBMisses
Definition: tlb.hh:362
statistics::Scalar globalNumTLBAccesses
Definition: tlb.hh:368
statistics::Scalar accessCycles
Definition: tlb.hh:374
statistics::Scalar localNumTLBAccesses
Definition: tlb.hh:360
statistics::Scalar pageTableCycles
Definition: tlb.hh:376

Generated on Wed Dec 21 2022 10:22:15 for gem5 by doxygen 1.9.1