gem5  v20.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gpu_tlb.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __GPU_TLB_HH__
35 #define __GPU_TLB_HH__
36 
37 #include <fstream>
38 #include <list>
39 #include <queue>
40 #include <string>
41 #include <vector>
42 
43 #include "arch/generic/tlb.hh"
44 #include "arch/x86/pagetable.hh"
46 #include "arch/x86/regs/segment.hh"
47 #include "base/callback.hh"
48 #include "base/logging.hh"
49 #include "base/statistics.hh"
51 #include "mem/port.hh"
52 #include "mem/request.hh"
53 #include "params/X86GPUTLB.hh"
54 #include "sim/clocked_object.hh"
55 #include "sim/sim_object.hh"
56 
57 class BaseTLB;
58 class Packet;
59 class ThreadContext;
60 
61 namespace X86ISA
62 {
63  class GpuTLB : public ClockedObject
64  {
65  protected:
66  friend class Walker;
67 
69 
70  uint32_t configAddress;
71 
72  // TLB clock: will inherit clock from shader's clock period in terms
73  // of nuber of ticks of curTime (aka global simulation clock)
74  // The assignment of TLB clock from shader clock is done in the python
75  // config files.
76  int clock;
77 
78  public:
79  // clock related functions ; maps to-and-from Simulation ticks and
80  // object clocks.
81  Tick frequency() const { return SimClock::Frequency / clock; }
82 
83  Tick
84  ticks(int numCycles) const
85  {
86  return (Tick)clock * numCycles;
87  }
88 
89  Tick curCycle() const { return curTick() / clock; }
90  Tick tickToCycles(Tick val) const { return val / clock;}
91 
92  typedef X86GPUTLBParams Params;
93  GpuTLB(const Params *p);
94  ~GpuTLB();
95 
96  typedef enum BaseTLB::Mode Mode;
97 
99  {
100  public:
101  virtual ~Translation() { }
102 
107  virtual void markDelayed() = 0;
108 
114  virtual void finish(Fault fault, const RequestPtr &req,
115  ThreadContext *tc, Mode mode) = 0;
116  };
117 
118  void dumpAll();
119  TlbEntry *lookup(Addr va, bool update_lru=true);
120  void setConfigAddress(uint32_t addr);
121 
122  protected:
123  EntryList::iterator lookupIt(Addr va, bool update_lru=true);
125 
126  public:
127  Walker *getWalker();
128  void invalidateAll();
129  void invalidateNonGlobal();
130  void demapPage(Addr va, uint64_t asn);
131 
132  protected:
133  int size;
134  int assoc;
135  int numSets;
136 
140  bool FA;
142 
148 
153 
159 
161 
162  /*
163  * It's a per-set list. As long as we have not reached
164  * the full capacity of the given set, grab an entry from
165  * the freeList.
166  */
168 
177 
178  Fault translateInt(bool read, const RequestPtr &req,
179  ThreadContext *tc);
180 
181  Fault translate(const RequestPtr &req, ThreadContext *tc,
182  Translation *translation, Mode mode, bool &delayedResponse,
183  bool timing, int &latency);
184 
185  public:
186  // latencies for a TLB hit, miss and page fault
190 
191  // local_stats are as seen from the TLB
192  // without taking into account coalescing
197 
198  // global_stats are as seen from the
199  // CU's perspective taking into account
200  // all coalesced requests.
205 
206  // from the CU perspective (global)
208  // from the CU perspective (global)
211  // from the perspective of this TLB
213  // from the perspective of this TLB
215  // I take the avg. per page and then
216  // the avg. over all pages.
218 
219  void regStats() override;
220  void updatePageFootprint(Addr virt_page_addr);
221  void printAccessPattern();
222 
223 
225  Mode mode, int &latency);
226 
227  void translateTiming(const RequestPtr &req, ThreadContext *tc,
228  Translation *translation, Mode mode,
229  int &latency);
230 
233 
234  TlbEntry *insert(Addr vpn, TlbEntry &entry);
235 
236  // Checkpointing
237  virtual void serialize(CheckpointOut& cp) const override;
238  virtual void unserialize(CheckpointIn& cp) override;
239  void issueTranslation();
241  bool tlbLookup(const RequestPtr &req,
242  ThreadContext *tc, bool update_stats);
243 
244  void handleTranslationReturn(Addr addr, tlbOutcome outcome,
245  PacketPtr pkt);
246 
248 
250  TlbEntry *tlb_entry, Mode mode);
251 
252  void updatePhysAddresses(Addr virt_page_addr, TlbEntry *tlb_entry,
253  Addr phys_page_addr);
254 
255  void issueTLBLookup(PacketPtr pkt);
256 
257  // CpuSidePort is the TLB Port closer to the CPU/CU side
258  class CpuSidePort : public SlavePort
259  {
260  public:
261  CpuSidePort(const std::string &_name, GpuTLB * gpu_TLB,
262  PortID _index)
263  : SlavePort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
264 
265  protected:
267  int index;
268 
269  virtual bool recvTimingReq(PacketPtr pkt);
270  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
271  virtual void recvFunctional(PacketPtr pkt);
272  virtual void recvRangeChange() { }
273  virtual void recvReqRetry();
274  virtual void recvRespRetry() { panic("recvRespRetry called"); }
275  virtual AddrRangeList getAddrRanges() const;
276  };
277 
285  class MemSidePort : public MasterPort
286  {
287  public:
288  MemSidePort(const std::string &_name, GpuTLB * gpu_TLB,
289  PortID _index)
290  : MasterPort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
291 
293 
294  protected:
296  int index;
297 
298  virtual bool recvTimingResp(PacketPtr pkt);
299  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
300  virtual void recvFunctional(PacketPtr pkt) { }
301  virtual void recvRangeChange() { }
302  virtual void recvReqRetry();
303  };
304 
305  // TLB ports on the cpu Side
307  // TLB ports on the memory side
309 
310  Port &getPort(const std::string &if_name,
311  PortID idx=InvalidPortID) override;
312 
330  {
331  // TLB mode, read or write
332  Mode tlbMode;
333  // Thread context associated with this req
335 
336  /*
337  * TLB entry to be populated and passed back and filled in
338  * previous TLBs. Equivalent to the data cache concept of
339  * "data return."
340  */
342  // Is this a TLB prefetch request?
343  bool prefetch;
344  // When was the req for this translation issued
345  uint64_t issueTime;
346  // Remember where this came from
348 
349  // keep track of #uncoalesced reqs per packet per TLB level;
350  // reqCnt per level >= reqCnt higher level
352  // TLB level this packet hit in; 0 if it hit in the page table
353  int hitLevel;
355 
356  TranslationState(Mode tlb_mode, ThreadContext *_tc,
357  bool _prefetch=false,
358  Packet::SenderState *_saved=nullptr)
359  : tlbMode(tlb_mode), tc(_tc), tlbEntry(nullptr),
360  prefetch(_prefetch), issueTime(0),
361  hitLevel(0),saved(_saved) { }
362  };
363 
364  // maximum number of permitted coalesced requests per cycle
366 
367  // Current number of outstandings coalesced requests.
368  // Should be <= maxCoalescedReqs
370 
378  void translationReturn(Addr virtPageAddr, tlbOutcome outcome,
379  PacketPtr pkt);
380 
381  class TLBEvent : public Event
382  {
383  private:
391 
392  public:
393  TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome,
394  PacketPtr _pkt);
395 
396  void process();
397  const char *description() const;
398 
399  // updateOutcome updates the tlbOutcome of a TLBEvent
400  void updateOutcome(tlbOutcome _outcome);
401  Addr getTLBEventVaddr();
402  };
403 
404  std::unordered_map<Addr, TLBEvent*> translationReturnEvent;
405 
406  // this FIFO queue keeps track of the virt. page addresses
407  // that are pending cleanup
408  std::queue<Addr> cleanupQueue;
409 
410  // the cleanupEvent is scheduled after a TLBEvent triggers in order to
411  // free memory and do the required clean-up
412  void cleanup();
413 
415 
421  struct AccessInfo
422  {
423  unsigned int lastTimeAccessed; // last access to this page
424  unsigned int accessesPerPage;
425  // need to divide it by accessesPerPage at the end
426  unsigned int totalReuseDistance;
427 
437  unsigned int sumDistance;
438  unsigned int meanDistance;
439  };
440 
441  typedef std::unordered_map<Addr, AccessInfo> AccessPatternTable;
442  AccessPatternTable TLBFootprint;
443 
444  // Called at the end of simulation to dump page access stats.
445  void exitCallback();
446 
448  };
449 }
450 
451 #endif // __GPU_TLB_HH__
A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
Definition: port.hh:71
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:163
AccessPatternTable TLBFootprint
Definition: gpu_tlb.hh:442
unsigned int accessesPerPage
Definition: gpu_tlb.hh:424
Stats::Formula globalTLBMissRate
Definition: gpu_tlb.hh:204
Ports are used to interface objects to each other.
Definition: port.hh:56
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Definition: gpu_tlb.hh:147
Stats::Scalar localCycles
Definition: gpu_tlb.hh:212
Bitfield< 5, 3 > index
Definition: types.hh:93
const PortID InvalidPortID
Definition: types.hh:236
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: gpu_tlb.cc:653
TLB TranslationState: this currently is a somewhat bastardization of the usage of SenderState...
Definition: gpu_tlb.hh:329
Stats::Scalar avgReuseDistance
Definition: gpu_tlb.hh:217
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e...
Definition: gpu_tlb.cc:962
virtual void recvRespRetry()
Called by the peer if sendTimingResp was called on this protocol (causing recvTimingResp to be called...
Definition: gpu_tlb.hh:274
CpuSidePort(const std::string &_name, GpuTLB *gpu_TLB, PortID _index)
Definition: gpu_tlb.hh:261
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Definition: gpu_tlb.cc:1128
Stats::Scalar accessCycles
Definition: gpu_tlb.hh:207
Stats::Formula localTLBMissRate
Definition: gpu_tlb.hh:196
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, TlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Definition: gpu_tlb.cc:843
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
void invalidateAll()
Definition: gpu_tlb.cc:228
std::shared_ptr< Request > RequestPtr
Definition: request.hh:81
unsigned int meanDistance
Definition: gpu_tlb.hh:438
void exitCallback()
Definition: gpu_tlb.cc:1419
void issueTranslation()
Walker * getWalker()
Definition: gpu_tlb.cc:646
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
Definition: gpu_tlb.cc:185
std::vector< TlbEntry > tlb
Definition: gpu_tlb.hh:160
Fault translateAtomic(const RequestPtr &req, ThreadContext *tc, Mode mode, int &latency)
Definition: gpu_tlb.cc:622
MemSidePort is the TLB Port closer to the memory side If this is a last level TLB then this port will...
Definition: gpu_tlb.hh:285
bool hasMemSidePort
if true, then this is not the last level TLB
Definition: gpu_tlb.hh:152
unsigned int totalReuseDistance
Definition: gpu_tlb.hh:426
Fault translate(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
Definition: gpu_tlb.cc:419
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition: gpu_tlb.cc:137
Tick doMmuRegRead(ThreadContext *tc, Packet *pkt)
unsigned int lastTimeAccessed
Definition: gpu_tlb.hh:423
A SlavePort is a specialisation of a port.
Definition: port.hh:254
Definition: cprintf.cc:40
unsigned int sumDistance
Definition: gpu_tlb.hh:437
Bitfield< 4, 0 > mode
Stats::Scalar localNumTLBMisses
Definition: gpu_tlb.hh:195
Tick Frequency
The simulated frequency of curTick(). (In ticks per second)
Definition: core.cc:46
int maxCoalescedReqs
Definition: gpu_tlb.hh:365
ThreadContext is the external interface to all thread state for anything outside of the CPU...
Declaration of Statistics objects.
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:2505
enum BaseTLB::Mode Mode
Definition: gpu_tlb.hh:96
STL vector class.
Definition: stl.hh:37
TlbEntry * insert(Addr vpn, TlbEntry &entry)
Definition: gpu_tlb.cc:159
Bitfield< 63 > val
Definition: misc.hh:769
Walker * walker
Definition: gpu_tlb.hh:124
Stats::Scalar numUniquePages
Definition: gpu_tlb.hh:210
Definition: tlb.hh:50
Stats::Scalar globalNumTLBMisses
Definition: gpu_tlb.hh:203
Tick tickToCycles(Tick val) const
Definition: gpu_tlb.hh:90
std::queue< Addr > cleanupQueue
Definition: gpu_tlb.hh:408
std::vector< MemSidePort * > memSidePort
Definition: gpu_tlb.hh:308
bool accessDistance
Print out accessDistance stats.
Definition: gpu_tlb.hh:158
Tick curTick()
The current simulated tick.
Definition: core.hh:44
virtual void markDelayed()=0
Signal that the translation has been delayed due to a hw page table walk.
void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
Definition: gpu_tlb.cc:632
std::deque< PacketPtr > retries
Definition: gpu_tlb.hh:292
virtual void finish(Fault fault, const RequestPtr &req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...
EventFunctionWrapper exitEvent
Definition: gpu_tlb.hh:447
Stats::Formula localLatency
Definition: gpu_tlb.hh:214
std::list< TlbEntry * > EntryList
Definition: gpu_tlb.hh:68
virtual Tick recvAtomic(PacketPtr pkt)
Definition: gpu_tlb.hh:299
int outstandingReqs
Definition: gpu_tlb.hh:369
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it&#39;s used to guide replacement decisions...
Definition: gpu_tlb.hh:176
void invalidateNonGlobal()
Definition: gpu_tlb.cc:248
uint64_t Tick
Tick count type.
Definition: types.hh:61
void setConfigAddress(uint32_t addr)
Definition: gpu_tlb.cc:242
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
MemSidePort(const std::string &_name, GpuTLB *gpu_TLB, PortID _index)
Definition: gpu_tlb.hh:288
EventFunctionWrapper cleanupEvent
Definition: gpu_tlb.hh:414
This hash map will use the virtual page address as a key and will keep track of total number of acces...
Definition: gpu_tlb.hh:421
ClockedObject declaration and implementation.
X86GPUTLBParams Params
Definition: gpu_tlb.hh:92
void demapPage(Addr va, uint64_t asn)
Definition: gpu_tlb.cc:266
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Definition: gpu_tlb.hh:301
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
Definition: gpu_tlb.hh:404
Port Object Declaration.
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns...
Definition: gpu_tlb.cc:877
virtual void recvFunctional(PacketPtr pkt)
Definition: gpu_tlb.hh:300
std::vector< CpuSidePort * > cpuSidePort
Definition: gpu_tlb.hh:306
TlbEntry * lookup(Addr va, bool update_lru=true)
Definition: gpu_tlb.cc:215
Stats::Scalar globalNumTLBAccesses
Definition: gpu_tlb.hh:201
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
Packet::SenderState * saved
Definition: gpu_tlb.hh:354
std::vector< EntryList > freeList
Definition: gpu_tlb.hh:167
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
Bitfield< 8 > va
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a SimObject that sees the packet.
Definition: packet.hh:397
STL deque class.
Definition: stl.hh:44
std::unordered_map< Addr, AccessInfo > AccessPatternTable
Definition: gpu_tlb.hh:441
void regStats() override
Callback to set stat parameters.
Definition: gpu_tlb.cc:663
A formula for statistics that is calculated when printed.
Definition: statistics.hh:3009
Fault translateInt(bool read, const RequestPtr &req, ThreadContext *tc)
Definition: gpu_tlb.cc:301
Mode
Definition: tlb.hh:57
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: gpu_tlb.cc:658
virtual Tick recvAtomic(PacketPtr pkt)
Receive an atomic request packet from the peer.
Definition: gpu_tlb.hh:270
uint32_t configAddress
Definition: gpu_tlb.hh:70
std::vector< unsigned int > localTLBAccesses
The field below will help us compute the access distance, that is the number of (coalesced) TLB acces...
Definition: gpu_tlb.hh:436
std::ostream CheckpointOut
Definition: serialize.hh:63
This is exposed globally, independent of the ISA.
Definition: acpi.hh:55
int missLatency2
Definition: gpu_tlb.hh:189
std::vector< SlavePort * > ports
Definition: gpu_tlb.hh:347
Definition: eventq.hh:245
Tick curCycle() const
Definition: gpu_tlb.hh:89
void cleanup()
Definition: gpu_tlb.cc:1360
Tick frequency() const
Definition: gpu_tlb.hh:81
int missLatency1
Definition: gpu_tlb.hh:188
Stats::Scalar pageTableCycles
Definition: gpu_tlb.hh:209
Tick ticks(int numCycles) const
Definition: gpu_tlb.hh:84
Stats::Scalar localNumTLBHits
Definition: gpu_tlb.hh:194
TranslationState(Mode tlb_mode, ThreadContext *_tc, bool _prefetch=false, Packet::SenderState *_saved=nullptr)
Definition: gpu_tlb.hh:356
void updatePhysAddresses(Addr virt_page_addr, TlbEntry *tlb_entry, Addr phys_page_addr)
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:235
Bitfield< 0 > p
Definition: pagetable.hh:151
GpuTLB(const Params *p)
Definition: gpu_tlb.cc:65
Tick doMmuRegWrite(ThreadContext *tc, Packet *pkt)
virtual void recvRangeChange()
Definition: gpu_tlb.hh:272
bool FA
true if this is a fully-associative TLB
Definition: gpu_tlb.hh:140
Stats::Scalar localNumTLBAccesses
Definition: gpu_tlb.hh:193
Stats::Scalar globalNumTLBHits
Definition: gpu_tlb.hh:202
std::shared_ptr< FaultBase > Fault
Definition: types.hh:238
Bitfield< 3 > addr
Definition: types.hh:79
tlbOutcome outcome
outcome can be TLB_HIT, TLB_MISS, or PAGE_WALK
Definition: gpu_tlb.hh:389
std::vector< int > reqCnt
Definition: gpu_tlb.hh:351
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access="" latency>=""> c...
Definition: gpu_tlb.cc:751
bool tlbLookup(const RequestPtr &req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss...
Definition: gpu_tlb.cc:370
void updatePageFootprint(Addr virt_page_addr)
Definition: gpu_tlb.cc:1384
void printAccessPattern()

Generated on Thu May 28 2020 16:21:33 for gem5 by doxygen 1.8.13