gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gpu_tlb.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: Lisa Hsu
34  */
35 
36 #ifndef __GPU_TLB_HH__
37 #define __GPU_TLB_HH__
38 
39 #include <fstream>
40 #include <list>
41 #include <queue>
42 #include <string>
43 #include <vector>
44 
45 #include "arch/generic/tlb.hh"
46 #include "arch/x86/pagetable.hh"
48 #include "arch/x86/regs/segment.hh"
49 #include "base/callback.hh"
50 #include "base/logging.hh"
51 #include "base/statistics.hh"
53 #include "mem/port.hh"
54 #include "mem/request.hh"
55 #include "params/X86GPUTLB.hh"
56 #include "sim/clocked_object.hh"
57 #include "sim/sim_object.hh"
58 
59 class BaseTLB;
60 class Packet;
61 class ThreadContext;
62 
63 namespace X86ISA
64 {
65  class GpuTLB : public ClockedObject
66  {
67  protected:
68  friend class Walker;
69 
71 
72  uint32_t configAddress;
73 
74  // TLB clock: will inherit clock from shader's clock period in terms
75  // of nuber of ticks of curTime (aka global simulation clock)
76  // The assignment of TLB clock from shader clock is done in the python
77  // config files.
78  int clock;
79 
80  public:
81  // clock related functions ; maps to-and-from Simulation ticks and
82  // object clocks.
83  Tick frequency() const { return SimClock::Frequency / clock; }
84 
85  Tick
86  ticks(int numCycles) const
87  {
88  return (Tick)clock * numCycles;
89  }
90 
91  Tick curCycle() const { return curTick() / clock; }
92  Tick tickToCycles(Tick val) const { return val / clock;}
93 
94  typedef X86GPUTLBParams Params;
95  GpuTLB(const Params *p);
96  ~GpuTLB();
97 
98  typedef enum BaseTLB::Mode Mode;
99 
101  {
102  public:
103  virtual ~Translation() { }
104 
109  virtual void markDelayed() = 0;
110 
116  virtual void finish(Fault fault, const RequestPtr &req,
117  ThreadContext *tc, Mode mode) = 0;
118  };
119 
120  void dumpAll();
121  TlbEntry *lookup(Addr va, bool update_lru=true);
122  void setConfigAddress(uint32_t addr);
123 
124  protected:
125  EntryList::iterator lookupIt(Addr va, bool update_lru=true);
127 
128  public:
129  Walker *getWalker();
130  void invalidateAll();
131  void invalidateNonGlobal();
132  void demapPage(Addr va, uint64_t asn);
133 
134  protected:
135  int size;
136  int assoc;
137  int numSets;
138 
142  bool FA;
144 
150 
155 
161 
163 
164  /*
165  * It's a per-set list. As long as we have not reached
166  * the full capacity of the given set, grab an entry from
167  * the freeList.
168  */
170 
179 
180  Fault translateInt(const RequestPtr &req, ThreadContext *tc);
181 
182  Fault translate(const RequestPtr &req, ThreadContext *tc,
183  Translation *translation, Mode mode, bool &delayedResponse,
184  bool timing, int &latency);
185 
186  public:
187  // latencies for a TLB hit, miss and page fault
191 
192  // local_stats are as seen from the TLB
193  // without taking into account coalescing
198 
199  // global_stats are as seen from the
200  // CU's perspective taking into account
201  // all coalesced requests.
206 
207  // from the CU perspective (global)
209  // from the CU perspective (global)
212  // from the perspective of this TLB
214  // from the perspective of this TLB
216  // I take the avg. per page and then
217  // the avg. over all pages.
219 
220  void regStats() override;
221  void updatePageFootprint(Addr virt_page_addr);
222  void printAccessPattern();
223 
224 
226  Mode mode, int &latency);
227 
228  void translateTiming(const RequestPtr &req, ThreadContext *tc,
229  Translation *translation, Mode mode,
230  int &latency);
231 
234 
235  TlbEntry *insert(Addr vpn, TlbEntry &entry);
236 
237  // Checkpointing
238  virtual void serialize(CheckpointOut& cp) const override;
239  virtual void unserialize(CheckpointIn& cp) override;
240  void issueTranslation();
242  bool tlbLookup(const RequestPtr &req,
243  ThreadContext *tc, bool update_stats);
244 
245  void handleTranslationReturn(Addr addr, tlbOutcome outcome,
246  PacketPtr pkt);
247 
249 
251  TlbEntry *tlb_entry, Mode mode);
252 
253  void updatePhysAddresses(Addr virt_page_addr, TlbEntry *tlb_entry,
254  Addr phys_page_addr);
255 
256  void issueTLBLookup(PacketPtr pkt);
257 
258  // CpuSidePort is the TLB Port closer to the CPU/CU side
259  class CpuSidePort : public SlavePort
260  {
261  public:
262  CpuSidePort(const std::string &_name, GpuTLB * gpu_TLB,
263  PortID _index)
264  : SlavePort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
265 
266  protected:
268  int index;
269 
270  virtual bool recvTimingReq(PacketPtr pkt);
271  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
272  virtual void recvFunctional(PacketPtr pkt);
273  virtual void recvRangeChange() { }
274  virtual void recvReqRetry();
275  virtual void recvRespRetry() { panic("recvRespRetry called"); }
276  virtual AddrRangeList getAddrRanges() const;
277  };
278 
286  class MemSidePort : public MasterPort
287  {
288  public:
289  MemSidePort(const std::string &_name, GpuTLB * gpu_TLB,
290  PortID _index)
291  : MasterPort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
292 
294 
295  protected:
297  int index;
298 
299  virtual bool recvTimingResp(PacketPtr pkt);
300  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
301  virtual void recvFunctional(PacketPtr pkt) { }
302  virtual void recvRangeChange() { }
303  virtual void recvReqRetry();
304  };
305 
306  // TLB ports on the cpu Side
308  // TLB ports on the memory side
310 
311  Port &getPort(const std::string &if_name,
312  PortID idx=InvalidPortID) override;
313 
331  {
332  // TLB mode, read or write
333  Mode tlbMode;
334  // Thread context associated with this req
336 
337  /*
338  * TLB entry to be populated and passed back and filled in
339  * previous TLBs. Equivalent to the data cache concept of
340  * "data return."
341  */
343  // Is this a TLB prefetch request?
344  bool prefetch;
345  // When was the req for this translation issued
346  uint64_t issueTime;
347  // Remember where this came from
349 
350  // keep track of #uncoalesced reqs per packet per TLB level;
351  // reqCnt per level >= reqCnt higher level
353  // TLB level this packet hit in; 0 if it hit in the page table
354  int hitLevel;
356 
357  TranslationState(Mode tlb_mode, ThreadContext *_tc,
358  bool _prefetch=false,
359  Packet::SenderState *_saved=nullptr)
360  : tlbMode(tlb_mode), tc(_tc), tlbEntry(nullptr),
361  prefetch(_prefetch), issueTime(0),
362  hitLevel(0),saved(_saved) { }
363  };
364 
365  // maximum number of permitted coalesced requests per cycle
367 
368  // Current number of outstandings coalesced requests.
369  // Should be <= maxCoalescedReqs
371 
379  void translationReturn(Addr virtPageAddr, tlbOutcome outcome,
380  PacketPtr pkt);
381 
382  class TLBEvent : public Event
383  {
384  private:
392 
393  public:
394  TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome,
395  PacketPtr _pkt);
396 
397  void process();
398  const char *description() const;
399 
400  // updateOutcome updates the tlbOutcome of a TLBEvent
401  void updateOutcome(tlbOutcome _outcome);
402  Addr getTLBEventVaddr();
403  };
404 
405  std::unordered_map<Addr, TLBEvent*> translationReturnEvent;
406 
407  // this FIFO queue keeps track of the virt. page addresses
408  // that are pending cleanup
409  std::queue<Addr> cleanupQueue;
410 
411  // the cleanupEvent is scheduled after a TLBEvent triggers in order to
412  // free memory and do the required clean-up
413  void cleanup();
414 
416 
422  struct AccessInfo
423  {
424  unsigned int lastTimeAccessed; // last access to this page
425  unsigned int accessesPerPage;
426  // need to divide it by accessesPerPage at the end
427  unsigned int totalReuseDistance;
428 
438  unsigned int sumDistance;
439  unsigned int meanDistance;
440  };
441 
442  typedef std::unordered_map<Addr, AccessInfo> AccessPatternTable;
443  AccessPatternTable TLBFootprint;
444 
445  // Called at the end of simulation to dump page access stats.
446  void exitCallback();
447 
449  };
450 }
451 
452 #endif // __GPU_TLB_HH__
A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
Definition: port.hh:75
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:167
AccessPatternTable TLBFootprint
Definition: gpu_tlb.hh:443
unsigned int accessesPerPage
Definition: gpu_tlb.hh:425
Stats::Formula globalTLBMissRate
Definition: gpu_tlb.hh:205
Ports are used to interface objects to each other.
Definition: port.hh:60
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Definition: gpu_tlb.hh:149
Stats::Scalar localCycles
Definition: gpu_tlb.hh:213
Bitfield< 5, 3 > index
Definition: types.hh:95
const PortID InvalidPortID
Definition: types.hh:238
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: gpu_tlb.cc:935
TLB TranslationState: this currently is a somewhat bastardization of the usage of SenderState...
Definition: gpu_tlb.hh:330
Stats::Scalar avgReuseDistance
Definition: gpu_tlb.hh:218
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e...
Definition: gpu_tlb.cc:1244
virtual void recvRespRetry()
Called by the peer if sendTimingResp was called on this protocol (causing recvTimingResp to be called...
Definition: gpu_tlb.hh:275
CpuSidePort(const std::string &_name, GpuTLB *gpu_TLB, PortID _index)
Definition: gpu_tlb.hh:262
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Definition: gpu_tlb.cc:1410
Stats::Scalar accessCycles
Definition: gpu_tlb.hh:208
Stats::Formula localTLBMissRate
Definition: gpu_tlb.hh:197
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, TlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Definition: gpu_tlb.cc:1125
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
void invalidateAll()
Definition: gpu_tlb.cc:226
std::shared_ptr< Request > RequestPtr
Definition: request.hh:83
unsigned int meanDistance
Definition: gpu_tlb.hh:439
void exitCallback()
Definition: gpu_tlb.cc:1701
void issueTranslation()
Walker * getWalker()
Definition: gpu_tlb.cc:928
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
Definition: gpu_tlb.cc:183
std::vector< TlbEntry > tlb
Definition: gpu_tlb.hh:162
Fault translateAtomic(const RequestPtr &req, ThreadContext *tc, Mode mode, int &latency)
Definition: gpu_tlb.cc:904
MemSidePort is the TLB Port closer to the memory side If this is a last level TLB then this port will...
Definition: gpu_tlb.hh:286
bool hasMemSidePort
if true, then this is not the last level TLB
Definition: gpu_tlb.hh:154
unsigned int totalReuseDistance
Definition: gpu_tlb.hh:427
Fault translate(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
Definition: gpu_tlb.cc:701
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition: gpu_tlb.cc:135
Tick doMmuRegRead(ThreadContext *tc, Packet *pkt)
unsigned int lastTimeAccessed
Definition: gpu_tlb.hh:424
A SlavePort is a specialisation of a port.
Definition: port.hh:258
Definition: cprintf.cc:42
unsigned int sumDistance
Definition: gpu_tlb.hh:438
Bitfield< 4, 0 > mode
Fault translateInt(const RequestPtr &req, ThreadContext *tc)
Definition: gpu_tlb.cc:277
Stats::Scalar localNumTLBMisses
Definition: gpu_tlb.hh:196
Tick Frequency
The simulated frequency of curTick(). (In ticks per second)
Definition: core.cc:49
int maxCoalescedReqs
Definition: gpu_tlb.hh:366
ThreadContext is the external interface to all thread state for anything outside of the CPU...
Declaration of Statistics objects.
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:2508
enum BaseTLB::Mode Mode
Definition: gpu_tlb.hh:98
STL vector class.
Definition: stl.hh:40
TlbEntry * insert(Addr vpn, TlbEntry &entry)
Definition: gpu_tlb.cc:157
Bitfield< 63 > val
Definition: misc.hh:771
Walker * walker
Definition: gpu_tlb.hh:126
Stats::Scalar numUniquePages
Definition: gpu_tlb.hh:211
Definition: tlb.hh:52
Stats::Scalar globalNumTLBMisses
Definition: gpu_tlb.hh:204
Tick tickToCycles(Tick val) const
Definition: gpu_tlb.hh:92
std::queue< Addr > cleanupQueue
Definition: gpu_tlb.hh:409
std::vector< MemSidePort * > memSidePort
Definition: gpu_tlb.hh:309
bool accessDistance
Print out accessDistance stats.
Definition: gpu_tlb.hh:160
Tick curTick()
The current simulated tick.
Definition: core.hh:47
virtual void markDelayed()=0
Signal that the translation has been delayed due to a hw page table walk.
void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
Definition: gpu_tlb.cc:914
std::deque< PacketPtr > retries
Definition: gpu_tlb.hh:293
virtual void finish(Fault fault, const RequestPtr &req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...
EventFunctionWrapper exitEvent
Definition: gpu_tlb.hh:448
Stats::Formula localLatency
Definition: gpu_tlb.hh:215
std::list< TlbEntry * > EntryList
Definition: gpu_tlb.hh:70
virtual Tick recvAtomic(PacketPtr pkt)
Definition: gpu_tlb.hh:300
int outstandingReqs
Definition: gpu_tlb.hh:370
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it&#39;s used to guide replacement decisions...
Definition: gpu_tlb.hh:178
void invalidateNonGlobal()
Definition: gpu_tlb.cc:246
uint64_t Tick
Tick count type.
Definition: types.hh:63
void setConfigAddress(uint32_t addr)
Definition: gpu_tlb.cc:240
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
MemSidePort(const std::string &_name, GpuTLB *gpu_TLB, PortID _index)
Definition: gpu_tlb.hh:289
EventFunctionWrapper cleanupEvent
Definition: gpu_tlb.hh:415
This hash map will use the virtual page address as a key and will keep track of total number of acces...
Definition: gpu_tlb.hh:422
ClockedObject declaration and implementation.
X86GPUTLBParams Params
Definition: gpu_tlb.hh:94
void demapPage(Addr va, uint64_t asn)
Definition: gpu_tlb.cc:264
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Definition: gpu_tlb.hh:302
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
Definition: gpu_tlb.hh:405
Port Object Declaration.
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns...
Definition: gpu_tlb.cc:1159
virtual void recvFunctional(PacketPtr pkt)
Definition: gpu_tlb.hh:301
std::vector< CpuSidePort * > cpuSidePort
Definition: gpu_tlb.hh:307
TlbEntry * lookup(Addr va, bool update_lru=true)
Definition: gpu_tlb.cc:213
Stats::Scalar globalNumTLBAccesses
Definition: gpu_tlb.hh:202
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
Packet::SenderState * saved
Definition: gpu_tlb.hh:355
std::vector< EntryList > freeList
Definition: gpu_tlb.hh:169
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:255
Bitfield< 8 > va
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a SimObject that sees the packet.
Definition: packet.hh:403
STL deque class.
Definition: stl.hh:47
std::unordered_map< Addr, AccessInfo > AccessPatternTable
Definition: gpu_tlb.hh:442
void regStats() override
Callback to set stat parameters.
Definition: gpu_tlb.cc:945
A formula for statistics that is calculated when printed.
Definition: statistics.hh:3012
Mode
Definition: tlb.hh:59
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: gpu_tlb.cc:940
virtual Tick recvAtomic(PacketPtr pkt)
Receive an atomic request packet from the peer.
Definition: gpu_tlb.hh:271
uint32_t configAddress
Definition: gpu_tlb.hh:72
std::vector< unsigned int > localTLBAccesses
The field below will help us compute the access distance, that is the number of (coalesced) TLB acces...
Definition: gpu_tlb.hh:437
std::ostream CheckpointOut
Definition: serialize.hh:68
This is exposed globally, independent of the ISA.
Definition: acpi.hh:57
int missLatency2
Definition: gpu_tlb.hh:190
std::vector< SlavePort * > ports
Definition: gpu_tlb.hh:348
Definition: eventq.hh:189
Tick curCycle() const
Definition: gpu_tlb.hh:91
void cleanup()
Definition: gpu_tlb.cc:1642
Tick frequency() const
Definition: gpu_tlb.hh:83
int missLatency1
Definition: gpu_tlb.hh:189
Stats::Scalar pageTableCycles
Definition: gpu_tlb.hh:210
Tick ticks(int numCycles) const
Definition: gpu_tlb.hh:86
Stats::Scalar localNumTLBHits
Definition: gpu_tlb.hh:195
TranslationState(Mode tlb_mode, ThreadContext *_tc, bool _prefetch=false, Packet::SenderState *_saved=nullptr)
Definition: gpu_tlb.hh:357
void updatePhysAddresses(Addr virt_page_addr, TlbEntry *tlb_entry, Addr phys_page_addr)
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:237
Bitfield< 0 > p
Definition: pagetable.hh:152
GpuTLB(const Params *p)
Definition: gpu_tlb.cc:63
Tick doMmuRegWrite(ThreadContext *tc, Packet *pkt)
virtual void recvRangeChange()
Definition: gpu_tlb.hh:273
bool FA
true if this is a fully-associative TLB
Definition: gpu_tlb.hh:142
Stats::Scalar localNumTLBAccesses
Definition: gpu_tlb.hh:194
Stats::Scalar globalNumTLBHits
Definition: gpu_tlb.hh:203
std::shared_ptr< FaultBase > Fault
Definition: types.hh:240
Bitfield< 3 > addr
Definition: types.hh:81
tlbOutcome outcome
outcome can be TLB_HIT, TLB_MISS, or PAGE_WALK
Definition: gpu_tlb.hh:390
std::vector< int > reqCnt
Definition: gpu_tlb.hh:352
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access="" latency>=""> c...
Definition: gpu_tlb.cc:1033
bool tlbLookup(const RequestPtr &req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss...
Definition: gpu_tlb.cc:652
void updatePageFootprint(Addr virt_page_addr)
Definition: gpu_tlb.cc:1666
void printAccessPattern()

Generated on Fri Feb 28 2020 16:27:01 for gem5 by doxygen 1.8.13