gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tlb_coalescer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: Lisa Hsu
34  */
35 
37 
38 #include <cstring>
39 
40 #include "base/logging.hh"
41 #include "debug/GPUTLB.hh"
42 #include "sim/process.hh"
43 
45  : ClockedObject(p),
46  clock(p->clk_domain->clockPeriod()),
47  TLBProbesPerCycle(p->probesPerCycle),
48  coalescingWindow(p->coalescingWindow),
49  disableCoalescing(p->disableCoalescing),
50  probeTLBEvent([this]{ processProbeTLBEvent(); },
51  "Probe the TLB below",
52  false, Event::CPU_Tick_Pri),
53  cleanupEvent([this]{ processCleanupEvent(); },
54  "Cleanup issuedTranslationsTable hashmap",
55  false, Event::Maximum_Pri)
56 {
57  // create the slave ports based on the number of connected ports
58  for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
59  cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", name(), i),
60  this, i));
61  }
62 
63  // create the master ports based on the number of connected ports
64  for (size_t i = 0; i < p->port_master_connection_count; ++i) {
65  memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", name(), i),
66  this, i));
67  }
68 }
69 
70 Port &
71 TLBCoalescer::getPort(const std::string &if_name, PortID idx)
72 {
73  if (if_name == "slave") {
74  if (idx >= static_cast<PortID>(cpuSidePort.size())) {
75  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
76  }
77 
78  return *cpuSidePort[idx];
79  } else if (if_name == "master") {
80  if (idx >= static_cast<PortID>(memSidePort.size())) {
81  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
82  }
83 
84  return *memSidePort[idx];
85  } else {
86  panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
87  }
88 }
89 
90 /*
91  * This method returns true if the <incoming_pkt>
92  * can be coalesced with <coalesced_pkt> and false otherwise.
93  * A given set of rules is checked.
94  * The rules can potentially be modified based on the TLB level.
95  */
96 bool
97 TLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt)
98 {
100  return false;
101 
102  TheISA::GpuTLB::TranslationState *incoming_state =
103  safe_cast<TheISA::GpuTLB::TranslationState*>(incoming_pkt->senderState);
104 
105  TheISA::GpuTLB::TranslationState *coalesced_state =
106  safe_cast<TheISA::GpuTLB::TranslationState*>(coalesced_pkt->senderState);
107 
108  // Rule 1: Coalesce requests only if they
109  // fall within the same virtual page
110  Addr incoming_virt_page_addr = roundDown(incoming_pkt->req->getVaddr(),
112 
113  Addr coalesced_virt_page_addr = roundDown(coalesced_pkt->req->getVaddr(),
115 
116  if (incoming_virt_page_addr != coalesced_virt_page_addr)
117  return false;
118 
119  //* Rule 2: Coalesce requests only if they
120  // share a TLB Mode, i.e. they are both read
121  // or write requests.
122  BaseTLB::Mode incoming_mode = incoming_state->tlbMode;
123  BaseTLB::Mode coalesced_mode = coalesced_state->tlbMode;
124 
125  if (incoming_mode != coalesced_mode)
126  return false;
127 
128  // when we can coalesce a packet update the reqCnt
129  // that is the number of packets represented by
130  // this coalesced packet
131  if (!incoming_state->prefetch)
132  coalesced_state->reqCnt.back() += incoming_state->reqCnt.back();
133 
134  return true;
135 }
136 
137 /*
138  * We need to update the physical addresses of all the translation requests
139  * that were coalesced into the one that just returned.
140  */
141 void
143 {
144  Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
145 
146  DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs for page %#x\n",
147  issuedTranslationsTable[virt_page_addr].size(), virt_page_addr);
148 
149  TheISA::GpuTLB::TranslationState *sender_state =
150  safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
151 
152  TheISA::TlbEntry *tlb_entry = sender_state->tlbEntry;
153  assert(tlb_entry);
154  Addr first_entry_vaddr = tlb_entry->vaddr;
155  Addr first_entry_paddr = tlb_entry->paddr;
156  int page_size = tlb_entry->size();
157  bool uncacheable = tlb_entry->uncacheable;
158  int first_hit_level = sender_state->hitLevel;
159 
160  // Get the physical page address of the translated request
161  // Using the page_size specified in the TLBEntry allows us
162  // to support different page sizes.
163  Addr phys_page_paddr = pkt->req->getPaddr();
164  phys_page_paddr &= ~(page_size - 1);
165 
166  for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size(); ++i) {
167  PacketPtr local_pkt = issuedTranslationsTable[virt_page_addr][i];
168  TheISA::GpuTLB::TranslationState *sender_state =
169  safe_cast<TheISA::GpuTLB::TranslationState*>(
170  local_pkt->senderState);
171 
172  // we are sending the packet back, so pop the reqCnt associated
173  // with this level in the TLB hiearchy
174  if (!sender_state->prefetch)
175  sender_state->reqCnt.pop_back();
176 
177  /*
178  * Only the first packet from this coalesced request has been
179  * translated. Grab the translated phys. page addr and update the
180  * physical addresses of the remaining packets with the appropriate
181  * page offsets.
182  */
183  if (i) {
184  Addr paddr = phys_page_paddr;
185  paddr |= (local_pkt->req->getVaddr() & (page_size - 1));
186  local_pkt->req->setPaddr(paddr);
187 
188  if (uncacheable)
189  local_pkt->req->setFlags(Request::UNCACHEABLE);
190 
191  // update senderState->tlbEntry, so we can insert
192  // the correct TLBEentry in the TLBs above.
193  auto p = sender_state->tc->getProcessPtr();
194  sender_state->tlbEntry =
195  new TheISA::TlbEntry(p->pid(), first_entry_vaddr,
196  first_entry_paddr, false, false);
197 
198  // update the hitLevel for all uncoalesced reqs
199  // so that each packet knows where it hit
200  // (used for statistics in the CUs)
201  sender_state->hitLevel = first_hit_level;
202  }
203 
204  SlavePort *return_port = sender_state->ports.back();
205  sender_state->ports.pop_back();
206 
207  // Translation is done - Convert to a response pkt if necessary and
208  // send the translation back
209  if (local_pkt->isRequest()) {
210  local_pkt->makeTimingResponse();
211  }
212 
213  return_port->sendTimingResp(local_pkt);
214  }
215 
216  // schedule clean up for end of this cycle
217  // This is a maximum priority event and must be on
218  // the same cycle as GPUTLB cleanup event to prevent
219  // race conditions with an IssueProbeEvent caused by
220  // MemSidePort::recvReqRetry
221  cleanupQueue.push(virt_page_addr);
222 
223  if (!cleanupEvent.scheduled())
225 }
226 
227 // Receive translation requests, create a coalesced request,
228 // and send them to the TLB (TLBProbesPerCycle)
229 bool
231 {
232  // first packet of a coalesced request
233  PacketPtr first_packet = nullptr;
234  // true if we are able to do coalescing
235  bool didCoalesce = false;
236  // number of coalesced reqs for a given window
237  int coalescedReq_cnt = 0;
238 
239  TheISA::GpuTLB::TranslationState *sender_state =
240  safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
241 
242  // push back the port to remember the path back
243  sender_state->ports.push_back(this);
244 
245  bool update_stats = !sender_state->prefetch;
246 
247  if (update_stats) {
248  // if reqCnt is empty then this packet does not represent
249  // multiple uncoalesced reqs(pkts) but just a single pkt.
250  // If it does though then the reqCnt for each level in the
251  // hierarchy accumulates the total number of reqs this packet
252  // represents
253  int req_cnt = 1;
254 
255  if (!sender_state->reqCnt.empty())
256  req_cnt = sender_state->reqCnt.back();
257 
258  sender_state->reqCnt.push_back(req_cnt);
259 
260  // update statistics
261  coalescer->uncoalescedAccesses++;
262  req_cnt = sender_state->reqCnt.back();
263  DPRINTF(GPUTLB, "receiving pkt w/ req_cnt %d\n", req_cnt);
264  coalescer->queuingCycles -= (curTick() * req_cnt);
265  coalescer->localqueuingCycles -= curTick();
266  }
267 
268  // FIXME if you want to coalesce not based on the issueTime
269  // of the packets (i.e., from the compute unit's perspective)
270  // but based on when they reached this coalescer then
271  // remove the following if statement and use curTick() or
272  // coalescingWindow for the tick_index.
273  if (!sender_state->issueTime)
274  sender_state->issueTime = curTick();
275 
276  // The tick index is used as a key to the coalescerFIFO hashmap.
277  // It is shared by all candidates that fall within the
278  // given coalescingWindow.
279  int64_t tick_index = sender_state->issueTime / coalescer->coalescingWindow;
280 
281  if (coalescer->coalescerFIFO.count(tick_index)) {
282  coalescedReq_cnt = coalescer->coalescerFIFO[tick_index].size();
283  }
284 
285  // see if we can coalesce the incoming pkt with another
286  // coalesced request with the same tick_index
287  for (int i = 0; i < coalescedReq_cnt; ++i) {
288  first_packet = coalescer->coalescerFIFO[tick_index][i][0];
289 
290  if (coalescer->canCoalesce(pkt, first_packet)) {
291  coalescer->coalescerFIFO[tick_index][i].push_back(pkt);
292 
293  DPRINTF(GPUTLB, "Coalesced req %i w/ tick_index %d has %d reqs\n",
294  i, tick_index,
295  coalescer->coalescerFIFO[tick_index][i].size());
296 
297  didCoalesce = true;
298  break;
299  }
300  }
301 
302  // if this is the first request for this tick_index
303  // or we did not manage to coalesce, update stats
304  // and make necessary allocations.
305  if (!coalescedReq_cnt || !didCoalesce) {
306  if (update_stats)
307  coalescer->coalescedAccesses++;
308 
309  std::vector<PacketPtr> new_array;
310  new_array.push_back(pkt);
311  coalescer->coalescerFIFO[tick_index].push_back(new_array);
312 
313  DPRINTF(GPUTLB, "coalescerFIFO[%d] now has %d coalesced reqs after "
314  "push\n", tick_index,
315  coalescer->coalescerFIFO[tick_index].size());
316  }
317 
318  //schedule probeTLBEvent next cycle to send the
319  //coalesced requests to the TLB
320  if (!coalescer->probeTLBEvent.scheduled()) {
321  coalescer->schedule(coalescer->probeTLBEvent,
322  curTick() + coalescer->ticks(1));
323  }
324 
325  return true;
326 }
327 
328 void
330 {
331  panic("recvReqRetry called");
332 }
333 
334 void
336 {
337 
338  TheISA::GpuTLB::TranslationState *sender_state =
339  safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
340 
341  bool update_stats = !sender_state->prefetch;
342 
343  if (update_stats)
344  coalescer->uncoalescedAccesses++;
345 
346  // If there is a pending timing request for this virtual address
347  // print a warning message. This is a temporary caveat of
348  // the current simulator where atomic and timing requests can
349  // coexist. FIXME remove this check/warning in the future.
350  Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
351  int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
352 
353  if (map_count) {
354  DPRINTF(GPUTLB, "Warning! Functional access to addr %#x sees timing "
355  "req. pending\n", virt_page_addr);
356  }
357 
358  coalescer->memSidePort[0]->sendFunctional(pkt);
359 }
360 
363 {
364  // currently not checked by the master
365  AddrRangeList ranges;
366 
367  return ranges;
368 }
369 
370 bool
372 {
373  // a translation completed and returned
374  coalescer->updatePhysAddresses(pkt);
375 
376  return true;
377 }
378 
379 void
381 {
382  //we've receeived a retry. Schedule a probeTLBEvent
383  if (!coalescer->probeTLBEvent.scheduled())
384  coalescer->schedule(coalescer->probeTLBEvent,
385  curTick() + coalescer->ticks(1));
386 }
387 
388 void
390 {
391  fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n");
392 }
393 
394 /*
395  * Here we scan the coalescer FIFO and issue the max
396  * number of permitted probes to the TLB below. We
397  * permit bypassing of coalesced requests for the same
398  * tick_index.
399  *
400  * We do not access the next tick_index unless we've
401  * drained the previous one. The coalesced requests
402  * that are successfully sent are moved to the
403  * issuedTranslationsTable table (the table which keeps
404  * track of the outstanding reqs)
405  */
406 void
408 {
409  // number of TLB probes sent so far
410  int sent_probes = 0;
411  // rejected denotes a blocking event
412  bool rejected = false;
413 
414  // It is set to true either when the recvTiming of the TLB below
415  // returns false or when there is another outstanding request for the
416  // same virt. page.
417 
418  DPRINTF(GPUTLB, "triggered TLBCoalescer %s\n", __func__);
419 
420  for (auto iter = coalescerFIFO.begin();
421  iter != coalescerFIFO.end() && !rejected; ) {
422  int coalescedReq_cnt = iter->second.size();
423  int i = 0;
424  int vector_index = 0;
425 
426  DPRINTF(GPUTLB, "coalescedReq_cnt is %d for tick_index %d\n",
427  coalescedReq_cnt, iter->first);
428 
429  while (i < coalescedReq_cnt) {
430  ++i;
431  PacketPtr first_packet = iter->second[vector_index][0];
432 
433  // compute virtual page address for this request
434  Addr virt_page_addr = roundDown(first_packet->req->getVaddr(),
436 
437  // is there another outstanding request for the same page addr?
438  int pending_reqs =
439  issuedTranslationsTable.count(virt_page_addr);
440 
441  if (pending_reqs) {
442  DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for "
443  "page %#x\n", virt_page_addr);
444 
445  ++vector_index;
446  rejected = true;
447 
448  continue;
449  }
450 
451  // send the coalesced request for virt_page_addr
452  if (!memSidePort[0]->sendTimingReq(first_packet)) {
453  DPRINTF(GPUTLB, "Failed to send TLB request for page %#x",
454  virt_page_addr);
455 
456  // No need for a retries queue since we are already buffering
457  // the coalesced request in coalescerFIFO.
458  rejected = true;
459  ++vector_index;
460  } else {
461  TheISA::GpuTLB::TranslationState *tmp_sender_state =
462  safe_cast<TheISA::GpuTLB::TranslationState*>
463  (first_packet->senderState);
464 
465  bool update_stats = !tmp_sender_state->prefetch;
466 
467  if (update_stats) {
468  // req_cnt is total number of packets represented
469  // by the one we just sent counting all the way from
470  // the top of TLB hiearchy (i.e., from the CU)
471  int req_cnt = tmp_sender_state->reqCnt.back();
472  queuingCycles += (curTick() * req_cnt);
473 
474  DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n",
475  name(), req_cnt);
476 
477  // pkt_cnt is number of packets we coalesced into the one
478  // we just sent but only at this coalescer level
479  int pkt_cnt = iter->second[vector_index].size();
480  localqueuingCycles += (curTick() * pkt_cnt);
481  }
482 
483  DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x",
484  virt_page_addr);
485 
486  //copy coalescedReq to issuedTranslationsTable
487  issuedTranslationsTable[virt_page_addr]
488  = iter->second[vector_index];
489 
490  //erase the entry of this coalesced req
491  iter->second.erase(iter->second.begin() + vector_index);
492 
493  if (iter->second.empty())
494  assert(i == coalescedReq_cnt);
495 
496  sent_probes++;
497  if (sent_probes == TLBProbesPerCycle)
498  return;
499  }
500  }
501 
502  //if there are no more coalesced reqs for this tick_index
503  //erase the hash_map with the first iterator
504  if (iter->second.empty()) {
505  coalescerFIFO.erase(iter++);
506  } else {
507  ++iter;
508  }
509  }
510 }
511 
512 void
514 {
515  while (!cleanupQueue.empty()) {
516  Addr cleanup_addr = cleanupQueue.front();
517  cleanupQueue.pop();
518  issuedTranslationsTable.erase(cleanup_addr);
519 
520  DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n",
521  cleanup_addr);
522  }
523 }
524 
525 void
527 {
529 
531  .name(name() + ".uncoalesced_accesses")
532  .desc("Number of uncoalesced TLB accesses")
533  ;
534 
536  .name(name() + ".coalesced_accesses")
537  .desc("Number of coalesced TLB accesses")
538  ;
539 
541  .name(name() + ".queuing_cycles")
542  .desc("Number of cycles spent in queue")
543  ;
544 
546  .name(name() + ".local_queuing_cycles")
547  .desc("Number of cycles spent in queue for all incoming reqs")
548  ;
549 
551  .name(name() + ".local_latency")
552  .desc("Avg. latency over all incoming pkts")
553  ;
554 
556 }
557 
558 
560 TLBCoalescerParams::create()
561 {
562  return new TLBCoalescer(this);
563 }
564 
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:167
#define DPRINTF(x,...)
Definition: trace.hh:229
The request is to an uncacheable address.
Definition: request.hh:115
Ports are used to interface objects to each other.
Definition: port.hh:60
TLBCoalescer(const Params *p)
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:175
virtual void recvFunctional(PacketPtr pkt)
CoalescingFIFO coalescerFIFO
void updatePhysAddresses(PacketPtr pkt)
Bitfield< 7 > i
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition: eventq.hh:162
void makeTimingResponse()
Definition: packet.hh:955
The TLBCoalescer is a ClockedObject sitting on the front side (CPUSide) of each TLB.
CoalescingTable issuedTranslationsTable
virtual void regStats()
Callback to set stat parameters.
Definition: group.cc:66
A SlavePort is a specialisation of a port.
Definition: port.hh:258
bool disableCoalescing
STL vector class.
Definition: stl.hh:40
Stats::Scalar localqueuingCycles
RequestPtr req
A pointer to the original request.
Definition: packet.hh:327
std::queue< Addr > cleanupQueue
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
bool isRequest() const
Definition: packet.hh:531
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the master port by calling its corresponding receive function...
Definition: port.hh:357
void processCleanupEvent()
Tick curTick()
The current simulated tick.
Definition: core.hh:47
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:162
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:385
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
TLBCoalescerParams Params
Stats::Formula localLatency
T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:185
Stats::Scalar coalescedAccesses
EventFunctionWrapper cleanupEvent
The cleanupEvent is scheduled after a TLBEvent triggers in order to free memory and do the required c...
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
virtual const std::string name() const
Definition: sim_object.hh:120
T safe_cast(U ptr)
Definition: cast.hh:61
std::vector< CpuSidePort * > cpuSidePort
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:255
const Addr PageBytes
Definition: isa_traits.hh:47
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
Mode
Definition: tlb.hh:59
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:279
std::vector< MemSidePort * > memSidePort
SenderState * senderState
This packet&#39;s sender state.
Definition: packet.hh:480
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
void regStats() override
Callback to set stat parameters.
Stats::Scalar uncoalescedAccesses
void schedule(Event &event, Tick when)
Definition: eventq.hh:744
Stats::Scalar queuingCycles
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:312
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:237
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
void processProbeTLBEvent()
bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2)
Bitfield< 0 > p
static const Priority Maximum_Pri
Maximum priority.
Definition: eventq.hh:179
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.

Generated on Fri Feb 28 2020 16:27:01 for gem5 by doxygen 1.8.13