gem5  v20.0.0.2
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tlb_coalescer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
35 
36 #include <cstring>
37 
38 #include "base/logging.hh"
39 #include "debug/GPUTLB.hh"
40 #include "sim/process.hh"
41 
43  : ClockedObject(p),
44  clock(p->clk_domain->clockPeriod()),
45  TLBProbesPerCycle(p->probesPerCycle),
46  coalescingWindow(p->coalescingWindow),
47  disableCoalescing(p->disableCoalescing),
48  probeTLBEvent([this]{ processProbeTLBEvent(); },
49  "Probe the TLB below",
50  false, Event::CPU_Tick_Pri),
51  cleanupEvent([this]{ processCleanupEvent(); },
52  "Cleanup issuedTranslationsTable hashmap",
53  false, Event::Maximum_Pri)
54 {
55  // create the slave ports based on the number of connected ports
56  for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
57  cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", name(), i),
58  this, i));
59  }
60 
61  // create the master ports based on the number of connected ports
62  for (size_t i = 0; i < p->port_master_connection_count; ++i) {
63  memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", name(), i),
64  this, i));
65  }
66 }
67 
68 Port &
69 TLBCoalescer::getPort(const std::string &if_name, PortID idx)
70 {
71  if (if_name == "slave") {
72  if (idx >= static_cast<PortID>(cpuSidePort.size())) {
73  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
74  }
75 
76  return *cpuSidePort[idx];
77  } else if (if_name == "master") {
78  if (idx >= static_cast<PortID>(memSidePort.size())) {
79  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
80  }
81 
82  return *memSidePort[idx];
83  } else {
84  panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
85  }
86 }
87 
88 /*
89  * This method returns true if the <incoming_pkt>
90  * can be coalesced with <coalesced_pkt> and false otherwise.
91  * A given set of rules is checked.
92  * The rules can potentially be modified based on the TLB level.
93  */
94 bool
95 TLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt)
96 {
98  return false;
99 
100  TheISA::GpuTLB::TranslationState *incoming_state =
101  safe_cast<TheISA::GpuTLB::TranslationState*>(incoming_pkt->senderState);
102 
103  TheISA::GpuTLB::TranslationState *coalesced_state =
104  safe_cast<TheISA::GpuTLB::TranslationState*>(coalesced_pkt->senderState);
105 
106  // Rule 1: Coalesce requests only if they
107  // fall within the same virtual page
108  Addr incoming_virt_page_addr = roundDown(incoming_pkt->req->getVaddr(),
110 
111  Addr coalesced_virt_page_addr = roundDown(coalesced_pkt->req->getVaddr(),
113 
114  if (incoming_virt_page_addr != coalesced_virt_page_addr)
115  return false;
116 
117  //* Rule 2: Coalesce requests only if they
118  // share a TLB Mode, i.e. they are both read
119  // or write requests.
120  BaseTLB::Mode incoming_mode = incoming_state->tlbMode;
121  BaseTLB::Mode coalesced_mode = coalesced_state->tlbMode;
122 
123  if (incoming_mode != coalesced_mode)
124  return false;
125 
126  // when we can coalesce a packet update the reqCnt
127  // that is the number of packets represented by
128  // this coalesced packet
129  if (!incoming_state->prefetch)
130  coalesced_state->reqCnt.back() += incoming_state->reqCnt.back();
131 
132  return true;
133 }
134 
135 /*
136  * We need to update the physical addresses of all the translation requests
137  * that were coalesced into the one that just returned.
138  */
139 void
141 {
142  Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
143 
144  DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs for page %#x\n",
145  issuedTranslationsTable[virt_page_addr].size(), virt_page_addr);
146 
147  TheISA::GpuTLB::TranslationState *sender_state =
148  safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
149 
150  TheISA::TlbEntry *tlb_entry = sender_state->tlbEntry;
151  assert(tlb_entry);
152  Addr first_entry_vaddr = tlb_entry->vaddr;
153  Addr first_entry_paddr = tlb_entry->paddr;
154  int page_size = tlb_entry->size();
155  bool uncacheable = tlb_entry->uncacheable;
156  int first_hit_level = sender_state->hitLevel;
157 
158  // Get the physical page address of the translated request
159  // Using the page_size specified in the TLBEntry allows us
160  // to support different page sizes.
161  Addr phys_page_paddr = pkt->req->getPaddr();
162  phys_page_paddr &= ~(page_size - 1);
163 
164  for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size(); ++i) {
165  PacketPtr local_pkt = issuedTranslationsTable[virt_page_addr][i];
166  TheISA::GpuTLB::TranslationState *sender_state =
167  safe_cast<TheISA::GpuTLB::TranslationState*>(
168  local_pkt->senderState);
169 
170  // we are sending the packet back, so pop the reqCnt associated
171  // with this level in the TLB hiearchy
172  if (!sender_state->prefetch)
173  sender_state->reqCnt.pop_back();
174 
175  /*
176  * Only the first packet from this coalesced request has been
177  * translated. Grab the translated phys. page addr and update the
178  * physical addresses of the remaining packets with the appropriate
179  * page offsets.
180  */
181  if (i) {
182  Addr paddr = phys_page_paddr;
183  paddr |= (local_pkt->req->getVaddr() & (page_size - 1));
184  local_pkt->req->setPaddr(paddr);
185 
186  if (uncacheable)
187  local_pkt->req->setFlags(Request::UNCACHEABLE);
188 
189  // update senderState->tlbEntry, so we can insert
190  // the correct TLBEentry in the TLBs above.
191  auto p = sender_state->tc->getProcessPtr();
192  sender_state->tlbEntry =
193  new TheISA::TlbEntry(p->pid(), first_entry_vaddr,
194  first_entry_paddr, false, false);
195 
196  // update the hitLevel for all uncoalesced reqs
197  // so that each packet knows where it hit
198  // (used for statistics in the CUs)
199  sender_state->hitLevel = first_hit_level;
200  }
201 
202  SlavePort *return_port = sender_state->ports.back();
203  sender_state->ports.pop_back();
204 
205  // Translation is done - Convert to a response pkt if necessary and
206  // send the translation back
207  if (local_pkt->isRequest()) {
208  local_pkt->makeTimingResponse();
209  }
210 
211  return_port->sendTimingResp(local_pkt);
212  }
213 
214  // schedule clean up for end of this cycle
215  // This is a maximum priority event and must be on
216  // the same cycle as GPUTLB cleanup event to prevent
217  // race conditions with an IssueProbeEvent caused by
218  // MemSidePort::recvReqRetry
219  cleanupQueue.push(virt_page_addr);
220 
221  if (!cleanupEvent.scheduled())
223 }
224 
225 // Receive translation requests, create a coalesced request,
226 // and send them to the TLB (TLBProbesPerCycle)
227 bool
229 {
230  // first packet of a coalesced request
231  PacketPtr first_packet = nullptr;
232  // true if we are able to do coalescing
233  bool didCoalesce = false;
234  // number of coalesced reqs for a given window
235  int coalescedReq_cnt = 0;
236 
237  TheISA::GpuTLB::TranslationState *sender_state =
238  safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
239 
240  // push back the port to remember the path back
241  sender_state->ports.push_back(this);
242 
243  bool update_stats = !sender_state->prefetch;
244 
245  if (update_stats) {
246  // if reqCnt is empty then this packet does not represent
247  // multiple uncoalesced reqs(pkts) but just a single pkt.
248  // If it does though then the reqCnt for each level in the
249  // hierarchy accumulates the total number of reqs this packet
250  // represents
251  int req_cnt = 1;
252 
253  if (!sender_state->reqCnt.empty())
254  req_cnt = sender_state->reqCnt.back();
255 
256  sender_state->reqCnt.push_back(req_cnt);
257 
258  // update statistics
259  coalescer->uncoalescedAccesses++;
260  req_cnt = sender_state->reqCnt.back();
261  DPRINTF(GPUTLB, "receiving pkt w/ req_cnt %d\n", req_cnt);
262  coalescer->queuingCycles -= (curTick() * req_cnt);
263  coalescer->localqueuingCycles -= curTick();
264  }
265 
266  // FIXME if you want to coalesce not based on the issueTime
267  // of the packets (i.e., from the compute unit's perspective)
268  // but based on when they reached this coalescer then
269  // remove the following if statement and use curTick() or
270  // coalescingWindow for the tick_index.
271  if (!sender_state->issueTime)
272  sender_state->issueTime = curTick();
273 
274  // The tick index is used as a key to the coalescerFIFO hashmap.
275  // It is shared by all candidates that fall within the
276  // given coalescingWindow.
277  int64_t tick_index = sender_state->issueTime / coalescer->coalescingWindow;
278 
279  if (coalescer->coalescerFIFO.count(tick_index)) {
280  coalescedReq_cnt = coalescer->coalescerFIFO[tick_index].size();
281  }
282 
283  // see if we can coalesce the incoming pkt with another
284  // coalesced request with the same tick_index
285  for (int i = 0; i < coalescedReq_cnt; ++i) {
286  first_packet = coalescer->coalescerFIFO[tick_index][i][0];
287 
288  if (coalescer->canCoalesce(pkt, first_packet)) {
289  coalescer->coalescerFIFO[tick_index][i].push_back(pkt);
290 
291  DPRINTF(GPUTLB, "Coalesced req %i w/ tick_index %d has %d reqs\n",
292  i, tick_index,
293  coalescer->coalescerFIFO[tick_index][i].size());
294 
295  didCoalesce = true;
296  break;
297  }
298  }
299 
300  // if this is the first request for this tick_index
301  // or we did not manage to coalesce, update stats
302  // and make necessary allocations.
303  if (!coalescedReq_cnt || !didCoalesce) {
304  if (update_stats)
305  coalescer->coalescedAccesses++;
306 
307  std::vector<PacketPtr> new_array;
308  new_array.push_back(pkt);
309  coalescer->coalescerFIFO[tick_index].push_back(new_array);
310 
311  DPRINTF(GPUTLB, "coalescerFIFO[%d] now has %d coalesced reqs after "
312  "push\n", tick_index,
313  coalescer->coalescerFIFO[tick_index].size());
314  }
315 
316  //schedule probeTLBEvent next cycle to send the
317  //coalesced requests to the TLB
318  if (!coalescer->probeTLBEvent.scheduled()) {
319  coalescer->schedule(coalescer->probeTLBEvent,
320  curTick() + coalescer->ticks(1));
321  }
322 
323  return true;
324 }
325 
326 void
328 {
329  panic("recvReqRetry called");
330 }
331 
332 void
334 {
335 
336  TheISA::GpuTLB::TranslationState *sender_state =
337  safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
338 
339  bool update_stats = !sender_state->prefetch;
340 
341  if (update_stats)
342  coalescer->uncoalescedAccesses++;
343 
344  // If there is a pending timing request for this virtual address
345  // print a warning message. This is a temporary caveat of
346  // the current simulator where atomic and timing requests can
347  // coexist. FIXME remove this check/warning in the future.
348  Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
349  int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
350 
351  if (map_count) {
352  DPRINTF(GPUTLB, "Warning! Functional access to addr %#x sees timing "
353  "req. pending\n", virt_page_addr);
354  }
355 
356  coalescer->memSidePort[0]->sendFunctional(pkt);
357 }
358 
361 {
362  // currently not checked by the master
363  AddrRangeList ranges;
364 
365  return ranges;
366 }
367 
368 bool
370 {
371  // a translation completed and returned
372  coalescer->updatePhysAddresses(pkt);
373 
374  return true;
375 }
376 
377 void
379 {
380  //we've receeived a retry. Schedule a probeTLBEvent
381  if (!coalescer->probeTLBEvent.scheduled())
382  coalescer->schedule(coalescer->probeTLBEvent,
383  curTick() + coalescer->ticks(1));
384 }
385 
386 void
388 {
389  fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n");
390 }
391 
392 /*
393  * Here we scan the coalescer FIFO and issue the max
394  * number of permitted probes to the TLB below. We
395  * permit bypassing of coalesced requests for the same
396  * tick_index.
397  *
398  * We do not access the next tick_index unless we've
399  * drained the previous one. The coalesced requests
400  * that are successfully sent are moved to the
401  * issuedTranslationsTable table (the table which keeps
402  * track of the outstanding reqs)
403  */
404 void
406 {
407  // number of TLB probes sent so far
408  int sent_probes = 0;
409  // rejected denotes a blocking event
410  bool rejected = false;
411 
412  // It is set to true either when the recvTiming of the TLB below
413  // returns false or when there is another outstanding request for the
414  // same virt. page.
415 
416  DPRINTF(GPUTLB, "triggered TLBCoalescer %s\n", __func__);
417 
418  for (auto iter = coalescerFIFO.begin();
419  iter != coalescerFIFO.end() && !rejected; ) {
420  int coalescedReq_cnt = iter->second.size();
421  int i = 0;
422  int vector_index = 0;
423 
424  DPRINTF(GPUTLB, "coalescedReq_cnt is %d for tick_index %d\n",
425  coalescedReq_cnt, iter->first);
426 
427  while (i < coalescedReq_cnt) {
428  ++i;
429  PacketPtr first_packet = iter->second[vector_index][0];
430 
431  // compute virtual page address for this request
432  Addr virt_page_addr = roundDown(first_packet->req->getVaddr(),
434 
435  // is there another outstanding request for the same page addr?
436  int pending_reqs =
437  issuedTranslationsTable.count(virt_page_addr);
438 
439  if (pending_reqs) {
440  DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for "
441  "page %#x\n", virt_page_addr);
442 
443  ++vector_index;
444  rejected = true;
445 
446  continue;
447  }
448 
449  // send the coalesced request for virt_page_addr
450  if (!memSidePort[0]->sendTimingReq(first_packet)) {
451  DPRINTF(GPUTLB, "Failed to send TLB request for page %#x",
452  virt_page_addr);
453 
454  // No need for a retries queue since we are already buffering
455  // the coalesced request in coalescerFIFO.
456  rejected = true;
457  ++vector_index;
458  } else {
459  TheISA::GpuTLB::TranslationState *tmp_sender_state =
460  safe_cast<TheISA::GpuTLB::TranslationState*>
461  (first_packet->senderState);
462 
463  bool update_stats = !tmp_sender_state->prefetch;
464 
465  if (update_stats) {
466  // req_cnt is total number of packets represented
467  // by the one we just sent counting all the way from
468  // the top of TLB hiearchy (i.e., from the CU)
469  int req_cnt = tmp_sender_state->reqCnt.back();
470  queuingCycles += (curTick() * req_cnt);
471 
472  DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n",
473  name(), req_cnt);
474 
475  // pkt_cnt is number of packets we coalesced into the one
476  // we just sent but only at this coalescer level
477  int pkt_cnt = iter->second[vector_index].size();
478  localqueuingCycles += (curTick() * pkt_cnt);
479  }
480 
481  DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x",
482  virt_page_addr);
483 
484  //copy coalescedReq to issuedTranslationsTable
485  issuedTranslationsTable[virt_page_addr]
486  = iter->second[vector_index];
487 
488  //erase the entry of this coalesced req
489  iter->second.erase(iter->second.begin() + vector_index);
490 
491  if (iter->second.empty())
492  assert(i == coalescedReq_cnt);
493 
494  sent_probes++;
495  if (sent_probes == TLBProbesPerCycle)
496  return;
497  }
498  }
499 
500  //if there are no more coalesced reqs for this tick_index
501  //erase the hash_map with the first iterator
502  if (iter->second.empty()) {
503  coalescerFIFO.erase(iter++);
504  } else {
505  ++iter;
506  }
507  }
508 }
509 
510 void
512 {
513  while (!cleanupQueue.empty()) {
514  Addr cleanup_addr = cleanupQueue.front();
515  cleanupQueue.pop();
516  issuedTranslationsTable.erase(cleanup_addr);
517 
518  DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n",
519  cleanup_addr);
520  }
521 }
522 
523 void
525 {
527 
529  .name(name() + ".uncoalesced_accesses")
530  .desc("Number of uncoalesced TLB accesses")
531  ;
532 
534  .name(name() + ".coalesced_accesses")
535  .desc("Number of coalesced TLB accesses")
536  ;
537 
539  .name(name() + ".queuing_cycles")
540  .desc("Number of cycles spent in queue")
541  ;
542 
544  .name(name() + ".local_queuing_cycles")
545  .desc("Number of cycles spent in queue for all incoming reqs")
546  ;
547 
549  .name(name() + ".local_latency")
550  .desc("Avg. latency over all incoming pkts")
551  ;
552 
554 }
555 
556 
558 TLBCoalescerParams::create()
559 {
560  return new TLBCoalescer(this);
561 }
562 
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:163
#define DPRINTF(x,...)
Definition: trace.hh:222
static const Priority Maximum_Pri
Maximum priority.
Definition: eventq.hh:236
Ports are used to interface objects to each other.
Definition: port.hh:56
TLBCoalescer(const Params *p)
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:171
virtual void recvFunctional(PacketPtr pkt)
CoalescingFIFO coalescerFIFO
void updatePhysAddresses(PacketPtr pkt)
Bitfield< 7 > i
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
void makeTimingResponse()
Definition: packet.hh:949
The TLBCoalescer is a ClockedObject sitting on the front side (CPUSide) of each TLB.
CoalescingTable issuedTranslationsTable
A SlavePort is a specialisation of a port.
Definition: port.hh:254
bool disableCoalescing
The request is to an uncacheable address.
Definition: request.hh:113
STL vector class.
Definition: stl.hh:37
Stats::Scalar localqueuingCycles
RequestPtr req
A pointer to the original request.
Definition: packet.hh:321
std::queue< Addr > cleanupQueue
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
bool isRequest() const
Definition: packet.hh:525
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the master port by calling its corresponding receive function...
Definition: port.hh:353
void processCleanupEvent()
Tick curTick()
The current simulated tick.
Definition: core.hh:44
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:158
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
TLBCoalescerParams Params
Stats::Formula localLatency
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition: eventq.hh:199
void schedule(Event &event, Tick when)
Definition: eventq.hh:998
T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:131
Stats::Scalar coalescedAccesses
EventFunctionWrapper cleanupEvent
The cleanupEvent is scheduled after a TLBEvent triggers in order to free memory and do the required c...
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
T safe_cast(U ptr)
Definition: cast.hh:59
std::vector< CpuSidePort * > cpuSidePort
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:460
Mode
Definition: tlb.hh:57
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:276
virtual const std::string name() const
Definition: sim_object.hh:128
std::vector< MemSidePort * > memSidePort
SenderState * senderState
This packet&#39;s sender state.
Definition: packet.hh:474
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
void regStats() override
Callback to set stat parameters.
Stats::Scalar uncoalescedAccesses
Stats::Scalar queuingCycles
const Addr PageBytes
Definition: isa_traits.hh:56
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:309
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:235
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
virtual void regStats()
Callback to set stat parameters.
Definition: group.cc:64
void processProbeTLBEvent()
bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2)
Bitfield< 0 > p
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.

Generated on Mon Jun 8 2020 15:45:11 for gem5 by doxygen 1.8.13