39 #include "debug/GPUTLB.hh" 
   47       TLBProbesPerCycle(
p.probesPerCycle),
 
   48       coalescingWindow(
p.coalescingWindow),
 
   49       disableCoalescing(
p.disableCoalescing),
 
   51                     "Probe the TLB below",
 
   53       cleanupEvent([
this]{ processCleanupEvent(); },
 
   54                    "Cleanup issuedTranslationsTable hashmap",
 
   59     for (
size_t i = 0; 
i < 
p.port_cpu_side_ports_connection_count; ++
i) {
 
   65     for (
size_t i = 0; 
i < 
p.port_mem_side_ports_connection_count; ++
i) {
 
   74     if (if_name == 
"cpu_side_ports") {
 
   76             panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
 
   80     } 
else  if (if_name == 
"mem_side_ports") {
 
   82             panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
 
   87         panic(
"TLBCoalescer::getPort: unknown port %s\n", if_name);
 
  104       safe_cast<GpuTranslationState*>(incoming_pkt->
senderState);
 
  107      safe_cast<GpuTranslationState*>(coalesced_pkt->
senderState);
 
  117     if (incoming_virt_page_addr != coalesced_virt_page_addr)
 
  126     if (incoming_mode != coalesced_mode)
 
  133         coalesced_state->
reqCnt.back() += incoming_state->
reqCnt.back();
 
  147     DPRINTF(GPUTLB, 
"Update phys. addr. for %d coalesced reqs for page %#x\n",
 
  154         safe_cast<X86ISA::TlbEntry *>(sender_state->
tlbEntry);
 
  156     Addr first_entry_vaddr = tlb_entry->
vaddr;
 
  157     Addr first_entry_paddr = tlb_entry->
paddr;
 
  158     int page_size = tlb_entry->
size();
 
  160     int first_hit_level = sender_state->
hitLevel;
 
  165     Addr phys_page_paddr = pkt->
req->getPaddr();
 
  166     phys_page_paddr &= ~(page_size - 1);
 
  171             safe_cast<GpuTranslationState*>(
 
  177             sender_state->
reqCnt.pop_back();
 
  186             Addr paddr = phys_page_paddr;
 
  187             paddr |= (local_pkt->
req->getVaddr() & (page_size - 1));
 
  188             local_pkt->
req->setPaddr(paddr);
 
  198                     first_entry_paddr, 
false, 
false);
 
  203             sender_state->
hitLevel = first_hit_level;
 
  207         sender_state->
ports.pop_back();
 
  237     bool didCoalesce = 
false;
 
  239     int coalescedReq_cnt = 0;
 
  245     sender_state->
ports.push_back(
this);
 
  247     bool update_stats = !sender_state->
isPrefetch;
 
  257         if (!sender_state->
reqCnt.empty())
 
  258             req_cnt = sender_state->
reqCnt.back();
 
  260         sender_state->
reqCnt.push_back(req_cnt);
 
  264         req_cnt = sender_state->
reqCnt.back();
 
  265         DPRINTF(GPUTLB, 
"receiving pkt w/ req_cnt %d\n", req_cnt);
 
  289     for (
int i = 0; 
i < coalescedReq_cnt; ++
i) {
 
  295             DPRINTF(GPUTLB, 
"Coalesced req %i w/ tick_index %d has %d reqs\n",
 
  307     if (!coalescedReq_cnt || !didCoalesce) {
 
  312         new_array.push_back(pkt);
 
  315         DPRINTF(GPUTLB, 
"coalescerFIFO[%d] now has %d coalesced reqs after " 
  316                 "push\n", tick_index,
 
  333     panic(
"recvReqRetry called");
 
  343     bool update_stats = !sender_state->
isPrefetch;
 
  346         coalescer->stats.uncoalescedAccesses++;
 
  353     int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
 
  356         DPRINTF(GPUTLB, 
"Warning! Functional access to addr %#x sees timing " 
  357                 "req. pending\n", virt_page_addr);
 
  360     coalescer->memSidePort[0]->sendFunctional(pkt);
 
  376     coalescer->updatePhysAddresses(pkt);
 
  385     if (!coalescer->probeTLBEvent.scheduled())
 
  386         coalescer->schedule(coalescer->probeTLBEvent,
 
  387                 curTick() + coalescer->clockPeriod());
 
  393     fatal(
"Memory side recvFunctional() not implemented in TLB coalescer.\n");
 
  414     bool rejected = 
false;
 
  420     DPRINTF(GPUTLB, 
"triggered TLBCoalescer %s\n", __func__);
 
  424         int coalescedReq_cnt = iter->second.size();
 
  426         int vector_index = 0;
 
  428         DPRINTF(GPUTLB, 
"coalescedReq_cnt is %d for tick_index %d\n",
 
  429                coalescedReq_cnt, iter->first);
 
  431         while (
i < coalescedReq_cnt) {
 
  433             PacketPtr first_packet = iter->second[vector_index][0];
 
  444                 DPRINTF(GPUTLB, 
"Cannot issue - There are pending reqs for " 
  445                         "page %#x\n", virt_page_addr);
 
  454             if (!
memSidePort[0]->sendTimingReq(first_packet)) {
 
  455                 DPRINTF(GPUTLB, 
"Failed to send TLB request for page %#x\n",
 
  464                     safe_cast<GpuTranslationState*>
 
  467                 bool update_stats = !tmp_sender_state->
isPrefetch;
 
  473                     int req_cnt = tmp_sender_state->
reqCnt.back();
 
  476                     DPRINTF(GPUTLB, 
"%s sending pkt w/ req_cnt %d\n",
 
  481                     int pkt_cnt = iter->second[vector_index].size();
 
  485                 DPRINTF(GPUTLB, 
"Successfully sent TLB request for page %#x",
 
  490                     = iter->second[vector_index];
 
  493                 iter->second.erase(iter->second.begin() + vector_index);
 
  495                 if (iter->second.empty())
 
  496                     assert(
i == coalescedReq_cnt);
 
  506         if (iter->second.empty()) {
 
  522         DPRINTF(GPUTLB, 
"Cleanup - Delete coalescer entry with key %#x\n",
 
  528     : statistics::
Group(parent),
 
  529       ADD_STAT(uncoalescedAccesses, 
"Number of uncoalesced TLB accesses"),
 
  530       ADD_STAT(coalescedAccesses, 
"Number of coalesced TLB accesses"),
 
  531       ADD_STAT(queuingCycles, 
"Number of cycles spent in queue"),
 
  533                "Number of cycles spent in queue for all incoming reqs"),
 
  534       ADD_STAT(localLatency, 
"Avg. latency over all incoming pkts")
 
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
virtual std::string name() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void makeTimingResponse()
SenderState * senderState
This packet's sender state.
RequestPtr req
A pointer to the original request.
@ UNCACHEABLE
The request is to an uncacheable address.
A ResponsePort is a specialization of a port.
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
virtual void recvReqRetry()
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
virtual void recvFunctional(PacketPtr pkt)
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
TLBCoalescerParams Params
std::queue< Addr > cleanupQueue
gem5::TLBCoalescer::TLBCoalescerStats stats
CoalescingTable issuedTranslationsTable
std::vector< MemSidePort * > memSidePort
void updatePhysAddresses(PacketPtr pkt)
EventFunctionWrapper cleanupEvent
The cleanupEvent is scheduled after a TLBEvent triggers in order to free memory and do the required c...
void processCleanupEvent()
EventFunctionWrapper probeTLBEvent
This event issues the TLB probes.
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
CoalescingFIFO coalescerFIFO
TLBCoalescer(const Params &p)
bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2)
void processProbeTLBEvent()
std::vector< CpuSidePort * > cpuSidePort
virtual Process * getProcessPtr()=0
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
static const Priority Maximum_Pri
Maximum priority.
bool scheduled() const
Determine if the current event is scheduled.
void schedule(Event &event, Tick when)
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Tick curTick()
The universal simulation clock.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
std::string csprintf(const char *format, const Args &...args)
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
std::vector< ResponsePort * > ports
std::vector< int > reqCnt
statistics::Scalar queuingCycles
statistics::Scalar localqueuingCycles
statistics::Formula localLatency
statistics::Scalar coalescedAccesses
statistics::Scalar uncoalescedAccesses
TLBCoalescerStats(statistics::Group *parent)