40#include "debug/GPUTLB.hh" 
   48      TLBProbesPerCycle(
p.probesPerCycle),
 
   49      coalescingWindow(
p.coalescingWindow),
 
   50      disableCoalescing(
p.disableCoalescing),
 
   52                    "Probe the TLB below",
 
   54      cleanupEvent([
this]{ processCleanupEvent(); },
 
   55                   "Cleanup issuedTranslationsTable hashmap",
 
   57      tlb_level(
p.tlb_level),
 
   58      maxDownstream(
p.maxDownstream),
 
   62    for (
size_t i = 0; 
i < 
p.port_cpu_side_ports_connection_count; ++
i) {
 
   63        cpuSidePort.push_back(
new CpuSidePort(
csprintf(
"%s-port%d", 
name(), 
i),
 
   68    for (
size_t i = 0; 
i < 
p.port_mem_side_ports_connection_count; ++
i) {
 
   69        memSidePort.push_back(
new MemSidePort(
csprintf(
"%s-port%d", 
name(), 
i),
 
   77    if (if_name == 
"cpu_side_ports") {
 
   79            panic(
"VegaTLBCoalescer::getPort: unknown index %d\n", idx);
 
   83    } 
else  if (if_name == 
"mem_side_ports") {
 
   85            panic(
"VegaTLBCoalescer::getPort: unknown index %d\n", idx);
 
   90        panic(
"VegaTLBCoalescer::getPort: unknown port %s\n", if_name);
 
  107      safe_cast<GpuTranslationState*>(incoming_pkt->
senderState);
 
  110     safe_cast<GpuTranslationState*>(coalesced_pkt->
senderState);
 
  120    if (incoming_virt_page_addr != coalesced_virt_page_addr)
 
  129    if (incoming_mode != coalesced_mode)
 
  136        coalesced_state->
reqCnt.back() += incoming_state->
reqCnt.back();
 
  150    DPRINTF(GPUTLB, 
"Update phys. addr. for %d coalesced reqs for page %#x\n",
 
  158    VegaISA::VegaTlbEntry tlb_entry =
 
  159        *safe_cast<VegaISA::VegaTlbEntry *>(sender_state->
tlbEntry);
 
  160    Addr first_entry_vaddr = tlb_entry.vaddr;
 
  161    Addr first_entry_paddr = tlb_entry.paddr;
 
  162    int page_size = tlb_entry.size();
 
  163    bool uncacheable = tlb_entry.uncacheable();
 
  164    int first_hit_level = sender_state->
hitLevel;
 
  165    bool is_system = pkt->
req->systemReq();
 
  170            safe_cast<GpuTranslationState*>(local_pkt->
senderState);
 
  175            sender_state->
reqCnt.pop_back();
 
  186            Addr paddr = first_entry_paddr
 
  187                       + (local_pkt->
req->getVaddr() & (page_size - 1));
 
  188            local_pkt->
req->setPaddr(paddr);
 
  197            if (sender_state->
tlbEntry == NULL) {
 
  200                    new VegaISA::VegaTlbEntry(1 ,
 
  210            sender_state->
hitLevel = first_hit_level;
 
  214        local_pkt->
req->setSystemReq(is_system);
 
  217        sender_state->
ports.pop_back();
 
  247    bool didCoalesce = 
false;
 
  249    int coalescedReq_cnt = 0;
 
  254    bool update_stats = !sender_state->
isPrefetch;
 
  260    sender_state->
ports.push_back(
this);
 
  270        if (!sender_state->
reqCnt.empty())
 
  271            req_cnt = sender_state->
reqCnt.back();
 
  273        sender_state->
reqCnt.push_back(req_cnt);
 
  277        req_cnt = sender_state->
reqCnt.back();
 
  278        DPRINTF(GPUTLB, 
"receiving pkt w/ req_cnt %d\n", req_cnt);
 
  299    for (
int i = 0; 
i < coalescedReq_cnt; ++
i) {
 
  305            DPRINTF(GPUTLB, 
"Coalesced req %i w/ tick_index %d has %d reqs\n",
 
  317    if (!coalescedReq_cnt || !didCoalesce) {
 
  322        new_array.push_back(pkt);
 
  325        DPRINTF(GPUTLB, 
"coalescerFIFO[%d] now has %d coalesced reqs after " 
  326                "push\n", tick_index,
 
  343    panic(
"recvReqRetry called");
 
  353    bool update_stats = !sender_state->
isPrefetch;
 
  356        coalescer->uncoalescedAccesses++;
 
  359    int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
 
  362        DPRINTF(GPUTLB, 
"Warning! Functional access to addr %#x sees timing " 
  363                "req. pending\n", virt_page_addr);
 
  366    coalescer->memSidePort[0]->sendFunctional(pkt);
 
  384    coalescer->updatePhysAddresses(pkt);
 
  386    if (coalescer->tlb_level != 1)
 
  390    coalescer->decrementNumDownstream();
 
  393            "recvTimingReq: clscr = %p, numDownstream = %d, max = %d\n",
 
  394            coalescer, coalescer->numDownstream, coalescer->maxDownstream);
 
  396    coalescer->unstallPorts();
 
  404    if (!coalescer->probeTLBEvent.scheduled())
 
  405        coalescer->schedule(coalescer->probeTLBEvent,
 
  406                curTick() + coalescer->clockPeriod());
 
  412    fatal(
"Memory side recvFunctional() not implemented in TLB coalescer.\n");
 
  437    DPRINTF(GPUTLB, 
"triggered VegaTLBCoalescer %s\n", __func__);
 
  441        DPRINTF(GPUTLB, 
"IssueProbeEvent - no downstream slots, bail out\n");
 
  447        int coalescedReq_cnt = iter->second.size();
 
  449        int vector_index = 0;
 
  451        DPRINTF(GPUTLB, 
"coalescedReq_cnt is %d for tick_index %d\n",
 
  452               coalescedReq_cnt, iter->first);
 
  454        while (
i < coalescedReq_cnt) {
 
  456            PacketPtr first_packet = iter->second[vector_index][0];
 
  474                DPRINTF(GPUTLB, 
"Cannot issue - There are pending reqs for " 
  475                        "page %#x\n", virt_page_addr);
 
  482            if (!
memSidePort[0]->sendTimingReq(first_packet)) {
 
  484                        "Failed to send TLB request for page %#x",
 
  499                    safe_cast<GpuTranslationState*>(first_packet->
senderState);
 
  501                bool update_stats = !tmp_sender_state->
isPrefetch;
 
  507                    int req_cnt = tmp_sender_state->
reqCnt.back();
 
  510                    DPRINTF(GPUTLB, 
"%s sending pkt w/ req_cnt %d\n",
 
  515                    int pkt_cnt = iter->second[vector_index].size();
 
  519                DPRINTF(GPUTLB, 
"Successfully sent TLB request for page %#x\n",
 
  524                    = iter->second[vector_index];
 
  527                iter->second.erase(iter->second.begin() + vector_index);
 
  529                if (iter->second.empty())
 
  530                    assert( 
i == coalescedReq_cnt );
 
  539                    if (iter->second.empty())
 
  555        if (iter->second.empty()) {
 
  571        DPRINTF(GPUTLB, 
"Cleanup - Delete coalescer entry with key %#x\n",
 
  582        .
name(
name() + 
".uncoalesced_accesses")
 
  583        .desc(
"Number of uncoalesced TLB accesses")
 
  587        .
name(
name() + 
".coalesced_accesses")
 
  588        .desc(
"Number of coalesced TLB accesses")
 
  593        .desc(
"Number of cycles spent in queue")
 
  597        .
name(
name() + 
".local_queuing_cycles")
 
  598        .desc(
"Number of cycles spent in queue for all incoming reqs")
 
  603        .desc(
"Number of cycles spent in queue for all incoming reqs")
 
  608        .desc(
"Avg. latency over all incoming pkts")
 
  613        .desc(
"Avg. latency over all incoming pkts")
 
  630            "insertStalledPortIfNotMapped: port %p, mapSz = %d, qsz = %d\n",
 
  639    DPRINTF(GPUTLB, 
"mustStallCUPort: downstream = %d, max = %d\n",
 
  643        warn(
"RED ALERT - VegaTLBCoalescer::mustStallCUPort\n");
 
  658    DPRINTF(GPUTLB, 
"unstallPorts()\n");
 
  681    DPRINTF(GPUTLB, 
"sending retry for port = %p(%s)\n", port, port->name());
 
  686    port->sendRetryReq(); 
 
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Tick cyclesToTicks(Cycles c) const
Cycles is a wrapper class for representing cycle counts, i.e.
virtual std::string name() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void makeTimingResponse()
SenderState * senderState
This packet's sender state.
RequestPtr req
A pointer to the original request.
@ UNCACHEABLE
The request is to an uncacheable address.
A ResponsePort is a specialization of a port.
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
virtual void recvReqRetry()
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
VegaTLBCoalescer * coalescer
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
virtual void recvFunctional(PacketPtr pkt)
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
statistics::Scalar localqueuingCycles
statistics::Scalar coalescedAccesses
void processCleanupEvent()
CoalescingTable issuedTranslationsTable
void updatePhysAddresses(PacketPtr pkt)
statistics::Scalar queuingCycles
EventFunctionWrapper cleanupEvent
The cleanupEvent is scheduled after a TLBEvent triggers in order to free memory and do the required c...
std::queue< CpuSidePort * > stalledPortsQueue
void incrementNumDownstream()
statistics::Scalar uncoalescedAccesses
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
std::vector< CpuSidePort * > cpuSidePort
std::vector< MemSidePort * > memSidePort
EventFunctionWrapper probeTLBEvent
This event issues the TLB probes.
VegaTLBCoalescer(const VegaTLBCoalescerParams &p)
void insertStalledPortIfNotMapped(CpuSidePort *)
CoalescingFIFO coalescerFIFO
std::map< CpuSidePort *, CpuSidePort * > stalledPortsMap
bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2)
void regStats() override
Callback to set stat parameters.
statistics::Formula localLatency
unsigned int availDownstreamSlots()
std::queue< Addr > cleanupQueue
bool mustStallCUPort(CpuSidePort *)
statistics::Scalar localCycles
unsigned int numDownstream
void processProbeTLBEvent()
statistics::Formula latency
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
static const Priority Maximum_Pri
Maximum priority.
bool scheduled() const
Determine if the current event is scheduled.
void schedule(Event &event, Tick when)
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
virtual void regStats()
Callback to set stat parameters.
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Tick curTick()
The universal simulation clock.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
uint64_t Tick
Tick count type.
std::string csprintf(const char *format, const Args &...args)
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
std::vector< ResponsePort * > ports
std::vector< int > reqCnt
const std::string & name()