40 #include "debug/GPUTLB.hh" 
   48       TLBProbesPerCycle(
p.probesPerCycle),
 
   49       coalescingWindow(
p.coalescingWindow),
 
   50       disableCoalescing(
p.disableCoalescing),
 
   52                     "Probe the TLB below",
 
   54       cleanupEvent([
this]{ processCleanupEvent(); },
 
   55                    "Cleanup issuedTranslationsTable hashmap",
 
   57       tlb_level(
p.tlb_level),
 
   58       maxDownstream(
p.maxDownstream),
 
   62     for (
size_t i = 0; 
i < 
p.port_cpu_side_ports_connection_count; ++
i) {
 
   63         cpuSidePort.push_back(
new CpuSidePort(
csprintf(
"%s-port%d", 
name(), 
i),
 
   68     for (
size_t i = 0; 
i < 
p.port_mem_side_ports_connection_count; ++
i) {
 
   69         memSidePort.push_back(
new MemSidePort(
csprintf(
"%s-port%d", 
name(), 
i),
 
   77     if (if_name == 
"cpu_side_ports") {
 
   79             panic(
"VegaTLBCoalescer::getPort: unknown index %d\n", idx);
 
   83     } 
else  if (if_name == 
"mem_side_ports") {
 
   85             panic(
"VegaTLBCoalescer::getPort: unknown index %d\n", idx);
 
   90         panic(
"VegaTLBCoalescer::getPort: unknown port %s\n", if_name);
 
  107       safe_cast<GpuTranslationState*>(incoming_pkt->
senderState);
 
  110      safe_cast<GpuTranslationState*>(coalesced_pkt->
senderState);
 
  120     if (incoming_virt_page_addr != coalesced_virt_page_addr)
 
  129     if (incoming_mode != coalesced_mode)
 
  136         coalesced_state->
reqCnt.back() += incoming_state->
reqCnt.back();
 
  150     DPRINTF(GPUTLB, 
"Update phys. addr. for %d coalesced reqs for page %#x\n",
 
  158     VegaISA::VegaTlbEntry tlb_entry =
 
  159         *safe_cast<VegaISA::VegaTlbEntry *>(sender_state->
tlbEntry);
 
  160     Addr first_entry_vaddr = tlb_entry.vaddr;
 
  161     Addr first_entry_paddr = tlb_entry.paddr;
 
  162     int page_size = tlb_entry.size();
 
  163     bool uncacheable = tlb_entry.uncacheable();
 
  164     int first_hit_level = sender_state->
hitLevel;
 
  165     bool is_system = pkt->
req->systemReq();
 
  170             safe_cast<GpuTranslationState*>(local_pkt->
senderState);
 
  175             sender_state->
reqCnt.pop_back();
 
  186             Addr paddr = first_entry_paddr
 
  187                        + (local_pkt->
req->getVaddr() & (page_size - 1));
 
  188             local_pkt->
req->setPaddr(paddr);
 
  197             if (sender_state->
tlbEntry == NULL) {
 
  200                     new VegaISA::VegaTlbEntry(1 ,
 
  210             sender_state->
hitLevel = first_hit_level;
 
  214         local_pkt->
req->setSystemReq(is_system);
 
  217         sender_state->
ports.pop_back();
 
  247     bool didCoalesce = 
false;
 
  249     int coalescedReq_cnt = 0;
 
  254     bool update_stats = !sender_state->
isPrefetch;
 
  260     sender_state->
ports.push_back(
this);
 
  270         if (!sender_state->
reqCnt.empty())
 
  271             req_cnt = sender_state->
reqCnt.back();
 
  273         sender_state->
reqCnt.push_back(req_cnt);
 
  277         req_cnt = sender_state->
reqCnt.back();
 
  278         DPRINTF(GPUTLB, 
"receiving pkt w/ req_cnt %d\n", req_cnt);
 
  299     for (
int i = 0; 
i < coalescedReq_cnt; ++
i) {
 
  305             DPRINTF(GPUTLB, 
"Coalesced req %i w/ tick_index %d has %d reqs\n",
 
  317     if (!coalescedReq_cnt || !didCoalesce) {
 
  322         new_array.push_back(pkt);
 
  325         DPRINTF(GPUTLB, 
"coalescerFIFO[%d] now has %d coalesced reqs after " 
  326                 "push\n", tick_index,
 
  343     panic(
"recvReqRetry called");
 
  353     bool update_stats = !sender_state->
isPrefetch;
 
  356         coalescer->uncoalescedAccesses++;
 
  359     int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
 
  362         DPRINTF(GPUTLB, 
"Warning! Functional access to addr %#x sees timing " 
  363                 "req. pending\n", virt_page_addr);
 
  366     coalescer->memSidePort[0]->sendFunctional(pkt);
 
  384     coalescer->updatePhysAddresses(pkt);
 
  386     if (coalescer->tlb_level != 1)
 
  390     coalescer->decrementNumDownstream();
 
  393             "recvTimingReq: clscr = %p, numDownstream = %d, max = %d\n",
 
  394             coalescer, coalescer->numDownstream, coalescer->maxDownstream);
 
  396     coalescer->unstallPorts();
 
  404     if (!coalescer->probeTLBEvent.scheduled())
 
  405         coalescer->schedule(coalescer->probeTLBEvent,
 
  406                 curTick() + coalescer->clockPeriod());
 
  412     fatal(
"Memory side recvFunctional() not implemented in TLB coalescer.\n");
 
  437     DPRINTF(GPUTLB, 
"triggered VegaTLBCoalescer %s\n", __func__);
 
  441         DPRINTF(GPUTLB, 
"IssueProbeEvent - no downstream slots, bail out\n");
 
  447         int coalescedReq_cnt = iter->second.size();
 
  449         int vector_index = 0;
 
  451         DPRINTF(GPUTLB, 
"coalescedReq_cnt is %d for tick_index %d\n",
 
  452                coalescedReq_cnt, iter->first);
 
  454         while (
i < coalescedReq_cnt) {
 
  456             PacketPtr first_packet = iter->second[vector_index][0];
 
  474                 DPRINTF(GPUTLB, 
"Cannot issue - There are pending reqs for " 
  475                         "page %#x\n", virt_page_addr);
 
  482             if (!
memSidePort[0]->sendTimingReq(first_packet)) {
 
  484                         "Failed to send TLB request for page %#x",
 
  499                     safe_cast<GpuTranslationState*>(first_packet->
senderState);
 
  501                 bool update_stats = !tmp_sender_state->
isPrefetch;
 
  507                     int req_cnt = tmp_sender_state->
reqCnt.back();
 
  510                     DPRINTF(GPUTLB, 
"%s sending pkt w/ req_cnt %d\n",
 
  515                     int pkt_cnt = iter->second[vector_index].size();
 
  519                 DPRINTF(GPUTLB, 
"Successfully sent TLB request for page %#x\n",
 
  524                     = iter->second[vector_index];
 
  527                 iter->second.erase(iter->second.begin() + vector_index);
 
  529                 if (iter->second.empty())
 
  530                     assert( 
i == coalescedReq_cnt );
 
  539                     if (iter->second.empty())
 
  555         if (iter->second.empty()) {
 
  571         DPRINTF(GPUTLB, 
"Cleanup - Delete coalescer entry with key %#x\n",
 
  582         .
name(
name() + 
".uncoalesced_accesses")
 
  583         .desc(
"Number of uncoalesced TLB accesses")
 
  587         .
name(
name() + 
".coalesced_accesses")
 
  588         .desc(
"Number of coalesced TLB accesses")
 
  593         .desc(
"Number of cycles spent in queue")
 
  597         .
name(
name() + 
".local_queuing_cycles")
 
  598         .desc(
"Number of cycles spent in queue for all incoming reqs")
 
  603         .desc(
"Number of cycles spent in queue for all incoming reqs")
 
  608         .desc(
"Avg. latency over all incoming pkts")
 
  613         .desc(
"Avg. latency over all incoming pkts")
 
  630             "insertStalledPortIfNotMapped: port %p, mapSz = %d, qsz = %d\n",
 
  639     DPRINTF(GPUTLB, 
"mustStallCUPort: downstream = %d, max = %d\n",
 
  643         warn(
"RED ALERT - VegaTLBCoalescer::mustStallCUPort\n");
 
  658     DPRINTF(GPUTLB, 
"unstallPorts()\n");
 
  681     DPRINTF(GPUTLB, 
"sending retry for port = %p(%s)\n", port, port->name());
 
  686     port->sendRetryReq(); 
 
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Tick cyclesToTicks(Cycles c) const
Cycles is a wrapper class for representing cycle counts, i.e.
virtual std::string name() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void makeTimingResponse()
SenderState * senderState
This packet's sender state.
RequestPtr req
A pointer to the original request.
@ UNCACHEABLE
The request is to an uncacheable address.
A ResponsePort is a specialization of a port.
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
virtual void recvReqRetry()
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
VegaTLBCoalescer * coalescer
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
virtual void recvFunctional(PacketPtr pkt)
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
statistics::Scalar localqueuingCycles
statistics::Scalar coalescedAccesses
void processCleanupEvent()
CoalescingTable issuedTranslationsTable
void updatePhysAddresses(PacketPtr pkt)
statistics::Scalar queuingCycles
EventFunctionWrapper cleanupEvent
The cleanupEvent is scheduled after a TLBEvent triggers in order to free memory and do the required c...
std::queue< CpuSidePort * > stalledPortsQueue
void incrementNumDownstream()
statistics::Scalar uncoalescedAccesses
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
std::vector< CpuSidePort * > cpuSidePort
std::vector< MemSidePort * > memSidePort
EventFunctionWrapper probeTLBEvent
This event issues the TLB probes.
VegaTLBCoalescer(const VegaTLBCoalescerParams &p)
void insertStalledPortIfNotMapped(CpuSidePort *)
CoalescingFIFO coalescerFIFO
std::map< CpuSidePort *, CpuSidePort * > stalledPortsMap
bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2)
void regStats() override
Callback to set stat parameters.
statistics::Formula localLatency
unsigned int availDownstreamSlots()
std::queue< Addr > cleanupQueue
bool mustStallCUPort(CpuSidePort *)
statistics::Scalar localCycles
unsigned int numDownstream
void processProbeTLBEvent()
statistics::Formula latency
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
static const Priority Maximum_Pri
Maximum priority.
bool scheduled() const
Determine if the current event is scheduled.
void schedule(Event &event, Tick when)
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
virtual void regStats()
Callback to set stat parameters.
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Tick curTick()
The universal simulation clock.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
uint64_t Tick
Tick count type.
std::string csprintf(const char *format, const Args &...args)
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
std::vector< ResponsePort * > ports
std::vector< int > reqCnt
const std::string & name()