Go to the documentation of this file.
40 #include "debug/GPUTLB.hh"
48 TLBProbesPerCycle(
p.probesPerCycle),
49 coalescingWindow(
p.coalescingWindow),
50 disableCoalescing(
p.disableCoalescing),
52 "Probe the TLB below",
54 cleanupEvent([
this]{ processCleanupEvent(); },
55 "Cleanup issuedTranslationsTable hashmap",
57 tlb_level(
p.tlb_level),
58 maxDownstream(
p.maxDownstream),
62 for (
size_t i = 0;
i <
p.port_cpu_side_ports_connection_count; ++
i) {
63 cpuSidePort.push_back(
new CpuSidePort(
csprintf(
"%s-port%d",
name(),
i),
68 for (
size_t i = 0;
i <
p.port_mem_side_ports_connection_count; ++
i) {
69 memSidePort.push_back(
new MemSidePort(
csprintf(
"%s-port%d",
name(),
i),
77 if (if_name ==
"cpu_side_ports") {
79 panic(
"VegaTLBCoalescer::getPort: unknown index %d\n", idx);
83 }
else if (if_name ==
"mem_side_ports") {
85 panic(
"VegaTLBCoalescer::getPort: unknown index %d\n", idx);
90 panic(
"VegaTLBCoalescer::getPort: unknown port %s\n", if_name);
107 safe_cast<GpuTranslationState*>(incoming_pkt->
senderState);
110 safe_cast<GpuTranslationState*>(coalesced_pkt->
senderState);
120 if (incoming_virt_page_addr != coalesced_virt_page_addr)
129 if (incoming_mode != coalesced_mode)
136 coalesced_state->
reqCnt.back() += incoming_state->
reqCnt.back();
150 DPRINTF(GPUTLB,
"Update phys. addr. for %d coalesced reqs for page %#x\n",
158 VegaISA::VegaTlbEntry tlb_entry =
159 *safe_cast<VegaISA::VegaTlbEntry *>(sender_state->
tlbEntry);
160 Addr first_entry_vaddr = tlb_entry.vaddr;
161 Addr first_entry_paddr = tlb_entry.paddr;
162 int page_size = tlb_entry.size();
163 bool uncacheable = tlb_entry.uncacheable();
164 int first_hit_level = sender_state->
hitLevel;
165 bool is_system = pkt->
req->systemReq();
170 safe_cast<GpuTranslationState*>(local_pkt->
senderState);
175 sender_state->
reqCnt.pop_back();
186 Addr paddr = first_entry_paddr
187 + (local_pkt->
req->getVaddr() & (page_size - 1));
188 local_pkt->
req->setPaddr(paddr);
197 if (sender_state->
tlbEntry == NULL) {
200 new VegaISA::VegaTlbEntry(1 ,
210 sender_state->
hitLevel = first_hit_level;
214 local_pkt->
req->setSystemReq(is_system);
217 sender_state->
ports.pop_back();
247 bool didCoalesce =
false;
249 int coalescedReq_cnt = 0;
254 bool update_stats = !sender_state->
isPrefetch;
260 sender_state->
ports.push_back(
this);
270 if (!sender_state->
reqCnt.empty())
271 req_cnt = sender_state->
reqCnt.back();
273 sender_state->
reqCnt.push_back(req_cnt);
277 req_cnt = sender_state->
reqCnt.back();
278 DPRINTF(GPUTLB,
"receiving pkt w/ req_cnt %d\n", req_cnt);
299 for (
int i = 0;
i < coalescedReq_cnt; ++
i) {
305 DPRINTF(GPUTLB,
"Coalesced req %i w/ tick_index %d has %d reqs\n",
317 if (!coalescedReq_cnt || !didCoalesce) {
322 new_array.push_back(pkt);
325 DPRINTF(GPUTLB,
"coalescerFIFO[%d] now has %d coalesced reqs after "
326 "push\n", tick_index,
343 panic(
"recvReqRetry called");
353 bool update_stats = !sender_state->
isPrefetch;
356 coalescer->uncoalescedAccesses++;
359 int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
362 DPRINTF(GPUTLB,
"Warning! Functional access to addr %#x sees timing "
363 "req. pending\n", virt_page_addr);
366 coalescer->memSidePort[0]->sendFunctional(pkt);
384 coalescer->updatePhysAddresses(pkt);
386 if (coalescer->tlb_level != 1)
390 coalescer->decrementNumDownstream();
393 "recvTimingReq: clscr = %p, numDownstream = %d, max = %d\n",
394 coalescer, coalescer->numDownstream, coalescer->maxDownstream);
396 coalescer->unstallPorts();
404 if (!coalescer->probeTLBEvent.scheduled())
405 coalescer->schedule(coalescer->probeTLBEvent,
406 curTick() + coalescer->clockPeriod());
412 fatal(
"Memory side recvFunctional() not implemented in TLB coalescer.\n");
437 DPRINTF(GPUTLB,
"triggered VegaTLBCoalescer %s\n", __func__);
441 DPRINTF(GPUTLB,
"IssueProbeEvent - no downstream slots, bail out\n");
447 int coalescedReq_cnt = iter->second.size();
449 int vector_index = 0;
451 DPRINTF(GPUTLB,
"coalescedReq_cnt is %d for tick_index %d\n",
452 coalescedReq_cnt, iter->first);
454 while (
i < coalescedReq_cnt) {
456 PacketPtr first_packet = iter->second[vector_index][0];
474 DPRINTF(GPUTLB,
"Cannot issue - There are pending reqs for "
475 "page %#x\n", virt_page_addr);
482 if (!
memSidePort[0]->sendTimingReq(first_packet)) {
484 "Failed to send TLB request for page %#x",
499 safe_cast<GpuTranslationState*>(first_packet->
senderState);
501 bool update_stats = !tmp_sender_state->
isPrefetch;
507 int req_cnt = tmp_sender_state->
reqCnt.back();
510 DPRINTF(GPUTLB,
"%s sending pkt w/ req_cnt %d\n",
515 int pkt_cnt = iter->second[vector_index].size();
519 DPRINTF(GPUTLB,
"Successfully sent TLB request for page %#x\n",
524 = iter->second[vector_index];
527 iter->second.erase(iter->second.begin() + vector_index);
529 if (iter->second.empty())
530 assert(
i == coalescedReq_cnt );
539 if (iter->second.empty())
555 if (iter->second.empty()) {
571 DPRINTF(GPUTLB,
"Cleanup - Delete coalescer entry with key %#x\n",
582 .
name(
name() +
".uncoalesced_accesses")
583 .
desc(
"Number of uncoalesced TLB accesses")
587 .
name(
name() +
".coalesced_accesses")
588 .
desc(
"Number of coalesced TLB accesses")
593 .
desc(
"Number of cycles spent in queue")
597 .
name(
name() +
".local_queuing_cycles")
598 .
desc(
"Number of cycles spent in queue for all incoming reqs")
603 .
desc(
"Number of cycles spent in queue for all incoming reqs")
608 .
desc(
"Avg. latency over all incoming pkts")
613 .
desc(
"Avg. latency over all incoming pkts")
630 "insertStalledPortIfNotMapped: port %p, mapSz = %d, qsz = %d\n",
639 DPRINTF(GPUTLB,
"mustStallCUPort: downstream = %d, max = %d\n",
643 warn(
"RED ALERT - VegaTLBCoalescer::mustStallCUPort\n");
658 DPRINTF(GPUTLB,
"unstallPorts()\n");
681 DPRINTF(GPUTLB,
"sending retry for port = %p(%s)\n", port, port->name());
686 port->sendRetryReq();
bool mustStallCUPort(CpuSidePort *)
void regStats() override
Callback to set stat parameters.
Tick curTick()
The universal simulation clock.
#define fatal(...)
This implements a cprintf based fatal() function.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
statistics::Scalar queuingCycles
std::map< CpuSidePort *, CpuSidePort * > stalledPortsMap
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
std::vector< ResponsePort * > ports
VegaTLBCoalescer(const VegaTLBCoalescerParams &p)
void processCleanupEvent()
RequestPtr req
A pointer to the original request.
statistics::Scalar coalescedAccesses
CoalescingTable issuedTranslationsTable
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
std::queue< CpuSidePort * > stalledPortsQueue
statistics::Scalar localqueuingCycles
void schedule(Event &event, Tick when)
std::string csprintf(const char *format, const Args &...args)
VegaTLBCoalescer * coalescer
Cycles is a wrapper class for representing cycle counts, i.e.
void incrementNumDownstream()
unsigned int availDownstreamSlots()
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
virtual std::string name() const
Tick cyclesToTicks(Cycles c) const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
uint64_t Tick
Tick count type.
static const Priority Maximum_Pri
Maximum priority.
void insertStalledPortIfNotMapped(CpuSidePort *)
virtual void recvFunctional(PacketPtr pkt)
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
EventFunctionWrapper probeTLBEvent
This event issues the TLB probes.
CoalescingFIFO coalescerFIFO
virtual void recvReqRetry()
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
@ UNCACHEABLE
The request is to an uncacheable address.
std::vector< int > reqCnt
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
SenderState * senderState
This packet's sender state.
const std::string & name()
virtual void regStats()
Callback to set stat parameters.
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2)
A ResponsePort is a specialization of a port.
statistics::Scalar uncoalescedAccesses
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
void makeTimingResponse()
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
statistics::Scalar localCycles
unsigned int numDownstream
std::queue< Addr > cleanupQueue
EventFunctionWrapper cleanupEvent
The cleanupEvent is scheduled after a TLBEvent triggers in order to free memory and do the required c...
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
statistics::Formula localLatency
statistics::Formula latency
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::vector< CpuSidePort * > cpuSidePort
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
void processProbeTLBEvent()
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
bool scheduled() const
Determine if the current event is scheduled.
void updatePhysAddresses(PacketPtr pkt)
#define panic(...)
This implements a cprintf based panic() function.
std::vector< MemSidePort * > memSidePort
Generated on Sun Jul 30 2023 01:56:33 for gem5 by doxygen 1.8.17