release/current/amdgpu_2vega_2tlb_8cc_source.html

/*

 * Copyright (c) 2021 Advanced Micro Devices, Inc.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *

 * 1. Redistributions of source code must retain the above copyright notice,

 * this list of conditions and the following disclaimer.

 *

 * 2. Redistributions in binary form must reproduce the above copyright notice,

 * this list of conditions and the following disclaimer in the documentation

 * and/or other materials provided with the distribution.

 *

 * 3. Neither the name of the copyright holder nor the names of its

 * contributors may be used to endorse or promote products derived from this

 * software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 * POSSIBILITY OF SUCH DAMAGE.

 */


#include "arch/amdgpu/vega/tlb.hh"


#include <cmath>

#include <cstring>


#include "arch/amdgpu/common/gpu_translation_state.hh"

#include "arch/amdgpu/vega/faults.hh"

#include "arch/amdgpu/vega/pagetable_walker.hh"

#include "debug/GPUPrefetch.hh"

#include "debug/GPUTLB.hh"

#include "dev/amdgpu/amdgpu_device.hh"


namespace gem5

{

namespace VegaISA

{


// we have no limit for the number of translations we send

// downstream as we depend on the limit of the coalescer

// above us


GpuTLB::GpuTLB(const VegaGPUTLBParams &p)

    :  ClockedObject(p), walker(p.walker),

      gpuDevice(p.gpu_device), size(p.size), stats(this),

      cleanupEvent([this]{ cleanup(); }, name(), false,

                   Event::Maximum_Pri)

{

    assoc = p.assoc;

    assert(assoc <= size);

    numSets = size/assoc;

    allocationPolicy = p.allocationPolicy;

    hasMemSidePort = false;


    tlb.assign(size, VegaTlbEntry());


    freeList.resize(numSets);

    entryList.resize(numSets);


    for (int set = 0; set < numSets; ++set) {

        for (int way = 0; way < assoc; ++way) {

            int x = set * assoc + way;

            freeList[set].push_back(&tlb.at(x));

        }

    }


    FA = (size == assoc);

    setMask = numSets - 1;


    maxCoalescedReqs = p.maxOutstandingReqs;


    outstandingReqs = 0;

    hitLatency = p.hitLatency;

    missLatency1 = p.missLatency1;

    missLatency2 = p.missLatency2;


    // create the response ports based on the number of connected ports

    for (size_t i = 0; i < p.port_cpu_side_ports_connection_count; ++i) {

        cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",

                              name(), i), this, i));

    }


    // create the requestor ports based on the number of connected ports

    for (size_t i = 0; i < p.port_mem_side_ports_connection_count; ++i) {

        memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",

                              name(), i), this, i));

    }


    // assuming one walker per TLB, set our walker's TLB to this TLB.

    walker->setTLB(this);


    // gpuDevice should be non-null in full system only and is set by GpuTLB

    // params from the config file.

    if (gpuDevice) {

        gpuDevice->getVM().registerTLB(this);

    }

}


GpuTLB::~GpuTLB()

{

}


Port &


GpuTLB::getPort(const std::string &if_name, PortID idx)

{

    if (if_name == "cpu_side_ports") {

        if (idx >= static_cast<PortID>(cpuSidePort.size())) {

            panic("TLBCoalescer::getPort: unknown index %d\n", idx);

        }


        return *cpuSidePort[idx];

    } else if (if_name == "mem_side_ports") {

        if (idx >= static_cast<PortID>(memSidePort.size())) {

            panic("TLBCoalescer::getPort: unknown index %d\n", idx);

        }


        hasMemSidePort = true;


        return *memSidePort[idx];

    } else {

        panic("TLBCoalescer::getPort: unknown port %s\n", if_name);

    }

}


Fault


GpuTLB::createPagefault(Addr vaddr, Mode mode)

{

    DPRINTF(GPUTLB, "GPUTLB: Raising page fault.\n");

    ExceptionCode code;

    if (mode == BaseMMU::Read)

        code = ExceptionCode::LOAD_PAGE;

    else if (mode == BaseMMU::Write)

        code = ExceptionCode::STORE_PAGE;

    else

        code = ExceptionCode::INST_PAGE;

    return std::make_shared<PageFault>(vaddr, code, true, mode, true);

}


Addr


GpuTLB::pageAlign(Addr vaddr)

{

    Addr pageMask = mask(VegaISA::PageShift);

    return (vaddr & ~pageMask);

}


VegaTlbEntry*


GpuTLB::insert(Addr vpn, VegaTlbEntry &entry)

{

    VegaTlbEntry *newEntry = nullptr;


    int set = (entry.vaddr >> VegaISA::PageShift) & setMask;


    if (!freeList[set].empty()) {

        newEntry = freeList[set].front();

        freeList[set].pop_front();

    } else {

        newEntry = entryList[set].back();

        entryList[set].pop_back();

    }


    *newEntry = entry;

    entryList[set].push_front(newEntry);


    DPRINTF(GPUTLB, "Inserted %#lx -> %#lx of size %#lx into set %d\n",

            newEntry->vaddr, newEntry->paddr, entry.size(), set);


    return newEntry;

}


GpuTLB::EntryList::iterator


GpuTLB::lookupIt(Addr va, bool update_lru)

{

    int set = (va >> VegaISA::PageShift) & setMask;


    if (FA) {

        assert(!set);

    }


    auto entry = entryList[set].begin();

    for (; entry != entryList[set].end(); ++entry) {

        int page_size = (*entry)->size();


        if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {

            DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "

                    "with size %#x.\n", va, (*entry)->vaddr, page_size);


            if (update_lru) {

                entryList[set].push_front(*entry);

                entryList[set].erase(entry);

                entry = entryList[set].begin();

            }


            break;

        }

    }


    return entry;

}


VegaTlbEntry*


GpuTLB::lookup(Addr va, bool update_lru)

{

    int set = (va >> VegaISA::PageShift) & setMask;


    auto entry = lookupIt(va, update_lru);


    if (entry == entryList[set].end())

        return nullptr;

    else

        return *entry;

}


void


GpuTLB::invalidateAll()

{

    DPRINTF(GPUTLB, "Invalidating all entries.\n");


    for (int i = 0; i < numSets; ++i) {

        while (!entryList[i].empty()) {

            VegaTlbEntry *entry = entryList[i].front();

            entryList[i].pop_front();

            freeList[i].push_back(entry);

        }

    }

}


void


GpuTLB::demapPage(Addr va, uint64_t asn)

{


    int set = (va >> VegaISA::PageShift) & setMask;

    auto entry = lookupIt(va, false);


    if (entry != entryList[set].end()) {

        freeList[set].push_back(*entry);

        entryList[set].erase(entry);

    }

}


VegaTlbEntry *


GpuTLB::tlbLookup(const RequestPtr &req, bool update_stats)

{

    if (req->hasNoAddr()) {

        return NULL;

    }

    Addr vaddr = req->getVaddr();

    Addr alignedVaddr = pageAlign(vaddr);

    DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);


    //update LRU stack on a hit

    VegaTlbEntry *entry = lookup(alignedVaddr, true);


    if (!update_stats) {

        // functional tlb access for memory initialization

        // i.e., memory seeding or instr. seeding -> don't update

        // TLB and stats

        return entry;

    }


    stats.localNumTLBAccesses++;


    if (!entry) {

        stats.localNumTLBMisses++;

    } else {

        stats.localNumTLBHits++;

    }


    return entry;

}


Walker*


GpuTLB::getWalker()

{

    return walker;

}


void


GpuTLB::serialize(CheckpointOut &cp) const

{

}


void


GpuTLB::unserialize(CheckpointIn &cp)

{

}


void


GpuTLB::issueTLBLookup(PacketPtr pkt)

{

    assert(pkt);

    assert(pkt->senderState);


    Addr virt_page_addr = roundDown(pkt->req->getVaddr(),

                                    VegaISA::PageBytes);


    GpuTranslationState *sender_state =

            safe_cast<GpuTranslationState*>(pkt->senderState);


    bool update_stats = !sender_state->isPrefetch;


    DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",

            virt_page_addr);


    int req_cnt = sender_state->reqCnt.back();


    if (update_stats) {

        stats.accessCycles -= (curCycle() * req_cnt);

        stats.localCycles -= curCycle();

        stats.globalNumTLBAccesses += req_cnt;

    }


    tlbOutcome lookup_outcome = TLB_MISS;

    const RequestPtr &tmp_req = pkt->req;


    // Access the TLB and figure out if it's a hit or a miss.

    auto entry = tlbLookup(tmp_req, update_stats);

    if (entry || pkt->req->hasNoAddr()) {

        // Put the entry in SenderState

        lookup_outcome = TLB_HIT;

        if (pkt->req->hasNoAddr()) {

            sender_state->tlbEntry =

                new VegaTlbEntry(1 /* VMID */, 0, 0, 0, 0);

            // set false because we shouldn't go to

            // host memory for a memtime request

            pkt->req->setSystemReq(false);

        } else {

            VegaTlbEntry *entry = lookup(virt_page_addr, false);

            assert(entry);


            // Set if this is a system request

            pkt->req->setSystemReq(entry->pte.s);


            sender_state->tlbEntry =

                new VegaTlbEntry(*entry);

        }


        if (update_stats) {

            // the reqCnt has an entry per level, so its size tells us

            // which level we are in

            sender_state->hitLevel = sender_state->reqCnt.size();

            stats.globalNumTLBHits += req_cnt;

        }

    } else {

        if (update_stats)

            stats.globalNumTLBMisses += req_cnt;

    }


    /*

     * We now know the TLB lookup outcome (if it's a hit or a miss), as

     * well as the TLB access latency.

     *

     * We create and schedule a new TLBEvent which will help us take the

     * appropriate actions (e.g., update TLB on a hit, send request to

     * lower level TLB on a miss, or start a page walk if this was the

     * last-level TLB)

     */

    TLBEvent *tlb_event =

        new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);


    if (translationReturnEvent.count(virt_page_addr)) {

        panic("Virtual Page Address %#x already has a return event\n",

              virt_page_addr);

    }


    translationReturnEvent[virt_page_addr] = tlb_event;

    assert(tlb_event);


    DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",

            curTick() + cyclesToTicks(Cycles(hitLatency)));


    schedule(tlb_event, curTick() + cyclesToTicks(Cycles(hitLatency)));

}


GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr,

    tlbOutcome tlb_outcome, PacketPtr _pkt)

        : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),

          outcome(tlb_outcome), pkt(_pkt)

{

}


void


GpuTLB::pagingProtectionChecks(PacketPtr pkt, VegaTlbEntry * tlb_entry,

        Mode mode)

{

    // Do paging protection checks.

    bool badWrite = (!tlb_entry->writable());


    if (mode == BaseMMU::Write && badWrite) {

        // The page must have been present to get into the TLB in

        // the first place. We'll assume the reserved bits are

        // fine even though we're not checking them.

        fatal("Page fault on addr %lx PTE=%#lx", pkt->req->getVaddr(),

                (uint64_t)tlb_entry->pte);

    }

}


void


GpuTLB::walkerResponse(VegaTlbEntry& entry, PacketPtr pkt)

{

    DPRINTF(GPUTLB, "WalkerResponse for %#lx. Entry: (%#lx, %#lx, %#lx)\n",

            pkt->req->getVaddr(), entry.vaddr, entry.paddr, entry.size());


    Addr virt_page_addr = roundDown(pkt->req->getVaddr(),

                                    VegaISA::PageBytes);


    Addr page_addr = entry.pte.ppn << VegaISA::PageShift;

    Addr paddr = page_addr + (entry.vaddr & mask(entry.logBytes));

    pkt->req->setPaddr(paddr);

    pkt->req->setSystemReq(entry.pte.s);


    GpuTranslationState *sender_state =

        safe_cast<GpuTranslationState*>(pkt->senderState);

    sender_state->tlbEntry = new VegaTlbEntry(entry);


    handleTranslationReturn(virt_page_addr, TLB_MISS, pkt);

}


void


GpuTLB::handleTranslationReturn(Addr virt_page_addr,

    tlbOutcome tlb_outcome, PacketPtr pkt)

{

    assert(pkt);

    Addr vaddr = pkt->req->getVaddr();


    GpuTranslationState *sender_state =

        safe_cast<GpuTranslationState*>(pkt->senderState);


    Mode mode = sender_state->tlbMode;


    VegaTlbEntry *local_entry, *new_entry;


    int req_cnt = sender_state->reqCnt.back();

    bool update_stats = !sender_state->isPrefetch;


    if (update_stats) {

        stats.accessCycles += (req_cnt * curCycle());

        stats.localCycles += curCycle();

    }


    if (tlb_outcome == TLB_HIT) {

        DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n",

            vaddr);

        local_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);

    } else {

        DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",

                vaddr);


        new_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);

        assert(new_entry);

        local_entry = new_entry;


        if (allocationPolicy) {

            assert(new_entry->pte);

            DPRINTF(GPUTLB, "allocating entry w/ addr %#lx of size %#lx\n",

                    virt_page_addr, new_entry->size());


            local_entry = insert(virt_page_addr, *new_entry);

        }


        assert(local_entry);

    }


    DPRINTF(GPUTLB, "Entry found with vaddr %#x,  doing protection checks "

            "while paddr was %#x.\n", local_entry->vaddr,

            local_entry->paddr);


    pagingProtectionChecks(pkt, local_entry, mode);

    int page_size = local_entry->size();

    Addr paddr = local_entry->paddr + (vaddr & (page_size - 1));

    DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);


    // Since this packet will be sent through the cpu side port, it must be

    // converted to a response pkt if it is not one already

    if (pkt->isRequest()) {

        pkt->makeTimingResponse();

    }


    pkt->req->setPaddr(paddr);


    if (local_entry->uncacheable()) {

         pkt->req->setFlags(Request::UNCACHEABLE);

    }


    //send packet back to coalescer

    cpuSidePort[0]->sendTimingResp(pkt);

    //schedule cleanup event

    cleanupQueue.push(virt_page_addr);


    DPRINTF(GPUTLB, "Scheduled %#lx for cleanup\n", virt_page_addr);


    // schedule this only once per cycle.

    // The check is required because we might have multiple translations

    // returning the same cycle

    // this is a maximum priority event and must be on the same cycle

    // as the cleanup event in TLBCoalescer to avoid a race with

    // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry

    if (!cleanupEvent.scheduled())

        schedule(cleanupEvent, curTick());

}


void


GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,

                          PacketPtr pkt)

{

    DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);


    assert(translationReturnEvent[virtPageAddr]);

    assert(pkt);


    GpuTranslationState *tmp_sender_state =

        safe_cast<GpuTranslationState*>(pkt->senderState);


    int req_cnt = tmp_sender_state->reqCnt.back();

    bool update_stats = !tmp_sender_state->isPrefetch;


    if (outcome == TLB_HIT) {

        handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);


    } else if (outcome == TLB_MISS) {


        DPRINTF(GPUTLB, "This is a TLB miss\n");

        if (hasMemSidePort) {

            // the one cyle added here represent the delay from when we get

            // the reply back till when we propagate it to the coalescer

            // above.


            tmp_sender_state->deviceId = 1;

            tmp_sender_state->pasId = 0;


            if (!memSidePort[0]->sendTimingReq(pkt)) {

                DPRINTF(GPUTLB, "Failed sending translation request to "

                        "lower level TLB for addr %#x\n", virtPageAddr);


                memSidePort[0]->retries.push_back(pkt);

            } else {

                DPRINTF(GPUTLB, "Sent translation request to lower level "

                        "TLB for addr %#x\n", virtPageAddr);

            }

        } else {

            //this is the last level TLB. Start a page walk

            DPRINTF(GPUTLB, "Last level TLB - start a page walk for "

                    "addr %#x\n", virtPageAddr);


            if (update_stats)

                stats.pageTableCycles -= (req_cnt*curCycle());


            TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];

            assert(tlb_event);

            tlb_event->updateOutcome(PAGE_WALK);

            schedule(tlb_event,

                     curTick() + cyclesToTicks(Cycles(missLatency2)));

        }

    } else if (outcome == PAGE_WALK) {

        if (update_stats)

            stats.pageTableCycles += (req_cnt*curCycle());


        // Need to access the page table and update the TLB

        DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",

                virtPageAddr);


        Addr base = gpuDevice->getVM().getPageTableBase(1);

        Addr vaddr = pkt->req->getVaddr();

        walker->setDevRequestor(gpuDevice->vramRequestorId());


        // Do page table walk

        walker->startTiming(pkt, base, vaddr, BaseMMU::Mode::Read);

    } else if (outcome == MISS_RETURN) {

        handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);

    } else {

        panic("Unexpected TLB outcome %d", outcome);

    }

}


void


GpuTLB::TLBEvent::process()

{

    tlb->translationReturn(virtPageAddr, outcome, pkt);

}


const char*


GpuTLB::TLBEvent::description() const

{

    return "trigger translationDoneEvent";

}


void


GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)

{

    outcome = _outcome;

}


Addr


GpuTLB::TLBEvent::getTLBEventVaddr()

{

    return virtPageAddr;

}


bool


GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)

{

    bool ret = false;

    [[maybe_unused]] Addr virt_page_addr = roundDown(pkt->req->getVaddr(),

                                                     VegaISA::PageBytes);


    if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {

        assert(!tlb->translationReturnEvent.count(virt_page_addr));

        tlb->issueTLBLookup(pkt);

        // update number of outstanding translation requests

        tlb->outstandingReqs++;

        ret = true;

    } else {

        DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",

                tlb->outstandingReqs);

        tlb->stats.maxDownstreamReached++;

        ret = false;


    }


    if (tlb->outstandingReqs > tlb->stats.outstandingReqsMax.value())

        tlb->stats.outstandingReqsMax = tlb->outstandingReqs;


    return ret;

}


void


GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)

{

    GpuTranslationState *sender_state =

        safe_cast<GpuTranslationState*>(pkt->senderState);


    Mode mode = sender_state->tlbMode;

    Addr vaddr = pkt->req->getVaddr();


    VegaTlbEntry *local_entry, *new_entry;


    if (tlb_outcome == TLB_HIT) {

        DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "

                "%#x\n", vaddr);


        local_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);

    } else {

        DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "

                "%#x\n", vaddr);


        new_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);

        assert(new_entry);

        local_entry = new_entry;


        if (allocationPolicy) {

            Addr virt_page_addr = roundDown(vaddr, VegaISA::PageBytes);


            DPRINTF(GPUTLB, "allocating entry w/ addr %#lx\n",

                    virt_page_addr);


            local_entry = insert(virt_page_addr, *new_entry);

        }


        assert(local_entry);

    }


    DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "

            "while paddr was %#x.\n", local_entry->vaddr,

            local_entry->paddr);


    if (!sender_state->isPrefetch && sender_state->tlbEntry)

        pagingProtectionChecks(pkt, local_entry, mode);


    int page_size = local_entry->size();

    Addr paddr = local_entry->paddr + (vaddr & (page_size - 1));

    DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);


    pkt->req->setPaddr(paddr);


    if (local_entry->uncacheable())

         pkt->req->setFlags(Request::UNCACHEABLE);

}


// This is used for atomic translations. Need to

// make it all happen during the same cycle.

void


GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)

{

    GpuTranslationState *sender_state =

        safe_cast<GpuTranslationState*>(pkt->senderState);


    bool update_stats = !sender_state->isPrefetch;


    Addr virt_page_addr = roundDown(pkt->req->getVaddr(),

                                    VegaISA::PageBytes);


    // do the TLB lookup without updating the stats

    bool success = tlb->tlbLookup(pkt->req, update_stats);

    tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;


    // functional mode means no coalescing

    // global metrics are the same as the local metrics

    if (update_stats) {

        tlb->stats.globalNumTLBAccesses++;


        if (success) {

            sender_state->hitLevel = sender_state->reqCnt.size();

            tlb->stats.globalNumTLBHits++;

        } else {

            tlb->stats.globalNumTLBMisses++;

        }

    }


    if (!success) {

        if (tlb->hasMemSidePort) {

            // there is a TLB below -> propagate down the TLB hierarchy

            tlb->memSidePort[0]->sendFunctional(pkt);

            // If no valid translation from a prefetch, then just return

            if (sender_state->isPrefetch && !pkt->req->hasPaddr())

                return;

        } else {

            // Need to access the page table and update the TLB

            DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",

                    virt_page_addr);


            Addr vaddr = pkt->req->getVaddr();

            [[maybe_unused]] Addr alignedVaddr =

                tlb->pageAlign(virt_page_addr);

            assert(alignedVaddr == virt_page_addr);


            unsigned logBytes;

            PageTableEntry pte;


            // Initialize walker state for VMID

            Addr base = tlb->gpuDevice->getVM().getPageTableBase(1);

            tlb->walker->setDevRequestor(tlb->gpuDevice->vramRequestorId());


            // Do page table walk

            Fault fault = tlb->walker->startFunctional(base, vaddr, pte,

                                                       logBytes,

                                                       BaseMMU::Mode::Read);

            if (fault != NoFault) {

                fatal("Translation fault in TLB at %d!", __LINE__);

            }


            // PPN is already shifted by fragment so we only shift by native

            // page size. Fragment is still used via logBytes to select lower

            // bits from vaddr.

            Addr page_addr = pte.ppn << PageShift;

            Addr paddr = page_addr + (vaddr & mask(logBytes));

            Addr alignedPaddr = tlb->pageAlign(paddr);

            pkt->req->setPaddr(paddr);

            pkt->req->setSystemReq(pte.s);


            if (!sender_state->isPrefetch) {

                assert(paddr);


                DPRINTF(GPUTLB, "Mapping %#x to %#x\n", vaddr, paddr);


                sender_state->tlbEntry =

                    new VegaTlbEntry(1 /* VMID */, vaddr & (~mask(logBytes)),

                                 alignedPaddr, logBytes, pte);

            } else {

                // If this was a prefetch, then do the normal thing if it

                // was a successful translation.  Otherwise, send an empty

                // TLB entry back so that it can be figured out as empty

                // and handled accordingly.

                if (paddr) {

                    DPRINTF(GPUTLB, "Mapping %#x to %#x\n", vaddr, paddr);


                    sender_state->tlbEntry =

                        new VegaTlbEntry(1 /* VMID */,

                                     vaddr & (~mask(logBytes)),

                                     alignedPaddr, logBytes, pte);

                } else {

                    DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", vaddr);


                    sender_state->tlbEntry = nullptr;


                    return;

                }

            }

        }

    } else {

        VegaTlbEntry *entry = tlb->lookup(virt_page_addr, update_stats);

        assert(entry);


        if (sender_state->isPrefetch) {

            DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",

                    entry->vaddr);

        }


        sender_state->tlbEntry = new VegaTlbEntry(*entry);

    }


    // This is the function that would populate pkt->req with the paddr of

    // the translation. But if no translation happens (i.e Prefetch fails)

    // then the early returns in the above code wiill keep this function

    // from executing.

    tlb->handleFuncTranslationReturn(pkt, tlb_outcome);

}


void


GpuTLB::CpuSidePort::recvReqRetry()

{

    // The CPUSidePort never sends anything but replies. No retries

    // expected.

    panic("recvReqRetry called");

}


AddrRangeList


GpuTLB::CpuSidePort::getAddrRanges() const

{

    // currently not checked by the requestor

    AddrRangeList ranges;


    return ranges;

}


bool


GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)

{

    Addr virt_page_addr = roundDown(pkt->req->getVaddr(),

                                    VegaISA::PageBytes);


    DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",

            virt_page_addr);


    TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];

    assert(tlb_event);

    assert(virt_page_addr == tlb_event->getTLBEventVaddr());


    tlb_event->updateOutcome(MISS_RETURN);

    tlb->schedule(tlb_event, curTick()+tlb->clockPeriod());


    return true;

}


void


GpuTLB::MemSidePort::recvReqRetry()

{

    // No retries should reach the TLB. The retries

    // should only reach the TLBCoalescer.

    panic("recvReqRetry called");

}


void


GpuTLB::cleanup()

{

    while (!cleanupQueue.empty()) {

        Addr cleanup_addr = cleanupQueue.front();

        cleanupQueue.pop();


        DPRINTF(GPUTLB, "Deleting return event for %#lx\n", cleanup_addr);


        // delete TLBEvent

        TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];

        delete old_tlb_event;

        translationReturnEvent.erase(cleanup_addr);


        // update number of outstanding requests

        outstandingReqs--;

    }


    for (int i = 0; i < cpuSidePort.size(); ++i) {

        cpuSidePort[i]->sendRetryReq();

    }

}


GpuTLB::VegaTLBStats::VegaTLBStats(statistics::Group *parent)

    : statistics::Group(parent),

      ADD_STAT(maxDownstreamReached, "Number of refused translation requests"),

      ADD_STAT(outstandingReqsMax, "Maximum count in coalesced request queue"),

      ADD_STAT(localNumTLBAccesses, "Number of TLB accesses"),

      ADD_STAT(localNumTLBHits, "Number of TLB hits"),

      ADD_STAT(localNumTLBMisses, "Number of TLB misses"),

      ADD_STAT(localTLBMissRate, "TLB miss rate"),

      ADD_STAT(globalNumTLBAccesses, "Number of TLB accesses"),

      ADD_STAT(globalNumTLBHits, "Number of TLB hits"),

      ADD_STAT(globalNumTLBMisses, "Number of TLB misses"),

      ADD_STAT(globalTLBMissRate, "TLB miss rate"),

      ADD_STAT(accessCycles, "Cycles spent accessing this TLB level"),

      ADD_STAT(pageTableCycles, "Cycles spent accessing the page table"),

      ADD_STAT(localCycles, "Number of cycles spent in queue for all "

               "incoming reqs"),

      ADD_STAT(localLatency, "Avg. latency over incoming coalesced reqs")

{

    localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;

    globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;


    localLatency = localCycles / localNumTLBAccesses;

}


} // namespace VegaISA

} // namespace gem5

pagetable_walker.hh

tlb.hh

amdgpu_device.hh

faults.hh

DPRINTF
#define DPRINTF(x,...)
Definition trace.hh:209

gem5::BaseMMU::Write
@ Write
Definition mmu.hh:56

gem5::BaseMMU::Read
@ Read
Definition mmu.hh:56

gem5::CheckpointIn
Definition serialize.hh:69

gem5::ClockedObject::ClockedObject
ClockedObject(const ClockedObjectParams &p)
Definition clocked_object.cc:46

gem5::Clocked::curCycle
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Definition clocked_object.hh:195

gem5::Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition clocked_object.hh:227

gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79

gem5::Event::Event
Event(Priority p=Default_Pri, Flags f=0)
Definition eventq.hh:407

gem5::Packet::makeTimingResponse
void makeTimingResponse()
Definition packet.hh:1080

gem5::Packet::senderState
SenderState * senderState
This packet's sender state.
Definition packet.hh:545

gem5::Packet::req
RequestPtr req
A pointer to the original request.
Definition packet.hh:377

gem5::Packet::isRequest
bool isRequest() const
Definition packet.hh:597

gem5::Port
Ports are used to interface objects to each other.
Definition port.hh:62

gem5::Request::UNCACHEABLE
@ UNCACHEABLE
The request is to an uncacheable address.
Definition request.hh:125

gem5::SparcISA::PageTableEntry
Definition pagetable.hh:69

gem5::VegaISA::GpuTLB::CpuSidePort::recvReqRetry
virtual void recvReqRetry()
Definition tlb.cc:891

gem5::VegaISA::GpuTLB::CpuSidePort::recvTimingReq
virtual bool recvTimingReq(PacketPtr pkt)
recvTiming receives a coalesced timing request from a TLBCoalescer and it calls issueTLBLookup() It o...
Definition tlb.cc:668

gem5::VegaISA::GpuTLB::CpuSidePort::getAddrRanges
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition tlb.cc:899

gem5::VegaISA::GpuTLB::CpuSidePort::tlb
GpuTLB * tlb
Definition tlb.hh:221

gem5::VegaISA::GpuTLB::CpuSidePort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
Definition tlb.cc:774

gem5::VegaISA::GpuTLB::MemSidePort::tlb
GpuTLB * tlb
Definition tlb.hh:250

gem5::VegaISA::GpuTLB::MemSidePort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
Definition tlb.cc:913

gem5::VegaISA::GpuTLB::MemSidePort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition tlb.cc:932

gem5::VegaISA::GpuTLB::TLBEvent
Definition tlb.hh:288

gem5::VegaISA::GpuTLB::TLBEvent::getTLBEventVaddr
Addr getTLBEventVaddr()
Definition tlb.cc:656

gem5::VegaISA::GpuTLB::TLBEvent::tlb
GpuTLB * tlb
Definition tlb.hh:290

gem5::VegaISA::GpuTLB::TLBEvent::description
const char * description() const
Return a C string describing the event.
Definition tlb.cc:644

gem5::VegaISA::GpuTLB::TLBEvent::updateOutcome
void updateOutcome(tlbOutcome _outcome)
Definition tlb.cc:650

gem5::VegaISA::GpuTLB::TLBEvent::process
void process()
Definition tlb.cc:638

gem5::VegaISA::GpuTLB::TLBEvent::pkt
PacketPtr pkt
Definition tlb.hh:296

gem5::VegaISA::GpuTLB::TLBEvent::outcome
tlbOutcome outcome
outcome can be TLB_HIT, TLB_MISS, or PAGE_WALK
Definition tlb.hh:295

gem5::VegaISA::GpuTLB::TLBEvent::virtPageAddr
Addr virtPageAddr
Definition tlb.hh:291

gem5::VegaISA::GpuTLB::TLBEvent::TLBEvent
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
Definition tlb.cc:405

gem5::VegaISA::GpuTLB::lookup
VegaTlbEntry * lookup(Addr va, bool update_lru=true)
Definition tlb.cc:211

gem5::VegaISA::GpuTLB::lookupIt
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
Definition tlb.cc:181

gem5::VegaISA::GpuTLB::size
int size
Definition tlb.hh:112

gem5::VegaISA::GpuTLB::walkerResponse
void walkerResponse(VegaTlbEntry &entry, PacketPtr pkt)
Definition tlb.cc:433

gem5::VegaISA::GpuTLB::cpuSidePort
std::vector< CpuSidePort * > cpuSidePort
Definition tlb.hh:261

gem5::VegaISA::GpuTLB::tlbOutcome
tlbOutcome
Definition tlb.hh:195

gem5::VegaISA::GpuTLB::MISS_RETURN
@ MISS_RETURN
Definition tlb.hh:195

gem5::VegaISA::GpuTLB::TLB_MISS
@ TLB_MISS
Definition tlb.hh:195

gem5::VegaISA::GpuTLB::PAGE_WALK
@ PAGE_WALK
Definition tlb.hh:195

gem5::VegaISA::GpuTLB::TLB_HIT
@ TLB_HIT
Definition tlb.hh:195

gem5::VegaISA::GpuTLB::serialize
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition tlb.cc:298

gem5::VegaISA::GpuTLB::unserialize
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition tlb.cc:303

gem5::VegaISA::GpuTLB::missLatency2
int missLatency2
Definition tlb.hh:155

gem5::VegaISA::GpuTLB::issueTLBLookup
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access latency> cycles l...
Definition tlb.cc:313

gem5::VegaISA::GpuTLB::insert
VegaTlbEntry * insert(Addr vpn, VegaTlbEntry &entry)
Definition tlb.cc:157

gem5::VegaISA::GpuTLB::translationReturn
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e....
Definition tlb.cc:556

gem5::VegaISA::GpuTLB::pagingProtectionChecks
void pagingProtectionChecks(PacketPtr pkt, VegaTlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Definition tlb.cc:417

gem5::VegaISA::GpuTLB::handleFuncTranslationReturn
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Definition tlb.cc:703

gem5::VegaISA::GpuTLB::Mode
enum BaseMMU::Mode Mode
Definition tlb.hh:68

gem5::VegaISA::GpuTLB::translationReturnEvent
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
Definition tlb.hh:310

gem5::VegaISA::GpuTLB::memSidePort
std::vector< MemSidePort * > memSidePort
Definition tlb.hh:263

gem5::VegaISA::GpuTLB::getWalker
Walker * getWalker()
Definition tlb.cc:291

gem5::VegaISA::GpuTLB::freeList
std::vector< EntryList > freeList
Definition tlb.hh:140

gem5::VegaISA::GpuTLB::invalidateAll
void invalidateAll()
Definition tlb.cc:224

gem5::VegaISA::GpuTLB::walker
Walker * walker
Definition tlb.hh:109

gem5::VegaISA::GpuTLB::getPort
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition tlb.cc:114

gem5::VegaISA::GpuTLB::cleanup
void cleanup()
Definition tlb.cc:940

gem5::VegaISA::GpuTLB::~GpuTLB
~GpuTLB()
Definition tlb.cc:109

gem5::VegaISA::GpuTLB::gpuDevice
AMDGPUDevice * gpuDevice
Definition tlb.hh:110

gem5::VegaISA::GpuTLB::hitLatency
int hitLatency
Definition tlb.hh:153

gem5::VegaISA::GpuTLB::pageAlign
Addr pageAlign(Addr vaddr)
Definition tlb.cc:150

gem5::VegaISA::GpuTLB::handleTranslationReturn
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns.
Definition tlb.cc:459

gem5::VegaISA::GpuTLB::cleanupQueue
std::queue< Addr > cleanupQueue
Definition tlb.hh:314

gem5::VegaISA::GpuTLB::FA
bool FA
true if this is a fully-associative TLB
Definition tlb.hh:119

gem5::VegaISA::GpuTLB::allocationPolicy
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Definition tlb.hh:126

gem5::VegaISA::GpuTLB::cleanupEvent
EventFunctionWrapper cleanupEvent
Definition tlb.hh:320

gem5::VegaISA::GpuTLB::setMask
Addr setMask
Definition tlb.hh:120

gem5::VegaISA::GpuTLB::stats
gem5::VegaISA::GpuTLB::VegaTLBStats stats

gem5::VegaISA::GpuTLB::tlbLookup
VegaTlbEntry * tlbLookup(const RequestPtr &req, bool update_stats)
TLB_lookup will only perform a TLB lookup returning the TLB entry on a TLB hit and nullptr on a TLB m...
Definition tlb.cc:260

gem5::VegaISA::GpuTLB::entryList
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions.
Definition tlb.hh:149

gem5::VegaISA::GpuTLB::numSets
int numSets
Definition tlb.hh:114

gem5::VegaISA::GpuTLB::demapPage
void demapPage(Addr va, uint64_t asn)
Definition tlb.cc:238

gem5::VegaISA::GpuTLB::GpuTLB
GpuTLB(const VegaGPUTLBParams &p)
Definition tlb.cc:52

gem5::VegaISA::GpuTLB::outstandingReqs
int outstandingReqs
Definition tlb.hh:275

gem5::VegaISA::GpuTLB::createPagefault
Fault createPagefault(Addr vaddr, Mode mode)
Definition tlb.cc:136

gem5::VegaISA::GpuTLB::hasMemSidePort
bool hasMemSidePort
if true, then this is not the last level TLB
Definition tlb.hh:131

gem5::VegaISA::Walker
Definition pagetable_walker.hh:56

gem5::statistics::Group
Statistics container.
Definition group.hh:93

gpu_translation_state.hh

ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75

gem5::AddrRangeList
std::list< AddrRange > AddrRangeList
Convenience typedef for a collection of address ranges.
Definition addr_range.hh:64

gem5::roundDown
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition intmath.hh:279

gem5::EventBase::Maximum_Pri
static const Priority Maximum_Pri
Maximum priority.
Definition eventq.hh:244

gem5::EventManager::schedule
void schedule(Event &event, Tick when)
Definition eventq.hh:1012

gem5::EventBase::CPU_Tick_Pri
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition eventq.hh:207

panic
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220

fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:232

gem5::statistics::Group::Group
Group()=delete

gem5::ArmISA::mask
Bitfield< 3, 0 > mask
Definition pcstate.hh:63

gem5::ArmISA::mode
Bitfield< 4, 0 > mode
Definition misc_types.hh:74

gem5::ArmISA::i
Bitfield< 7 > i
Definition misc_types.hh:67

gem5::ArmISA::set
Bitfield< 12, 11 > set
Definition misc_types.hh:805

gem5::ArmISA::va
Bitfield< 8 > va
Definition misc_types.hh:357

gem5::ArmISA::tlb
Bitfield< 59, 56 > tlb
Definition misc_types.hh:119

gem5::MipsISA::vaddr
vaddr
Definition pra_constants.hh:278

gem5::RiscvISA::base
base
Definition misc.hh:1453

gem5::VegaISA
classes that represnt vector/scalar operands in VEGA ISA.
Definition faults.cc:39

gem5::VegaISA::x
Bitfield< 4 > x
Definition pagetable.hh:61

gem5::VegaISA::ExceptionCode
ExceptionCode
Definition faults.hh:46

gem5::VegaISA::INST_PAGE
@ INST_PAGE
Definition faults.hh:47

gem5::VegaISA::LOAD_PAGE
@ LOAD_PAGE
Definition faults.hh:48

gem5::VegaISA::STORE_PAGE
@ STORE_PAGE
Definition faults.hh:49

gem5::VegaISA::PageShift
const Addr PageShift
Definition page_size.hh:41

gem5::VegaISA::PageBytes
const Addr PageBytes
Definition page_size.hh:42

gem5::VegaISA::p
Bitfield< 54 > p
Definition pagetable.hh:70

gem5::cp
Definition cprintf.cc:41

gem5::statistics
Definition statistics.cc:57

gem5
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36

gem5::Fault
std::shared_ptr< FaultBase > Fault
Definition types.hh:249

gem5::safe_cast
T safe_cast(U &&ref_or_ptr)
Definition cast.hh:74

gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition request.hh:94

gem5::curTick
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46

gem5::CheckpointOut
std::ostream CheckpointOut
Definition serialize.hh:66

gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147

gem5::PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition types.hh:245

gem5::PacketPtr
Packet * PacketPtr
Definition thread_context.hh:70

gem5::csprintf
std::string csprintf(const char *format, const Args &...args)
Definition cprintf.hh:161

gem5::NoFault
constexpr decltype(nullptr) NoFault
Definition types.hh:253

gem5::GpuTranslationState
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
Definition gpu_translation_state.hh:59

gem5::GpuTranslationState::pasId
int pasId
Definition gpu_translation_state.hh:66

gem5::GpuTranslationState::deviceId
int deviceId
Definition gpu_translation_state.hh:65

gem5::GpuTranslationState::tlbMode
BaseMMU::Mode tlbMode
Definition gpu_translation_state.hh:61

gem5::GpuTranslationState::reqCnt
std::vector< int > reqCnt
Definition gpu_translation_state.hh:83

gem5::GpuTranslationState::isPrefetch
bool isPrefetch
Definition gpu_translation_state.hh:75

gem5::GpuTranslationState::hitLevel
int hitLevel
Definition gpu_translation_state.hh:85

gem5::GpuTranslationState::tlbEntry
Serializable * tlbEntry
Definition gpu_translation_state.hh:73

gem5::VegaISA::GpuTLB::VegaTLBStats::outstandingReqsMax
statistics::Scalar outstandingReqsMax
Definition tlb.hh:162

gem5::VegaISA::GpuTLB::VegaTLBStats::localNumTLBMisses
statistics::Scalar localNumTLBMisses
Definition tlb.hh:168

gem5::VegaISA::GpuTLB::VegaTLBStats::localLatency
statistics::Formula localLatency
Definition tlb.hh:185

gem5::VegaISA::GpuTLB::VegaTLBStats::localTLBMissRate
statistics::Formula localTLBMissRate
Definition tlb.hh:169

gem5::VegaISA::GpuTLB::VegaTLBStats::localCycles
statistics::Scalar localCycles
Definition tlb.hh:184

gem5::VegaISA::GpuTLB::VegaTLBStats::globalNumTLBAccesses
statistics::Scalar globalNumTLBAccesses
Definition tlb.hh:174

gem5::VegaISA::GpuTLB::VegaTLBStats::accessCycles
statistics::Scalar accessCycles
Definition tlb.hh:180

gem5::VegaISA::GpuTLB::VegaTLBStats::localNumTLBAccesses
statistics::Scalar localNumTLBAccesses
Definition tlb.hh:166

gem5::VegaISA::GpuTLB::VegaTLBStats::VegaTLBStats
VegaTLBStats(statistics::Group *parent)
Definition tlb.cc:965

gem5::VegaISA::GpuTLB::VegaTLBStats::globalTLBMissRate
statistics::Formula globalTLBMissRate
Definition tlb.hh:177

gem5::VegaISA::GpuTLB::VegaTLBStats::globalNumTLBMisses
statistics::Scalar globalNumTLBMisses
Definition tlb.hh:176

gem5::VegaISA::GpuTLB::VegaTLBStats::localNumTLBHits
statistics::Scalar localNumTLBHits
Definition tlb.hh:167

gem5::VegaISA::GpuTLB::VegaTLBStats::pageTableCycles
statistics::Scalar pageTableCycles
Definition tlb.hh:181

gem5::VegaISA::GpuTLB::VegaTLBStats::globalNumTLBHits
statistics::Scalar globalNumTLBHits
Definition tlb.hh:175

gem5::VegaISA::GpuTLB::VegaTLBStats::maxDownstreamReached
statistics::Scalar maxDownstreamReached
Definition tlb.hh:161

name
const std::string & name()
Definition trace.cc:48