release/current/amdgpu__device_8cc_source.html

/*

 * Copyright (c) 2021 Advanced Micro Devices, Inc.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *

 * 1. Redistributions of source code must retain the above copyright notice,

 * this list of conditions and the following disclaimer.

 *

 * 2. Redistributions in binary form must reproduce the above copyright notice,

 * this list of conditions and the following disclaimer in the documentation

 * and/or other materials provided with the distribution.

 *

 * 3. Neither the name of the copyright holder nor the names of its

 * contributors may be used to endorse or promote products derived from this

 * software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 * POSSIBILITY OF SUCH DAMAGE.

 */


#include "dev/amdgpu/amdgpu_device.hh"


#include <fstream>


#include "debug/AMDGPUDevice.hh"

#include "dev/amdgpu/amdgpu_nbio.hh"

#include "dev/amdgpu/amdgpu_vm.hh"

#include "dev/amdgpu/interrupt_handler.hh"

#include "dev/amdgpu/pm4_packet_processor.hh"

#include "dev/amdgpu/sdma_engine.hh"

#include "dev/hsa/hw_scheduler.hh"

#include "gpu-compute/gpu_command_processor.hh"

#include "gpu-compute/shader.hh"

#include "mem/abstract_mem.hh"

#include "mem/packet.hh"

#include "mem/packet_access.hh"

#include "params/AMDGPUDevice.hh"

#include "sim/byteswap.hh"

#include "sim/sim_exit.hh"


namespace gem5

{


AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)

    : PciDevice(p), gpuMemMgr(p.memory_manager), deviceIH(p.device_ih),

      cp(p.cp), checkpoint_before_mmios(p.checkpoint_before_mmios),

      init_interrupt_count(0), _lastVMID(0),

      deviceMem(name() + ".deviceMem", p.memories, false, "", false)

{

    // System pointer needs to be explicitly set for device memory since

    // DRAMCtrl uses it to get (1) cache line size and (2) the mem mode.

    // Note this means the cache line size is system wide.

    for (auto& m : p.memories) {

        m->system(p.system);


        // Add to system's device memory map.

        p.system->addDeviceMemory(gpuMemMgr->getRequestorID(), m);

    }


    if (config.expansionROM) {

        romRange = RangeSize(config.expansionROM, ROM_SIZE);

    } else {

        romRange = RangeSize(VGA_ROM_DEFAULT, ROM_SIZE);

    }


    if (p.device_name == "Vega10") {

        gfx_version = GfxVersion::gfx900;

    } else if (p.device_name == "MI100") {

        gfx_version = GfxVersion::gfx908;

    } else if (p.device_name == "MI200") {

        gfx_version = GfxVersion::gfx90a;

    } else if (p.device_name == "MI300X") {

        gfx_version = GfxVersion::gfx942;

    } else {

        panic("Unknown GPU device %s\n", p.device_name);

    }


    int sdma_id = 0;

    for (auto& s : p.sdmas) {

        s->setGPUDevice(this);

        s->setId(sdma_id);

        sdmaIds.insert({sdma_id, s});

        sdmaMmios.insert({sdma_id,

                          RangeSize(s->getMmioBase(), s->getMmioSize())});

        DPRINTF(AMDGPUDevice, "SDMA%d has MMIO range %s\n", sdma_id,

                sdmaMmios[sdma_id].to_string().c_str());

        sdma_id++;

    }


    // Map SDMA MMIO addresses to functions

    sdmaFunc.insert({0x81, &SDMAEngine::setGfxBaseLo});

    sdmaFunc.insert({0x82, &SDMAEngine::setGfxBaseHi});

    sdmaFunc.insert({0x88, &SDMAEngine::setGfxRptrHi});

    sdmaFunc.insert({0x89, &SDMAEngine::setGfxRptrLo});

    sdmaFunc.insert({0x92, &SDMAEngine::setGfxDoorbellLo});

    sdmaFunc.insert({0xab, &SDMAEngine::setGfxDoorbellOffsetLo});

    sdmaFunc.insert({0x80, &SDMAEngine::setGfxSize});

    sdmaFunc.insert({0xb2, &SDMAEngine::setGfxWptrLo});

    sdmaFunc.insert({0xb3, &SDMAEngine::setGfxWptrHi});

    if (p.device_name == "Vega10") {

        sdmaFunc.insert({0xe1, &SDMAEngine::setPageBaseLo});

        sdmaFunc.insert({0xe9, &SDMAEngine::setPageRptrLo});

        sdmaFunc.insert({0xe8, &SDMAEngine::setPageRptrHi});

        sdmaFunc.insert({0xf2, &SDMAEngine::setPageDoorbellLo});

        sdmaFunc.insert({0x10b, &SDMAEngine::setPageDoorbellOffsetLo});

        sdmaFunc.insert({0xe0, &SDMAEngine::setPageSize});

        sdmaFunc.insert({0x113, &SDMAEngine::setPageWptrLo});

    } else if (p.device_name == "MI100" || p.device_name == "MI200"

            || p.device_name == "MI300X") {

        sdmaFunc.insert({0xd9, &SDMAEngine::setPageBaseLo});

        sdmaFunc.insert({0xe1, &SDMAEngine::setPageRptrLo});

        sdmaFunc.insert({0xe0, &SDMAEngine::setPageRptrHi});

        sdmaFunc.insert({0xea, &SDMAEngine::setPageDoorbellLo});

        sdmaFunc.insert({0xd8, &SDMAEngine::setPageDoorbellOffsetLo});

        sdmaFunc.insert({0x10b, &SDMAEngine::setPageWptrLo});

    } else {

        panic("Unknown GPU device %s\n", p.device_name);

    }


    // Setup PM4 packet processors and sanity check IDs

    std::set<int> pm4_ids;

    for (auto& pm4 : p.pm4_pkt_procs) {

        pm4->setGPUDevice(this);

        fatal_if(pm4_ids.count(pm4->getIpId()),

                "Two PM4s with same IP IDs is not allowed");

        pm4_ids.insert(pm4->getIpId());

        pm4PktProcs.insert({pm4->getIpId(), pm4});


        pm4Ranges.insert({pm4->getMMIORange(), pm4});

    }


    // There should be at least one PM4 packet processor with ID 0

    fatal_if(!pm4PktProcs.count(0), "No default PM4 processor found");


    deviceIH->setGPUDevice(this);

    cp->hsaPacketProc().setGPUDevice(this);

    cp->setGPUDevice(this);

    nbio.setGPUDevice(this);


    // Address aperture for device memory. We tell this to the driver and

    // could possibly be anything, but these are the values used by hardware.

    uint64_t mmhubBase = 0x8000ULL << 24;

    uint64_t mmhubTop = 0x83ffULL << 24;

    uint64_t mem_size = 0x3ff0; // 16 GB of memory


    gpuvm.setMMHUBBase(mmhubBase);

    gpuvm.setMMHUBTop(mmhubTop);


    // Map other MMIO apertures based on gfx version. This must be done before

    // any calls to get/setRegVal.

    // NBIO               0x0     - 0x4280

    // IH                 0x4280  - 0x4980

    // GRBM               0x8000  - 0xC000

    // GFX                0x28000 - 0x3F000

    // MMHUB              0x68000 - 0x6a120

    gpuvm.setMMIOAperture(NBIO_MMIO_RANGE, AddrRange(0x0, 0x4280));

    gpuvm.setMMIOAperture(IH_MMIO_RANGE,   AddrRange(0x4280, 0x4980));

    gpuvm.setMMIOAperture(GRBM_MMIO_RANGE, AddrRange(0x8000, 0xC000));

    gpuvm.setMMIOAperture(GFX_MMIO_RANGE,  AddrRange(0x28000, 0x3F000));

    gpuvm.setMMIOAperture(MMHUB_MMIO_RANGE,  AddrRange(0x68000, 0x6A120));


    // These are hardcoded register values to return what the driver expects

    setRegVal(AMDGPU_MP0_SMN_C2PMSG_33, 0x80000000);


    // There are different registers for different GPUs, so we set the value

    // based on the GPU type specified by the user.

    if (p.device_name == "Vega10") {

        setRegVal(VEGA10_FB_LOCATION_BASE, mmhubBase >> 24);

        setRegVal(VEGA10_FB_LOCATION_TOP, mmhubTop >> 24);

    } else if (p.device_name == "MI100") {

        setRegVal(MI100_FB_LOCATION_BASE, mmhubBase >> 24);

        setRegVal(MI100_FB_LOCATION_TOP, mmhubTop >> 24);

        setRegVal(MI100_MEM_SIZE_REG, mem_size);

    } else if (p.device_name == "MI200") {

        // This device can have either 64GB or 128GB of device memory.

        // This limits to 16GB for simulation.

        setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);

        setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);

        setRegVal(MI200_MEM_SIZE_REG, mem_size);

    } else if (p.device_name == "MI300X") {

        setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);

        setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);

        setRegVal(MI200_MEM_SIZE_REG, mem_size);

    } else {

        panic("Unknown GPU device %s\n", p.device_name);

    }

}

AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p) {…}


void


AMDGPUDevice::readROM(PacketPtr pkt)

{

    Addr rom_offset = pkt->getAddr() & (ROM_SIZE - 1);

    uint64_t rom_data = 0;


    memcpy(&rom_data, rom.data() + rom_offset, pkt->getSize());

    pkt->setUintX(rom_data, ByteOrder::little);


    DPRINTF(AMDGPUDevice, "Read from addr %#x on ROM offset %#x data: %#x\n",

            pkt->getAddr(), rom_offset, rom_data);

}

AMDGPUDevice::readROM(PacketPtr pkt) {…}


void


AMDGPUDevice::writeROM(PacketPtr pkt)

{

    assert(isROM(pkt->getAddr()));


    Addr rom_offset = pkt->getAddr() - romRange.start();

    uint64_t rom_data = pkt->getUintX(ByteOrder::little);


    memcpy(rom.data() + rom_offset, &rom_data, pkt->getSize());


    DPRINTF(AMDGPUDevice, "Write to addr %#x on ROM offset %#x data: %#x\n",

            pkt->getAddr(), rom_offset, rom_data);

}

AMDGPUDevice::writeROM(PacketPtr pkt) {…}


AddrRangeList


AMDGPUDevice::getAddrRanges() const

{

    AddrRangeList ranges = PciDevice::getAddrRanges();

    AddrRangeList ret_ranges;

    ret_ranges.push_back(romRange);


    // If the range starts at zero assume OS hasn't assigned it yet. Do not

    // return ranges starting with zero as they will surely overlap with

    // another range causing the I/O crossbar to fatal.

    for (auto & r : ranges) {

        if (r.start() != 0) {

            ret_ranges.push_back(r);

        }

    }


    return ret_ranges;

}

AMDGPUDevice::getAddrRanges() const {…}


Tick


AMDGPUDevice::readConfig(PacketPtr pkt)

{

    int offset = pkt->getAddr() & PCI_CONFIG_SIZE;


    if (offset < PCI_DEVICE_SPECIFIC) {

        PciDevice::readConfig(pkt);

    } else {

        if (offset >= PXCAP_BASE && offset < (PXCAP_BASE + sizeof(PXCAP))) {

            int pxcap_offset = offset - PXCAP_BASE;


            switch (pkt->getSize()) {

                case sizeof(uint8_t):

                    pkt->setLE<uint8_t>(pxcap.data[pxcap_offset]);

                    DPRINTF(AMDGPUDevice,

                        "Read PXCAP:  dev %#x func %#x reg %#x 1 bytes: data "

                        "= %#x\n", _busAddr.dev, _busAddr.func, pxcap_offset,

                        (uint32_t)pkt->getLE<uint8_t>());

                    break;

                case sizeof(uint16_t):

                    pkt->setLE<uint16_t>(

                        *(uint16_t*)&pxcap.data[pxcap_offset]);

                    DPRINTF(AMDGPUDevice,

                        "Read PXCAP:  dev %#x func %#x reg %#x 2 bytes: data "

                        "= %#x\n", _busAddr.dev, _busAddr.func, pxcap_offset,

                        (uint32_t)pkt->getLE<uint16_t>());

                    break;

                case sizeof(uint32_t):

                    pkt->setLE<uint32_t>(

                        *(uint32_t*)&pxcap.data[pxcap_offset]);

                    DPRINTF(AMDGPUDevice,

                        "Read PXCAP:  dev %#x func %#x reg %#x 4 bytes: data "

                        "= %#x\n",_busAddr.dev, _busAddr.func, pxcap_offset,

                        (uint32_t)pkt->getLE<uint32_t>());

                    break;

                default:

                    panic("Invalid access size (%d) for amdgpu PXCAP %#x\n",

                          pkt->getSize(), pxcap_offset);

            }

            pkt->makeAtomicResponse();

        } else {

            warn("Device specific offset %d not implemented!\n", offset);

        }

    }


    // Before sending MMIOs the driver sends three interrupts in a row.

    // Use this to trigger creating a checkpoint to restore in timing mode.

    // This is only necessary until we can create a "hole" in the KVM VM

    // around the VGA ROM region such that KVM exits and sends requests to

    // this device rather than the KVM VM.

    if (checkpoint_before_mmios) {

        if (offset == PCI0_INTERRUPT_PIN) {

            if (++init_interrupt_count == 3) {

                DPRINTF(AMDGPUDevice, "Checkpointing before first MMIO\n");

                exitSimLoop("checkpoint", 0, curTick() + configDelay + 1);

            }

        } else {

            init_interrupt_count = 0;

        }

    }


    return configDelay;

}

AMDGPUDevice::readConfig(PacketPtr pkt) {…}


Tick


AMDGPUDevice::writeConfig(PacketPtr pkt)

{

    [[maybe_unused]] int offset = pkt->getAddr() & PCI_CONFIG_SIZE;

    DPRINTF(AMDGPUDevice, "Write Config: from offset: %#x size: %#x "

            "data: %#x\n", offset, pkt->getSize(),

            pkt->getUintX(ByteOrder::little));


    if (offset < PCI_DEVICE_SPECIFIC)

        return PciDevice::writeConfig(pkt);


    if (offset >= PXCAP_BASE && offset < (PXCAP_BASE + sizeof(PXCAP))) {

        uint8_t *pxcap_data = &(pxcap.data[0]);

        int pxcap_offset = offset - PXCAP_BASE;


        DPRINTF(AMDGPUDevice, "Writing PXCAP offset %d size %d\n",

                pxcap_offset, pkt->getSize());


        memcpy(pxcap_data + pxcap_offset, pkt->getConstPtr<void>(),

               pkt->getSize());

    }


    pkt->makeAtomicResponse();


    return configDelay;

}

AMDGPUDevice::writeConfig(PacketPtr pkt) {…}


void


AMDGPUDevice::dispatchAccess(PacketPtr pkt, bool read)

{

    DPRINTF(AMDGPUDevice, "%s from addr %#x size: %#x data: %#x\n",

            read ? "Read" : "Write", pkt->getAddr(), pkt->getSize(),

            pkt->getUintX(ByteOrder::little));


    pkt->makeAtomicResponse();

}

AMDGPUDevice::dispatchAccess(PacketPtr pkt, bool read) {…}


void


AMDGPUDevice::readFrame(PacketPtr pkt, Addr offset)

{

    DPRINTF(AMDGPUDevice, "Read framebuffer address %#lx\n", offset);


    /*

     * Return data for frame reads in priority order: (1) Special addresses

     * first, ignoring any writes from driver. (2) Any other address from

     * device backing store / abstract memory class functionally.

     */

    if (nbio.readFrame(pkt, offset)) {

        return;

    }


    /*

     * Read the value from device memory. This must be done functionally

     * because this method is called by the PCIDevice::read method which

     * is a non-timing read.

     */

    RequestPtr req = std::make_shared<Request>(

            offset, pkt->getSize(), 0, vramRequestorId());


    PacketPtr readPkt = new Packet(req, MemCmd::ReadReq);

    uint8_t *dataPtr = new uint8_t[pkt->getSize()];

    readPkt->dataDynamic(dataPtr);

    readPkt->req->setGPUFuncAccess(true);

    readPkt->setSuppressFuncError();

    cp->shader()->cuList[0]->memPort[0].sendFunctional(readPkt);

    if (readPkt->cmd == MemCmd::FunctionalReadError) {

        delete readPkt;

        delete[] dataPtr;

        RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,

                                               vramRequestorId());

        PacketPtr readPkt = Packet::createRead(req);

        uint8_t *dataPtr = new uint8_t[pkt->getSize()];

        readPkt->dataDynamic(dataPtr);


        auto system = cp->shader()->gpuCmdProc.system();

        system->getDeviceMemory(readPkt)->access(readPkt);

    }


    pkt->setUintX(readPkt->getUintX(ByteOrder::little), ByteOrder::little);

    delete readPkt;

}

AMDGPUDevice::readFrame(PacketPtr pkt, Addr offset) {…}


void


AMDGPUDevice::readDoorbell(PacketPtr pkt, Addr offset)

{

    DPRINTF(AMDGPUDevice, "Read doorbell %#lx\n", offset);

    mmioReader.readFromTrace(pkt, DOORBELL_BAR, offset);

}

AMDGPUDevice::readDoorbell(PacketPtr pkt, Addr offset) {…}


void


AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset)

{

    AddrRange aperture = gpuvm.getMMIOAperture(offset);

    Addr aperture_offset = offset - aperture.start();


    // By default read from MMIO trace. Overwrite the packet for a select

    // few more dynamic MMIOs.

    DPRINTF(AMDGPUDevice, "Read MMIO %#lx\n", offset);

    mmioReader.readFromTrace(pkt, MMIO_BAR, offset);


    if (aperture == gpuvm.getMMIORange(NBIO_MMIO_RANGE)) {

        DPRINTF(AMDGPUDevice, "NBIO base\n");

        nbio.readMMIO(pkt, aperture_offset);

    } else if (aperture == gpuvm.getMMIORange(GRBM_MMIO_RANGE)) {

        DPRINTF(AMDGPUDevice, "GRBM base\n");

        gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);

    } else if (aperture == gpuvm.getMMIORange(GFX_MMIO_RANGE)) {

        DPRINTF(AMDGPUDevice, "GFX base\n");

        gfx.readMMIO(pkt, aperture_offset);

    } else if (aperture == gpuvm.getMMIORange(MMHUB_MMIO_RANGE)) {

        DPRINTF(AMDGPUDevice, "MMHUB base\n");

        gpuvm.readMMIO(pkt, aperture_offset >> MMHUB_OFFSET_SHIFT);

    } else {

        DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for read %#x\n", offset);

    }

}

AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset) {…}


void


AMDGPUDevice::writeFrame(PacketPtr pkt, Addr offset)

{

    DPRINTF(AMDGPUDevice, "Wrote framebuffer address %#lx\n", offset);


    for (auto& cu: CP()->shader()->cuList) {

        Addr aligned_addr = offset & ~(gpuMemMgr->getCacheLineSize() - 1);

        cu->sendInvL2(aligned_addr);

    }


    Addr aperture = gpuvm.getFrameAperture(offset);

    Addr aperture_offset = offset - aperture;


    // Record the value

    if (aperture == gpuvm.gartBase()) {

        gpuvm.gartTable[aperture_offset] = pkt->getUintX(ByteOrder::little);

        DPRINTF(AMDGPUDevice, "GART translation %p -> %p\n", aperture_offset,

                gpuvm.gartTable[aperture_offset]);

    }


    nbio.writeFrame(pkt, offset);


    /*

     * Write the value to device memory. This must be done functionally

     * because this method is called by the PCIDevice::write method which

     * is a non-timing write.

     */

    RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,

                                               vramRequestorId());

    PacketPtr writePkt = Packet::createWrite(req);

    uint8_t *dataPtr = new uint8_t[pkt->getSize()];

    std::memcpy(dataPtr, pkt->getPtr<uint8_t>(),

                pkt->getSize() * sizeof(uint8_t));

    writePkt->dataDynamic(dataPtr);


    auto system = cp->shader()->gpuCmdProc.system();

    system->getDeviceMemory(writePkt)->access(writePkt);


    delete writePkt;

}

AMDGPUDevice::writeFrame(PacketPtr pkt, Addr offset) {…}


void


AMDGPUDevice::writeDoorbell(PacketPtr pkt, Addr offset)

{

    DPRINTF(AMDGPUDevice, "Wrote doorbell %#lx\n", offset);


    if (doorbells.find(offset) != doorbells.end()) {

        QueueType q_type = doorbells[offset].qtype;

        int ip_id = doorbells[offset].ip_id;

        DPRINTF(AMDGPUDevice, "Doorbell offset %p queue: %d\n",

                              offset, q_type);

        switch (q_type) {

          case Compute:

            assert(pm4PktProcs.count(ip_id));

            pm4PktProcs[ip_id]->process(

                pm4PktProcs[ip_id]->getQueue(offset),

                pkt->getLE<uint64_t>());

          break;

          case Gfx:

            assert(pm4PktProcs.count(ip_id));

            pm4PktProcs[ip_id]->process(

                pm4PktProcs[ip_id]->getQueue(offset, true),

                pkt->getLE<uint64_t>());

          break;

          case SDMAGfx: {

            SDMAEngine *sdmaEng = getSDMAEngine(offset);

            sdmaEng->processGfx(pkt->getLE<uint64_t>());

          } break;

          case SDMAPage: {

            SDMAEngine *sdmaEng = getSDMAEngine(offset);

            sdmaEng->processPage(pkt->getLE<uint64_t>());

          } break;

          case ComputeAQL: {

            assert(pm4PktProcs.count(ip_id));

            cp->hsaPacketProc().hwScheduler()->write(offset,

                pkt->getLE<uint64_t>() + 1);

            pm4PktProcs[ip_id]->updateReadIndex(offset,

                pkt->getLE<uint64_t>() + 1);

          } break;

          case InterruptHandler:

            deviceIH->updateRptr(pkt->getLE<uint32_t>());

            break;

          case RLC: {

            SDMAEngine *sdmaEng = getSDMAEngine(offset);

            sdmaEng->processRLC(offset, pkt->getLE<uint64_t>());

          } break;

          default:

            panic("Write to unkown queue type!");

        }

    } else {

        warn("Unknown doorbell offset: %lx. Saving to pending doorbells.\n",

             offset);


        // We have to ACK the PCI packet immediately, so create a copy of the

        // packet here to send again. The packet data contains the value of

        // the doorbell to write so we need to copy that as the original

        // packet gets deleted after the PCI write() method returns.

        RequestPtr pending_req(pkt->req);

        PacketPtr pending_pkt = Packet::createWrite(pending_req);

        uint8_t *pending_data = new uint8_t[pkt->getSize()];

        memcpy(pending_data, pkt->getPtr<uint8_t>(), pkt->getSize());

        pending_pkt->dataDynamic(pending_data);


        pendingDoorbellPkts.emplace(offset, pending_pkt);

    }

}

AMDGPUDevice::writeDoorbell(PacketPtr pkt, Addr offset) {…}


void


AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset)

{

    AddrRange aperture = gpuvm.getMMIOAperture(offset);

    Addr aperture_offset = offset - aperture.start();


    DPRINTF(AMDGPUDevice, "Wrote MMIO %#lx\n", offset);


    // Check SDMA functions first, then fallback to MMIO ranges.

    for (int idx = 0; idx < sdmaIds.size(); ++idx) {

        if (sdmaMmios[idx].contains(offset)) {

            Addr sdma_offset = (offset - sdmaMmios[idx].start()) >> 2;

            if (sdmaFunc.count(sdma_offset)) {

                DPRINTF(AMDGPUDevice, "Calling SDMA%d MMIO function %lx\n",

                        idx, sdma_offset);

                sdmaFuncPtr mptr = sdmaFunc[sdma_offset];

                (getSDMAById(idx)->*mptr)(pkt->getLE<uint32_t>());

            } else {

                DPRINTF(AMDGPUDevice, "Unknown SDMA%d MMIO: %#lx\n", idx,

                        sdma_offset);

            }


            return;

        }

    }


    // Check PM4s next, returning to avoid duplicate writes.

    for (auto& [range, pm4_proc] : pm4Ranges) {

        if (range.contains(offset)) {

            // PM4 MMIOs are offset based on the MMIO range start

            Addr ip_offset = offset - range.start();

            pm4_proc->writeMMIO(pkt, ip_offset >> GRBM_OFFSET_SHIFT);


            return;

        }

    }


    if (aperture == gpuvm.getMMIORange(GRBM_MMIO_RANGE)) {

        DPRINTF(AMDGPUDevice, "GRBM base\n");

        gpuvm.writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);

    } else if (aperture == gpuvm.getMMIORange(IH_MMIO_RANGE)) {

        DPRINTF(AMDGPUDevice, "IH base\n");

        deviceIH->writeMMIO(pkt, aperture_offset >> IH_OFFSET_SHIFT);

    } else if (aperture == gpuvm.getMMIORange(NBIO_MMIO_RANGE)) {

        DPRINTF(AMDGPUDevice, "NBIO base\n");

        nbio.writeMMIO(pkt, aperture_offset);

    } else if (aperture == gpuvm.getMMIORange(GFX_MMIO_RANGE)) {

        DPRINTF(AMDGPUDevice, "GFX base\n");

        gfx.writeMMIO(pkt, aperture_offset);

    } else {

        DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for write %#x\n", offset);

    }

}

AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset) {…}


Tick


AMDGPUDevice::read(PacketPtr pkt)

{

    if (isROM(pkt->getAddr())) {

        readROM(pkt);

    } else {

        int barnum = -1;

        Addr offset = 0;

        getBAR(pkt->getAddr(), barnum, offset);


        switch (barnum) {

          case FRAMEBUFFER_BAR:

              readFrame(pkt, offset);

              break;

          case DOORBELL_BAR:

              readDoorbell(pkt, offset);

              break;

          case MMIO_BAR:

              readMMIO(pkt, offset);

              break;

          default:

            panic("Request with address out of mapped range!");

        }

    }


    dispatchAccess(pkt, true);

    return pioDelay;

}

AMDGPUDevice::read(PacketPtr pkt) {…}


Tick


AMDGPUDevice::write(PacketPtr pkt)

{

    if (isROM(pkt->getAddr())) {

        writeROM(pkt);


        dispatchAccess(pkt, false);


        return pioDelay;

    }


    int barnum = -1;

    Addr offset = 0;

    getBAR(pkt->getAddr(), barnum, offset);


    switch (barnum) {

      case FRAMEBUFFER_BAR:

          writeFrame(pkt, offset);

          break;

      case DOORBELL_BAR:

          writeDoorbell(pkt, offset);

          break;

      case MMIO_BAR:

          writeMMIO(pkt, offset);

          break;

      default:

        panic("Request with address out of mapped range!");

    }


    // Record only if there is non-zero value, or a value to be overwritten.

    // Reads return 0 by default.

    uint64_t data = pkt->getUintX(ByteOrder::little);


    DPRINTF(AMDGPUDevice, "PCI Write to %#lx data %#lx\n",

                            pkt->getAddr(), data);


    dispatchAccess(pkt, false);


    return pioDelay;

}

AMDGPUDevice::write(PacketPtr pkt) {…}


void


AMDGPUDevice::processPendingDoorbells(uint32_t offset)

{

    if (pendingDoorbellPkts.count(offset)) {

        DPRINTF(AMDGPUDevice, "Sending pending doorbell %x\n", offset);

        writeDoorbell(pendingDoorbellPkts[offset], offset);

        delete pendingDoorbellPkts[offset];

        pendingDoorbellPkts.erase(offset);

    }

}

AMDGPUDevice::processPendingDoorbells(uint32_t offset) {…}


uint32_t


AMDGPUDevice::getRegVal(uint64_t addr)

{

    // This is somewhat of a guess based on amdgpu_device_mm_access

    // in amdgpu_device.c in the ROCk driver. If bit 32 is 1 then

    // assume VRAM and use full address, otherwise assume register

    // address and only user lower 31 bits.

    Addr fixup_addr = bits(addr, 31, 31) ? addr : addr & 0x7fffffff;


    uint32_t pkt_data = 0;

    RequestPtr request = std::make_shared<Request>(fixup_addr,

            sizeof(uint32_t), 0 /* flags */, vramRequestorId());

    PacketPtr pkt = Packet::createRead(request);

    pkt->dataStatic((uint8_t *)&pkt_data);

    readMMIO(pkt, addr);

    DPRINTF(AMDGPUDevice, "Getting register 0x%lx = %x\n",

            fixup_addr, pkt->getLE<uint32_t>());


    pkt_data = pkt->getLE<uint32_t>();

    delete pkt;


    return pkt_data;

}

AMDGPUDevice::getRegVal(uint64_t addr) {…}


void


AMDGPUDevice::setRegVal(uint64_t addr, uint32_t value)

{

    DPRINTF(AMDGPUDevice, "Setting register 0x%lx to %x\n",

            addr, value);


    uint32_t pkt_data = value;

    RequestPtr request = std::make_shared<Request>(addr,

            sizeof(uint32_t), 0 /* flags */, vramRequestorId());

    PacketPtr pkt = Packet::createWrite(request);

    pkt->dataStatic((uint8_t *)&pkt_data);

    writeMMIO(pkt, addr);

    delete pkt;

}

AMDGPUDevice::setRegVal(uint64_t addr, uint32_t value) {…}


void


AMDGPUDevice::setDoorbellType(uint32_t offset, QueueType qt, int ip_id)

{

    DPRINTF(AMDGPUDevice, "Setting doorbell type for %x\n", offset);

    doorbells[offset].qtype = qt;

    doorbells[offset].ip_id = ip_id;

}

AMDGPUDevice::setDoorbellType(uint32_t offset, QueueType qt, int ip_id) {…}


void


AMDGPUDevice::unsetDoorbell(uint32_t offset)

{

    doorbells.erase(offset);

}

AMDGPUDevice::unsetDoorbell(uint32_t offset) {…}


void


AMDGPUDevice::setSDMAEngine(Addr offset, SDMAEngine *eng)

{

    sdmaEngs[offset] = eng;

}

AMDGPUDevice::setSDMAEngine(Addr offset, SDMAEngine *eng) {…}


SDMAEngine*


AMDGPUDevice::getSDMAById(int id)

{

    assert(sdmaIds.count(id));


    return sdmaIds[id];

}

AMDGPUDevice::getSDMAById(int id) {…}


SDMAEngine*


AMDGPUDevice::getSDMAEngine(Addr offset)

{

    return sdmaEngs[offset];

}

AMDGPUDevice::getSDMAEngine(Addr offset) {…}


void


AMDGPUDevice::intrPost()

{

    PciDevice::intrPost();

}

AMDGPUDevice::intrPost() {…}


void


AMDGPUDevice::serialize(CheckpointOut &cp) const

{

    // Serialize the PciDevice base class

    PciDevice::serialize(cp);


    uint64_t doorbells_size = doorbells.size();

    uint64_t sdma_engs_size = sdmaEngs.size();

    uint64_t used_vmid_map_size = usedVMIDs.size();


    SERIALIZE_SCALAR(doorbells_size);

    SERIALIZE_SCALAR(sdma_engs_size);

    // Save the number of vmids used

    SERIALIZE_SCALAR(used_vmid_map_size);


    // Make a c-style array of the regs to serialize

    uint32_t doorbells_offset[doorbells_size];

    QueueType doorbells_queues[doorbells_size];

    int doorbells_ip_ids[doorbells_size];

    uint32_t sdma_engs_offset[sdma_engs_size];

    int sdma_engs[sdma_engs_size];

    int used_vmids[used_vmid_map_size];

    int used_queue_id_sizes[used_vmid_map_size];

    std::vector<int> used_vmid_sets;


    int idx = 0;

    for (auto & it : doorbells) {

        doorbells_offset[idx] = it.first;

        doorbells_queues[idx] = it.second.qtype;

        doorbells_ip_ids[idx] = it.second.ip_id;

        ++idx;

    }


    idx = 0;

    for (auto & it : sdmaEngs) {

        sdma_engs_offset[idx] = it.first;

        sdma_engs[idx] = it.second->getId();

        ++idx;

    }


    idx = 0;

    for (auto & it : usedVMIDs) {

        used_vmids[idx] = it.first;

        used_queue_id_sizes[idx] = it.second.size();

        std::vector<int> set_vector(it.second.begin(), it.second.end());

        used_vmid_sets.insert(used_vmid_sets.end(),

                set_vector.begin(), set_vector.end());

        ++idx;

    }


    int num_queue_id = used_vmid_sets.size();

    int* vmid_array = new int[num_queue_id];

    std::copy(used_vmid_sets.begin(), used_vmid_sets.end(), vmid_array);


    SERIALIZE_ARRAY(doorbells_offset, sizeof(doorbells_offset)/

        sizeof(doorbells_offset[0]));

    SERIALIZE_ARRAY(doorbells_queues, sizeof(doorbells_queues)/

        sizeof(doorbells_queues[0]));

    SERIALIZE_ARRAY(doorbells_ip_ids, sizeof(doorbells_ip_ids)/

        sizeof(doorbells_ip_ids[0]));

    SERIALIZE_ARRAY(sdma_engs_offset, sizeof(sdma_engs_offset)/

        sizeof(sdma_engs_offset[0]));

    SERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0]));

    // Save the vmids used in an array

    SERIALIZE_ARRAY(used_vmids, sizeof(used_vmids)/sizeof(used_vmids[0]));

    // Save the size of the set of queue ids mapped to each vmid

    SERIALIZE_ARRAY(used_queue_id_sizes,

            sizeof(used_queue_id_sizes)/sizeof(used_queue_id_sizes[0]));

    // Save all the queue ids used for all the vmids

    SERIALIZE_ARRAY(vmid_array, num_queue_id);

    // Save the total number of queue idsused

    SERIALIZE_SCALAR(num_queue_id);


    // Serialize the device memory

    deviceMem.serializeSection(cp, "deviceMem");

    gpuvm.serializeSection(cp, "GPUVM");


    delete[] vmid_array;

}

AMDGPUDevice::serialize(CheckpointOut &cp) const {…}


void


AMDGPUDevice::unserialize(CheckpointIn &cp)

{

    // Unserialize the PciDevice base class

    PciDevice::unserialize(cp);


    uint64_t doorbells_size = 0;

    uint64_t sdma_engs_size = 0;

    uint64_t used_vmid_map_size = 0;


    UNSERIALIZE_SCALAR(doorbells_size);

    UNSERIALIZE_SCALAR(sdma_engs_size);

    UNSERIALIZE_SCALAR(used_vmid_map_size);


    if (doorbells_size > 0) {

        uint32_t doorbells_offset[doorbells_size];

        QueueType doorbells_queues[doorbells_size];

        int doorbells_ip_ids[doorbells_size];


        UNSERIALIZE_ARRAY(doorbells_offset, sizeof(doorbells_offset)/

                sizeof(doorbells_offset[0]));

        UNSERIALIZE_ARRAY(doorbells_queues, sizeof(doorbells_queues)/

                sizeof(doorbells_queues[0]));

        UNSERIALIZE_ARRAY(doorbells_ip_ids, sizeof(doorbells_ip_ids)/

                sizeof(doorbells_ip_ids[0]));


        for (int idx = 0; idx < doorbells_size; ++idx) {

            doorbells[doorbells_offset[idx]].qtype = doorbells_queues[idx];

            doorbells[doorbells_offset[idx]].ip_id = doorbells_ip_ids[idx];

        }

    }


    if (sdma_engs_size > 0) {

        uint32_t sdma_engs_offset[sdma_engs_size];

        int sdma_engs[sdma_engs_size];


        UNSERIALIZE_ARRAY(sdma_engs_offset, sizeof(sdma_engs_offset)/

            sizeof(sdma_engs_offset[0]));

        UNSERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0]));


        for (int idx = 0; idx < sdma_engs_size; ++idx) {

            int sdma_id = sdma_engs[idx];

            assert(sdmaIds.count(sdma_id));

            SDMAEngine *sdma = sdmaIds[sdma_id];

            sdmaEngs.insert(std::make_pair(sdma_engs_offset[idx], sdma));

        }

    }


    if (used_vmid_map_size > 0) {

        int used_vmids[used_vmid_map_size];

        int used_queue_id_sizes[used_vmid_map_size];

        int num_queue_id = 0;

        std::vector<int> used_vmid_sets;

        // Extract the total number of queue ids used

        UNSERIALIZE_SCALAR(num_queue_id);

        int* vmid_array = new int[num_queue_id];

        // Extract the number of vmids used

        UNSERIALIZE_ARRAY(used_vmids, used_vmid_map_size);

        // Extract the size of the queue id set for each vmid

        UNSERIALIZE_ARRAY(used_queue_id_sizes, used_vmid_map_size);

        // Extract all the queue ids used

        UNSERIALIZE_ARRAY(vmid_array, num_queue_id);

        // Populate the usedVMIDs map with the queue ids per vm

        int idx = 0;

        for (int it = 0; it < used_vmid_map_size; it++) {

            int vmid = used_vmids[it];

            int vmid_set_size = used_queue_id_sizes[it];

            for (int j = 0; j < vmid_set_size; j++) {

                usedVMIDs[vmid].insert(vmid_array[idx + j]);

            }

            idx += vmid_set_size;

        }

        delete[] vmid_array;

    }


    // Unserialize the device memory

    deviceMem.unserializeSection(cp, "deviceMem");

    gpuvm.unserializeSection(cp, "GPUVM");

}

AMDGPUDevice::unserialize(CheckpointIn &cp) {…}


uint16_t


AMDGPUDevice::allocateVMID(uint16_t pasid)

{

    for (uint16_t vmid = 1; vmid < AMDGPU_VM_COUNT; vmid++) {

        auto result = usedVMIDs.find(vmid);

        if (result == usedVMIDs.end()) {

            idMap.insert(std::make_pair(pasid, vmid));

            usedVMIDs[vmid] = {};

            _lastVMID = vmid;

            return vmid;

        }

    }

    panic("All VMIDs have been assigned");

}

AMDGPUDevice::allocateVMID(uint16_t pasid) {…}


void


AMDGPUDevice::deallocateVmid(uint16_t vmid)

{

    usedVMIDs.erase(vmid);

}

AMDGPUDevice::deallocateVmid(uint16_t vmid) {…}


void


AMDGPUDevice::deallocatePasid(uint16_t pasid)

{

    auto result = idMap.find(pasid);

    assert(result != idMap.end());

    if (result == idMap.end()) return;

    uint16_t vmid = result->second;


    idMap.erase(result);

    usedVMIDs.erase(vmid);

}

AMDGPUDevice::deallocatePasid(uint16_t pasid) {…}


void


AMDGPUDevice::deallocateAllQueues(bool unmap_static)

{

    idMap.erase(idMap.begin(), idMap.end());

    usedVMIDs.erase(usedVMIDs.begin(), usedVMIDs.end());


    for (auto& it : sdmaEngs) {

        it.second->deallocateRLCQueues(unmap_static);

    }


    // "All" queues implicitly refers to all user queues. User queues begin at

    // doorbell address 0x4000, so unmap any queue at or above that address.

    for (auto [offset, vmid] : doorbellVMIDMap) {

        if (offset >= 0x4000) {

            doorbells.erase(offset);

        }

    }

}

AMDGPUDevice::deallocateAllQueues(bool unmap_static) {…}


void


AMDGPUDevice::mapDoorbellToVMID(Addr doorbell, uint16_t vmid)

{

    doorbellVMIDMap[doorbell] = vmid;

}

AMDGPUDevice::mapDoorbellToVMID(Addr doorbell, uint16_t vmid) {…}


std::unordered_map<uint16_t, std::set<int>>&


AMDGPUDevice::getUsedVMIDs()

{

    return usedVMIDs;

}

AMDGPUDevice::getUsedVMIDs() {…}


void


AMDGPUDevice::insertQId(uint16_t vmid, int id)

{

    usedVMIDs[vmid].insert(id);

}

AMDGPUDevice::insertQId(uint16_t vmid, int id) {…}


} // namespace gem5

abstract_mem.hh
AbstractMemory declaration.

amdgpu_device.hh

amdgpu_nbio.hh

AMDGPU_MP0_SMN_C2PMSG_33
#define AMDGPU_MP0_SMN_C2PMSG_33
Definition amdgpu_nbio.hh:66

amdgpu_vm.hh

VEGA10_FB_LOCATION_BASE
#define VEGA10_FB_LOCATION_BASE
Definition amdgpu_vm.hh:77

VEGA10_FB_LOCATION_TOP
#define VEGA10_FB_LOCATION_TOP
Definition amdgpu_vm.hh:78

MI200_MEM_SIZE_REG
#define MI200_MEM_SIZE_REG
Definition amdgpu_vm.hh:84

MI200_FB_LOCATION_TOP
#define MI200_FB_LOCATION_TOP
Definition amdgpu_vm.hh:86

MI100_FB_LOCATION_BASE
#define MI100_FB_LOCATION_BASE
Definition amdgpu_vm.hh:81

MI200_FB_LOCATION_BASE
#define MI200_FB_LOCATION_BASE
Definition amdgpu_vm.hh:85

MI100_FB_LOCATION_TOP
#define MI100_FB_LOCATION_TOP
Definition amdgpu_vm.hh:82

MI100_MEM_SIZE_REG
#define MI100_MEM_SIZE_REG
Definition amdgpu_vm.hh:80

DPRINTF
#define DPRINTF(x,...)
Definition trace.hh:209

byteswap.hh

data
const char data[]
Definition circlebuf.test.cc:48

gem5::AMDGPUDevice
Device model for an AMD GPU.
Definition amdgpu_device.hh:64

gem5::AMDGPUDevice::_lastVMID
uint16_t _lastVMID
Definition amdgpu_device.hh:153

gem5::AMDGPUDevice::insertQId
void insertQId(uint16_t vmid, int id)
Definition amdgpu_device.cc:979

gem5::AMDGPUDevice::pm4Ranges
std::unordered_map< AddrRange, PM4PacketProcessor *, AddrRangeHasher > pm4Ranges
Definition amdgpu_device.hh:127

gem5::AMDGPUDevice::deallocateAllQueues
void deallocateAllQueues(bool unmap_static)
Definition amdgpu_device.cc:948

gem5::AMDGPUDevice::doorbellVMIDMap
std::unordered_map< Addr, uint16_t > doorbellVMIDMap
Definition amdgpu_device.hh:149

gem5::AMDGPUDevice::idMap
std::unordered_map< uint16_t, uint16_t > idMap
Definition amdgpu_device.hh:147

gem5::AMDGPUDevice::readMMIO
void readMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_device.cc:401

gem5::AMDGPUDevice::serialize
void serialize(CheckpointOut &cp) const override
Checkpoint support.
Definition amdgpu_device.cc:754

gem5::AMDGPUDevice::processPendingDoorbells
void processPendingDoorbells(uint32_t offset)
Definition amdgpu_device.cc:660

gem5::AMDGPUDevice::getAddrRanges
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
Definition amdgpu_device.cc:228

gem5::AMDGPUDevice::unserialize
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition amdgpu_device.cc:834

gem5::AMDGPUDevice::writeMMIO
void writeMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_device.cc:536

gem5::AMDGPUDevice::cp
GPUCommandProcessor * cp
Definition amdgpu_device.hh:116

gem5::AMDGPUDevice::setDoorbellType
void setDoorbellType(uint32_t offset, QueueType qt, int ip_id=0)
Set handles to GPU blocks.
Definition amdgpu_device.cc:710

gem5::AMDGPUDevice::write
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
Definition amdgpu_device.cc:619

gem5::AMDGPUDevice::gpuvm
AMDGPUVM gpuvm
Definition amdgpu_device.hh:115

gem5::AMDGPUDevice::gfx
AMDGPUGfx gfx
Definition amdgpu_device.hh:112

gem5::AMDGPUDevice::readROM
void readROM(PacketPtr pkt)
Definition amdgpu_device.cc:201

gem5::AMDGPUDevice::romRange
AddrRange romRange
VGA ROM methods.
Definition amdgpu_device.hh:96

gem5::AMDGPUDevice::doorbells
std::unordered_map< uint32_t, DoorbellInfo > doorbells
Structures to hold registers, doorbells, and some frame memory.
Definition amdgpu_device.hh:90

gem5::AMDGPUDevice::getUsedVMIDs
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
Definition amdgpu_device.cc:973

gem5::AMDGPUDevice::rom
std::array< uint8_t, ROM_SIZE > rom
Definition amdgpu_device.hh:101

gem5::AMDGPUDevice::isROM
bool isROM(Addr addr) const
Definition amdgpu_device.hh:97

gem5::AMDGPUDevice::unsetDoorbell
void unsetDoorbell(uint32_t offset)
Definition amdgpu_device.cc:718

gem5::AMDGPUDevice::pendingDoorbellPkts
std::unordered_map< uint32_t, PacketPtr > pendingDoorbellPkts
Definition amdgpu_device.hh:91

gem5::AMDGPUDevice::setRegVal
void setRegVal(uint64_t addr, uint32_t value)
Definition amdgpu_device.cc:695

gem5::AMDGPUDevice::sdmaMmios
std::unordered_map< uint32_t, AddrRange > sdmaMmios
Definition amdgpu_device.hh:134

gem5::AMDGPUDevice::gfx_version
GfxVersion gfx_version
Definition amdgpu_device.hh:161

gem5::AMDGPUDevice::sdmaFuncPtr
void(SDMAEngine::* sdmaFuncPtr)(uint32_t)
Definition amdgpu_device.hh:136

gem5::AMDGPUDevice::getSDMAEngine
SDMAEngine * getSDMAEngine(Addr offset)
Definition amdgpu_device.cc:742

gem5::AMDGPUDevice::gpuMemMgr
AMDGPUMemoryManager * gpuMemMgr
Definition amdgpu_device.hh:113

gem5::AMDGPUDevice::AMDGPUDevice
AMDGPUDevice(const AMDGPUDeviceParams &p)
Definition amdgpu_device.cc:55

gem5::AMDGPUDevice::readDoorbell
void readDoorbell(PacketPtr pkt, Addr offset)
Definition amdgpu_device.cc:394

gem5::AMDGPUDevice::nbio
AMDGPUNbio nbio
Blocks of the GPU.
Definition amdgpu_device.hh:111

gem5::AMDGPUDevice::readConfig
Tick readConfig(PacketPtr pkt) override
Read from the PCI config space data that is stored locally.
Definition amdgpu_device.cc:247

gem5::AMDGPUDevice::sdmaFunc
std::unordered_map< uint32_t, sdmaFuncPtr > sdmaFunc
Definition amdgpu_device.hh:137

gem5::AMDGPUDevice::usedVMIDs
std::unordered_map< uint16_t, std::set< int > > usedVMIDs
Definition amdgpu_device.hh:151

gem5::AMDGPUDevice::deviceIH
AMDGPUInterruptHandler * deviceIH
Definition amdgpu_device.hh:114

gem5::AMDGPUDevice::writeConfig
Tick writeConfig(PacketPtr pkt) override
Write to the PCI config space data that is stored locally.
Definition amdgpu_device.cc:311

gem5::AMDGPUDevice::mmioReader
AMDMMIOReader mmioReader
MMIO reader to populate device registers map.
Definition amdgpu_device.hh:106

gem5::AMDGPUDevice::read
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
Definition amdgpu_device.cc:590

gem5::AMDGPUDevice::checkpoint_before_mmios
bool checkpoint_before_mmios
Initial checkpoint support variables.
Definition amdgpu_device.hh:142

gem5::AMDGPUDevice::dispatchAccess
void dispatchAccess(PacketPtr pkt, bool read)
Convert a PCI packet into a response.
Definition amdgpu_device.cc:339

gem5::AMDGPUDevice::getRegVal
uint32_t getRegVal(uint64_t addr)
Register value getter/setter.
Definition amdgpu_device.cc:671

gem5::AMDGPUDevice::deallocateVmid
void deallocateVmid(uint16_t vmid)
Definition amdgpu_device.cc:930

gem5::AMDGPUDevice::mapDoorbellToVMID
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
Definition amdgpu_device.cc:967

gem5::AMDGPUDevice::intrPost
void intrPost()
Methods inherited from PciDevice.
Definition amdgpu_device.cc:748

gem5::AMDGPUDevice::readFrame
void readFrame(PacketPtr pkt, Addr offset)
Helper methods to handle specific BAR read/writes.
Definition amdgpu_device.cc:349

gem5::AMDGPUDevice::writeROM
void writeROM(PacketPtr pkt)
Definition amdgpu_device.cc:214

gem5::AMDGPUDevice::writeDoorbell
void writeDoorbell(PacketPtr pkt, Addr offset)
Definition amdgpu_device.cc:470

gem5::AMDGPUDevice::vramRequestorId
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
Definition amdgpu_device.hh:213

gem5::AMDGPUDevice::sdmaIds
std::unordered_map< uint32_t, SDMAEngine * > sdmaIds
Definition amdgpu_device.hh:132

gem5::AMDGPUDevice::allocateVMID
uint16_t allocateVMID(uint16_t pasid)
Definition amdgpu_device.cc:915

gem5::AMDGPUDevice::pm4PktProcs
std::unordered_map< int, PM4PacketProcessor * > pm4PktProcs
Definition amdgpu_device.hh:125

gem5::AMDGPUDevice::deallocatePasid
void deallocatePasid(uint16_t pasid)
Definition amdgpu_device.cc:936

gem5::AMDGPUDevice::init_interrupt_count
int init_interrupt_count
Definition amdgpu_device.hh:143

gem5::AMDGPUDevice::getSDMAById
SDMAEngine * getSDMAById(int id)
Definition amdgpu_device.cc:730

gem5::AMDGPUDevice::writeFrame
void writeFrame(PacketPtr pkt, Addr offset)
Definition amdgpu_device.cc:429

gem5::AMDGPUDevice::setSDMAEngine
void setSDMAEngine(Addr offset, SDMAEngine *eng)
Definition amdgpu_device.cc:724

gem5::AMDGPUDevice::deviceMem
memory::PhysicalMemory deviceMem
Definition amdgpu_device.hh:158

gem5::AMDGPUDevice::sdmaEngs
std::unordered_map< uint32_t, SDMAEngine * > sdmaEngs
Definition amdgpu_device.hh:130

gem5::AMDGPUDevice::CP
GPUCommandProcessor * CP()
Definition amdgpu_device.hh:193

gem5::AMDGPUGfx::readMMIO
void readMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_gfx.cc:48

gem5::AMDGPUGfx::writeMMIO
void writeMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_gfx.cc:66

gem5::AMDGPUInterruptHandler::setGPUDevice
void setGPUDevice(AMDGPUDevice *gpu_device)
Definition interrupt_handler.hh:175

gem5::AMDGPUInterruptHandler::updateRptr
void updateRptr(const uint32_t &data)
Definition interrupt_handler.cc:260

gem5::AMDGPUInterruptHandler::writeMMIO
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of interrupt handler MMIO registers.
Definition interrupt_handler.cc:171

gem5::AMDGPUMemoryManager::getRequestorID
RequestorID getRequestorID() const
Get the requestorID for the memory manager.
Definition memory_manager.hh:126

gem5::AMDGPUMemoryManager::getCacheLineSize
Addr getCacheLineSize() const
Definition memory_manager.hh:128

gem5::AMDGPUNbio::readMMIO
void readMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_nbio.cc:54

gem5::AMDGPUNbio::writeMMIO
void writeMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_nbio.cc:134

gem5::AMDGPUNbio::readFrame
bool readFrame(PacketPtr pkt, Addr offset)
Definition amdgpu_nbio.cc:199

gem5::AMDGPUNbio::writeFrame
void writeFrame(PacketPtr pkt, Addr offset)
Definition amdgpu_nbio.cc:212

gem5::AMDGPUNbio::setGPUDevice
void setGPUDevice(AMDGPUDevice *gpu_device)
Definition amdgpu_nbio.cc:48

gem5::AMDGPUVM::setMMIOAperture
void setMMIOAperture(mmio_range_t mmio_aperture, AddrRange range)
Definition amdgpu_vm.cc:62

gem5::AMDGPUVM::setMMHUBBase
void setMMHUBBase(Addr base)
Definition amdgpu_vm.hh:230

gem5::AMDGPUVM::getMMIORange
AddrRange getMMIORange(mmio_range_t mmio_aperture)
Definition amdgpu_vm.cc:68

gem5::AMDGPUVM::gartTable
std::unordered_map< uint64_t, uint64_t > gartTable
Copy of GART table.
Definition amdgpu_vm.hh:203

gem5::AMDGPUVM::readMMIO
void readMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_vm.cc:99

gem5::AMDGPUVM::getMMIOAperture
const AddrRange & getMMIOAperture(Addr addr)
Definition amdgpu_vm.cc:74

gem5::AMDGPUVM::writeMMIO
void writeMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_vm.cc:135

gem5::AMDGPUVM::getFrameAperture
Addr getFrameAperture(Addr addr)
Definition amdgpu_vm.hh:259

gem5::AMDGPUVM::gartBase
Addr gartBase()
Return base address of GART table in framebuffer.
Definition amdgpu_vm.cc:87

gem5::AMDGPUVM::setMMHUBTop
void setMMHUBTop(Addr top)
Definition amdgpu_vm.hh:231

gem5::AMDMMIOReader::readFromTrace
void readFromTrace(PacketPtr pkt, int barnum, Addr offset)
Get the next MMIO read from the trace file to an offset in a BAR and write the value to the packet pr...
Definition mmio_reader.cc:76

gem5::AddrRange
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition addr_range.hh:82

gem5::CheckpointIn
Definition serialize.hh:69

gem5::GPUCommandProcessor::setGPUDevice
void setGPUDevice(AMDGPUDevice *gpu_device)
Definition gpu_command_processor.cc:836

gem5::GPUCommandProcessor::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition gpu_command_processor.cc:71

gem5::GPUCommandProcessor::shader
Shader * shader()
Definition gpu_command_processor.cc:849

gem5::GPUCommandProcessor::system
System * system()
Definition gpu_command_processor.cc:823

gem5::HSAPacketProcessor::hwScheduler
HWScheduler * hwScheduler()
Definition hsa_packet_processor.hh:353

gem5::HSAPacketProcessor::setGPUDevice
void setGPUDevice(AMDGPUDevice *gpu_device)
Definition hsa_packet_processor.cc:97

gem5::HWScheduler::write
void write(Addr db_addr, uint64_t doorbell_reg)
Definition hw_scheduler.cc:331

gem5::MemCmd::FunctionalReadError
@ FunctionalReadError
Definition packet.hh:139

gem5::MemCmd::ReadReq
@ ReadReq
Definition packet.hh:87

gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295

gem5::Packet::getAddr
Addr getAddr() const
Definition packet.hh:807

gem5::Packet::setUintX
void setUintX(uint64_t w, ByteOrder endian)
Set the value in the word w after truncating it to the length of the packet and then byteswapping it ...
Definition packet.cc:361

gem5::Packet::setLE
void setLE(T v)
Set the value in the data pointer to v as little endian.
Definition packet_access.hh:108

gem5::Packet::createWrite
static PacketPtr createWrite(const RequestPtr &req)
Definition packet.hh:1044

gem5::Packet::dataStatic
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition packet.hh:1175

gem5::Packet::getPtr
T * getPtr()
get a pointer to the data ptr.
Definition packet.hh:1225

gem5::Packet::createRead
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition packet.hh:1038

gem5::Packet::req
RequestPtr req
A pointer to the original request.
Definition packet.hh:377

gem5::Packet::getSize
unsigned getSize() const
Definition packet.hh:817

gem5::Packet::getUintX
uint64_t getUintX(ByteOrder endian) const
Get the data in the packet byte swapped from the specified endianness and zero-extended to 64 bits.
Definition packet.cc:352

gem5::Packet::getConstPtr
const T * getConstPtr() const
Definition packet.hh:1234

gem5::Packet::dataDynamic
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition packet.hh:1213

gem5::Packet::makeAtomicResponse
void makeAtomicResponse()
Definition packet.hh:1074

gem5::Packet::cmd
MemCmd cmd
The command field of the packet.
Definition packet.hh:372

gem5::Packet::getLE
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
Definition packet_access.hh:78

gem5::Packet::setSuppressFuncError
void setSuppressFuncError()
Definition packet.hh:757

gem5::PciDevice
PCI device, base implementation is only config space.
Definition device.hh:270

gem5::PciDevice::pxcap
PXCAP pxcap
Definition device.hh:301

gem5::PciDevice::config
PCIConfig config
The current config space.
Definition device.hh:275

gem5::PciDevice::unserialize
void unserialize(CheckpointIn &cp) override
Reconstruct the state of this object from a checkpoint.
Definition device.cc:464

gem5::PciDevice::serialize
void serialize(CheckpointOut &cp) const override
Serialize this object to the given output stream.
Definition device.cc:401

gem5::PciDevice::getBAR
bool getBAR(Addr addr, int &num, Addr &offs)
Which base address register (if any) maps the given address?
Definition device.hh:320

gem5::PciDevice::getAddrRanges
AddrRangeList getAddrRanges() const override
Determine the address ranges that this device responds to.
Definition device.cc:269

gem5::PciDevice::pioDelay
Tick pioDelay
Definition device.hh:354

gem5::PciDevice::_busAddr
const PciBusAddr _busAddr
Definition device.hh:272

gem5::PciDevice::readConfig
virtual Tick readConfig(PacketPtr pkt)
Read from the PCI config space data that is stored locally.
Definition device.cc:212

gem5::PciDevice::writeConfig
virtual Tick writeConfig(PacketPtr pkt)
Write to the PCI config space data that is stored locally.
Definition device.cc:283

gem5::PciDevice::intrPost
void intrPost()
Definition device.hh:364

gem5::PciDevice::PXCAP_BASE
const int PXCAP_BASE
Definition device.hh:300

gem5::PciDevice::configDelay
Tick configDelay
Definition device.hh:355

gem5::SDMAEngine
System DMA Engine class for AMD dGPU.
Definition sdma_engine.hh:49

gem5::SDMAEngine::setPageRptrLo
void setPageRptrLo(uint32_t data)
Definition sdma_engine.cc:1495

gem5::SDMAEngine::setGfxRptrLo
void setGfxRptrLo(uint32_t data)
Definition sdma_engine.cc:1409

gem5::SDMAEngine::setGfxWptrLo
void setGfxWptrLo(uint32_t data)
Definition sdma_engine.cc:1465

gem5::SDMAEngine::setGfxRptrHi
void setGfxRptrHi(uint32_t data)
Definition sdma_engine.cc:1417

gem5::SDMAEngine::processRLC
void processRLC(Addr doorbellOffset, Addr wptrOffset)
Definition sdma_engine.cc:316

gem5::SDMAEngine::setGfxSize
void setGfxSize(uint32_t data)
Definition sdma_engine.cc:1457

gem5::SDMAEngine::setGfxBaseLo
void setGfxBaseLo(uint32_t data)
Definition sdma_engine.cc:1393

gem5::SDMAEngine::processGfx
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
Definition sdma_engine.cc:294

gem5::SDMAEngine::setGfxWptrHi
void setGfxWptrHi(uint32_t data)
Definition sdma_engine.cc:1472

gem5::SDMAEngine::setGfxDoorbellOffsetLo
void setGfxDoorbellOffsetLo(uint32_t data)
Definition sdma_engine.cc:1439

gem5::SDMAEngine::processPage
void processPage(Addr wptrOffset)
Definition sdma_engine.cc:305

gem5::SDMAEngine::setPageDoorbellOffsetLo
void setPageDoorbellOffsetLo(uint32_t data)
Definition sdma_engine.cc:1525

gem5::SDMAEngine::setPageWptrLo
void setPageWptrLo(uint32_t data)
Definition sdma_engine.cc:1551

gem5::SDMAEngine::setGfxDoorbellLo
void setGfxDoorbellLo(uint32_t data)
Definition sdma_engine.cc:1425

gem5::SDMAEngine::setPageDoorbellLo
void setPageDoorbellLo(uint32_t data)
Definition sdma_engine.cc:1511

gem5::SDMAEngine::setPageSize
void setPageSize(uint32_t data)
Definition sdma_engine.cc:1543

gem5::SDMAEngine::setPageBaseLo
void setPageBaseLo(uint32_t data)
Definition sdma_engine.cc:1479

gem5::SDMAEngine::setGfxBaseHi
void setGfxBaseHi(uint32_t data)
Definition sdma_engine.cc:1401

gem5::SDMAEngine::setPageRptrHi
void setPageRptrHi(uint32_t data)
Definition sdma_engine.cc:1503

gem5::Shader::cuList
std::vector< ComputeUnit * > cuList
Definition shader.hh:268

gem5::Shader::gpuCmdProc
GPUCommandProcessor & gpuCmdProc
Definition shader.hh:270

gem5::System::getDeviceMemory
memory::AbstractMemory * getDeviceMemory(const PacketPtr &pkt) const
Return a pointer to the device memory.
Definition system.cc:311

gem5::memory::AbstractMemory::access
void access(PacketPtr pkt)
Perform an untimed memory access and update all the state (e.g.
Definition abstract_mem.cc:380

std::list< AddrRange >

std::vector
STL vector class.
Definition stl.hh:37

gpu_command_processor.hh
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...

gem5::RangeSize
AddrRange RangeSize(Addr start, Addr size)
Definition addr_range.hh:849

gem5::AddrRange::start
Addr start() const
Get the start address of the range.
Definition addr_range.hh:343

gem5::bits
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79

panic
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188

fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:236

gem5::Serializable::serializeSection
void serializeSection(CheckpointOut &cp, const char *name) const
Serialize an object into a new section.
Definition serialize.cc:74

UNSERIALIZE_ARRAY
#define UNSERIALIZE_ARRAY(member, size)
Definition serialize.hh:618

SERIALIZE_ARRAY
#define SERIALIZE_ARRAY(member, size)
Definition serialize.hh:610

gem5::Serializable::unserializeSection
void unserializeSection(CheckpointIn &cp, const char *name)
Unserialize an a child object.
Definition serialize.cc:81

hw_scheduler.hh

interrupt_handler.hh

warn
#define warn(...)
Definition logging.hh:256

gem5::ArmISA::s
Bitfield< 4 > s
Definition misc_types.hh:647

gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition types.hh:144

gem5::ArmISA::id
Bitfield< 33 > id
Definition misc_types.hh:332

gem5::ArmISA::m
Bitfield< 0 > m
Definition misc_types.hh:479

gem5::MipsISA::r
r
Definition pra_constants.hh:98

gem5::MipsISA::p
Bitfield< 0 > p
Definition pra_constants.hh:326

gem5::X86ISA::system
Bitfield< 15 > system
Definition misc.hh:1032

gem5::X86ISA::addr
Bitfield< 3 > addr
Definition types.hh:84

gem5
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36

gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition request.hh:94

gem5::curTick
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46

gem5::MMIO_BAR
constexpr int MMIO_BAR
Definition amdgpu_defines.hh:68

gem5::CheckpointOut
std::ostream CheckpointOut
Definition serialize.hh:66

gem5::QueueType
QueueType
Definition amdgpu_defines.hh:42

gem5::SDMAGfx
@ SDMAGfx
Definition amdgpu_defines.hh:45

gem5::Compute
@ Compute
Definition amdgpu_defines.hh:43

gem5::RLC
@ RLC
Definition amdgpu_defines.hh:49

gem5::InterruptHandler
@ InterruptHandler
Definition amdgpu_defines.hh:48

gem5::Gfx
@ Gfx
Definition amdgpu_defines.hh:44

gem5::ComputeAQL
@ ComputeAQL
Definition amdgpu_defines.hh:47

gem5::SDMAPage
@ SDMAPage
Definition amdgpu_defines.hh:46

gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147

gem5::pasid
Bitfield< 10 > pasid
Definition x86_cpu.cc:129

gem5::Tick
uint64_t Tick
Tick count type.
Definition types.hh:58

gem5::ROM_SIZE
constexpr uint32_t ROM_SIZE
Definition amdgpu_defines.hh:72

gem5::exitSimLoop
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
Definition sim_events.cc:88

gem5::IH_OFFSET_SHIFT
static constexpr uint32_t IH_OFFSET_SHIFT
Definition amdgpu_defines.hh:75

gem5::MMHUB_OFFSET_SHIFT
static constexpr uint32_t MMHUB_OFFSET_SHIFT
Definition amdgpu_defines.hh:77

gem5::AMDGPU_VM_COUNT
static constexpr int AMDGPU_VM_COUNT
Definition amdgpu_defines.hh:63

gem5::FRAMEBUFFER_BAR
constexpr int FRAMEBUFFER_BAR
Definition amdgpu_defines.hh:66

gem5::GRBM_MMIO_RANGE
@ GRBM_MMIO_RANGE
Definition amdgpu_vm.hh:107

gem5::GFX_MMIO_RANGE
@ GFX_MMIO_RANGE
Definition amdgpu_vm.hh:106

gem5::IH_MMIO_RANGE
@ IH_MMIO_RANGE
Definition amdgpu_vm.hh:108

gem5::MMHUB_MMIO_RANGE
@ MMHUB_MMIO_RANGE
Definition amdgpu_vm.hh:105

gem5::NBIO_MMIO_RANGE
@ NBIO_MMIO_RANGE
Definition amdgpu_vm.hh:104

gem5::DOORBELL_BAR
constexpr int DOORBELL_BAR
Definition amdgpu_defines.hh:67

gem5::VGA_ROM_DEFAULT
constexpr uint32_t VGA_ROM_DEFAULT
Definition amdgpu_defines.hh:71

gem5::GRBM_OFFSET_SHIFT
static constexpr uint32_t GRBM_OFFSET_SHIFT
Definition amdgpu_defines.hh:76

packet.hh
Declaration of the Packet class.

packet_access.hh

PCI0_INTERRUPT_PIN
#define PCI0_INTERRUPT_PIN
Definition pcireg.h:135

PCI_DEVICE_SPECIFIC
#define PCI_DEVICE_SPECIFIC
Definition pcireg.h:164

PCI_CONFIG_SIZE
#define PCI_CONFIG_SIZE
Definition pcireg.h:165

pm4_packet_processor.hh

sdma_engine.hh

UNSERIALIZE_SCALAR
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575

SERIALIZE_SCALAR
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568

shader.hh

sim_exit.hh

gem5::PciBusAddr::func
uint8_t func
Definition types.hh:58

gem5::PciBusAddr::dev
uint8_t dev
Definition types.hh:57

name
const std::string & name()
Definition trace.cc:48

PXCAP
Defines the PCI Express capability register and its associated bitfields for a PCIe device.
Definition pcireg.h:330

PXCAP::data
uint8_t data[48]
Definition pcireg.h:331