release/v23-0-0-1/gpu__compute__driver_8cc_source.html

/*

 * Copyright (c) 2015-2018 Advanced Micro Devices, Inc.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *

 * 1. Redistributions of source code must retain the above copyright notice,

 * this list of conditions and the following disclaimer.

 *

 * 2. Redistributions in binary form must reproduce the above copyright notice,

 * this list of conditions and the following disclaimer in the documentation

 * and/or other materials provided with the distribution.

 *

 * 3. Neither the name of the copyright holder nor the names of its

 * contributors may be used to endorse or promote products derived from this

 * software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 * POSSIBILITY OF SUCH DAMAGE.

 */


#include "gpu-compute/gpu_compute_driver.hh"


#include <memory>


#include "arch/x86/page_size.hh"

#include "base/compiler.hh"

#include "base/logging.hh"

#include "base/trace.hh"

#include "cpu/thread_context.hh"

#include "debug/GPUDriver.hh"

#include "debug/GPUShader.hh"

#include "dev/hsa/hsa_packet_processor.hh"

#include "dev/hsa/kfd_event_defines.h"

#include "dev/hsa/kfd_ioctl.h"

#include "gpu-compute/gpu_command_processor.hh"

#include "gpu-compute/shader.hh"

#include "mem/port_proxy.hh"

#include "mem/se_translating_port_proxy.hh"

#include "mem/translating_port_proxy.hh"

#include "params/GPUComputeDriver.hh"

#include "sim/full_system.hh"

#include "sim/process.hh"

#include "sim/se_workload.hh"

#include "sim/syscall_emul_buf.hh"


namespace gem5

{


GPUComputeDriver::GPUComputeDriver(const Params &p)

    : EmulatedDriver(p), device(p.device), queueId(0),

      isdGPU(p.isdGPU), gfxVersion(p.gfxVersion), dGPUPoolID(p.dGPUPoolID),

      eventPage(0), eventSlotIndex(0)

{

    device->attachDriver(this);

    DPRINTF(GPUDriver, "Constructing KFD: device\n");


    // Convert the 3 bit mtype specified in Shader.py to the proper type

    // used for requests.

    std::bitset<MtypeFlags::NUM_MTYPE_BITS> mtype(p.m_type);

    if (mtype.test(MtypeFlags::SHARED)) {

        defaultMtype.set(Request::SHARED);

    }


    if (mtype.test(MtypeFlags::READ_WRITE)) {

        defaultMtype.set(Request::READ_WRITE);

    }


    if (mtype.test(MtypeFlags::CACHED)) {

        defaultMtype.set(Request::CACHED);

    }

}


const char*

GPUComputeDriver::DriverWakeupEvent::description() const

{

    return "DriverWakeupEvent";

}


int

GPUComputeDriver::open(ThreadContext *tc, int mode, int flags)

{

    DPRINTF(GPUDriver, "Opened %s\n", filename);

    auto process = tc->getProcessPtr();

    auto device_fd_entry = std::make_shared<DeviceFDEntry>(this, filename);

    int tgt_fd = process->fds->allocFD(device_fd_entry);

    return tgt_fd;

}


Addr

GPUComputeDriver::mmap(ThreadContext *tc, Addr start, uint64_t length,

                       int prot, int tgt_flags, int tgt_fd, off_t offset)

{

    auto process = tc->getProcessPtr();

    auto mem_state = process->memState;


    Addr pg_off = offset >> PAGE_SHIFT;

    Addr mmap_type = pg_off & KFD_MMAP_TYPE_MASK;

    DPRINTF(GPUDriver, "amdkfd mmap (start: %p, length: 0x%x,"

            "offset: 0x%x)\n", start, length, offset);


    switch(mmap_type) {

        case KFD_MMAP_TYPE_DOORBELL:

            DPRINTF(GPUDriver, "amdkfd mmap type DOORBELL offset\n");

            start = mem_state->extendMmap(length);

            process->pTable->map(start, device->hsaPacketProc().pioAddr,

                    length, false);

            break;

        case KFD_MMAP_TYPE_EVENTS:

            DPRINTF(GPUDriver, "amdkfd mmap type EVENTS offset\n");

            panic_if(start != 0,

                     "Start address should be provided by KFD\n");

            panic_if(length != 8 * KFD_SIGNAL_EVENT_LIMIT,

                     "Requested length %d, expected length %d; length "

                     "mismatch\n", length, 8* KFD_SIGNAL_EVENT_LIMIT);

            if (!eventPage) {

                eventPage = mem_state->extendMmap(length);

                start = eventPage;

            }

            break;

        default:

            warn_once("Unrecognized kfd mmap type %llx\n", mmap_type);

            break;

    }


    return start;

}


void

GPUComputeDriver::allocateQueue(PortProxy &mem_proxy, Addr ioc_buf)

{

    TypedBufferArg<kfd_ioctl_create_queue_args> args(ioc_buf);

    args.copyIn(mem_proxy);


    if ((doorbellSize() * queueId) > 4096) {

        fatal("%s: Exceeded maximum number of HSA queues allowed\n", name());

    }


    args->doorbell_offset = (KFD_MMAP_TYPE_DOORBELL |

        KFD_MMAP_GPU_ID(args->gpu_id)) << PAGE_SHIFT;


    // for vega offset needs to include exact value of doorbell

    if (doorbellSize())

        args->doorbell_offset += queueId * doorbellSize();


    args->queue_id = queueId++;

    auto &hsa_pp = device->hsaPacketProc();

    hsa_pp.setDeviceQueueDesc(args->read_pointer_address,

                              args->ring_base_address, args->queue_id,

                              args->ring_size, doorbellSize(), gfxVersion);

    args.copyOut(mem_proxy);

}


void

GPUComputeDriver::DriverWakeupEvent::scheduleWakeup(Tick wakeup_delay)

{

    assert(driver);

    driver->schedule(this, curTick() + wakeup_delay);

}


void

GPUComputeDriver::signalWakeupEvent(uint32_t event_id)

{

    panic_if(event_id >= eventSlotIndex,

        "Trying wakeup on an event that is not yet created\n");

    if (ETable[event_id].threadWaiting) {

        panic_if(!ETable[event_id].tc,

                 "No thread context to wake up\n");

        ThreadContext *tc = ETable[event_id].tc;

        DPRINTF(GPUDriver,

                "Signal event: Waking up CPU %d\n", tc->cpuId());

        // Remove events that can wakeup this thread

        TCEvents[tc].clearEvents();

        // Now wakeup this thread

        tc->activate();

    } else {

       // This may be a race condition between an ioctl call asking to wait on

       // this event and this signalWakeupEvent. Taking care of this race

       // condition here by setting the event here. The ioctl call should take

       // the necessary action when waiting on an already set event.  However,

       // this may be a genuine instance in which the runtime has decided not

       // to wait on this event. But since we cannot distinguish this case with

       // the race condition, we are any way setting the event.

       ETable[event_id].setEvent = true;

    }

}


void

GPUComputeDriver::DriverWakeupEvent::process()

{

    DPRINTF(GPUDriver,

            "Timer event: Waking up CPU %d\n", tc->cpuId());

    // Remove events that can wakeup this thread

    driver->TCEvents[tc].clearEvents();

    // Now wakeup this thread

    tc->activate();

}


int

GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)

{

    TranslatingPortProxy fs_proxy(tc);

    SETranslatingPortProxy se_proxy(tc);

    PortProxy &virt_proxy = FullSystem ? fs_proxy : se_proxy;

    auto process = tc->getProcessPtr();

    auto mem_state = process->memState;


    switch (req) {

        case AMDKFD_IOC_GET_VERSION:

          {

            DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_VERSION\n");


            TypedBufferArg<kfd_ioctl_get_version_args> args(ioc_buf);

            args->major_version = KFD_IOCTL_MAJOR_VERSION;

            args->minor_version = KFD_IOCTL_MINOR_VERSION;


            args.copyOut(virt_proxy);

          }

          break;

        case AMDKFD_IOC_CREATE_QUEUE:

          {

            DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_CREATE_QUEUE\n");


            allocateQueue(virt_proxy, ioc_buf);


            DPRINTF(GPUDriver, "Creating queue %d\n", queueId);

          }

          break;

        case AMDKFD_IOC_DESTROY_QUEUE:

          {

            TypedBufferArg<kfd_ioctl_destroy_queue_args> args(ioc_buf);

            args.copyIn(virt_proxy);

            DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_QUEUE;" \

                    "queue offset %d\n", args->queue_id);

            device->hsaPacketProc().unsetDeviceQueueDesc(args->queue_id,

                                                         doorbellSize());

          }

          break;

        case AMDKFD_IOC_SET_MEMORY_POLICY:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_SET_MEMORY_POLICY\n");

          }

          break;

        case AMDKFD_IOC_GET_CLOCK_COUNTERS:

          {

            DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_CLOCK_COUNTERS\n");


            TypedBufferArg<kfd_ioctl_get_clock_counters_args> args(ioc_buf);

            args.copyIn(virt_proxy);


            // Set nanosecond resolution

            args->system_clock_freq = 1000000000;


            uint64_t elapsed_nsec = curTick() / sim_clock::as_int::ns;

            args->gpu_clock_counter = elapsed_nsec;

            args->cpu_clock_counter = elapsed_nsec;

            args->system_clock_counter = elapsed_nsec;


            args.copyOut(virt_proxy);

          }

          break;

        case AMDKFD_IOC_GET_PROCESS_APERTURES:

          {

            DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES\n");


            TypedBufferArg<kfd_ioctl_get_process_apertures_args> args(ioc_buf);

            args->num_of_nodes = 1;


            for (int i = 0; i < args->num_of_nodes; ++i) {

                switch (gfxVersion) {

                  case GfxVersion::gfx801:

                  case GfxVersion::gfx803:

                    args->process_apertures[i].scratch_base =

                        scratchApeBase(i + 1);

                    args->process_apertures[i].lds_base =

                        ldsApeBase(i + 1);

                    break;

                  case GfxVersion::gfx900:

                  case GfxVersion::gfx902:

                    args->process_apertures[i].scratch_base =

                        scratchApeBaseV9();

                    args->process_apertures[i].lds_base =

                        ldsApeBaseV9();

                    break;

                  default:

                    fatal("Invalid gfx version\n");

                }


                // GFX8 and GFX9 set lds and scratch limits the same way

                args->process_apertures[i].scratch_limit =

                    scratchApeLimit(args->process_apertures[i].scratch_base);


                args->process_apertures[i].lds_limit =

                    ldsApeLimit(args->process_apertures[i].lds_base);


                switch (gfxVersion) {

                  case GfxVersion::gfx801:

                    args->process_apertures[i].gpuvm_base =

                        gpuVmApeBase(i + 1);

                    args->process_apertures[i].gpuvm_limit =

                        gpuVmApeLimit(args->process_apertures[i].gpuvm_base);

                    break;

                  case GfxVersion::gfx803:

                  case GfxVersion::gfx900:

                  case GfxVersion::gfx902:

                    // Taken from SVM_USE_BASE in Linux kernel

                    args->process_apertures[i].gpuvm_base = 0x1000000ull;

                    // Taken from AMDGPU_GMC_HOLE_START in Linux kernel

                    args->process_apertures[i].gpuvm_limit =

                        0x0000800000000000ULL - 1;

                    break;

                  default:

                    fatal("Invalid gfx version");

                }


                // NOTE: Must match ID populated by hsaTopology.py

                //

                // https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/

                // blob/6a986c0943e9acd8c4c0cf2a9d510ff42167b43f/include/uapi/

                // linux/kfd_ioctl.h#L564

                //

                // The gpu_id is a device identifier used by the driver for

                // ioctls that allocate arguments. Each device has an unique

                // id composed out of a non-zero base and an offset.

                if (isdGPU) {

                    switch (gfxVersion) {

                      case GfxVersion::gfx803:

                        args->process_apertures[i].gpu_id = 50156;

                        break;

                      case GfxVersion::gfx900:

                        args->process_apertures[i].gpu_id = 22124;

                        break;

                      default:

                        fatal("Invalid gfx version for dGPU\n");

                    }

                } else {

                    switch (gfxVersion) {

                      case GfxVersion::gfx801:

                      case GfxVersion::gfx902:

                        args->process_apertures[i].gpu_id = 2765;

                        break;

                      default:

                        fatal("Invalid gfx version for APU\n");

                    }

                }


                DPRINTF(GPUDriver, "GPUVM base for node[%i] = %#x\n", i,

                        args->process_apertures[i].gpuvm_base);

                DPRINTF(GPUDriver, "GPUVM limit for node[%i] = %#x\n", i,

                        args->process_apertures[i].gpuvm_limit);


                DPRINTF(GPUDriver, "LDS base for node[%i] = %#x\n", i,

                        args->process_apertures[i].lds_base);

                DPRINTF(GPUDriver, "LDS limit for node[%i] = %#x\n", i,

                        args->process_apertures[i].lds_limit);


                DPRINTF(GPUDriver, "Scratch base for node[%i] = %#x\n", i,

                        args->process_apertures[i].scratch_base);

                DPRINTF(GPUDriver, "Scratch limit for node[%i] = %#x\n", i,

                        args->process_apertures[i].scratch_limit);


                assert(bits<Addr>(args->process_apertures[i].scratch_base, 63,

                       47) != 0x1ffff);

                assert(bits<Addr>(args->process_apertures[i].scratch_base, 63,

                       47) != 0);

                assert(bits<Addr>(args->process_apertures[i].scratch_limit, 63,

                       47) != 0x1ffff);

                assert(bits<Addr>(args->process_apertures[i].scratch_limit, 63,

                       47) != 0);

                assert(bits<Addr>(args->process_apertures[i].lds_base, 63,

                       47) != 0x1ffff);

                assert(bits<Addr>(args->process_apertures[i].lds_base, 63,

                       47) != 0);

                assert(bits<Addr>(args->process_apertures[i].lds_limit, 63,

                       47) != 0x1ffff);

                assert(bits<Addr>(args->process_apertures[i].lds_limit, 63,

                       47) != 0);

            }


            args.copyOut(virt_proxy);

          }

          break;

        case AMDKFD_IOC_UPDATE_QUEUE:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_UPDATE_QUEUE\n");

          }

          break;

        case AMDKFD_IOC_CREATE_EVENT:

          {

            DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_CREATE_EVENT\n");


            TypedBufferArg<kfd_ioctl_create_event_args> args(ioc_buf);

            args.copyIn(virt_proxy);

            if (args->event_type != KFD_IOC_EVENT_SIGNAL) {

                warn("Signal events are only supported currently\n");

            } else if (eventSlotIndex == SLOTS_PER_PAGE) {

                fatal("Signal event wasn't created; signal limit reached\n");

            }

            // Currently, we allocate only one signal_page for events.

            // Note that this signal page is of size 8 * KFD_SIGNAL_EVENT_LIMIT

            uint64_t page_index = 0;

            args->event_page_offset = (page_index | KFD_MMAP_TYPE_EVENTS);

            args->event_page_offset <<= PAGE_SHIFT;

            // TODO: Currently we support only signal events, hence using

            // the same ID for both signal slot and event slot

            args->event_slot_index = eventSlotIndex;

            args->event_id = eventSlotIndex++;

            args->event_trigger_data = args->event_id;

            DPRINTF(GPUDriver, "amdkfd create events"

                    "(event_id: 0x%x, offset: 0x%x)\n",

                    args->event_id, args->event_page_offset);

            // Since eventSlotIndex is increased everytime a new event is

            // created ETable at eventSlotIndex(event_id) is guaranteed to be

            // empty. In a future implementation that reuses deleted event_ids,

            // we should check if event table at this

            // eventSlotIndex(event_id) is empty before inserting a new event

            // table entry

            ETable.emplace(std::pair<uint32_t, ETEntry>(args->event_id, {}));

            args.copyOut(virt_proxy);

          }

          break;

        case AMDKFD_IOC_DESTROY_EVENT:

          {

            DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_EVENT\n");

            TypedBufferArg<kfd_ioctl_destroy_event_args> args(ioc_buf);

            args.copyIn(virt_proxy);

            DPRINTF(GPUDriver, "amdkfd destroying event %d\n", args->event_id);

            fatal_if(ETable.count(args->event_id) == 0,

                     "Event ID invalid, cannot destroy this event\n");

            ETable.erase(args->event_id);

          }

          break;

        case AMDKFD_IOC_SET_EVENT:

          {

            DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_SET_EVENTS\n");

            TypedBufferArg<kfd_ioctl_set_event_args> args(ioc_buf);

            args.copyIn(virt_proxy);

            DPRINTF(GPUDriver, "amdkfd set event %d\n", args->event_id);

            fatal_if(ETable.count(args->event_id) == 0,

                     "Event ID invlaid, cannot set this event\n");

            ETable[args->event_id].setEvent = true;

            signalWakeupEvent(args->event_id);

          }

          break;

        case AMDKFD_IOC_RESET_EVENT:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_RESET_EVENT\n");

          }

          break;

        case AMDKFD_IOC_WAIT_EVENTS:

          {

            DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_WAIT_EVENTS\n");

            TypedBufferArg<kfd_ioctl_wait_events_args> args(ioc_buf);

            args.copyIn(virt_proxy);

            kfd_event_data *events =

                (kfd_event_data *)args->events_ptr;

            DPRINTF(GPUDriver, "amdkfd wait for events"

                    "(wait on all: %d, timeout : %d, num_events: %s)\n",

                    args->wait_for_all, args->timeout, args->num_events);

            panic_if(args->wait_for_all != 0 && args->num_events > 1,

                    "Wait for all events not supported\n");

            bool should_sleep = true;

            if (TCEvents.count(tc) == 0) {

                // This thread context trying to wait on an event for the first

                // time, initialize it.

                TCEvents.emplace(std::piecewise_construct, std::make_tuple(tc),

                                 std::make_tuple(this, tc));

                DPRINTF(GPUDriver, "\tamdkfd creating event list"

                        " for thread  %d\n", tc->cpuId());

            }

            panic_if(TCEvents[tc].signalEvents.size() != 0,

                     "There are %d events that put this thread to sleep,"

                     " this thread should not be running\n",

                     TCEvents[tc].signalEvents.size());

            for (int i = 0; i < args->num_events; i++) {

                panic_if(!events,

                         "Event pointer invalid\n");

                Addr eventDataAddr = (Addr)(events + i);

                TypedBufferArg<kfd_event_data> EventData(

                    eventDataAddr, sizeof(kfd_event_data));

                EventData.copyIn(virt_proxy);

                DPRINTF(GPUDriver,

                        "\tamdkfd wait for event %d\n", EventData->event_id);

                panic_if(ETable.count(EventData->event_id) == 0,

                         "Event ID invalid, cannot set this event\n");

                if (ETable[EventData->event_id].threadWaiting)

                         warn("Multiple threads waiting on the same event\n");

                if (ETable[EventData->event_id].setEvent) {

                    // If event is already set, the event has already happened.

                    // Just unset the event and dont put this thread to sleep.

                    ETable[EventData->event_id].setEvent = false;

                    should_sleep = false;

                }

                if (should_sleep) {

                    // Put this thread to sleep

                    ETable[EventData->event_id].threadWaiting = true;

                    ETable[EventData->event_id].tc = tc;

                    TCEvents[tc].signalEvents.insert(EventData->event_id);

                }

            }


            // TODO: Return the correct wait_result back. Currently, returning

            // success for both KFD_WAIT_TIMEOUT and KFD_WAIT_COMPLETE.

            // Ideally, this needs to be done after the event is triggered and

            // after the thread is woken up.

            args->wait_result = 0;

            args.copyOut(virt_proxy);

            if (should_sleep) {

                // Put this thread to sleep

                sleepCPU(tc, args->timeout);

            } else {

                // Remove events that tried to put this thread to sleep

                TCEvents[tc].clearEvents();

            }

          }

          break;

        case AMDKFD_IOC_DBG_REGISTER:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_DBG_REGISTER\n");

          }

          break;

        case AMDKFD_IOC_DBG_UNREGISTER:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_DBG_UNREGISTER\n");

          }

          break;

        case AMDKFD_IOC_DBG_ADDRESS_WATCH:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_DBG_ADDRESS_WATCH\n");

          }

          break;

        case AMDKFD_IOC_DBG_WAVE_CONTROL:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_DBG_WAVE_CONTROL\n");

          }

          break;

        case AMDKFD_IOC_SET_SCRATCH_BACKING_VA:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_SET_SCRATCH_BACKING_VA\n");

          }

          break;

        case AMDKFD_IOC_GET_TILE_CONFIG:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_GET_TILE_CONFIG\n");

          }

          break;

        case AMDKFD_IOC_SET_TRAP_HANDLER:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_SET_TRAP_HANDLER\n");

          }

          break;

        case AMDKFD_IOC_GET_PROCESS_APERTURES_NEW:

          {

            DPRINTF(GPUDriver,

                    "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES_NEW\n");


            TypedBufferArg<kfd_ioctl_get_process_apertures_new_args>

                ioc_args(ioc_buf);


            ioc_args.copyIn(virt_proxy);

            ioc_args->num_of_nodes = 1;


            for (int i = 0; i < ioc_args->num_of_nodes; ++i) {

                TypedBufferArg<kfd_process_device_apertures> ape_args

                    (ioc_args->kfd_process_device_apertures_ptr);


                switch (gfxVersion) {

                  case GfxVersion::gfx801:

                  case GfxVersion::gfx803:

                    ape_args->scratch_base = scratchApeBase(i + 1);

                    ape_args->lds_base = ldsApeBase(i + 1);

                    break;

                  case GfxVersion::gfx900:

                  case GfxVersion::gfx902:

                    ape_args->scratch_base = scratchApeBaseV9();

                    ape_args->lds_base = ldsApeBaseV9();

                    break;

                  default:

                    fatal("Invalid gfx version\n");

                }


                // GFX8 and GFX9 set lds and scratch limits the same way

                ape_args->scratch_limit =

                    scratchApeLimit(ape_args->scratch_base);

                ape_args->lds_limit = ldsApeLimit(ape_args->lds_base);


                switch (gfxVersion) {

                  case GfxVersion::gfx801:

                    ape_args->gpuvm_base = gpuVmApeBase(i + 1);

                    ape_args->gpuvm_limit =

                        gpuVmApeLimit(ape_args->gpuvm_base);

                    break;

                  case GfxVersion::gfx803:

                  case GfxVersion::gfx900:

                  case GfxVersion::gfx902:

                    // Taken from SVM_USE_BASE in Linux kernel

                    ape_args->gpuvm_base = 0x1000000ull;

                    // Taken from AMDGPU_GMC_HOLE_START in Linux kernel

                    ape_args->gpuvm_limit = 0x0000800000000000ULL - 1;

                    break;

                  default:

                    fatal("Invalid gfx version\n");

                }


                // NOTE: Must match ID populated by hsaTopology.py

                if (isdGPU) {

                    switch (gfxVersion) {

                      case GfxVersion::gfx803:

                        ape_args->gpu_id = 50156;

                        break;

                      case GfxVersion::gfx900:

                        ape_args->gpu_id = 22124;

                        break;

                      default:

                        fatal("Invalid gfx version for dGPU\n");

                    }

                } else {

                    switch (gfxVersion) {

                      case GfxVersion::gfx801:

                      case GfxVersion::gfx902:

                        ape_args->gpu_id = 2765;

                        break;

                      default:

                        fatal("Invalid gfx version for APU\n");

                    }

                }


                assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0x1ffff);

                assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0);

                assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0x1ffff);

                assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0);

                assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0x1ffff);

                assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0);

                assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0x1ffff);

                assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0);


                ape_args.copyOut(virt_proxy);

            }


            ioc_args.copyOut(virt_proxy);

          }

          break;

        case AMDKFD_IOC_ACQUIRE_VM:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_ACQUIRE_VM\n");

          }

          break;

        case AMDKFD_IOC_ALLOC_MEMORY_OF_GPU:

          {

            DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_ALLOC_MEMORY_OF_GPU\n");

            TypedBufferArg<kfd_ioctl_alloc_memory_of_gpu_args> args(ioc_buf);

            args.copyIn(virt_proxy);


            assert(isdGPU || gfxVersion == GfxVersion::gfx902);

            assert((args->va_addr % X86ISA::PageBytes) == 0);

            [[maybe_unused]] Addr mmap_offset = 0;


            Request::CacheCoherenceFlags mtype = defaultMtype;

            Addr pa_addr = 0;


            int npages = divCeil(args->size, (int64_t)X86ISA::PageBytes);

            bool cacheable = true;


            if (KFD_IOC_ALLOC_MEM_FLAGS_VRAM & args->flags) {

                DPRINTF(GPUDriver, "amdkfd allocation type: VRAM\n");

                args->mmap_offset = args->va_addr;

                // VRAM allocations are device memory mapped into GPUVM

                // space.

                //

                // We can't rely on the lazy host allocator (fixupFault) to

                // handle this mapping since it needs to be placed in dGPU

                // framebuffer memory.  The lazy allocator will try to place

                // this in host memory.

                //

                // TODO: We don't have the appropriate bifurcation of the

                // physical address space with different memory controllers

                // yet.  This is where we will explicitly add the PT maps to

                // dGPU memory in the future.

                //

                // Bind the VA space to the dGPU physical memory pool.  Mark

                // this region as Uncacheable.  The Uncacheable flag is only

                // really used by the CPU and is ignored by the GPU. We mark

                // this as uncacheable from the CPU so that we can implement

                // direct CPU framebuffer access similar to what we currently

                // offer in real HW through the so-called Large BAR feature.

                pa_addr = process->seWorkload->allocPhysPages(

                        npages, dGPUPoolID);

                //

                // TODO: Uncacheable accesses need to be supported by the

                // CPU-side protocol for this to work correctly.  I believe

                // it only works right now if the physical memory is MMIO

                cacheable = false;


                DPRINTF(GPUDriver, "Mapping VA %p to framebuffer PA %p size "

                        "%d\n", args->va_addr, pa_addr, args->size);


            } else if (KFD_IOC_ALLOC_MEM_FLAGS_USERPTR & args->flags) {

                DPRINTF(GPUDriver, "amdkfd allocation type: USERPTR\n");

                mmap_offset = args->mmap_offset;

                // USERPTR allocations are system memory mapped into GPUVM

                // space.  The user provides the driver with the pointer.

                pa_addr = process->seWorkload->allocPhysPages(npages);


                DPRINTF(GPUDriver, "Mapping VA %p to framebuffer PA %p size "

                        "%d\n", args->va_addr, pa_addr, args->size);


                // If the HSA runtime requests system coherent memory, than we

                // need to explicity mark this region as uncacheable from the

                // perspective of the GPU.

                if (args->flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)

                    mtype.clear();


            } else if (KFD_IOC_ALLOC_MEM_FLAGS_GTT & args->flags) {

                DPRINTF(GPUDriver, "amdkfd allocation type: GTT\n");

                args->mmap_offset = args->va_addr;

                // GTT allocations are system memory mapped into GPUVM space.

                // It's different than a USERPTR allocation since the driver

                // itself allocates the physical memory on the host.

                //

                // We will lazily map it into host memory on first touch.  The

                // fixupFault will find the original SVM aperture mapped to the

                // host.

                pa_addr = process->seWorkload->allocPhysPages(npages);


                DPRINTF(GPUDriver, "Mapping VA %p to framebuffer PA %p size "

                        "%d\n", args->va_addr, pa_addr, args->size);


                // If the HSA runtime requests system coherent memory, than we

                // need to explicity mark this region as uncacheable from the

                // perspective of the GPU.

                if (args->flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)

                    mtype.clear();


                // Note that for GTT the thunk layer needs to call mmap on the

                // driver FD later if it wants the host to have access to this

                // memory (which it probably does).  This will be ignored.

            } else if (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL & args->flags) {

                DPRINTF(GPUDriver, "amdkfd allocation type: DOORBELL\n");

                // DOORBELL allocations are the queue doorbells that are

                // memory mapped into GPUVM space.

                //

                // Explicitly map this virtual address to our PIO doorbell

                // interface in the page tables (non-cacheable)

                pa_addr = device->hsaPacketProc().pioAddr;

                cacheable = false;

            }


            DPRINTF(GPUDriver, "amdkfd allocation arguments: va_addr %p "

                    "size %lu, mmap_offset %p, gpu_id %d\n",

                    args->va_addr, args->size, mmap_offset, args->gpu_id);


            // Bind selected physical memory to provided virtual address range

            // in X86 page tables.

            process->pTable->map(args->va_addr, pa_addr, args->size,

                cacheable);


            // We keep track of allocated regions of GPU mapped memory,

            // just like the driver would.  This allows us to provide the

            // user with a unique handle for a given allocation.  The user

            // will only provide us with a handle after allocation and expect

            // us to be able to use said handle to extract all the properties

            // of the region.

            //

            // This is a simplified version of regular system VMAs, but for

            // GPUVM space (none of the clobber/remap nonsense we find in real

            // OS managed memory).

            allocateGpuVma(mtype, args->va_addr, args->size);


            // Used by the runtime to uniquely identify this allocation.

            // We can just use the starting address of the VMA region.

            args->handle= args->va_addr;

            args.copyOut(virt_proxy);

          }

          break;

        case AMDKFD_IOC_FREE_MEMORY_OF_GPU:

          {

            DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_FREE_MEMORY_OF_GPU\n");

            TypedBufferArg<kfd_ioctl_free_memory_of_gpu_args> args(ioc_buf);

            args.copyIn(virt_proxy);


            assert(isdGPU);

            DPRINTF(GPUDriver, "amdkfd free arguments: handle %p ",

                    args->handle);


            // We don't recycle physical pages in SE mode

            Addr size = deallocateGpuVma(args->handle);

            process->pTable->unmap(args->handle, size);


            // TODO: IOMMU and GPUTLBs do not seem to correctly support

            // shootdown.  This is also a potential issue for APU systems

            // that perform unmap or remap with system memory.

            tc->getMMUPtr()->flushAll();


            args.copyOut(virt_proxy);

          }

          break;

        case AMDKFD_IOC_MAP_MEMORY_TO_GPU:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_MAP_MEMORY_TO_GPU\n");

          }

          break;

        case AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU\n");

          }

          break;

        case AMDKFD_IOC_SET_CU_MASK:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_SET_CU_MASK\n");

          }

          break;

        case AMDKFD_IOC_GET_QUEUE_WAVE_STATE:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_GET_QUEUE_WAVE_STATE\n");

          }

          break;

        case AMDKFD_IOC_GET_DMABUF_INFO:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_GET_DMABUF_INFO\n");

          }

          break;

        case AMDKFD_IOC_IMPORT_DMABUF:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_IMPORT_DMABUF\n");

          }

          break;

        case AMDKFD_IOC_ALLOC_QUEUE_GWS:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_ALLOC_QUEUE_GWS\n");

          }

          break;

        case AMDKFD_IOC_SMI_EVENTS:

          {

            warn("unimplemented ioctl: AMDKFD_IOC_SMI_EVENTS\n");

          }

          break;

        default:

          fatal("%s: bad ioctl %d\n", req);

          break;

    }

    return 0;

}


void

GPUComputeDriver::sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout)

{

    // Convert millisecs to ticks

    Tick wakeup_delay((uint64_t)milliSecTimeout * 1000000000);

    assert(TCEvents.count(tc) == 1);

    TCEvents[tc].timerEvent.scheduleWakeup(wakeup_delay);

    tc->suspend();

    DPRINTF(GPUDriver,

            "CPU %d is put to sleep\n", tc->cpuId());

}


Addr

GPUComputeDriver::gpuVmApeBase(int gpuNum) const

{

    return ((Addr)gpuNum << 61) + 0x1000000000000L;

}


Addr

GPUComputeDriver::gpuVmApeLimit(Addr apeBase) const

{

    return (apeBase & 0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL;

}


Addr

GPUComputeDriver::scratchApeBase(int gpuNum) const

{

    return ((Addr)gpuNum << 61) + 0x100000000L;

}


// Used for GFX9 devices

// From drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c in the Linux kernel

Addr

GPUComputeDriver::scratchApeBaseV9() const

{

    return ((Addr)0x1 << 48);

}


Addr

GPUComputeDriver::scratchApeLimit(Addr apeBase) const

{

    return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;

}


Addr

GPUComputeDriver::ldsApeBase(int gpuNum) const

{

    return ((Addr)gpuNum << 61) + 0x0;

}


//Used for GFX9 devices

// From drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c in the Linux kernel

Addr

GPUComputeDriver::ldsApeBaseV9() const

{

    return ((Addr)0x2 << 48);

}


Addr

GPUComputeDriver::ldsApeLimit(Addr apeBase) const

{

    return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;

}


void

GPUComputeDriver::allocateGpuVma(Request::CacheCoherenceFlags mtype,

                                 Addr start, Addr length)

{

    AddrRange range = AddrRange(start, start + length);

    DPRINTF(GPUDriver, "Registering [%p - %p] with MTYPE %d\n",

            range.start(), range.end(), mtype);

    fatal_if(gpuVmas.insert(range, mtype) == gpuVmas.end(),

             "Attempted to double register Mtypes for [%p - %p]\n",

             range.start(), range.end());

}


Addr

GPUComputeDriver::deallocateGpuVma(Addr start)

{

    auto vma = gpuVmas.contains(start);

    assert(vma != gpuVmas.end());

    assert((vma->first.start() == start));

    Addr size = vma->first.size();

    DPRINTF(GPUDriver, "Unregistering [%p - %p]\n", vma->first.start(),

            vma->first.end());

    gpuVmas.erase(vma);

    return size;

}


void

GPUComputeDriver::setMtype(RequestPtr req)

{

    // If we are a dGPU then set the MTYPE from our VMAs.

    if (isdGPU) {

        assert(!FullSystem);

        AddrRange range = RangeSize(req->getVaddr(), req->getSize());

        auto vma = gpuVmas.contains(range);

        assert(vma != gpuVmas.end());

        DPRINTF(GPUShader, "Setting req from [%p - %p] MTYPE %d\n"

                "%d\n", range.start(), range.end(), vma->second);

        req->setCacheCoherenceFlags(vma->second);

    // APUs always get the default MTYPE

    } else {

        req->setCacheCoherenceFlags(defaultMtype);

    }

}


} // namespace gem5

trace.hh

DPRINTF
#define DPRINTF(x,...)
Definition trace.hh:210

gem5::AddrRange
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition addr_range.hh:82

gem5::BaseBufferArg::copyIn
bool copyIn(const PortProxy &memproxy)
copy data into simulator space (read from target memory)
Definition syscall_emul_buf.hh:81

gem5::BaseBufferArg::size
const int size
buffer size
Definition syscall_emul_buf.hh:99

gem5::BaseBufferArg::copyOut
bool copyOut(const PortProxy &memproxy)
copy data out of simulator space (write to target memory)
Definition syscall_emul_buf.hh:91

gem5::BaseMMU::flushAll
virtual void flushAll()
Definition mmu.cc:81

gem5::EmulatedDriver
EmulatedDriver is an abstract base class for fake SE-mode device drivers.
Definition emul_driver.hh:56

gem5::EmulatedDriver::filename
const std::string & filename
filename for opening this driver (under /dev)
Definition emul_driver.hh:61

gem5::Flags< CacheCoherenceFlagsType >

gem5::GPUCommandProcessor::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition gpu_command_processor.cc:65

gem5::GPUCommandProcessor::attachDriver
void attachDriver(GPUComputeDriver *driver)
Definition gpu_command_processor.cc:308

gem5::GPUComputeDriver::DriverWakeupEvent::scheduleWakeup
void scheduleWakeup(Tick wakeup_delay)
Definition gpu_compute_driver.cc:184

gem5::GPUComputeDriver::DriverWakeupEvent::process
void process() override
Definition gpu_compute_driver.cc:218

gem5::GPUComputeDriver::DriverWakeupEvent::description
const char * description() const override
Return a C string describing the event.
Definition gpu_compute_driver.cc:85

gem5::GPUComputeDriver::allocateGpuVma
void allocateGpuVma(Request::CacheCoherenceFlags mtype, Addr start, Addr length)
Allocate/deallocate GPUVM VMAs for tracking virtual address allocations and properties on DGPUs.
Definition gpu_compute_driver.cc:997

gem5::GPUComputeDriver::setMtype
void setMtype(RequestPtr req)
Called by the compute units right before a request is issued to ruby.
Definition gpu_compute_driver.cc:1022

gem5::GPUComputeDriver::CACHED
@ CACHED
Definition gpu_compute_driver.hh:173

gem5::GPUComputeDriver::SHARED
@ SHARED
Definition gpu_compute_driver.hh:171

gem5::GPUComputeDriver::READ_WRITE
@ READ_WRITE
Definition gpu_compute_driver.hh:172

gem5::GPUComputeDriver::signalWakeupEvent
virtual void signalWakeupEvent(uint32_t event_id)
Definition gpu_compute_driver.cc:191

gem5::GPUComputeDriver::open
int open(ThreadContext *tc, int mode, int flags) override
Create an FD entry for the KFD inside of the owning process.
Definition gpu_compute_driver.cc:94

gem5::GPUComputeDriver::isdGPU
bool isdGPU
Definition gpu_compute_driver.hh:153

gem5::GPUComputeDriver::ioctl
int ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) override
Abstract method, invoked when the user program calls ioctl() on the file descriptor returned by a pre...
Definition gpu_compute_driver.cc:229

gem5::GPUComputeDriver::dGPUPoolID
int dGPUPoolID
Definition gpu_compute_driver.hh:155

gem5::GPUComputeDriver::sleepCPU
void sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout)
Definition gpu_compute_driver.cc:933

gem5::GPUComputeDriver::scratchApeLimit
Addr scratchApeLimit(Addr apeBase) const
Definition gpu_compute_driver.cc:971

gem5::GPUComputeDriver::GPUComputeDriver
GPUComputeDriver(const Params &p)
Definition gpu_compute_driver.cc:60

gem5::GPUComputeDriver::deallocateGpuVma
Addr deallocateGpuVma(Addr start)
Definition gpu_compute_driver.cc:1009

gem5::GPUComputeDriver::eventSlotIndex
uint32_t eventSlotIndex
Definition gpu_compute_driver.hh:157

gem5::GPUComputeDriver::scratchApeBase
Addr scratchApeBase(int gpuNum) const
Definition gpu_compute_driver.cc:957

gem5::GPUComputeDriver::gfxVersion
GfxVersion gfxVersion
Definition gpu_compute_driver.hh:154

gem5::GPUComputeDriver::scratchApeBaseV9
Addr scratchApeBaseV9() const
Definition gpu_compute_driver.cc:965

gem5::GPUComputeDriver::TCEvents
std::unordered_map< ThreadContext *, EventList > TCEvents
Definition gpu_compute_driver.hh:208

gem5::GPUComputeDriver::gpuVmApeBase
Addr gpuVmApeBase(int gpuNum) const
The aperture (APE) base/limit pairs are set statically at startup by the real KFD.
Definition gpu_compute_driver.cc:945

gem5::GPUComputeDriver::ldsApeBaseV9
Addr ldsApeBaseV9() const
Definition gpu_compute_driver.cc:985

gem5::GPUComputeDriver::queueId
uint32_t queueId
Definition gpu_compute_driver.hh:152

gem5::GPUComputeDriver::allocateQueue
void allocateQueue(PortProxy &mem_proxy, Addr ioc_buf_addr)
Forward relevant parameters to packet processor; queueId is used to link doorbell.
Definition gpu_compute_driver.cc:159

gem5::GPUComputeDriver::defaultMtype
Request::CacheCoherenceFlags defaultMtype
Definition gpu_compute_driver.hh:177

gem5::GPUComputeDriver::Params
GPUComputeDriverParams Params
Definition gpu_compute_driver.hh:65

gem5::GPUComputeDriver::ETable
std::unordered_map< uint32_t, ETEntry > ETable
Definition gpu_compute_driver.hh:159

gem5::GPUComputeDriver::mmap
Addr mmap(ThreadContext *tc, Addr start, uint64_t length, int prot, int tgt_flags, int tgt_fd, off_t offset) override
Currently, mmap() will simply setup a mapping for the associated device's packet processor's doorbell...
Definition gpu_compute_driver.cc:108

gem5::GPUComputeDriver::device
GPUCommandProcessor * device
GPU that is controlled by this driver.
Definition gpu_compute_driver.hh:151

gem5::GPUComputeDriver::gpuVmas
AddrRangeMap< Request::CacheCoherenceFlags, 1 > gpuVmas
VMA structures for GPUVM memory.
Definition gpu_compute_driver.hh:164

gem5::GPUComputeDriver::ldsApeBase
Addr ldsApeBase(int gpuNum) const
Definition gpu_compute_driver.cc:977

gem5::GPUComputeDriver::ldsApeLimit
Addr ldsApeLimit(Addr apeBase) const
Definition gpu_compute_driver.cc:991

gem5::GPUComputeDriver::eventPage
Addr eventPage
Definition gpu_compute_driver.hh:156

gem5::GPUComputeDriver::gpuVmApeLimit
Addr gpuVmApeLimit(Addr apeBase) const
Definition gpu_compute_driver.cc:951

gem5::GPUComputeDriver::doorbellSize
int doorbellSize()
Definition gpu_compute_driver.hh:86

gem5::HSAPacketProcessor::setDeviceQueueDesc
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
Definition hsa_packet_processor.cc:112

gem5::HSAPacketProcessor::pioAddr
Addr pioAddr
Definition hsa_packet_processor.hh:348

gem5::HSAPacketProcessor::unsetDeviceQueueDesc
void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize)
Definition hsa_packet_processor.cc:106

gem5::Named::name
virtual std::string name() const
Definition named.hh:47

gem5::PortProxy
This object is a proxy for a port or other object which implements the functional response protocol,...
Definition port_proxy.hh:87

gem5::Process::memState
std::shared_ptr< MemState > memState
Definition process.hh:289

gem5::Request::CACHED
@ CACHED
mtype flags
Definition request.hh:336

gem5::Request::SHARED
@ SHARED
Definition request.hh:338

gem5::Request::READ_WRITE
@ READ_WRITE
Definition request.hh:337

gem5::SETranslatingPortProxy
Definition se_translating_port_proxy.hh:50

gem5::ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition thread_context.hh:89

gem5::ThreadContext::getMMUPtr
virtual BaseMMU * getMMUPtr()=0

gem5::ThreadContext::getProcessPtr
virtual Process * getProcessPtr()=0

gem5::ThreadContext::suspend
virtual void suspend()=0
Set the status to Suspended.

gem5::ThreadContext::cpuId
virtual int cpuId() const =0

gem5::TranslatingPortProxy
This proxy attempts to translate virtual addresses using the TLBs.
Definition translating_port_proxy.hh:61

gem5::TypedBufferArg
TypedBufferArg is a class template; instances of this template represent typed buffers in target user...
Definition syscall_emul_buf.hh:133

std::pair
STL pair class.
Definition stl.hh:58

compiler.hh

thread_context.hh

full_system.hh

gpu_command_processor.hh
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...

gpu_compute_driver.hh
The GPUComputeDriver implements an HSADriver for an HSA AMD GPU agent.

gem5::RangeSize
AddrRange RangeSize(Addr start, Addr size)
Definition addr_range.hh:831

gem5::AddrRange::end
Addr end() const
Get the end address of the range.
Definition addr_range.hh:350

gem5::AddrRange::start
Addr start() const
Get the start address of the range.
Definition addr_range.hh:343

gem5::divCeil
static constexpr T divCeil(const T &a, const U &b)
Definition intmath.hh:110

gem5::Flags::set
void set(Type mask)
Set all flag's bits matching the given mask.
Definition flags.hh:116

gem5::Flags::clear
void clear()
Clear all flag's bits.
Definition flags.hh:102

fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:236

fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200

panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214

flags
uint8_t flags
Definition helpers.cc:66

hsa_packet_processor.hh

kfd_event_defines.h

KFD_MMAP_TYPE_DOORBELL
#define KFD_MMAP_TYPE_DOORBELL
Definition kfd_event_defines.h:44

SLOTS_PER_PAGE
#define SLOTS_PER_PAGE
Definition kfd_event_defines.h:46

KFD_MMAP_TYPE_MASK
#define KFD_MMAP_TYPE_MASK
Definition kfd_event_defines.h:43

KFD_MMAP_TYPE_EVENTS
#define KFD_MMAP_TYPE_EVENTS
Definition kfd_event_defines.h:45

PAGE_SHIFT
#define PAGE_SHIFT
Definition kfd_event_defines.h:41

KFD_MMAP_GPU_ID
#define KFD_MMAP_GPU_ID(gpu_id)
Definition kfd_event_defines.h:51

kfd_ioctl.h

AMDKFD_IOC_RESET_EVENT
#define AMDKFD_IOC_RESET_EVENT
Definition kfd_ioctl.h:553

AMDKFD_IOC_GET_CLOCK_COUNTERS
#define AMDKFD_IOC_GET_CLOCK_COUNTERS
Definition kfd_ioctl.h:535

AMDKFD_IOC_GET_DMABUF_INFO
#define AMDKFD_IOC_GET_DMABUF_INFO
Definition kfd_ioctl.h:605

AMDKFD_IOC_IMPORT_DMABUF
#define AMDKFD_IOC_IMPORT_DMABUF
Definition kfd_ioctl.h:608

KFD_IOCTL_MAJOR_VERSION
#define KFD_IOCTL_MAJOR_VERSION
Definition kfd_ioctl.h:38

AMDKFD_IOC_SET_MEMORY_POLICY
#define AMDKFD_IOC_SET_MEMORY_POLICY
Definition kfd_ioctl.h:532

AMDKFD_IOC_GET_VERSION
#define AMDKFD_IOC_GET_VERSION
Definition kfd_ioctl.h:523

AMDKFD_IOC_DESTROY_EVENT
#define AMDKFD_IOC_DESTROY_EVENT
Definition kfd_ioctl.h:547

KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL
#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL
Definition kfd_ioctl.h:379

AMDKFD_IOC_SET_SCRATCH_BACKING_VA
#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA
Definition kfd_ioctl.h:571

KFD_SIGNAL_EVENT_LIMIT
#define KFD_SIGNAL_EVENT_LIMIT
Definition kfd_ioctl.h:229

AMDKFD_IOC_DBG_REGISTER
#define AMDKFD_IOC_DBG_REGISTER
Definition kfd_ioctl.h:559

AMDKFD_IOC_ACQUIRE_VM
#define AMDKFD_IOC_ACQUIRE_VM
Definition kfd_ioctl.h:584

KFD_IOC_ALLOC_MEM_FLAGS_COHERENT
#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT
Definition kfd_ioctl.h:387

AMDKFD_IOC_GET_PROCESS_APERTURES_NEW
#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW
Definition kfd_ioctl.h:580

KFD_IOC_EVENT_SIGNAL
#define KFD_IOC_EVENT_SIGNAL
Definition kfd_ioctl.h:215

AMDKFD_IOC_CREATE_EVENT
#define AMDKFD_IOC_CREATE_EVENT
Definition kfd_ioctl.h:544

AMDKFD_IOC_WAIT_EVENTS
#define AMDKFD_IOC_WAIT_EVENTS
Definition kfd_ioctl.h:556

AMDKFD_IOC_DESTROY_QUEUE
#define AMDKFD_IOC_DESTROY_QUEUE
Definition kfd_ioctl.h:529

AMDKFD_IOC_SMI_EVENTS
#define AMDKFD_IOC_SMI_EVENTS
Definition kfd_ioctl.h:614

KFD_IOC_ALLOC_MEM_FLAGS_USERPTR
#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR
Definition kfd_ioctl.h:378

AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
Definition kfd_ioctl.h:587

AMDKFD_IOC_GET_TILE_CONFIG
#define AMDKFD_IOC_GET_TILE_CONFIG
Definition kfd_ioctl.h:574

AMDKFD_IOC_SET_EVENT
#define AMDKFD_IOC_SET_EVENT
Definition kfd_ioctl.h:550

AMDKFD_IOC_MAP_MEMORY_TO_GPU
#define AMDKFD_IOC_MAP_MEMORY_TO_GPU
Definition kfd_ioctl.h:593

AMDKFD_IOC_DBG_UNREGISTER
#define AMDKFD_IOC_DBG_UNREGISTER
Definition kfd_ioctl.h:562

AMDKFD_IOC_SET_CU_MASK
#define AMDKFD_IOC_SET_CU_MASK
Definition kfd_ioctl.h:599

AMDKFD_IOC_CREATE_QUEUE
#define AMDKFD_IOC_CREATE_QUEUE
Definition kfd_ioctl.h:526

AMDKFD_IOC_FREE_MEMORY_OF_GPU
#define AMDKFD_IOC_FREE_MEMORY_OF_GPU
Definition kfd_ioctl.h:590

KFD_IOCTL_MINOR_VERSION
#define KFD_IOCTL_MINOR_VERSION
Definition kfd_ioctl.h:39

AMDKFD_IOC_GET_PROCESS_APERTURES
#define AMDKFD_IOC_GET_PROCESS_APERTURES
Definition kfd_ioctl.h:538

AMDKFD_IOC_DBG_WAVE_CONTROL
#define AMDKFD_IOC_DBG_WAVE_CONTROL
Definition kfd_ioctl.h:568

AMDKFD_IOC_UPDATE_QUEUE
#define AMDKFD_IOC_UPDATE_QUEUE
Definition kfd_ioctl.h:541

AMDKFD_IOC_DBG_ADDRESS_WATCH
#define AMDKFD_IOC_DBG_ADDRESS_WATCH
Definition kfd_ioctl.h:565

KFD_IOC_ALLOC_MEM_FLAGS_GTT
#define KFD_IOC_ALLOC_MEM_FLAGS_GTT
Definition kfd_ioctl.h:377

AMDKFD_IOC_GET_QUEUE_WAVE_STATE
#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE
Definition kfd_ioctl.h:602

KFD_IOC_ALLOC_MEM_FLAGS_VRAM
#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM
Definition kfd_ioctl.h:376

AMDKFD_IOC_ALLOC_QUEUE_GWS
#define AMDKFD_IOC_ALLOC_QUEUE_GWS
Definition kfd_ioctl.h:611

AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
Definition kfd_ioctl.h:596

AMDKFD_IOC_SET_TRAP_HANDLER
#define AMDKFD_IOC_SET_TRAP_HANDLER
Definition kfd_ioctl.h:577

logging.hh

warn
#define warn(...)
Definition logging.hh:256

warn_once
#define warn_once(...)
Definition logging.hh:260

gem5::ArmISA::mode
Bitfield< 4, 0 > mode
Definition misc_types.hh:74

gem5::ArmISA::i
Bitfield< 7 > i
Definition misc_types.hh:67

gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition types.hh:144

gem5::MipsISA::p
Bitfield< 0 > p
Definition pra_constants.hh:326

gem5::X86ISA::prot
Bitfield< 7 > prot
Definition misc.hh:587

gem5::X86ISA::PageBytes
const Addr PageBytes
Definition page_size.hh:49

gem5::sim_clock::as_int::ns
Tick ns
nanosecond
Definition core.cc:68

gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition gpu_translation_state.hh:38

gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition request.hh:94

gem5::curTick
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46

gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147

gem5::FullSystem
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
Definition root.cc:220

gem5::Tick
uint64_t Tick
Tick count type.
Definition types.hh:58

port_proxy.hh
PortProxy Object Declaration.

se_translating_port_proxy.hh

shader.hh

process.hh

se_workload.hh

gem5::kfd_event_data
Definition kfd_ioctl.h:308

syscall_emul_buf.hh
This file defines buffer classes used to handle pointer arguments in emulated syscalls.

translating_port_proxy.hh

page_size.hh