develop/dispatcher_8cc_source.html

/*

 * Copyright (c) 2011-2015,2018 Advanced Micro Devices, Inc.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *

 * 1. Redistributions of source code must retain the above copyright notice,

 * this list of conditions and the following disclaimer.

 *

 * 2. Redistributions in binary form must reproduce the above copyright notice,

 * this list of conditions and the following disclaimer in the documentation

 * and/or other materials provided with the distribution.

 *

 * 3. Neither the name of the copyright holder nor the names of its

 * contributors may be used to endorse or promote products derived from this

 * software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 * POSSIBILITY OF SUCH DAMAGE.

 */


#include "gpu-compute/dispatcher.hh"


#include "debug/GPUAgentDisp.hh"

#include "debug/GPUDisp.hh"

#include "debug/GPUKernelInfo.hh"

#include "debug/GPUWgLatency.hh"

#include "gpu-compute/gpu_command_processor.hh"

#include "gpu-compute/hsa_queue_entry.hh"

#include "gpu-compute/shader.hh"

#include "gpu-compute/wavefront.hh"

#include "sim/sim_exit.hh"

#include "sim/syscall_emul_buf.hh"

#include "sim/system.hh"


namespace gem5

{


GPUDispatcher::GPUDispatcher(const Params &p)

    : SimObject(p), shader(nullptr), gpuCmdProc(nullptr),

      tickEvent([this]{ exec(); },

          "GPU Dispatcher tick", false, Event::CPU_Tick_Pri),

      dispatchActive(false), kernelExitEvents(p.kernel_exit_events),

      stats(this)

{

    schedule(&tickEvent, 0);

}


GPUDispatcher::~GPUDispatcher()

{

}


HSAQueueEntry*


GPUDispatcher::hsaTask(int disp_id)

{

    assert(hsaQueueEntries.find(disp_id) != hsaQueueEntries.end());

    return hsaQueueEntries[disp_id];

}


void


GPUDispatcher::setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)

{

    gpuCmdProc = gpu_cmd_proc;

}


void


GPUDispatcher::setShader(Shader *new_shader)

{

    shader = new_shader;

}


void


GPUDispatcher::serialize(CheckpointOut &cp) const

{

    Tick event_tick = 0;


    if (tickEvent.scheduled())

        event_tick = tickEvent.when();


    SERIALIZE_SCALAR(event_tick);

}


void


GPUDispatcher::unserialize(CheckpointIn &cp)

{

    Tick event_tick;


    if (tickEvent.scheduled())

        deschedule(&tickEvent);


    UNSERIALIZE_SCALAR(event_tick);


    if (event_tick) {

        schedule(&tickEvent, event_tick);

    }

}


void


GPUDispatcher::dispatch(HSAQueueEntry *task)

{

    ++stats.numKernelLaunched;


    DPRINTF(GPUDisp, "launching kernel: %s, dispatch ID: %d\n",

            task->kernelName(), task->dispatchId());

    DPRINTF(GPUAgentDisp, "launching kernel: %s, dispatch ID: %d\n",

            task->kernelName(), task->dispatchId());


    execIds.push(task->dispatchId());

    dispatchActive = true;

    hsaQueueEntries.emplace(task->dispatchId(), task);


    if (!tickEvent.scheduled()) {

        schedule(&tickEvent, curTick() + shader->clockPeriod());

    }

}


void


GPUDispatcher::exec()

{

    int fail_count(0);

    int disp_count(0);


    DPRINTF(GPUDisp, "Launching %d Kernels\n", execIds.size());

    DPRINTF(GPUAgentDisp, "Launching %d Kernels\n", execIds.size());


    if (execIds.size() > 0) {

        ++stats.cyclesWaitingForDispatch;

    }


    while (execIds.size() > fail_count) {

        int exec_id = execIds.front();

        auto task = hsaQueueEntries[exec_id];

        bool launched(false);


        // acq is needed before starting dispatch

        if (shader->impl_kern_launch_acq) {

            // try to invalidate cache

            shader->prepareInvalidate(task);

        } else {

            // kern launch acquire is not set, skip invalidate

            task->markInvDone();

        }


        if (!task->isInvDone()){

            execIds.push(exec_id);

            ++fail_count;


            DPRINTF(GPUDisp, "kernel %d failed to launch, due to [%d] pending"

                " invalidate requests\n", exec_id, task->outstandingInvs());


            // try the next kernel_id

            execIds.pop();

            continue;

        }


        // kernel invalidate is done, start workgroup dispatch

        while (!task->dispComplete()) {

            // update the thread context

            shader->updateContext(task->contextId());


            // attempt to dispatch workgroup

            DPRINTF(GPUWgLatency, "Attempt Kernel Launch cycle:%d kernel:%d\n",

                curTick(), exec_id);


            if (!shader->dispatchWorkgroups(task)) {

                DPRINTF(GPUDisp, "kernel %d failed to launch\n", exec_id);

                execIds.push(exec_id);

                ++fail_count;

                break;

            } else if (!launched) {

                launched = true;

                disp_count++;

                DPRINTF(GPUKernelInfo, "Launched kernel %d for WG %d\n",

                            exec_id, disp_count);

            }

        }


        // try the next kernel_id

        execIds.pop();

    }


    DPRINTF(GPUDisp, "Returning %d Kernels\n", doneIds.size());

    DPRINTF(GPUWgLatency, "Kernel Wgs dispatched: %d | %d failures\n",

            disp_count, fail_count);


    while (doneIds.size()) {

        DPRINTF(GPUDisp, "Kernel %d completed\n", doneIds.front());

        doneIds.pop();

    }

}


bool


GPUDispatcher::isReachingKernelEnd(Wavefront *wf)

{

    int kern_id = wf->kernId;

    assert(hsaQueueEntries.find(kern_id) != hsaQueueEntries.end());

    auto task = hsaQueueEntries[kern_id];

    assert(task->dispatchId() == kern_id);


    return (task->numWgCompleted() + 1 == task->numWgTotal());

}


void


GPUDispatcher::updateInvCounter(int kern_id, int val) {

    assert(val == -1 || val == 1);


    auto task = hsaQueueEntries[kern_id];

    task->updateOutstandingInvs(val);


    // kernel invalidate is done, schedule dispatch work

    if (task->isInvDone() && !tickEvent.scheduled()) {

        schedule(&tickEvent, curTick() + shader->clockPeriod());

    }

}


bool


GPUDispatcher::updateWbCounter(int kern_id, int val) {

    assert(val == -1 || val == 1);


    auto task = hsaQueueEntries[kern_id];

    task->updateOutstandingWbs(val);


    // true: WB is done, false: WB is still ongoing

    return (task->outstandingWbs() == 0);

}


int


GPUDispatcher::getOutstandingWbs(int kernId) {

    auto task = hsaQueueEntries[kernId];


    return task->outstandingWbs();

}


void


GPUDispatcher::notifyWgCompl(Wavefront *wf)

{

    int kern_id = wf->kernId;

    DPRINTF(GPUDisp, "notify WgCompl %d\n", wf->wgId);

    auto task = hsaQueueEntries[kern_id];

    assert(task->dispatchId() == kern_id);

    task->notifyWgCompleted();


    DPRINTF(GPUWgLatency, "WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",

        curTick(), wf->wgId, kern_id, wf->computeUnit->cu_id);


    if (task->numWgCompleted() == task->numWgTotal()) {

        // Notify the HSA PP that this kernel is complete

        gpuCmdProc->hsaPacketProc()

            .finishPkt(task->dispPktPtr(), task->queueId());

        if (task->completionSignal()) {

            DPRINTF(GPUDisp, "HSA AQL Kernel Complete with completion "

                    "signal! Addr: %d\n", task->completionSignal());


            gpuCmdProc->sendCompletionSignal(task->completionSignal());

        } else {

            DPRINTF(GPUDisp, "HSA AQL Kernel Complete! No completion "

                "signal\n");

        }


        DPRINTF(GPUWgLatency, "Kernel Complete ticks:%d kernel:%d\n",

                curTick(), kern_id);

        DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id);


        if (kernelExitEvents) {

            shader->requestKernelExitEvent(task->completionSignal());

        }

    }


    if (!tickEvent.scheduled()) {

        schedule(&tickEvent, curTick() + shader->clockPeriod());

    }

}


void


GPUDispatcher::scheduleDispatch()

{

    if (!tickEvent.scheduled()) {

        schedule(&tickEvent, curTick() + shader->clockPeriod());

    }

}


GPUDispatcher::GPUDispatcherStats::GPUDispatcherStats(

    statistics::Group *parent)

    : statistics::Group(parent),

      ADD_STAT(numKernelLaunched, "number of kernel launched"),

      ADD_STAT(cyclesWaitingForDispatch, "number of cycles with outstanding "

               "wavefronts that are waiting to be dispatched")

{

}


} // namespace gem5

DPRINTF
#define DPRINTF(x,...)
Definition trace.hh:209

gem5::CheckpointIn
Definition serialize.hh:69

gem5::ComputeUnit::cu_id
int cu_id
Definition compute_unit.hh:294

gem5::GPUCommandProcessor
Definition gpu_command_processor.hh:72

gem5::GPUDispatcher::serialize
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition dispatcher.cc:84

gem5::GPUDispatcher::dispatch
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
Definition dispatcher.cc:116

gem5::GPUDispatcher::updateInvCounter
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
Definition dispatcher.cc:249

gem5::GPUDispatcher::dispatchActive
bool dispatchActive
Definition dispatcher.hh:94

gem5::GPUDispatcher::tickEvent
EventFunctionWrapper tickEvent
Definition dispatcher.hh:87

gem5::GPUDispatcher::isReachingKernelEnd
bool isReachingKernelEnd(Wavefront *wf)
Definition dispatcher.cc:229

gem5::GPUDispatcher::Params
GPUDispatcherParams Params
Definition dispatcher.hh:65

gem5::GPUDispatcher::scheduleDispatch
void scheduleDispatch()
Definition dispatcher.cc:338

gem5::GPUDispatcher::getOutstandingWbs
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
Definition dispatcher.cc:283

gem5::GPUDispatcher::hsaQueueEntries
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
Definition dispatcher.hh:88

gem5::GPUDispatcher::stats
gem5::GPUDispatcher::GPUDispatcherStats stats

gem5::GPUDispatcher::updateWbCounter
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
Definition dispatcher.cc:269

gem5::GPUDispatcher::hsaTask
HSAQueueEntry * hsaTask(int disp_id)
Definition dispatcher.cc:65

gem5::GPUDispatcher::~GPUDispatcher
~GPUDispatcher()
Definition dispatcher.cc:60

gem5::GPUDispatcher::unserialize
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition dispatcher.cc:95

gem5::GPUDispatcher::gpuCmdProc
GPUCommandProcessor * gpuCmdProc
Definition dispatcher.hh:86

gem5::GPUDispatcher::execIds
std::queue< int > execIds
Definition dispatcher.hh:90

gem5::GPUDispatcher::shader
Shader * shader
Definition dispatcher.hh:85

gem5::GPUDispatcher::GPUDispatcher
GPUDispatcher(const Params &p)
Definition dispatcher.cc:50

gem5::GPUDispatcher::notifyWgCompl
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
Definition dispatcher.cc:298

gem5::GPUDispatcher::setCommandProcessor
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
Definition dispatcher.cc:72

gem5::GPUDispatcher::kernelExitEvents
bool kernelExitEvents
Definition dispatcher.hh:96

gem5::GPUDispatcher::exec
void exec()
Definition dispatcher.cc:135

gem5::GPUDispatcher::setShader
void setShader(Shader *new_shader)
Definition dispatcher.cc:78

gem5::GPUDispatcher::doneIds
std::queue< int > doneIds
Definition dispatcher.hh:92

gem5::HSAQueueEntry
Definition hsa_queue_entry.hh:61

gem5::HSAQueueEntry::kernelName
const std::string & kernelName() const
Definition hsa_queue_entry.hh:143

gem5::HSAQueueEntry::dispatchId
int dispatchId() const
Definition hsa_queue_entry.hh:181

gem5::Shader
Definition shader.hh:84

gem5::Wavefront
Definition wavefront.hh:62

gem5::Wavefront::wgId
uint32_t wgId
Definition wavefront.hh:166

gem5::Wavefront::kernId
int kernId
Definition wavefront.hh:100

gem5::Wavefront::computeUnit
ComputeUnit * computeUnit
Definition wavefront.hh:109

gem5::statistics::Group
Statistics container.
Definition group.hh:93

dispatcher.hh
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...

gpu_command_processor.hh
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...

ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75

gem5::EventManager::deschedule
void deschedule(Event &event)
Definition eventq.hh:1021

gem5::EventManager::schedule
void schedule(Event &event, Tick when)
Definition eventq.hh:1012

gem5::EventBase::CPU_Tick_Pri
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition eventq.hh:207

gem5::SimObject::SimObject
SimObject(const Params &p)
Definition sim_object.cc:58

gem5::statistics::Group::Group
Group()=delete

hsa_queue_entry.hh
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).

gem5::MipsISA::p
Bitfield< 0 > p
Definition pra_constants.hh:326

gem5::X86ISA::val
Bitfield< 63 > val
Definition misc.hh:804

gem5::cp
Definition cprintf.cc:41

gem5::statistics
Definition statistics.cc:57

gem5
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36

gem5::curTick
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46

gem5::CheckpointOut
std::ostream CheckpointOut
Definition serialize.hh:66

gem5::Tick
uint64_t Tick
Tick count type.
Definition types.hh:58

UNSERIALIZE_SCALAR
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575

SERIALIZE_SCALAR
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568

shader.hh

system.hh

sim_exit.hh

gem5::GPUDispatcher::GPUDispatcherStats::numKernelLaunched
statistics::Scalar numKernelLaunched
Definition dispatcher.hh:103

gem5::GPUDispatcher::GPUDispatcherStats::GPUDispatcherStats
GPUDispatcherStats(statistics::Group *parent)
Definition dispatcher.cc:345

gem5::GPUDispatcher::GPUDispatcherStats::cyclesWaitingForDispatch
statistics::Scalar cyclesWaitingForDispatch
Definition dispatcher.hh:104

syscall_emul_buf.hh
This file defines buffer classes used to handle pointer arguments in emulated syscalls.

wavefront.hh