42 #include "debug/GPUDisp.hh" 53 pioAddr(p->pio_addr), pioSize(4096), pioDelay(p->pio_latency),
54 dispatchCount(0), dispatchActive(false), cpu(p->cpu),
55 shader(p->shader_pointer), driver(p->cl_driver),
56 tickEvent([this]{
exec(); },
"GPU Dispatcher tick",
71 .
name(
name() +
".num_kernel_launched")
72 .
desc(
"number of kernel launched")
90 fatal(
"Checkpointing not supported during active workgroup execution");
118 DPRINTF(GPUDisp,
"dispatcher registering addr range at %#x size %#x\n",
135 DPRINTF(GPUDisp,
" read register %#x size=%d\n", offset, pkt->
getSize());
146 char *curTaskPtr = (
char*)&
curTask;
148 memcpy(pkt->
getPtr<
const void*>(), curTaskPtr + offset, pkt->
getSize());
165 uint64_t data_val = 0;
169 data_val = pkt->
getLE<uint8_t>();
172 data_val = pkt->
getLE<uint16_t>();
175 data_val = pkt->
getLE<uint32_t>();
178 data_val = pkt->
getLE<uint64_t>();
184 DPRINTF(GPUDisp,
"write register %#x value %#x size=%d\n", offset, data_val,
188 static int nextId = 0;
198 uint64_t start =
curTick() / 1000;
201 &start,
sizeof(uint64_t), 0);
216 for (
int i = 0;
i < 3; ++
i) {
230 DPRINTF(GPUDisp,
"launching kernel %d\n",nextId);
244 char *curTaskPtr = (
char*)&
curTask;
245 memcpy(curTaskPtr + offset, pkt->
getPtr<
const void*>(), pkt->
getSize());
257 if (if_name ==
"translation_port") {
274 while (
execIds.size() > fail_count) {
286 DPRINTF(GPUDisp,
"kernel %d failed to launch\n", execId);
313 DPRINTF(GPUDisp,
"notify WgCompl %d\n",kern_id);
314 assert(
ndRangeMap[kern_id].dispatchId == kern_id);
333 sizeof(uint64_t), 0);
335 uint64_t end =
curTick() / 1000;
338 sizeof(uint64_t), 0);
366 panic(
"Cannot find host");
392 return shader->
cuList[0]->wfList[0][0]->getStaticContextSize();
#define panic(...)
This implements a cprintf based panic() function.
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
AddrRange RangeSize(Addr start, Addr size)
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
Ports are used to interface objects to each other.
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
std::vector< ComputeUnit * > cuList
#define fatal(...)
This implements a cprintf based fatal() function.
uint32_t getStaticContextSize() const
Returns the size of the static hardware context of a wavefront.
Tick when() const
Get the time that the event is scheduled.
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
std::queue< int > execIds
void updateContext(int cid)
void handshake(GpuDispatcher *_dispatcher)
void setFuncargsSize(int funcargs_size)
void accessUserVar(BaseCPU *cpu, uint64_t addr, int val, int off)
T * getPtr()
get a pointer to the data ptr.
void deschedule(Event &event)
void setLE(T v)
Set the value in the data pointer to v as little endian.
RequestPtr req
A pointer to the original request.
#define UNSERIALIZE_SCALAR(scalar)
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Tick curTick()
The current simulated tick.
std::string csprintf(const char *format, const Args &...args)
bool scheduled() const
Determine if the current event is scheduled.
void notifyWgCompl(Wavefront *w)
void makeAtomicResponse()
uint64_t Tick
Tick count type.
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
volatile uint32_t * numDispLeft
static void setInstance(GpuDispatcher *_instance)
std::queue< int > doneIds
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Stats::Scalar num_kernelLaunched
virtual const std::string name() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
static GpuDispatcher * getInstance()
#define SERIALIZE_SCALAR(scalar)
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
EventFunctionWrapper tickEvent
std::ostream CheckpointOut
Tick ticks(int numCycles) const
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
volatile bool * addrToNotify
T divCeil(const T &a, const U &b)
void schedule(Event &event, Tick when)
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
std::unordered_map< int, NDRange > ndRangeMap
static GpuDispatcher * instance
bool dispatch_workgroups(NDRange *ndr)
GpuDispatcher(const Params *p)
void hostWakeUp(BaseCPU *cpu)
void allocate()
Allocate memory for the packet.
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
void handshake(GpuDispatcher *dispatcher)