38 #include "debug/GPUDisp.hh" 49 pioAddr(p->pio_addr), pioSize(4096), pioDelay(p->pio_latency),
50 dispatchCount(0), dispatchActive(false), cpu(p->cpu),
51 shader(p->shader_pointer), driver(p->cl_driver),
52 tickEvent([this]{
exec(); },
"GPU Dispatcher tick",
67 .
name(
name() +
".num_kernel_launched")
68 .
desc(
"number of kernel launched")
86 fatal(
"Checkpointing not supported during active workgroup execution");
114 DPRINTF(GPUDisp,
"dispatcher registering addr range at %#x size %#x\n",
131 DPRINTF(GPUDisp,
" read register %#x size=%d\n", offset, pkt->
getSize());
142 char *curTaskPtr = (
char*)&
curTask;
144 memcpy(pkt->
getPtr<
const void*>(), curTaskPtr + offset, pkt->
getSize());
161 uint64_t data_val = 0;
165 data_val = pkt->
getLE<uint8_t>();
168 data_val = pkt->
getLE<uint16_t>();
171 data_val = pkt->
getLE<uint32_t>();
174 data_val = pkt->
getLE<uint64_t>();
180 DPRINTF(GPUDisp,
"write register %#x value %#x size=%d\n", offset, data_val,
184 static int nextId = 0;
194 uint64_t start =
curTick() / 1000;
197 &start,
sizeof(uint64_t), 0);
212 for (
int i = 0;
i < 3; ++
i) {
226 DPRINTF(GPUDisp,
"launching kernel %d\n",nextId);
240 char *curTaskPtr = (
char*)&
curTask;
241 memcpy(curTaskPtr + offset, pkt->
getPtr<
const void*>(), pkt->
getSize());
253 if (if_name ==
"translation_port") {
270 while (
execIds.size() > fail_count) {
282 DPRINTF(GPUDisp,
"kernel %d failed to launch\n", execId);
309 DPRINTF(GPUDisp,
"notify WgCompl %d\n",kern_id);
310 assert(
ndRangeMap[kern_id].dispatchId == kern_id);
329 sizeof(uint64_t), 0);
331 uint64_t end =
curTick() / 1000;
334 sizeof(uint64_t), 0);
362 panic(
"Cannot find host");
388 return shader->
cuList[0]->wfList[0][0]->getStaticContextSize();
#define panic(...)
This implements a cprintf based panic() function.
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
AddrRange RangeSize(Addr start, Addr size)
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
Ports are used to interface objects to each other.
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
std::vector< ComputeUnit * > cuList
#define fatal(...)
This implements a cprintf based fatal() function.
uint32_t getStaticContextSize() const
Returns the size of the static hardware context of a wavefront.
std::queue< int > execIds
void updateContext(int cid)
void handshake(GpuDispatcher *_dispatcher)
void setFuncargsSize(int funcargs_size)
void accessUserVar(BaseCPU *cpu, uint64_t addr, int val, int off)
T * getPtr()
get a pointer to the data ptr.
void setLE(T v)
Set the value in the data pointer to v as little endian.
RequestPtr req
A pointer to the original request.
#define UNSERIALIZE_SCALAR(scalar)
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Tick curTick()
The current simulated tick.
std::string csprintf(const char *format, const Args &...args)
void notifyWgCompl(Wavefront *w)
void makeAtomicResponse()
uint64_t Tick
Tick count type.
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
void deschedule(Event &event)
volatile uint32_t * numDispLeft
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
static void setInstance(GpuDispatcher *_instance)
void schedule(Event &event, Tick when)
std::queue< int > doneIds
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Stats::Scalar num_kernelLaunched
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
static GpuDispatcher * getInstance()
#define SERIALIZE_SCALAR(scalar)
bool scheduled() const
Determine if the current event is scheduled.
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
virtual const std::string name() const
EventFunctionWrapper tickEvent
std::ostream CheckpointOut
Tick ticks(int numCycles) const
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
volatile bool * addrToNotify
T divCeil(const T &a, const U &b)
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
std::unordered_map< int, NDRange > ndRangeMap
static GpuDispatcher * instance
bool dispatch_workgroups(NDRange *ndr)
GpuDispatcher(const Params *p)
Tick when() const
Get the time that the event is scheduled.
void hostWakeUp(BaseCPU *cpu)
void allocate()
Allocate memory for the packet.
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
void handshake(GpuDispatcher *dispatcher)