40 #include "debug/GPUDisp.hh" 41 #include "debug/GPUMem.hh" 42 #include "debug/HSAIL.hh" 53 cpuThread(nullptr), gpuTc(nullptr), cpuPointer(p->cpu_pointer),
66 for (
int i = 0;
i <
n_cu; ++
i) {
87 start = mem_state->getMmapEnd() -
length;
88 mem_state->setMmapEnd(start);
91 start = mem_state->getMmapEnd();
92 mem_state->setMmapEnd(start + length);
95 assert(mem_state->getStackBase() - mem_state->getMaxStackSize() >
96 mem_state->getMmapEnd());
99 DPRINTF(HSAIL,
"Shader::mmap start= %#x, %#x\n", start, length);
138 panic(
"Dispatcher wants to wakeup a different host");
143 ShaderParams::create()
155 for (
int i = 0;
i <
sa_n; ++
i) {
174 bool scheduledSomething =
false;
178 while (cuCount <
n_cu) {
186 scheduledSomething =
true;
187 DPRINTF(GPUDisp,
"Dispatching a workgroup to CU %d\n", curCu);
194 cuList[curCu]->StartWorkgroup(ndr);
217 return scheduledSomething;
228 bool suppress_func_errors,
int cu_id)
230 int block_size =
cuList.at(cu_id)->cacheLineSize();
231 unsigned size = req->getSize();
241 fatal(
"unexcepted MemCmd\n");
244 tmp_addr = req->getVaddr();
245 Addr split_addr =
roundDown(tmp_addr + size - 1, block_size);
247 assert(split_addr <= tmp_addr || split_addr - tmp_addr < block_size);
250 if (split_addr > tmp_addr) {
252 req->splitOnVaddr(split_addr, req1, req2);
265 new_pkt2->
dataStatic((uint8_t*)data + req1->getSize());
267 if (suppress_func_errors) {
274 cuList[0]->memPort[0]->sendFunctional(new_pkt1);
275 cuList[0]->memPort[0]->sendFunctional(new_pkt2);
287 if (suppress_func_errors) {
293 cuList[0]->memPort[0]->sendFunctional(new_pkt);
303 for (
int i_cu = 0; i_cu <
n_cu; ++i_cu) {
304 if (!
cuList[i_cu]->isDone()) {
333 MemCmd cmd,
bool suppress_func_errors)
335 uint8_t *data_buf = (uint8_t*)ptr;
338 !gen.
done(); gen.next()) {
341 gen.addr(), gen.size(), 0,
342 cuList[0]->masterId(), 0, 0,
nullptr);
345 data_buf += gen.size();
357 bool suppress_func_errors)
370 bool suppress_func_errors)
373 suppress_func_errors);
386 new TheISA::GpuTLB::TranslationState(mode,
gpuTc,
false);
395 cuList[cu_id]->tlbPort[0]->sendFunctional(pkt);
399 TheISA::GpuTLB::TranslationState *sender_state =
402 delete sender_state->tlbEntry;
#define panic(...)
This implements a cprintf based panic() function.
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
std::vector< int32_t > sa_x
std::vector< ComputeUnit * > cuList
#define fatal(...)
This implements a cprintf based fatal() function.
void setSuppressFuncError()
void updateContext(int cid)
std::shared_ptr< Request > RequestPtr
void allocateMem(Addr vaddr, int64_t size, bool clobber=false)
virtual void activateContext(ThreadID thread_num)
Notify the CPU that the indicated context is now active.
GpuDispatcher * dispatcher
virtual Process * getProcessPtr()=0
std::vector< uint64_t > sa_when
T roundUp(const T &val, const U &align)
This function is used to align addresses in memory.
std::shared_ptr< MemState > memState
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
RequestPtr req
A pointer to the original request.
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Tick curTick()
The current simulated tick.
uint64_t Tick
Tick count type.
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
int separate_acquire_release
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
void schedule(Event &event, Tick when)
T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
bool done() const
Are we done? That is, did the last call to next() advance past the end of the region?
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
bool scheduled() const
Determine if the current event is scheduled.
void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data, bool suppress_func_errors, int cu_id)
void ScheduleAdd(uint32_t *val, Tick when, int x)
Declaration of the Packet class.
EventFunctionWrapper tickEvent
SenderState * senderState
This packet's sender state.
Tick ticks(int numCycles) const
virtual int threadId() const =0
virtual ThreadContext * getContext(int tn)
Given a thread num get tho thread context for it.
virtual Status status() const =0
virtual bool mmapGrowsDown() const
Does mmap region grow upward or downward from mmapEnd? Most platforms grow downward, but a few (such as Alpha) grow upward instead, so they can override this method to return false.
void sendFunctional(PacketPtr pkt) const
Send a functional request packet, where the data is instantly updated everywhere in the memory system...
Declaration and inline definition of ChunkGenerator object.
int impl_kern_boundary_sync
bool dispatch_workgroups(NDRange *ndr)
void hostWakeUp(BaseCPU *cpu)
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
void handshake(GpuDispatcher *dispatcher)
ProbePointArg< PacketInfo > Packet
Packet probe point.
std::vector< uint32_t * > sa_val