42 #include "debug/GPUDisp.hh" 43 #include "debug/GPUMem.hh" 44 #include "debug/HSAIL.hh" 55 cpuThread(nullptr), gpuTc(nullptr), cpuPointer(p->cpu_pointer),
68 for (
int i = 0;
i <
n_cu; ++
i) {
89 start = mem_state->getMmapEnd() -
length;
90 mem_state->setMmapEnd(start);
93 start = mem_state->getMmapEnd();
94 mem_state->setMmapEnd(start + length);
97 assert(mem_state->getStackBase() - mem_state->getMaxStackSize() >
98 mem_state->getMmapEnd());
101 DPRINTF(HSAIL,
"Shader::mmap start= %#x, %#x\n", start, length);
140 panic(
"Dispatcher wants to wakeup a different host");
145 ShaderParams::create()
157 for (
int i = 0;
i <
sa_n; ++
i) {
176 bool scheduledSomething =
false;
180 while (cuCount <
n_cu) {
188 scheduledSomething =
true;
189 DPRINTF(GPUDisp,
"Dispatching a workgroup to CU %d\n", curCu);
196 cuList[curCu]->StartWorkgroup(ndr);
219 return scheduledSomething;
230 bool suppress_func_errors,
int cu_id)
232 int block_size =
cuList.at(cu_id)->cacheLineSize();
233 unsigned size = req->getSize();
243 fatal(
"unexcepted MemCmd\n");
246 tmp_addr = req->getVaddr();
247 Addr split_addr =
roundDown(tmp_addr + size - 1, block_size);
249 assert(split_addr <= tmp_addr || split_addr - tmp_addr < block_size);
252 if (split_addr > tmp_addr) {
254 req->splitOnVaddr(split_addr, req1, req2);
267 new_pkt2->
dataStatic((uint8_t*)data + req1->getSize());
269 if (suppress_func_errors) {
276 cuList[0]->memPort[0]->sendFunctional(new_pkt1);
277 cuList[0]->memPort[0]->sendFunctional(new_pkt2);
289 if (suppress_func_errors) {
295 cuList[0]->memPort[0]->sendFunctional(new_pkt);
305 for (
int i_cu = 0; i_cu <
n_cu; ++i_cu) {
306 if (!
cuList[i_cu]->isDone()) {
335 MemCmd cmd,
bool suppress_func_errors)
337 uint8_t *data_buf = (uint8_t*)ptr;
340 !gen.
done(); gen.next()) {
343 0, gen.addr(), gen.size(), 0,
344 cuList[0]->masterId(), 0, 0,
nullptr);
347 data_buf += gen.size();
359 bool suppress_func_errors)
372 bool suppress_func_errors)
375 suppress_func_errors);
388 new TheISA::GpuTLB::TranslationState(mode,
gpuTc,
false);
397 cuList[cu_id]->tlbPort[0]->sendFunctional(pkt);
401 TheISA::GpuTLB::TranslationState *sender_state =
404 delete sender_state->tlbEntry;
#define panic(...)
This implements a cprintf based panic() function.
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
std::vector< int32_t > sa_x
std::vector< ComputeUnit * > cuList
#define fatal(...)
This implements a cprintf based fatal() function.
void setSuppressFuncError()
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
void updateContext(int cid)
std::shared_ptr< Request > RequestPtr
void allocateMem(Addr vaddr, int64_t size, bool clobber=false)
virtual void activateContext(ThreadID thread_num)
Notify the CPU that the indicated context is now active.
GpuDispatcher * dispatcher
virtual Process * getProcessPtr()=0
std::vector< uint64_t > sa_when
T roundUp(const T &val, const U &align)
This function is used to align addresses in memory.
std::shared_ptr< MemState > memState
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
RequestPtr req
A pointer to the original request.
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Tick curTick()
The current simulated tick.
bool scheduled() const
Determine if the current event is scheduled.
uint64_t Tick
Tick count type.
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
int separate_acquire_release
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
bool done() const
Are we done? That is, did the last call to next() advance past the end of the region?
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data, bool suppress_func_errors, int cu_id)
void ScheduleAdd(uint32_t *val, Tick when, int x)
Declaration of the Packet class.
EventFunctionWrapper tickEvent
SenderState * senderState
This packet's sender state.
Tick ticks(int numCycles) const
virtual int threadId() const =0
virtual ThreadContext * getContext(int tn)
Given a thread num get tho thread context for it.
void schedule(Event &event, Tick when)
virtual Status status() const =0
virtual bool mmapGrowsDown() const
Does mmap region grow upward or downward from mmapEnd? Most platforms grow downward, but a few (such as Alpha) grow upward instead, so they can override this method to return false.
void sendFunctional(PacketPtr pkt) const
Send a functional request packet, where the data is instantly updated everywhere in the memory system...
Declaration and inline definition of ChunkGenerator object.
int impl_kern_boundary_sync
bool dispatch_workgroups(NDRange *ndr)
void hostWakeUp(BaseCPU *cpu)
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
void handshake(GpuDispatcher *dispatcher)
ProbePointArg< PacketInfo > Packet
Packet probe point.
std::vector< uint32_t * > sa_val