34 #ifndef __WAVEFRONT_HH__ 35 #define __WAVEFRONT_HH__ 43 #include "arch/gpu_isa.hh" 46 #include "config/the_gpu_isa.hh" 51 #include "params/Wavefront.hh" 112 template<
typename CType>
116 return addr * wfSize +
sizeof(CType) * lane;
120 : wfSize(wf_size), funcArgsSizePerItem(func_args_size_per_item)
122 mem = (uint8_t*)malloc(funcArgsSizePerItem * wfSize);
130 template<
typename CType>
134 return mem + getLaneOffset<CType>(lane,
addr);
137 template<
typename CType>
141 *((CType*)(mem + getLaneOffset<CType>(lane, addr))) =
val;
148 enum itype_e {I_ALU,I_GLOBAL,I_SHARED,I_FLAT,I_PRIVATE};
179 uint32_t remap(uint32_t vgprIndex, uint32_t size, uint8_t
mode=0);
180 void resizeRegFiles(
int num_cregs,
int num_sregs,
int num_dregs);
183 bool isOldestInstGMem();
184 bool isOldestInstLMem();
185 bool isOldestInstPrivMem();
186 bool isOldestInstFlatMem();
187 bool isOldestInstALU();
188 bool isOldestInstBarrier();
194 uint32_t workGroupId[3];
195 uint32_t workGroupSz[3];
200 uint32_t actualWgSz[3];
202 void computeActualWgSz(
NDRange *ndr);
302 callArgMem =
new CallArgMem(func_args_size_per_item, wf_size);
305 template<
typename CType>
309 return *((CType*)(callArgMem->
getLaneAddr<CType>(lane, addr)));
312 template<
typename CType>
330 void start(uint64_t _wfDynId, uint64_t _base_ptr);
332 void updateResources();
334 bool instructionBufferHasBranch();
338 bool waitingAtBarrier(
int lane);
340 void pushToReconvergenceStack(uint32_t
pc, uint32_t
rpc,
343 void popFromReconvergenceStack();
347 uint32_t
rpc()
const;
353 void pc(uint32_t new_pc);
361 uint32_t getStaticContextSize()
const;
367 void getContext(
const void *out);
373 void setContext(
const void *in);
393 #endif // __WAVEFRONT_HH__
std::vector< uint32_t > oldVgpr
Stats::Scalar numTimesBlockedDueRAWDependencies
CallArgMem(int func_args_size_per_item, int wf_size)
Stats::Scalar numTimesBlockedDueVrfPortAvail
std::deque< std::unique_ptr< ReconvergenceStackEntry > > reconvergenceStack
Stack containing Control Flow Graph nodes (i.e., kernel instructions) to be visited by the wavefront...
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
class ConditionRegisterState * condRegState
this represents a slice of the overall LDS, intended to be associated with an individual workgroup ...
Stats::Scalar numTimesBlockedDueWAXDependencies
This is a simple scalar statistic, like a counter.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Stats::Distribution srcRegOpDist
uint32_t pc
PC of current instruction.
std::deque< GPUDynInstPtr > instructionBuffer
void initCallArgMem(int func_args_size_per_item, int wf_size)
std::vector< uint32_t > workItemFlatId
void writeCallArgMem(int lane, int addr, CType val)
void exec(GPUDynInstPtr ii, Wavefront *w)
A simple distribution stat.
CType readCallArgMem(int lane, int addr)
std::vector< int > barCnt
void setParent(ComputeUnit *cu)
ComputeUnit * computeUnit
Stats::Distribution dstRegOpDist
uint32_t outstandingReqsWrLm
uint32_t outstandingReqsRdGm
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,16,32,64}_t.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint32_t outstandingReqsRdLm
int getLaneOffset(int lane, int addr)
TheGpuISA::GPUISA & gpuISA()
uint32_t outstandingReqsWrGm
std::vector< Addr > lastAddr
TheGpuISA::GPUISA _gpuISA
A reconvergence stack entry conveys the necessary state to implement control flow divergence...
void setLaneAddr(int lane, int addr, CType val)
uint8_t * getLaneAddr(int lane, int addr)
uint32_t spillSizePerItem
static const int MAX_NUM_INSTS_PER_WF
std::vector< uint64_t > oldDgpr
Abstract superclass for simulation objects.
const FlagsType init
This Stat is Initialized.
uint32_t rpc
PC of the immediate post-dominator instruction, i.e., the value of pc for the first instruction that ...
VectorMask execMask
Execution mask.