34 #ifndef __COMPUTE_UNIT_HH__ 35 #define __COMPUTE_UNIT_HH__ 39 #include <unordered_map> 45 #include "enums/PrefetchType.hh" 63 struct ComputeUnitParams;
221 uint32_t operandSize,
224 regIdxVec.push_back(std::make_pair(simdId, regIdx));
225 timestampVec.push_back(when);
226 statusVec.push_back(newStatus);
227 if (operandSize > 4) {
228 regIdxVec.push_back(std::make_pair(simdId,
230 numVecRegsPerSimd)));
231 timestampVec.push_back(when);
232 statusVec.push_back(newStatus);
276 int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots);
277 bool cedeSIMD(
int simdId,
int wfSlotId);
280 virtual void init()
override;
284 bool kernelLaunch=
true,
389 getRefCounter(
const uint32_t dispatchId,
const uint32_t wgId)
const;
436 : _gpuDynInst(gpuDynInst),
437 port_index(_port_index),
438 saved(sender_state) { }
453 virtual bool recvTimingResp(
PacketPtr pkt);
457 virtual void recvReqRetry();
484 *sender_state=
nullptr)
485 : wavefront(_wavefront), saved(sender_state) { }
494 virtual bool recvTimingResp(
PacketPtr pkt);
498 virtual void recvReqRetry();
514 index(_index), stalled(false)
541 : _gpuDynInst(gpuDynInst), portIndex(port_index) { }
550 virtual bool recvTimingResp(
PacketPtr pkt);
554 virtual void recvReqRetry();
589 virtual bool recvTimingResp(
PacketPtr pkt);
593 virtual void recvReqRetry();
629 _gpuDynInst(gpuDynInst)
645 bool stalled =
false;
694 if (if_name ==
"memory_port") {
697 return *memPort[idx];
698 }
else if (if_name ==
"translation_port") {
701 return *tlbPort[idx];
702 }
else if (if_name ==
"sqc_port") {
706 }
else if (if_name ==
"sqc_tlb_port") {
709 }
else if (if_name ==
"ldsPort") {
711 fatal(
"an LDS port was already allocated");
716 panic(
"incorrect port name");
726 : simdId(_simdId), wfSlotId(_wfSlotId) { }
748 #endif // __COMPUTE_UNIT_HH__ uint32_t numVecRegsPerSimd
A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
#define panic(...)
This implements a cprintf based panic() function.
void updatePageDivergenceDist(Addr addr)
Stats::Formula tlbLatency
GPUDynInstPtr _gpuDynInst
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
SenderState(Wavefront *_wavefront, Packet::SenderState *sender_state=nullptr)
Ports are used to interface objects to each other.
Stats::Scalar flatLDSInsts
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
std::vector< bool > vectorAluInstAvail
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch=true, RequestPtr req=nullptr)
void handleMemPacket(PacketPtr pkt, int memport_index)
#define fatal(...)
This implements a cprintf based fatal() function.
DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
uint32_t numCyclesPerLoadTransfer
Packet::SenderState * saved
static const int MAX_REGS_FOR_NON_VEC_MEM_INST
std::map< unsigned, waveQueue > xactCasLoadMap
static const int MAX_WIDTH_FOR_MEM_INST
std::vector< std::vector< std::pair< Wavefront *, WAVE_STATUS > > > waveStatusList
void fillKernelState(Wavefront *w, NDRange *ndr)
Stats::Vector hitsPerTLBLevel
Stats::Scalar dynamicGMemInstrCnt
ScheduleStage scheduleStage
Stats::Formula flatLDSInstsPerWF
const char * __attribute__((weak)) m5MainCommands[]
Stats::Distribution controlFlowDivergenceDist
std::vector< std::vector< Wavefront * > > readyList
std::shared_ptr< Request > RequestPtr
GPUDynInstPtr _gpuDynInst
Stats::Scalar vectorMemWrites
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
SenderState(Wavefront *_wavefront)
virtual void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
CUExitCallback * cuExitCallback
A vector of scalar stats.
std::vector< DTLBPort * > tlbPort
std::vector< std::vector< Wavefront * > > wfList
this represents a slice of the overall LDS, intended to be associated with an individual workgroup ...
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Scalar dynamicLMemInstrCnt
SenderState is information carried along with the packet throughout the TLB hierarchy.
Stats::Formula numALUInstsExecuted
Declaration of Statistics objects.
GPUStaticInst * kernelLaunchInst
Stats::Scalar numInstrExecuted
void initiateFetch(Wavefront *wavefront)
This is a simple scalar statistic, like a counter.
SenderState(GPUDynInstPtr gpuDynInst)
virtual void recvFunctional(PacketPtr pkt)
Stats::Distribution ldsBankConflictDist
SenderState is information carried along with the packet throughout the TLB hierarchy.
std::vector< WaitClass > vrfToLocalMemPipeBus
Stats::Formula vectorMemWritesPerWF
std::deque< std::pair< PacketPtr, GPUDynInstPtr > > retries
Stats::Scalar wgBlockedDueLdsAllocation
Packet::SenderState * saved
virtual void recvFunctional(PacketPtr pkt)
std::vector< std::vector< std::vector< Addr > > > lastVaddrWF
virtual Tick recvAtomic(PacketPtr pkt)
std::vector< WaitClass > aluPipe
uint32_t numCyclesPerStoreTransfer
void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, NDRange *ndr)
ComputeUnit(const Params *p)
std::deque< std::pair< PacketPtr, Wavefront * > > retries
GlobalMemPipeline globalMemoryPipe
uint32_t coalescerToVrfBusWidth
Stats::Formula vALUUtilization
std::shared_ptr< GPUDynInst > GPUDynInstPtr
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Distribution activeLanesPerLMemInstrDist
ITLBPort(const std::string &_name, ComputeUnit *_cu)
Stats::Formula scalarMemWritesPerWF
Stats::Scalar numTimesWgBlockedDueVgprAlloc
CUExitCallback(ComputeUnit *_cu)
Stats::Distribution execRateDist
Stats::Formula vectorMemReadsPerWF
void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
Port & getPort(const std::string &if_name, PortID idx) override
Get a port with a given name and index.
bool isShrMem(int unitId)
std::vector< std::pair< uint32_t, uint32_t > > regIdxVec
std::string csprintf(const char *format, const Args &...args)
virtual Tick recvAtomic(PacketPtr pkt)
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
the port intended to communicate between the CU and its LDS
std::list< waveIdentifier > waveIDQueue
Stats::Distribution pageDivergenceDist
LdsState & getLds() const
uint64_t Tick
Tick count type.
Stats::Scalar tlbRequests
ComputeUnit * computeUnit
virtual void recvFunctional(PacketPtr pkt)
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
A simple distribution stat.
ComputeUnit * computeUnit
std::vector< WaitClass > vrfToGlobalMemPipeBus
void updateInstStats(GPUDynInstPtr gpuDynInst)
ClockedObject declaration and implementation.
Stats::Scalar flatVMemInsts
GPUDynInstPtr getMemInst() const
ComputeUnit * computeUnit
std::vector< DataPort * > memPort
The memory port for SIMD data accesses.
std::vector< std::vector< Addr > > lastVaddrSimd
uint32_t vrfToCoalescerBusWidth
int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots)
void StartWorkgroup(NDRange *ndr)
Stats::Formula sALUInstsPerWF
virtual Tick recvAtomic(PacketPtr pkt)
bool isGlbMem(int unitId)
Stats::Scalar scalarMemWrites
std::unordered_map< Addr, std::pair< int, int > > pageDataStruct
Stats::Scalar scalarMemReads
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,16,32,64}_t.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
Stats::Scalar ldsNoFlatInsts
std::vector< std::pair< Wavefront *, DISPATCH_STATUS > > dispatchList
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
ComputeUnit * computeUnit
bool sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result))
send a general request to the LDS make sure to look at the return value here as your request might be...
bool cedeSIMD(int simdId, int wfSlotId)
Stats::Scalar instCyclesVALU
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a SimObject that sees the packet.
Stats::Scalar completedWfs
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, Packet::SenderState *sender_state=nullptr)
Stats::Formula scalarMemReadsPerWF
Stats::Formula vALUInstsPerWF
virtual const std::string name() const
int32_t getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
Stats::Distribution activeLanesPerGMemInstrDist
void doSmReturn(GPUDynInstPtr gpuDynInst)
ComputeUnit * computeUnit
SenderState is information carried along with the packet, esp.
std::queue< PacketPtr > retries
here we queue all the requests that were not successfully sent.
Stats::Scalar numVecOpsExecuted
std::vector< VectorRegisterFile * > vrf
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
virtual void recvFunctional(PacketPtr pkt)
void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs)
Stats::Scalar numFailedCASOps
int ReadyWorkgroup(NDRange *ndr)
LDSPort * getLdsPort() const
std::map< Addr, int > pagesTouched
Stats::Scalar instCyclesSALU
virtual void process()
virtual process function that is invoked when the callback queue is executed.
void fetch(PacketPtr pkt, Wavefront *wavefront)
Stats::Formula flatVMemInstsPerWF
std::vector< uint8_t > statusVec
virtual ~CUExitCallback()
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
std::vector< uint64_t > lastExecCycle
void registerEvent(uint32_t simdId, uint32_t regIdx, uint32_t operandSize, uint64_t when, uint8_t newStatus)
std::vector< WaitClass > wfWait
LocalMemPipeline localMemoryPipe
void processFetchReturn(PacketPtr pkt)
pageDataStruct pageAccesses
bool processTimingPacket(PacketPtr pkt)
Enums::PrefetchType prefetchType
Stats::Scalar ldsBankAccesses
Stats::Scalar totalCycles
LDSPort * ldsPort
The port to access the Local Data Store Can be connected to a LDS object.
waveIdentifier(int _simdId, int _wfSlotId)
std::vector< uint64_t > timestampVec
Stats::Scalar vectorMemReads
int cacheLineSize() const
std::vector< Addr > lastVaddrCU
void regStats() override
Callback to set stat parameters.
Stats::Formula ldsNoFlatInstsPerWF
virtual Tick recvAtomic(PacketPtr pkt)
virtual Tick recvAtomic(PacketPtr pkt)
ComputeUnit * computeUnit
bool isSimdDone(uint32_t) const
uint64_t getAndIncSeqNum()
Stats::Scalar threadCyclesVALU
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
std::vector< int > vectorRegsReserved
bool isVecAlu(int unitId)
LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
ScoreboardCheckStage scoreboardCheckStage
virtual void recvFunctional(PacketPtr pkt)
DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)