Go to the documentation of this file.
34 #ifndef __COMPUTE_UNIT_HH__
35 #define __COMPUTE_UNIT_HH__
39 #include <unordered_set>
47 #include "config/the_gpu_isa.hh"
48 #include "enums/PrefetchType.hh"
69 struct ComputeUnitParams;
402 bool fetchContext=
false);
428 int bar_id = *free_bar_id;
445 template<
typename c0,
typename c1>
448 virtual void init()
override;
477 getRefCounter(
const uint32_t dispatchId,
const uint32_t wgId)
const;
525 saved(sender_state) { }
618 *sender_state=
nullptr,
int _kernId=-1)
859 if (if_name ==
"memory_port" && idx <
memPort.size()) {
861 }
else if (if_name ==
"translation_port" && idx <
tlbPort.size()) {
863 }
else if (if_name ==
"scalar_port") {
865 }
else if (if_name ==
"scalar_tlb_port") {
867 }
else if (if_name ==
"sqc_port") {
869 }
else if (if_name ==
"sqc_tlb_port") {
871 }
else if (if_name ==
"ldsPort") {
873 }
else if (if_name ==
"gmTokenPort") {
1091 #endif // __COMPUTE_UNIT_HH__
EventFunctionWrapper * createMemReqEvent(PacketPtr pkt)
std::vector< WaitClass > vectorALUs
WaitClass vectorSharedMemUnit
Stats::Scalar completedWGs
std::deque< std::pair< PacketPtr, Wavefront * > > retries
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
ComputeUnit * computeUnit
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
Stats::Distribution pageDivergenceDist
MemReqEvent(ScalarDataPort &_scalar_data_port, PacketPtr _pkt)
static const FlagsType AutoDelete
Stats::Scalar instCyclesVALU
void recvReqRetry() override
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Enums::PrefetchType prefetchType
Stats::Formula vectorMemWritesPerWF
Stats::Scalar kernargReads
const char * description() const
Return a C string describing the event.
Stats::Formula spillMemInsts
ScalarDataPort & scalarDataPort
void resetBarrier(int bar_id)
Stats::Vector hitsPerTLBLevel
void deleteFromPipeMap(Wavefront *w)
int simdUnitWidth() const
void initiateFetch(Wavefront *wavefront)
std::map< Addr, int > pagesTouched
virtual void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
WaitClass vrfToGlobalMemPipeBus
int loadBusLength() const
int mapWaveToScalarAluGlobalIdx(Wavefront *w) const
Stats::Scalar numVecOpsExecutedMAC64
int maxBarrierCnt() const
void incNumAtBarrier()
Mark that a WF has reached the barrier.
std::queue< PacketPtr > retries
here we queue all the requests that were not successfully sent.
ComputeUnit * computeUnit
int numYetToReachBarrier() const
Number of WFs that have not yet reached the barrier.
void dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg)
virtual Tick recvAtomic(PacketPtr pkt)
ScalarDataPort(const std::string &_name, ComputeUnit *_cu)
SenderState(Wavefront *_wavefront, Packet::SenderState *sender_state=nullptr, int _kernId=-1)
DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id)
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
const PortID InvalidPortID
LocalMemPipeline localMemoryPipe
int mapWaveToGlobalMem(Wavefront *w) const
LDSPort(const std::string &_name, ComputeUnit *_cu)
Cycles srf_scm_bus_latency
DataPort(const std::string &_name, ComputeUnit *_cu, PortID id)
Stats::Formula flatLDSInstsPerWF
void incNumAtBarrier(int bar_id)
Cycles vrf_gm_bus_latency
Stats::Scalar numVecOpsExecutedTwoOpFP
Stats::Formula privMemInsts
Stats::Formula readonlyMemInsts
ScalarDTLBPort scalarDTLBPort
int mapWaveToScalarMem(Wavefront *w) const
int mapWaveToLocalMem(Wavefront *w) const
Stats::Scalar numVecOpsExecutedF64
int numCyclesPerLoadTransfer
virtual void recvFunctional(PacketPtr pkt)
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
std::unordered_map< GPUDynInstPtr, Tick > headTailMap
int numYetToReachBarrier(int bar_id)
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
void doFlush(GPUDynInstPtr gpuDynInst)
trigger flush operation in the cu
Stats::Scalar numVecOpsExecutedFMA64
TokenManager * memPortTokens
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
Stats::Formula scalarMemInstsPerKiloInst
Stats::Scalar globalReads
uint64_t Tick
Tick count type.
Stats::Scalar numVecOpsExecutedMAD64
Stats::Formula sALUInstsPerWF
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
std::shared_ptr< Request > RequestPtr
Stats::Formula flatVMemInstsPerWF
SenderState(GPUDynInstPtr gpuDynInst, Packet::SenderState *sender_state=nullptr)
ComputeUnit::ComputeUnitStats stats
WaitClass vectorGlobalMemUnit
ITLBPort(const std::string &_name, ComputeUnit *_cu)
RegisterManager * registerManager
Stats::Scalar dynamicGMemInstrCnt
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Cycles vrf_lm_bus_latency
virtual bool sendTimingReq(PacketPtr pkt)
attempt to send this packet, either the port is already stalled, the request is nack'd and must stall...
ComputeUnit * computeUnit
int scalarPipeLength() const
Stats::Scalar numVecOpsExecutedF32
ScheduleStage scheduleStage
bool allAtBarrier(int bar_id)
Stats::Formula scalarMemReadsPerWF
Stats::Scalar numTimesWgBlockedDueSgprAlloc
A vector of scalar stats.
LdsState & getLds() const
void insertInPipeMap(Wavefront *w)
int _maxBarrierCnt
The maximum number of WFs that can reach this barrier.
A vector of distributions.
ComputeUnit(const Params &p)
Stats::Scalar numVecOpsExecutedF16
virtual void recvFunctional(PacketPtr pkt)
ComputeUnit * computeUnit
void handleMemPacket(PacketPtr pkt, int memport_index)
int numVectorGlobalMemUnits
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
Stats::Vector instCyclesLdsPerSimd
SenderState is information carried along with the packet, esp.
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
std::vector< WaitClass > scalarALUs
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, Packet::SenderState *sender_state=nullptr)
bool recvTimingResp(PacketPtr pkt) override
Receive a timing response from the peer.
Stats::Formula vectorMemWritesPerKiloInst
virtual void recvFunctional(PacketPtr pkt)
ComputeUnit * computeUnit
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Stats::Formula scalarMemWritesPerKiloInst
Stats::Scalar groupWrites
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
This is a simple scalar statistic, like a counter.
EventFunctionWrapper tickEvent
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
GPUDynInstPtr _gpuDynInst
Stats::Distribution ldsBankConflictDist
Stats::Scalar spillWrites
SenderState(GPUDynInstPtr gpuDynInst)
bool stalled
whether or not it is stalled
void decMaxBarrierCnt(int bar_id)
std::vector< ScalarRegisterFile * > srf
Stats::Scalar wgBlockedDueBarrierAllocation
Stats::Scalar totalCycles
int numVectorSharedMemUnits
Communication interface between Schedule and Execute stages.
A virtual base opaque structure used to hold state associated with the packet (e.g....
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
int spBypassLength() const
std::vector< DataPort > memPort
The memory port for SIMD data accesses.
int32_t getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
int numBarrierSlots() const
virtual Port & getPort(const std::string &if_name, PortID idx=InvalidPortID)
Get a port with a given name and index.
std::vector< WFBarrier > wfBarrierSlots
The barrier slots for this CU.
SenderState(GPUDynInstPtr gpuDynInst)
int maxBarrierCnt(int bar_id)
Stats::Scalar kernargWrites
Stats::Formula tlbLatency
virtual Tick recvAtomic(PacketPtr pkt)
void sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
Stats::Scalar flatLDSInsts
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelMemSync, RequestPtr req=nullptr)
Stats::Scalar numFailedCASOps
SenderState is information carried along with the packet throughout the TLB hierarchy.
const int _numBarrierSlots
Packet::SenderState * saved
Stats::Formula ldsNoFlatInstsPerWF
std::deque< std::pair< PacketPtr, GPUDynInstPtr > > retries
Ports are used to interface objects to each other.
std::vector< VectorRegisterFile * > vrf
std::vector< uint64_t > instExecPerSimd
void reset()
Reset the barrier.
Stats::Scalar globalWrites
void decMaxBarrierCnt()
Decrement the number of WFs that are participating in this barrier.
void setFlags(Flags _flags)
Stats::Scalar numTimesWgBlockedDueVgprAlloc
std::vector< std::vector< Wavefront * > > wfList
int storeBusLength() const
Stats::Scalar numVecOpsExecutedMAC16
ScalarDataPort scalarDataPort
Stats::Scalar tlbRequests
Stats::Formula scalarMemReadsPerKiloInst
GPUDynInstPtr _gpuDynInst
ScoreboardCheckToSchedule scoreboardCheckToSchedule
TODO: Update these comments once the pipe stage interface has been fully refactored.
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
void releaseBarrier(int bar_id)
ScalarDTLBPort(const std::string &_name, ComputeUnit *_cu)
ComputeUnit * computeUnit
Stats::Distribution controlFlowDivergenceDist
ComputeUnitStats(Stats::Group *parent, int n_wf)
bool allAtBarrier() const
Have all WFs participating in this barrier reached the barrier? If so, then the barrier is satisfied ...
WaitClass vrfToLocalMemPipeBus
bool isVectorAluIdle(uint32_t simdId) const
void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, HSAQueueEntry *task, int bar_id, bool fetchContext=false)
const PortID id
A numeric identifier to distinguish ports in a vector, and set to InvalidPortID in case this port is ...
Packet::SenderState * saved
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Stats::Vector instCyclesVMemPerSimd
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Stats::Formula vALUInstsPerWF
ComputeUnit * computeUnit
Stats::Scalar ldsBankAccesses
std::vector< uint64_t > lastExecCycle
ScalarMemPipeline scalarMemoryPipe
GPUDynInstPtr _gpuDynInst
Stats::Formula vectorMemInstsPerKiloInst
void sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt)
std::vector< std::vector< std::vector< Addr > > > lastVaddrWF
Stats::Scalar vectorMemReads
Stats::Scalar numVecOpsExecutedFMA16
Stats::Distribution activeLanesPerLMemInstrDist
M5_NODISCARD bool sendToLds(GPUDynInstPtr gpuDynInst)
send a general request to the LDS make sure to look at the return value here as your request might be...
int coalescerToVrfBusWidth
WaitClass srfToScalarMemPipeBus
InstSeqNum getAndIncSeqNum()
Stats::Scalar numVecOpsExecutedFMA32
SenderState is information carried along with the packet throughout the TLB hierarchy.
virtual void recvReqRetry()
the bus is telling the port that there is now space so retrying stalled requests should work now this...
void processFetchReturn(PacketPtr pkt)
bool recvTimingResp(PacketPtr pkt) override
Receive a timing response from the peer.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Packet::SenderState * saved
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
const std::string name() const
Return port name (for DPRINTF).
A simple distribution stat.
std::vector< int > scalarRegsReserved
Stats::Formula argMemInsts
bool processTimingPacket(PacketPtr pkt)
GlobalMemPipeline globalMemoryPipe
Stats::Formula vectorMemReadsPerWF
int numAtBarrier(int bar_id)
bool hasDispResources(HSAQueueEntry *task, int &num_wfs_in_wg)
int cacheLineSize() const
TokenManager * getTokenManager()
LDSPort ldsPort
The port to access the Local Data Store Can be connected to a LDS object.
std::unordered_set< int > freeBarrierIds
A set used to easily retrieve a free barrier ID.
std::vector< Addr > lastVaddrCU
Stats::Formula scalarMemWritesPerWF
Port & getPort(const std::string &if_name, PortID idx) override
Get a port with a given name and index.
GPUDynInstPtr _gpuDynInst
virtual Tick recvAtomic(PacketPtr pkt)
void release()
Release this barrier resource so it can be used by other WGs.
Stats::Formula globalMemInsts
std::vector< int > numWfsToSched
Number of WFs to schedule to each SIMD.
virtual void recvFunctional(PacketPtr pkt)
int oprNetPipeLength() const
Stats::Scalar dynamicFlatMemInstrCnt
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Stats::Scalar vectorMemWrites
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
virtual Tick recvAtomic(PacketPtr pkt)
Stats::Scalar readonlyWrites
WaitClass scalarMemToSrfBus
Stats::Distribution waveLevelParallelism
pageDataStruct pageAccesses
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Communication interface between ScoreboardCheck and Schedule stages.
Stats::Formula kernargMemInsts
std::unordered_map< Addr, std::pair< int, int > > pageDataStruct
void updateInstStats(GPUDynInstPtr gpuDynInst)
void updatePageDivergenceDist(Addr addr)
GMTokenPort(const std::string &name, SimObject *owner, PortID id=InvalidPortID)
void fillKernelState(Wavefront *w, HSAQueueEntry *task)
Stats::Distribution activeLanesPerGMemInstrDist
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Stats::Formula groupMemInsts
void setMaxBarrierCnt(int max_barrier_cnt)
Set the maximum barrier count (i.e., the number of WFs that are participating in the barrier).
GPUDynInstPtr getMemInst() const
void recvReqRetry() override
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
bool recvTimingResp(PacketPtr)
Receive a timing response from the peer.
int vrfToCoalescerBusWidth
int numCyclesPerStoreTransfer
std::vector< std::vector< Addr > > lastVaddrSimd
Stats::Scalar ldsNoFlatInsts
void processMemRespEvent(PacketPtr pkt)
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Stats::Scalar threadCyclesVALU
void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
SQCPort(const std::string &_name, ComputeUnit *_cu)
Cycles is a wrapper class for representing cycle counts, i.e.
Stats::Scalar completedWfs
std::vector< int > vectorRegsReserved
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Stats::Formula numALUInstsExecuted
Stats::Scalar instCyclesSALU
ScoreboardCheckStage scoreboardCheckStage
int dpBypassLength() const
Stats::Distribution execRateDist
std::deque< PacketPtr > retries
Stats::Vector instCyclesScMemPerSimd
int getCacheLineBits() const
void releaseWFsFromBarrier(int bar_id)
EventFunctionWrapper * createMemRespEvent(PacketPtr pkt)
void doSmReturn(GPUDynInstPtr gpuDynInst)
void processMemReqEvent(PacketPtr pkt)
std::vector< DTLBPort > tlbPort
Stats::Scalar numVecOpsExecutedMAD16
Stats::Scalar wgBlockedDueLdsAllocation
Stats::Scalar scalarMemWrites
Stats::Scalar numVecOpsExecutedMAC32
Stats::Scalar numVecOpsExecuted
virtual bool recvTimingResp(PacketPtr pkt)
get the result of packets sent to the LDS when they return
Stats::Scalar numVecOpsExecutedMAD32
std::unordered_set< uint64_t > pipeMap
virtual Tick recvAtomic(PacketPtr pkt)
Stats::Scalar numInstrExecuted
virtual void recvFunctional(PacketPtr pkt)
Stats::Scalar readonlyReads
void doInvalidate(RequestPtr req, int kernId)
trigger invalidate operation in the cu
Stats::Formula vALUUtilization
Stats::Scalar scalarMemReads
SenderState(Wavefront *_wavefront)
Stats::Distribution headTailLatency
Stats::VectorDistribution instInterleave
RequestorID requestorId()
Stats::Scalar flatVMemInsts
std::deque< PacketPtr > retries
int mapWaveToScalarAlu(Wavefront *w) const
Stats::Formula vectorMemReadsPerKiloInst
static const int InvalidID
Stats::Scalar dynamicLMemInstrCnt
GPUDynInstPtr _gpuDynInst
WFBarrier & barrierSlot(int bar_id)
the port intended to communicate between the CU and its LDS
ScheduleToExecute scheduleToExecute
int _numAtBarrier
The number of WFs in the WG that have reached the barrier.
void fetch(PacketPtr pkt, Wavefront *wavefront)
Abstract superclass for simulation objects.
Generated on Tue Jun 22 2021 15:28:28 for gem5 by doxygen 1.8.17