Go to the documentation of this file.
   34 #ifndef __COMPUTE_UNIT_HH__ 
   35 #define __COMPUTE_UNIT_HH__ 
   39 #include <unordered_set> 
   45 #include "config/the_gpu_isa.hh" 
   46 #include "enums/PrefetchType.hh" 
   67 struct ComputeUnitParams;
 
  406                         bool fetchContext=
false);
 
  430         int bar_id = *free_bar_id;
 
  447     template<
typename c0, 
typename c1>
 
  450     virtual void init() 
override;
 
  621     getRefCounter(
const uint32_t dispatchId, 
const uint32_t wgId) 
const;
 
  670                   saved(sender_state) { }
 
  763                     *sender_state=
nullptr, 
int _kernId=-1)
 
 1004         if (if_name == 
"memory_port" && idx < 
memPort.size()) {
 
 1006         } 
else if (if_name == 
"translation_port" && idx < 
tlbPort.size()) {
 
 1008         } 
else if (if_name == 
"scalar_port") {
 
 1010         } 
else if (if_name == 
"scalar_tlb_port") {
 
 1012         } 
else if (if_name == 
"sqc_port") {
 
 1014         } 
else if (if_name == 
"sqc_tlb_port") {
 
 1016         } 
else if (if_name == 
"ldsPort") {
 
 1082 #endif // __COMPUTE_UNIT_HH__ 
  
Stats::Distribution controlFlowDivergenceDist
EventFunctionWrapper * createMemReqEvent(PacketPtr pkt)
std::vector< WaitClass > vectorALUs
WaitClass vectorSharedMemUnit
std::deque< std::pair< PacketPtr, Wavefront * > > retries
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Stats::Scalar globalWrites
ComputeUnit * computeUnit
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
Stats::Scalar vectorMemWrites
MemReqEvent(ScalarDataPort &_scalar_data_port, PacketPtr _pkt)
static const FlagsType AutoDelete
void recvReqRetry() override
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Enums::PrefetchType prefetchType
const char * description() const
Return a C string describing the event.
ScalarDataPort & scalarDataPort
void resetBarrier(int bar_id)
void deleteFromPipeMap(Wavefront *w)
Stats::Scalar numInstrExecuted
int simdUnitWidth() const
void initiateFetch(Wavefront *wavefront)
std::map< Addr, int > pagesTouched
virtual void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
WaitClass vrfToGlobalMemPipeBus
int loadBusLength() const
int mapWaveToScalarAluGlobalIdx(Wavefront *w) const
int maxBarrierCnt() const
Stats::Distribution ldsBankConflictDist
void incNumAtBarrier()
Mark that a WF has reached the barrier.
std::queue< PacketPtr > retries
here we queue all the requests that were not successfully sent.
ComputeUnit * computeUnit
Stats::Formula vectorMemWritesPerWF
int numYetToReachBarrier() const
Number of WFs that have not yet reached the barrier.
Stats::Scalar numVecOpsExecutedF16
void dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg)
virtual Tick recvAtomic(PacketPtr pkt)
Stats::Formula ldsNoFlatInstsPerWF
ScalarDataPort(const std::string &_name, ComputeUnit *_cu)
SenderState(Wavefront *_wavefront, Packet::SenderState *sender_state=nullptr, int _kernId=-1)
DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id)
Stats::Scalar globalReads
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
const PortID InvalidPortID
LocalMemPipeline localMemoryPipe
Stats::Scalar completedWGs
int mapWaveToGlobalMem(Wavefront *w) const
LDSPort(const std::string &_name, ComputeUnit *_cu)
Stats::Scalar dynamicLMemInstrCnt
Cycles srf_scm_bus_latency
DataPort(const std::string &_name, ComputeUnit *_cu, PortID id)
Stats::Vector instCyclesLdsPerSimd
void incNumAtBarrier(int bar_id)
Cycles vrf_gm_bus_latency
ScalarDTLBPort scalarDTLBPort
int mapWaveToScalarMem(Wavefront *w) const
int mapWaveToLocalMem(Wavefront *w) const
Stats::Scalar ldsBankAccesses
Stats::Formula tlbLatency
Stats::Scalar totalCycles
int numCyclesPerLoadTransfer
Stats::Formula readonlyMemInsts
virtual void recvFunctional(PacketPtr pkt)
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
std::unordered_map< GPUDynInstPtr, Tick > headTailMap
Stats::Scalar numVecOpsExecutedMAD16
int numYetToReachBarrier(int bar_id)
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
void doFlush(GPUDynInstPtr gpuDynInst)
trigger flush operation in the cu
Stats::Scalar kernargReads
TokenManager * memPortTokens
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
Stats::Scalar readonlyWrites
Stats::Scalar dynamicFlatMemInstrCnt
uint64_t Tick
Tick count type.
Stats::Scalar tlbRequests
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Stats::Formula kernargMemInsts
ComputeUnit(const Params *p)
std::shared_ptr< Request > RequestPtr
Stats::Scalar numVecOpsExecutedFMA32
SenderState(GPUDynInstPtr gpuDynInst, Packet::SenderState *sender_state=nullptr)
Stats::Distribution activeLanesPerLMemInstrDist
WaitClass vectorGlobalMemUnit
Stats::Formula scalarMemWritesPerWF
ITLBPort(const std::string &_name, ComputeUnit *_cu)
RegisterManager * registerManager
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Stats::Scalar readonlyReads
Cycles vrf_lm_bus_latency
virtual bool sendTimingReq(PacketPtr pkt)
attempt to send this packet, either the port is already stalled, the request is nack'd and must stall...
Stats::Scalar spillWrites
ComputeUnit * computeUnit
Stats::Vector hitsPerTLBLevel
int scalarPipeLength() const
Stats::Scalar numVecOpsExecutedF64
ScheduleStage scheduleStage
Stats::Formula flatLDSInstsPerWF
bool allAtBarrier(int bar_id)
Stats::Distribution pageDivergenceDist
A vector of scalar stats.
LdsState & getLds() const
void insertInPipeMap(Wavefront *w)
Stats::Formula sALUInstsPerWF
Stats::Formula argMemInsts
int _maxBarrierCnt
The maximum number of WFs that can reach this barrier.
A vector of distributions.
Stats::Scalar groupWrites
virtual void recvFunctional(PacketPtr pkt)
Stats::Formula vectorMemInstsPerKiloInst
ComputeUnit * computeUnit
void handleMemPacket(PacketPtr pkt, int memport_index)
int numVectorGlobalMemUnits
Stats::Scalar scalarMemWrites
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
Stats::Scalar numVecOpsExecutedMAD64
SenderState is information carried along with the packet, esp.
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Stats::Scalar scalarMemReads
std::vector< WaitClass > scalarALUs
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, Packet::SenderState *sender_state=nullptr)
bool recvTimingResp(PacketPtr pkt) override
Receive a timing response from the peer.
Stats::Formula scalarMemReadsPerKiloInst
virtual void recvFunctional(PacketPtr pkt)
ComputeUnit * computeUnit
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
bool sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result))
send a general request to the LDS make sure to look at the return value here as your request might be...
Stats::Scalar numTimesWgBlockedDueSgprAlloc
This is a simple scalar statistic, like a counter.
EventFunctionWrapper tickEvent
Stats::Scalar instCyclesVALU
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Stats::Vector instCyclesScMemPerSimd
Stats::Scalar completedWfs
GPUDynInstPtr _gpuDynInst
SenderState(GPUDynInstPtr gpuDynInst)
Stats::Scalar numVecOpsExecutedFMA64
bool stalled
whether or not it is stalled
void decMaxBarrierCnt(int bar_id)
std::vector< ScalarRegisterFile * > srf
Stats::Scalar numVecOpsExecutedF32
int numVectorSharedMemUnits
Communication interface between Schedule and Execute stages.
A virtual base opaque structure used to hold state associated with the packet (e.g....
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
int spBypassLength() const
Stats::Scalar kernargWrites
std::vector< DataPort > memPort
The memory port for SIMD data accesses.
int32_t getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
int numBarrierSlots() const
virtual Port & getPort(const std::string &if_name, PortID idx=InvalidPortID)
Get a port with a given name and index.
std::vector< WFBarrier > wfBarrierSlots
The barrier slots for this CU.
SenderState(GPUDynInstPtr gpuDynInst)
int maxBarrierCnt(int bar_id)
virtual Tick recvAtomic(PacketPtr pkt)
void sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelMemSync, RequestPtr req=nullptr)
SenderState is information carried along with the packet throughout the TLB hierarchy.
const int _numBarrierSlots
Packet::SenderState * saved
std::deque< std::pair< PacketPtr, GPUDynInstPtr > > retries
Stats::Scalar wgBlockedDueLdsAllocation
Ports are used to interface objects to each other.
std::vector< VectorRegisterFile * > vrf
std::vector< uint64_t > instExecPerSimd
void reset()
Reset the barrier.
void decMaxBarrierCnt()
Decrement the number of WFs that are participating in this barrier.
void setFlags(Flags _flags)
std::vector< std::vector< Wavefront * > > wfList
int storeBusLength() const
ScalarDataPort scalarDataPort
Stats::Scalar numFailedCASOps
Stats::VectorDistribution instInterleave
Stats::Scalar numVecOpsExecutedMAC32
Stats::Formula vALUUtilization
GPUDynInstPtr _gpuDynInst
ScoreboardCheckToSchedule scoreboardCheckToSchedule
TODO: Update these comments once the pipe stage interface has been fully refactored.
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
void releaseBarrier(int bar_id)
Stats::Formula numALUInstsExecuted
Stats::Scalar numTimesWgBlockedDueVgprAlloc
ScalarDTLBPort(const std::string &_name, ComputeUnit *_cu)
ComputeUnit * computeUnit
Stats::Distribution execRateDist
bool allAtBarrier() const
Have all WFs participating in this barrier reached the barrier? If so, then the barrier is satisfied ...
WaitClass vrfToLocalMemPipeBus
bool isVectorAluIdle(uint32_t simdId) const
void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, HSAQueueEntry *task, int bar_id, bool fetchContext=false)
const PortID id
A numeric identifier to distinguish ports in a vector, and set to InvalidPortID in case this port is ...
Packet::SenderState * saved
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Stats::Scalar numVecOpsExecuted
ComputeUnit * computeUnit
Stats::Formula groupMemInsts
std::vector< uint64_t > lastExecCycle
ScalarMemPipeline scalarMemoryPipe
GPUDynInstPtr _gpuDynInst
void sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt)
std::vector< std::vector< std::vector< Addr > > > lastVaddrWF
int coalescerToVrfBusWidth
WaitClass srfToScalarMemPipeBus
InstSeqNum getAndIncSeqNum()
Stats::Scalar numVecOpsExecutedMAD32
SenderState is information carried along with the packet throughout the TLB hierarchy.
virtual void recvReqRetry()
the bus is telling the port that there is now space so retrying stalled requests should work now this...
void processFetchReturn(PacketPtr pkt)
bool recvTimingResp(PacketPtr pkt) override
Receive a timing response from the peer.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Packet::SenderState * saved
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
const std::string name() const
Return port name (for DPRINTF).
A simple distribution stat.
std::vector< int > scalarRegsReserved
Stats::Formula vectorMemReadsPerKiloInst
bool processTimingPacket(PacketPtr pkt)
GlobalMemPipeline globalMemoryPipe
int numAtBarrier(int bar_id)
bool hasDispResources(HSAQueueEntry *task, int &num_wfs_in_wg)
int cacheLineSize() const
TokenManager * getTokenManager()
LDSPort ldsPort
The port to access the Local Data Store Can be connected to a LDS object.
std::unordered_set< int > freeBarrierIds
A set used to easily retrieve a free barrier ID.
Stats::Scalar flatVMemInsts
std::vector< Addr > lastVaddrCU
Stats::Distribution headTailLatency
Stats::Formula vectorMemReadsPerWF
Port & getPort(const std::string &if_name, PortID idx) override
Get a port with a given name and index.
Stats::Formula privMemInsts
Stats::Formula flatVMemInstsPerWF
GPUDynInstPtr _gpuDynInst
Stats::Scalar numVecOpsExecutedMAC64
virtual Tick recvAtomic(PacketPtr pkt)
void release()
Release this barrier resource so it can be used by other WGs.
std::vector< int > numWfsToSched
Number of WFs to schedule to each SIMD.
virtual void recvFunctional(PacketPtr pkt)
int oprNetPipeLength() const
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
virtual Tick recvAtomic(PacketPtr pkt)
WaitClass scalarMemToSrfBus
Stats::Formula globalMemInsts
Stats::Distribution waveLevelParallelism
pageDataStruct pageAccesses
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Communication interface between ScoreboardCheck and Schedule stages.
Stats::Formula scalarMemWritesPerKiloInst
Stats::Scalar wgBlockedDueBarrierAllocation
std::unordered_map< Addr, std::pair< int, int > > pageDataStruct
void updateInstStats(GPUDynInstPtr gpuDynInst)
void updatePageDivergenceDist(Addr addr)
Stats::Scalar numVecOpsExecutedTwoOpFP
Stats::Scalar vectorMemReads
GMTokenPort(const std::string &name, SimObject *owner, PortID id=InvalidPortID)
void fillKernelState(Wavefront *w, HSAQueueEntry *task)
Stats::Scalar numVecOpsExecutedMAC16
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void regStats() override
Callback to set stat parameters.
void setMaxBarrierCnt(int max_barrier_cnt)
Set the maximum barrier count (i.e., the number of WFs that are participating in the barrier).
Stats::Scalar flatLDSInsts
GPUDynInstPtr getMemInst() const
void recvReqRetry() override
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
bool recvTimingResp(PacketPtr)
Receive a timing response from the peer.
int vrfToCoalescerBusWidth
int numCyclesPerStoreTransfer
std::vector< std::vector< Addr > > lastVaddrSimd
void processMemRespEvent(PacketPtr pkt)
Stats::Scalar threadCyclesVALU
std::shared_ptr< GPUDynInst > GPUDynInstPtr
void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
SQCPort(const std::string &_name, ComputeUnit *_cu)
Cycles is a wrapper class for representing cycle counts, i.e.
std::vector< int > vectorRegsReserved
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Stats::Formula scalarMemReadsPerWF
ScoreboardCheckStage scoreboardCheckStage
int dpBypassLength() const
std::deque< PacketPtr > retries
int getCacheLineBits() const
void releaseWFsFromBarrier(int bar_id)
EventFunctionWrapper * createMemRespEvent(PacketPtr pkt)
void doSmReturn(GPUDynInstPtr gpuDynInst)
Stats::Scalar ldsNoFlatInsts
void processMemReqEvent(PacketPtr pkt)
std::vector< DTLBPort > tlbPort
virtual bool recvTimingResp(PacketPtr pkt)
get the result of packets sent to the LDS when they return
std::unordered_set< uint64_t > pipeMap
Stats::Vector instCyclesVMemPerSimd
virtual Tick recvAtomic(PacketPtr pkt)
virtual void recvFunctional(PacketPtr pkt)
Stats::Formula spillMemInsts
void doInvalidate(RequestPtr req, int kernId)
trigger invalidate operation in the cu
const char * __attribute__((weak)) m5MainCommands[]
SenderState(Wavefront *_wavefront)
RequestorID requestorId()
std::deque< PacketPtr > retries
Stats::Formula vALUInstsPerWF
int mapWaveToScalarAlu(Wavefront *w) const
static const int InvalidID
Stats::Formula vectorMemWritesPerKiloInst
Stats::Scalar instCyclesSALU
Stats::Distribution activeLanesPerGMemInstrDist
Stats::Scalar dynamicGMemInstrCnt
Stats::Scalar numVecOpsExecutedFMA16
GPUDynInstPtr _gpuDynInst
WFBarrier & barrierSlot(int bar_id)
the port intended to communicate between the CU and its LDS
ScheduleToExecute scheduleToExecute
int _numAtBarrier
The number of WFs in the WG that have reached the barrier.
Stats::Formula scalarMemInstsPerKiloInst
void fetch(PacketPtr pkt, Wavefront *wavefront)
Abstract superclass for simulation objects.
Generated on Thu Mar 18 2021 12:09:17 for gem5 by  doxygen 1.8.17