Go to the documentation of this file.
32 #ifndef __COMPUTE_UNIT_HH__
33 #define __COMPUTE_UNIT_HH__
37 #include <unordered_set>
45 #include "config/the_gpu_isa.hh"
46 #include "enums/PrefetchType.hh"
66 class ScalarRegisterFile;
68 class VectorRegisterFile;
70 struct ComputeUnitParams;
405 bool fetchContext=
false);
431 int bar_id = *free_bar_id;
448 template<
typename c0,
typename c1>
451 virtual void init()
override;
483 getRefCounter(
const uint32_t dispatchId,
const uint32_t wgId)
const;
531 saved(sender_state) { }
672 *sender_state=
nullptr,
int _kernId=-1)
913 if (if_name ==
"memory_port" && idx <
memPort.size()) {
915 }
else if (if_name ==
"translation_port" && idx <
tlbPort.size()) {
917 }
else if (if_name ==
"scalar_port") {
919 }
else if (if_name ==
"scalar_tlb_port") {
921 }
else if (if_name ==
"sqc_port") {
923 }
else if (if_name ==
"sqc_tlb_port") {
925 }
else if (if_name ==
"ldsPort") {
927 }
else if (if_name ==
"gmTokenPort") {
1147 #endif // __COMPUTE_UNIT_HH__
statistics::Scalar tlbRequests
This is a simple scalar statistic, like a counter.
bool recvTimingResp(PacketPtr)
Receive a timing response from the peer.
GMTokenPort(const std::string &name, SimObject *owner, PortID id=InvalidPortID)
statistics::Formula sALUInstsPerWF
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
statistics::Formula vALUUtilization
InstSeqNum getAndIncSeqNum()
virtual Port & getPort(const std::string &if_name, PortID idx=InvalidPortID)
Get a port with a given name and index.
Packet::SenderState * saved
std::vector< std::vector< Wavefront * > > wfList
ComputeUnit(const Params &p)
statistics::Formula scalarMemReadsPerWF
std::vector< WFBarrier > wfBarrierSlots
The barrier slots for this CU.
void doSmReturn(GPUDynInstPtr gpuDynInst)
statistics::Scalar instCyclesSALU
ScoreboardCheckToSchedule scoreboardCheckToSchedule
TODO: Update these comments once the pipe stage interface has been fully refactored.
statistics::VectorDistribution instInterleave
statistics::Scalar flatVMemInsts
const std::string name() const
Return port name (for DPRINTF).
statistics::Scalar numVecOpsExecutedTwoOpFP
std::deque< PacketPtr > retries
void sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
A simple distribution stat.
int coalescerToVrfBusWidth
std::vector< uint64_t > lastExecCycle
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
void processMemReqEvent(PacketPtr pkt)
LocalMemPipeline localMemoryPipe
statistics::Scalar privWrites
statistics::Scalar kernargWrites
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, Packet::SenderState *sender_state=nullptr)
std::vector< ScalarRegisterFile * > srf
int numYetToReachBarrier() const
Number of WFs that have not yet reached the barrier.
statistics::Formula scalarMemWritesPerWF
SenderState(GPUDynInstPtr gpuDynInst)
statistics::Formula argMemInsts
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
statistics::Scalar spillWrites
statistics::Formula spillMemInsts
statistics::Formula scalarMemWritesPerKiloInst
void recvReqRetry() override
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
bool handleResponse(PacketPtr pkt)
statistics::Scalar readonlyReads
void handleSQCReturn(PacketPtr pkt)
Communication interface between Schedule and Execute stages.
SenderState is information carried along with the packet, esp.
statistics::Scalar wgBlockedDueBarrierAllocation
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
statistics::Scalar completedWGs
statistics::Scalar vectorMemReads
virtual void recvFunctional(PacketPtr pkt)
std::vector< std::vector< Addr > > lastVaddrSimd
statistics::Scalar numVecOpsExecutedF64
void fetch(PacketPtr pkt, Wavefront *wavefront)
void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
statistics::Scalar dynamicGMemInstrCnt
int numCyclesPerStoreTransfer
int numCyclesPerLoadTransfer
const PortID id
A numeric identifier to distinguish ports in a vector, and set to InvalidPortID in case this port is ...
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
std::map< Addr, int > pagesTouched
virtual Tick recvAtomic(PacketPtr pkt)
bool stalled
whether or not it is stalled
ScoreboardCheckStage scoreboardCheckStage
gem5::ComputeUnit::ComputeUnitStats stats
ComputeUnit * computeUnit
std::unordered_map< GPUDynInstPtr, Tick > headTailMap
statistics::Formula vpc_f16
statistics::Formula tlbLatency
void processMemRespEvent(PacketPtr pkt)
std::vector< Addr > lastVaddrCU
void release()
Release this barrier resource so it can be used by other WGs.
bool recvTimingResp(PacketPtr pkt) override
Receive a timing response from the peer.
statistics::Scalar sALUInsts
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
WaitClass srfToScalarMemPipeBus
void releaseBarrier(int bar_id)
statistics::Vector instCyclesScMemPerSimd
int _maxBarrierCnt
The maximum number of WFs that can reach this barrier.
int spBypassLength() const
int numYetToReachBarrier(int bar_id)
A vector of scalar stats.
statistics::Distribution ldsBankConflictDist
int32_t getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
bool handleResponse(PacketPtr pkt)
virtual Tick recvAtomic(PacketPtr pkt)
int scalarPipeLength() const
statistics::Scalar kernargReads
statistics::Scalar numVecOpsExecutedFMA32
WaitClass vrfToGlobalMemPipeBus
statistics::Scalar ldsNoFlatInsts
void resetBarrier(int bar_id)
ComputeUnit * computeUnit
statistics::Scalar globalReads
int storeBusLength() const
std::deque< std::pair< PacketPtr, Wavefront * > > retries
statistics::Formula groupMemInsts
statistics::Distribution activeLanesPerLMemInstrDist
TokenManager * memPortTokens
void decMaxBarrierCnt()
Decrement the number of WFs that are participating in this barrier.
LdsState & getLds() const
const PortID InvalidPortID
int numVectorSharedMemUnits
statistics::Formula vectorMemWritesPerWF
statistics::Scalar numVecOpsExecutedMAD64
statistics::Distribution headTailLatency
ScalarDataPort scalarDataPort
statistics::Scalar threadCyclesVALU
statistics::Scalar numVecOpsExecutedMAC32
statistics::Scalar numVecOpsExecutedMAD32
statistics::Formula vectorMemReadsPerWF
std::vector< VectorRegisterFile * > vrf
statistics::Vector instCyclesVMemPerSimd
statistics::Scalar dynamicFlatMemInstrCnt
SystemHubEvent(PacketPtr pkt, DataPort *_dataPort)
void setFlags(Flags _flags)
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
statistics::Scalar groupReads
statistics::Formula vpc_f64
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelMemSync, RequestPtr req=nullptr)
ComputeUnit * computeUnit
Cycles is a wrapper class for representing cycle counts, i.e.
static const FlagsType AutoDelete
virtual bool sendTimingReq(PacketPtr pkt)
attempt to send this packet, either the port is already stalled, the request is nack'd and must stall...
statistics::Formula kernargMemInsts
statistics::Formula flatVMemInstsPerWF
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
int loadBusLength() const
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
bool processTimingPacket(PacketPtr pkt)
SenderState is information carried along with the packet throughout the TLB hierarchy.
pageDataStruct pageAccesses
std::deque< PacketPtr > retries
GPUDynInstPtr _gpuDynInst
statistics::Scalar flatLDSInsts
DataPort(const std::string &_name, ComputeUnit *_cu, PortID id)
void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, HSAQueueEntry *task, int bar_id, bool fetchContext=false)
statistics::Scalar privReads
int numAtBarrier(int bar_id)
void incNumAtBarrier(int bar_id)
ScalarDataPort & scalarDataPort
statistics::Scalar completedWfs
virtual Tick recvAtomic(PacketPtr pkt)
statistics::Scalar numVecOpsExecuted
int oprNetPipeLength() const
static const int InvalidID
ComputeUnit * computeUnit
virtual void recvFunctional(PacketPtr pkt)
void decMaxBarrierCnt(int bar_id)
WaitClass vectorSharedMemUnit
void releaseWFsFromBarrier(int bar_id)
statistics::Distribution activeLanesPerGMemInstrDist
ScalarDTLBPort(const std::string &_name, ComputeUnit *_cu)
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void initiateFetch(Wavefront *wavefront)
const char * description() const
Return a C string describing the event.
statistics::Scalar vALUInsts
statistics::Scalar instCyclesVALU
statistics::Scalar numVecOpsExecutedMAC64
void handleMemPacket(PacketPtr pkt, int memport_index)
GPUDynInstPtr _gpuDynInst
uint64_t Tick
Tick count type.
std::shared_ptr< Request > RequestPtr
EventFunctionWrapper tickEvent
ScheduleToExecute scheduleToExecute
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from shader.
GlobalMemPipeline globalMemoryPipe
ComputeUnit * computeUnit
RegisterManager * registerManager
statistics::Scalar numInstrExecuted
bool recvTimingResp(PacketPtr pkt) override
Receive a timing response from the peer.
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
statistics::Scalar numCASOps
EventFunctionWrapper * createMemReqEvent(PacketPtr pkt)
statistics::Formula vectorMemInstsPerKiloInst
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
statistics::Scalar ldsBankAccesses
Cycles vrf_lm_bus_latency
GPUDynInstPtr _gpuDynInst
ScalarDataPort * dataPort
statistics::Scalar numTimesWgBlockedDueVgprAlloc
void processFetchReturn(PacketPtr pkt)
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
virtual Tick recvAtomic(PacketPtr pkt)
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
int cacheLineSize() const
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
int mapWaveToScalarMem(Wavefront *w) const
int mapWaveToGlobalMem(Wavefront *w) const
void deleteFromPipeMap(Wavefront *w)
virtual Tick recvAtomic(PacketPtr pkt)
the port intended to communicate between the CU and its LDS
void doFlush(GPUDynInstPtr gpuDynInst)
trigger flush operation in the cu
Abstract superclass for simulation objects.
virtual void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
std::vector< WaitClass > scalarALUs
GPUDynInstPtr _gpuDynInst
int dpBypassLength() const
std::vector< DataPort > memPort
The memory port for SIMD data accesses.
statistics::Formula scalarMemReadsPerKiloInst
A virtual base opaque structure used to hold state associated with the packet (e.g....
statistics::Formula vectorMemReadsPerKiloInst
std::vector< uint64_t > instExecPerSimd
SenderState(Wavefront *_wavefront)
std::vector< std::vector< std::vector< Addr > > > lastVaddrWF
GPUDynInstPtr _gpuDynInst
statistics::Scalar groupWrites
LDSPort(const std::string &_name, ComputeUnit *_cu)
int numBarrierSlots() const
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
statistics::Scalar globalWrites
statistics::Formula vALUInstsPerWF
TokenManager * getTokenManager()
MemReqEvent(ScalarDataPort &_scalar_data_port, PacketPtr _pkt)
statistics::Scalar numTimesWgBlockedDueSgprAlloc
statistics::Scalar numVecOpsExecutedF16
WFBarrier & barrierSlot(int bar_id)
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
statistics::Formula privMemInsts
int mapWaveToScalarAlu(Wavefront *w) const
std::shared_ptr< GPUDynInst > GPUDynInstPtr
bool hasDispResources(HSAQueueEntry *task, int &num_wfs_in_wg)
ComputeUnit * computeUnit
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
std::unordered_set< uint64_t > pipeMap
virtual void recvReqRetry()
the bus is telling the port that there is now space so retrying stalled requests should work now this...
virtual void recvFunctional(PacketPtr pkt)
Packet::SenderState * saved
SystemHubEvent(PacketPtr pkt, ScalarDataPort *_dataPort)
std::deque< std::pair< PacketPtr, GPUDynInstPtr > > retries
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
SenderState(Wavefront *_wavefront, Packet::SenderState *sender_state=nullptr, int _kernId=-1)
void updateInstStats(GPUDynInstPtr gpuDynInst)
statistics::Formula numALUInstsExecuted
statistics::Vector instCyclesLdsPerSimd
statistics::Scalar argReads
int getCacheLineBits() const
std::unordered_map< Addr, std::pair< int, int > > pageDataStruct
statistics::Formula globalMemInsts
statistics::Scalar wgBlockedDueLdsAllocation
virtual bool recvTimingResp(PacketPtr pkt)
get the result of packets sent to the LDS when they return
int numVectorGlobalMemUnits
statistics::Scalar numVecOpsExecutedMAD16
void updatePageDivergenceDist(Addr addr)
Communication interface between ScoreboardCheck and Schedule stages.
std::vector< int > vectorRegsReserved
statistics::Scalar readonlyWrites
statistics::Scalar numVecOpsExecutedMAC16
statistics::Distribution waveLevelParallelism
statistics::Scalar scalarMemWrites
Cycles vrf_gm_bus_latency
int vrfToCoalescerBusWidth
statistics::Distribution controlFlowDivergenceDist
Ports are used to interface objects to each other.
statistics::Scalar vectorMemWrites
void insertInPipeMap(Wavefront *w)
int _numAtBarrier
The number of WFs in the WG that have reached the barrier.
int mapWaveToLocalMem(Wavefront *w) const
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
LDSPort ldsPort
The port to access the Local Data Store Can be connected to a LDS object.
ComputeUnit * computeUnit
statistics::Scalar numVecOpsExecutedFMA64
statistics::Formula flatLDSInstsPerWF
Packet::SenderState * saved
void makeResponse()
Take a request packet and modify it in place to be suitable for returning as a response to that reque...
GPUDynInstPtr getMemInst() const
statistics::Vector hitsPerTLBLevel
int maxBarrierCnt(int bar_id)
std::vector< int > scalarRegsReserved
void fillKernelState(Wavefront *w, HSAQueueEntry *task)
virtual void recvFunctional(PacketPtr pkt)
SenderState is information carried along with the packet throughout the TLB hierarchy.
SQCPort(const std::string &_name, ComputeUnit *_cu)
WaitClass vrfToLocalMemPipeBus
statistics::Distribution execRateDist
std::vector< DTLBPort > tlbPort
statistics::Scalar numVecOpsExecutedF32
bool isVectorAluIdle(uint32_t simdId) const
std::vector< WaitClass > vectorALUs
bool allAtBarrier() const
Have all WFs participating in this barrier reached the barrier? If so, then the barrier is satisfied ...
void sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt)
std::unordered_set< int > freeBarrierIds
A set used to easily retrieve a free barrier ID.
ComputeUnitStats(statistics::Group *parent, int n_wf)
Tick scalar_req_tick_latency
std::queue< PacketPtr > retries
here we queue all the requests that were not successfully sent.
WaitClass scalarMemToSrfBus
ScalarDTLBPort scalarDTLBPort
statistics::Distribution pageDivergenceDist
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
void setMaxBarrierCnt(int max_barrier_cnt)
Set the maximum barrier count (i.e., the number of WFs that are participating in the barrier).
A vector of distributions.
ScalarDataPort(const std::string &_name, ComputeUnit *_cu)
statistics::Scalar argWrites
void reset()
Reset the barrier.
statistics::Formula vpc_f32
void incNumAtBarrier()
Mark that a WF has reached the barrier.
int maxBarrierCnt() const
bool sendToLds(GPUDynInstPtr gpuDynInst)
send a general request to the LDS make sure to look at the return value here as your request might be...
statistics::Scalar dynamicLMemInstrCnt
statistics::Scalar numFailedCASOps
ScalarMemPipeline scalarMemoryPipe
virtual void recvFunctional(PacketPtr pkt)
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
WaitClass vectorGlobalMemUnit
int simdUnitWidth() const
statistics::Formula readonlyMemInsts
void recvReqRetry() override
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
statistics::Scalar scalarMemReads
statistics::Scalar totalCycles
void dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg)
enums::PrefetchType prefetchType
statistics::Scalar tlbCycles
int mapWaveToScalarAluGlobalIdx(Wavefront *w) const
Port & getPort(const std::string &if_name, PortID idx) override
Get a port with a given name and index.
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
void doInvalidate(RequestPtr req, int kernId)
trigger invalidate operation in the cu
statistics::Scalar spillReads
ITLBPort(const std::string &_name, ComputeUnit *_cu)
bool allAtBarrier(int bar_id)
std::vector< int > numWfsToSched
Number of WFs to schedule to each SIMD.
statistics::Formula ldsNoFlatInstsPerWF
SenderState(GPUDynInstPtr gpuDynInst)
Cycles srf_scm_bus_latency
statistics::Scalar numVecOpsExecutedFMA16
DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id)
ScheduleStage scheduleStage
const int _numBarrierSlots
SenderState(GPUDynInstPtr gpuDynInst, Packet::SenderState *sender_state=nullptr)
EventFunctionWrapper * createMemRespEvent(PacketPtr pkt)
RequestorID requestorId()
statistics::Formula scalarMemInstsPerKiloInst
statistics::Formula vectorMemWritesPerKiloInst
Tick scalar_resp_tick_latency
Generated on Sun Jul 30 2023 01:56:56 for gem5 by doxygen 1.8.17