37 #ifndef __COMPUTE_UNIT_HH__ 38 #define __COMPUTE_UNIT_HH__ 42 #include <unordered_map> 48 #include "enums/PrefetchType.hh" 66 struct ComputeUnitParams;
224 uint32_t operandSize,
227 regIdxVec.push_back(std::make_pair(simdId, regIdx));
228 timestampVec.push_back(when);
229 statusVec.push_back(newStatus);
230 if (operandSize > 4) {
231 regIdxVec.push_back(std::make_pair(simdId,
233 numVecRegsPerSimd)));
234 timestampVec.push_back(when);
235 statusVec.push_back(newStatus);
279 int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots);
280 bool cedeSIMD(
int simdId,
int wfSlotId);
283 virtual void init()
override;
287 bool kernelLaunch=
true,
392 getRefCounter(
const uint32_t dispatchId,
const uint32_t wgId)
const;
439 : _gpuDynInst(gpuDynInst),
440 port_index(_port_index),
441 saved(sender_state) { }
456 virtual bool recvTimingResp(
PacketPtr pkt);
460 virtual void recvReqRetry();
487 *sender_state=
nullptr)
488 : wavefront(_wavefront), saved(sender_state) { }
497 virtual bool recvTimingResp(
PacketPtr pkt);
501 virtual void recvReqRetry();
517 index(_index), stalled(false)
544 : _gpuDynInst(gpuDynInst), portIndex(port_index) { }
553 virtual bool recvTimingResp(
PacketPtr pkt);
557 virtual void recvReqRetry();
592 virtual bool recvTimingResp(
PacketPtr pkt);
596 virtual void recvReqRetry();
632 _gpuDynInst(gpuDynInst)
648 bool stalled =
false;
697 if (if_name ==
"memory_port") {
700 return *memPort[idx];
701 }
else if (if_name ==
"translation_port") {
704 return *tlbPort[idx];
705 }
else if (if_name ==
"sqc_port") {
709 }
else if (if_name ==
"sqc_tlb_port") {
712 }
else if (if_name ==
"ldsPort") {
714 fatal(
"an LDS port was already allocated");
719 panic(
"incorrect port name");
729 : simdId(_simdId), wfSlotId(_wfSlotId) { }
751 #endif // __COMPUTE_UNIT_HH__ uint32_t numVecRegsPerSimd
A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
#define panic(...)
This implements a cprintf based panic() function.
void updatePageDivergenceDist(Addr addr)
Stats::Formula tlbLatency
GPUDynInstPtr _gpuDynInst
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
SenderState(Wavefront *_wavefront, Packet::SenderState *sender_state=nullptr)
Ports are used to interface objects to each other.
Stats::Scalar flatLDSInsts
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
std::vector< bool > vectorAluInstAvail
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch=true, RequestPtr req=nullptr)
void handleMemPacket(PacketPtr pkt, int memport_index)
#define fatal(...)
This implements a cprintf based fatal() function.
DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
uint32_t numCyclesPerLoadTransfer
Packet::SenderState * saved
static const int MAX_REGS_FOR_NON_VEC_MEM_INST
std::map< unsigned, waveQueue > xactCasLoadMap
static const int MAX_WIDTH_FOR_MEM_INST
std::vector< std::vector< std::pair< Wavefront *, WAVE_STATUS > > > waveStatusList
void fillKernelState(Wavefront *w, NDRange *ndr)
Stats::Vector hitsPerTLBLevel
Stats::Scalar dynamicGMemInstrCnt
ScheduleStage scheduleStage
Stats::Formula flatLDSInstsPerWF
const char * __attribute__((weak)) m5MainCommands[]
Stats::Distribution controlFlowDivergenceDist
std::vector< std::vector< Wavefront * > > readyList
std::shared_ptr< Request > RequestPtr
GPUDynInstPtr _gpuDynInst
Stats::Scalar vectorMemWrites
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
SenderState(Wavefront *_wavefront)
virtual void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
CUExitCallback * cuExitCallback
A vector of scalar stats.
std::vector< DTLBPort * > tlbPort
std::vector< std::vector< Wavefront * > > wfList
this represents a slice of the overall LDS, intended to be associated with an individual workgroup ...
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Scalar dynamicLMemInstrCnt
SenderState is information carried along with the packet throughout the TLB hierarchy.
Stats::Formula numALUInstsExecuted
Declaration of Statistics objects.
GPUStaticInst * kernelLaunchInst
Stats::Scalar numInstrExecuted
void initiateFetch(Wavefront *wavefront)
This is a simple scalar statistic, like a counter.
SenderState(GPUDynInstPtr gpuDynInst)
virtual void recvFunctional(PacketPtr pkt)
Stats::Distribution ldsBankConflictDist
SenderState is information carried along with the packet throughout the TLB hierarchy.
std::vector< WaitClass > vrfToLocalMemPipeBus
Stats::Formula vectorMemWritesPerWF
std::deque< std::pair< PacketPtr, GPUDynInstPtr > > retries
Stats::Scalar wgBlockedDueLdsAllocation
Packet::SenderState * saved
virtual void recvFunctional(PacketPtr pkt)
std::vector< std::vector< std::vector< Addr > > > lastVaddrWF
virtual Tick recvAtomic(PacketPtr pkt)
std::vector< WaitClass > aluPipe
uint32_t numCyclesPerStoreTransfer
void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, NDRange *ndr)
ComputeUnit(const Params *p)
std::deque< std::pair< PacketPtr, Wavefront * > > retries
GlobalMemPipeline globalMemoryPipe
uint32_t coalescerToVrfBusWidth
Stats::Formula vALUUtilization
std::shared_ptr< GPUDynInst > GPUDynInstPtr
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Distribution activeLanesPerLMemInstrDist
ITLBPort(const std::string &_name, ComputeUnit *_cu)
Stats::Formula scalarMemWritesPerWF
Stats::Scalar numTimesWgBlockedDueVgprAlloc
CUExitCallback(ComputeUnit *_cu)
Stats::Distribution execRateDist
Stats::Formula vectorMemReadsPerWF
void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
Port & getPort(const std::string &if_name, PortID idx) override
Get a port with a given name and index.
bool isShrMem(int unitId)
std::vector< std::pair< uint32_t, uint32_t > > regIdxVec
std::string csprintf(const char *format, const Args &...args)
virtual Tick recvAtomic(PacketPtr pkt)
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
the port intended to communicate between the CU and its LDS
std::list< waveIdentifier > waveIDQueue
Stats::Distribution pageDivergenceDist
LdsState & getLds() const
uint64_t Tick
Tick count type.
Stats::Scalar tlbRequests
ComputeUnit * computeUnit
virtual void recvFunctional(PacketPtr pkt)
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
A simple distribution stat.
ComputeUnit * computeUnit
std::vector< WaitClass > vrfToGlobalMemPipeBus
void updateInstStats(GPUDynInstPtr gpuDynInst)
ClockedObject declaration and implementation.
Stats::Scalar flatVMemInsts
GPUDynInstPtr getMemInst() const
ComputeUnit * computeUnit
std::vector< DataPort * > memPort
The memory port for SIMD data accesses.
std::vector< std::vector< Addr > > lastVaddrSimd
uint32_t vrfToCoalescerBusWidth
int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots)
void StartWorkgroup(NDRange *ndr)
Stats::Formula sALUInstsPerWF
virtual Tick recvAtomic(PacketPtr pkt)
bool isGlbMem(int unitId)
Stats::Scalar scalarMemWrites
std::unordered_map< Addr, std::pair< int, int > > pageDataStruct
Stats::Scalar scalarMemReads
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,16,32,64}_t.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
virtual const std::string name() const
Stats::Scalar ldsNoFlatInsts
std::vector< std::pair< Wavefront *, DISPATCH_STATUS > > dispatchList
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
ComputeUnit * computeUnit
bool sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result))
send a general request to the LDS make sure to look at the return value here as your request might be...
bool cedeSIMD(int simdId, int wfSlotId)
Stats::Scalar instCyclesVALU
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a SimObject that sees the packet.
Stats::Scalar completedWfs
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, Packet::SenderState *sender_state=nullptr)
Stats::Formula scalarMemReadsPerWF
Stats::Formula vALUInstsPerWF
int32_t getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
Stats::Distribution activeLanesPerGMemInstrDist
void doSmReturn(GPUDynInstPtr gpuDynInst)
ComputeUnit * computeUnit
SenderState is information carried along with the packet, esp.
std::queue< PacketPtr > retries
here we queue all the requests that were not successfully sent.
Stats::Scalar numVecOpsExecuted
std::vector< VectorRegisterFile * > vrf
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
virtual void recvFunctional(PacketPtr pkt)
void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs)
Stats::Scalar numFailedCASOps
int ReadyWorkgroup(NDRange *ndr)
LDSPort * getLdsPort() const
std::map< Addr, int > pagesTouched
Stats::Scalar instCyclesSALU
virtual void process()
virtual process function that is invoked when the callback queue is executed.
void fetch(PacketPtr pkt, Wavefront *wavefront)
Stats::Formula flatVMemInstsPerWF
std::vector< uint8_t > statusVec
virtual ~CUExitCallback()
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
std::vector< uint64_t > lastExecCycle
void registerEvent(uint32_t simdId, uint32_t regIdx, uint32_t operandSize, uint64_t when, uint8_t newStatus)
std::vector< WaitClass > wfWait
LocalMemPipeline localMemoryPipe
void processFetchReturn(PacketPtr pkt)
pageDataStruct pageAccesses
bool processTimingPacket(PacketPtr pkt)
Enums::PrefetchType prefetchType
Stats::Scalar ldsBankAccesses
Stats::Scalar totalCycles
LDSPort * ldsPort
The port to access the Local Data Store Can be connected to a LDS object.
waveIdentifier(int _simdId, int _wfSlotId)
std::vector< uint64_t > timestampVec
Stats::Scalar vectorMemReads
int cacheLineSize() const
std::vector< Addr > lastVaddrCU
void regStats() override
Callback to set stat parameters.
Stats::Formula ldsNoFlatInstsPerWF
virtual Tick recvAtomic(PacketPtr pkt)
virtual Tick recvAtomic(PacketPtr pkt)
ComputeUnit * computeUnit
bool isSimdDone(uint32_t) const
uint64_t getAndIncSeqNum()
Stats::Scalar threadCyclesVALU
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
std::vector< int > vectorRegsReserved
bool isVecAlu(int unitId)
LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
ScoreboardCheckStage scoreboardCheckStage
virtual void recvFunctional(PacketPtr pkt)
DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)