release/v22-1-0-0/compute__unit_8hh_source.html

 /*

  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.

  * All rights reserved.

  *

  * Redistribution and use in source and binary forms, with or without

  * modification, are permitted provided that the following conditions are met:

  *

  * 1. Redistributions of source code must retain the above copyright notice,

  * this list of conditions and the following disclaimer.

  *

  * 2. Redistributions in binary form must reproduce the above copyright notice,

  * this list of conditions and the following disclaimer in the documentation

  * and/or other materials provided with the distribution.

  *

  * 3. Neither the name of the copyright holder nor the names of its

  * contributors may be used to endorse or promote products derived from this

  * software without specific prior written permission.

  *

  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

  * POSSIBILITY OF SUCH DAMAGE.

  */


 #ifndef __COMPUTE_UNIT_HH__

 #define __COMPUTE_UNIT_HH__


 #include <deque>

 #include <map>

 #include <unordered_set>

 #include <vector>


 #include "base/callback.hh"

 #include "base/compiler.hh"

 #include "base/statistics.hh"

 #include "base/stats/group.hh"

 #include "base/types.hh"

 #include "config/the_gpu_isa.hh"

 #include "enums/PrefetchType.hh"

 #include "gpu-compute/comm.hh"

 #include "gpu-compute/exec_stage.hh"

 #include "gpu-compute/fetch_stage.hh"

 #include "gpu-compute/global_memory_pipeline.hh"

 #include "gpu-compute/hsa_queue_entry.hh"

 #include "gpu-compute/local_memory_pipeline.hh"

 #include "gpu-compute/register_manager.hh"

 #include "gpu-compute/scalar_memory_pipeline.hh"

 #include "gpu-compute/schedule_stage.hh"

 #include "gpu-compute/scoreboard_check_stage.hh"

 #include "mem/port.hh"

 #include "mem/token_port.hh"

 #include "sim/clocked_object.hh"


 namespace gem5

 {


 class HSAQueueEntry;

 class LdsChunk;

 class ScalarRegisterFile;

 class Shader;

 class VectorRegisterFile;


 struct ComputeUnitParams;


 enum EXEC_POLICY

 {

     OLDEST = 0,

     RR

 };


 enum TLB_CACHE

 {

     TLB_MISS_CACHE_MISS = 0,

     TLB_MISS_CACHE_HIT,

     TLB_HIT_CACHE_MISS,

     TLB_HIT_CACHE_HIT

 };


 class WFBarrier

 {

   public:

     WFBarrier() : _numAtBarrier(0), _maxBarrierCnt(0)

     {

     }


     static const int InvalidID = -1;


     int

     numAtBarrier() const

     {

         return _numAtBarrier;

     }


     int

     numYetToReachBarrier() const

     {

         return _maxBarrierCnt - _numAtBarrier;

     }


     int

     maxBarrierCnt() const

     {

         return _maxBarrierCnt;

     }


     void

     setMaxBarrierCnt(int max_barrier_cnt)

     {

         _maxBarrierCnt = max_barrier_cnt;

     }


     void

     incNumAtBarrier()

     {

         assert(_numAtBarrier < _maxBarrierCnt);

         ++_numAtBarrier;

     }


     bool

     allAtBarrier() const

     {

         return _numAtBarrier == _maxBarrierCnt;

     }


     void

     decMaxBarrierCnt()

     {

         assert(_maxBarrierCnt > 0);

         --_maxBarrierCnt;

     }


     void

     release()

     {

         _numAtBarrier = 0;

         _maxBarrierCnt = 0;

     }


     void

     reset()

     {

         _numAtBarrier = 0;

     }


   private:

     int _numAtBarrier;


     int _maxBarrierCnt;

 };


 class ComputeUnit : public ClockedObject

 {

   public:


     // Execution resources

     //

     // The ordering of units is:

     // Vector ALUs

     // Scalar ALUs

     // GM Pipe

     // LM Pipe

     // Scalar Mem Pipe

     //

     // Note: the ordering of units is important and the code assumes the

     // above ordering. However, there may be more than one resource of

     // each type (e.g., 4 VALUs or 2 SALUs)


     int numVectorGlobalMemUnits;

     // Resource control for global memory to VRF data/address bus

     WaitClass glbMemToVrfBus;

     // Resource control for Vector Register File->Global Memory pipe buses

     WaitClass vrfToGlobalMemPipeBus;

     // Resource control for Vector Global Memory execution unit

     WaitClass vectorGlobalMemUnit;


     int numVectorSharedMemUnits;

     // Resource control for local memory to VRF data/address bus

     WaitClass locMemToVrfBus;

     // Resource control for Vector Register File->Local Memory pipe buses

     WaitClass vrfToLocalMemPipeBus;

     // Resource control for Vector Shared/Local Memory execution unit

     WaitClass vectorSharedMemUnit;


     int numScalarMemUnits;

     // Resource control for scalar memory to SRF data/address bus

     WaitClass scalarMemToSrfBus;

     // Resource control for Scalar Register File->Scalar Memory pipe buses

     WaitClass srfToScalarMemPipeBus;

     // Resource control for Scalar Memory execution unit

     WaitClass scalarMemUnit;


     // vector ALU execution resources

     int numVectorALUs;

     std::vector<WaitClass> vectorALUs;


     // scalar ALU execution resources

     int numScalarALUs;

     std::vector<WaitClass> scalarALUs;


     // Return total number of execution units on this CU

     int numExeUnits() const;

     // index into readyList of the first memory unit

     int firstMemUnit() const;

     // index into readyList of the last memory unit

     int lastMemUnit() const;

     // index into scalarALUs vector of SALU used by the wavefront

     int mapWaveToScalarAlu(Wavefront *w) const;

     // index into readyList of SALU used by wavefront

     int mapWaveToScalarAluGlobalIdx(Wavefront *w) const;

     // index into readyList of Global Memory unit used by wavefront

     int mapWaveToGlobalMem(Wavefront *w) const;

     // index into readyList of Local Memory unit used by wavefront

     int mapWaveToLocalMem(Wavefront *w) const;

     // index into readyList of Scalar Memory unit used by wavefront

     int mapWaveToScalarMem(Wavefront *w) const;


     int vrfToCoalescerBusWidth; // VRF->Coalescer data bus width in bytes

     int coalescerToVrfBusWidth; // Coalescer->VRF data bus width in bytes

     int numCyclesPerStoreTransfer;  // number of cycles per vector store

     int numCyclesPerLoadTransfer;  // number of cycles per vector load


     // track presence of dynamic instructions in the Schedule pipeline

     // stage. This is used to check the readiness of the oldest,

     // non-dispatched instruction of every WF in the Scoreboard stage.

     std::unordered_set<uint64_t> pipeMap;


     RegisterManager* registerManager;


     FetchStage fetchStage;

     ScoreboardCheckStage scoreboardCheckStage;

     ScheduleStage scheduleStage;

     ExecStage execStage;

     GlobalMemPipeline globalMemoryPipe;

     LocalMemPipeline localMemoryPipe;

     ScalarMemPipeline scalarMemoryPipe;


     EventFunctionWrapper tickEvent;


     typedef ComputeUnitParams Params;

     std::vector<std::vector<Wavefront*>> wfList;

     int cu_id;


     // array of vector register files, one per SIMD

     std::vector<VectorRegisterFile*> vrf;

     // array of scalar register files, one per SIMD

     std::vector<ScalarRegisterFile*> srf;


     // Width per VALU/SIMD unit: number of work items that can be executed

     // on the vector ALU simultaneously in a SIMD unit

     int simdWidth;

     // number of pipe stages for bypassing data to next dependent single

     // precision vector instruction inside the vector ALU pipeline

     int spBypassPipeLength;

     // number of pipe stages for bypassing data to next dependent double

     // precision vector instruction inside the vector ALU pipeline

     int dpBypassPipeLength;

     // number of pipe stages for scalar ALU

     int scalarPipeStages;

     // number of pipe stages for operand collection & distribution network

     int operandNetworkLength;

     // number of cycles per instruction issue period

     Cycles issuePeriod;


     // VRF to GM Bus latency

     Cycles vrf_gm_bus_latency;

     // SRF to Scalar Mem Bus latency

     Cycles srf_scm_bus_latency;

     // VRF to LM Bus latency

     Cycles vrf_lm_bus_latency;


     // tracks the last cycle a vector instruction was executed on a SIMD

     std::vector<uint64_t> lastExecCycle;


     // tracks the number of dyn inst executed per SIMD

     std::vector<uint64_t> instExecPerSimd;


     // true if we allow a separate TLB per lane

     bool perLaneTLB;

     // if 0, TLB prefetching is off.

     int prefetchDepth;

     // if fixed-stride prefetching, this is the stride.

     int prefetchStride;


     std::vector<Addr> lastVaddrCU;

     std::vector<std::vector<Addr>> lastVaddrSimd;

     std::vector<std::vector<std::vector<Addr>>> lastVaddrWF;

     enums::PrefetchType prefetchType;

     EXEC_POLICY exec_policy;


     bool debugSegFault;

     // Idle CU timeout in ticks

     Tick idleCUTimeout;

     int idleWfs;

     bool functionalTLB;

     bool localMemBarrier;


     /*

      * for Counting page accesses

      */

     bool countPages;


     Shader *shader;


     Tick req_tick_latency;

     Tick resp_tick_latency;


     std::vector<int> numWfsToSched;


     // number of currently reserved vector registers per SIMD unit

     std::vector<int> vectorRegsReserved;

     // number of currently reserved scalar registers per SIMD unit

     std::vector<int> scalarRegsReserved;

     // number of vector registers per SIMD unit

     int numVecRegsPerSimd;

     // number of available scalar registers per SIMD unit

     int numScalarRegsPerSimd;


     // this hash map will keep track of page divergence

     // per memory instruction per wavefront. The hash map

     // is cleared in GPUDynInst::updateStats() in gpu_dyn_inst.cc.

     std::map<Addr, int> pagesTouched;


     void insertInPipeMap(Wavefront *w);

     void deleteFromPipeMap(Wavefront *w);


     ComputeUnit(const Params &p);

     ~ComputeUnit();


     // Timing Functions

     int oprNetPipeLength() const { return operandNetworkLength; }

     int simdUnitWidth() const { return simdWidth; }

     int spBypassLength() const { return spBypassPipeLength; }

     int dpBypassLength() const { return dpBypassPipeLength; }

     int scalarPipeLength() const { return scalarPipeStages; }

     int storeBusLength() const { return numCyclesPerStoreTransfer; }

     int loadBusLength() const { return numCyclesPerLoadTransfer; }

     int wfSize() const { return wavefrontSize; }


     void exec();

     void initiateFetch(Wavefront *wavefront);

     void fetch(PacketPtr pkt, Wavefront *wavefront);

     void fillKernelState(Wavefront *w, HSAQueueEntry *task);


     void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk,

                         HSAQueueEntry *task, int bar_id,

                         bool fetchContext=false);


     void doInvalidate(RequestPtr req, int kernId);

     void doFlush(GPUDynInstPtr gpuDynInst);


     void dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg);

     bool hasDispResources(HSAQueueEntry *task, int &num_wfs_in_wg);


     int cacheLineSize() const { return _cacheLineSize; }

     int getCacheLineBits() const { return cacheLineBits; }


     void resetRegisterPool();


   private:

     WFBarrier&

     barrierSlot(int bar_id)

     {

         assert(bar_id > WFBarrier::InvalidID);

         return wfBarrierSlots.at(bar_id);

     }


     int

     getFreeBarrierId()

     {

         assert(freeBarrierIds.size());

         auto free_bar_id = freeBarrierIds.begin();

         int bar_id = *free_bar_id;

         freeBarrierIds.erase(free_bar_id);

         return bar_id;

     }


   public:

     int numYetToReachBarrier(int bar_id);

     bool allAtBarrier(int bar_id);

     void incNumAtBarrier(int bar_id);

     int numAtBarrier(int bar_id);

     int maxBarrierCnt(int bar_id);

     void resetBarrier(int bar_id);

     void decMaxBarrierCnt(int bar_id);

     void releaseBarrier(int bar_id);

     void releaseWFsFromBarrier(int bar_id);

     int numBarrierSlots() const { return _numBarrierSlots; }


     template<typename c0, typename c1>

     void doSmReturn(GPUDynInstPtr gpuDynInst);


     virtual void init() override;

     void sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt);

     void sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt);

     void injectGlobalMemFence(GPUDynInstPtr gpuDynInst,

                               bool kernelMemSync,

                               RequestPtr req=nullptr);

     void handleMemPacket(PacketPtr pkt, int memport_index);

     bool processTimingPacket(PacketPtr pkt);

     void processFetchReturn(PacketPtr pkt);

     void updatePageDivergenceDist(Addr addr);


     RequestorID requestorId() { return _requestorId; }

     RequestorID vramRequestorId();


     bool isDone() const;

     bool isVectorAluIdle(uint32_t simdId) const;


     void handleSQCReturn(PacketPtr pkt);


   protected:

     RequestorID _requestorId;


     LdsState &lds;


   public:

     LdsState &

     getLds() const

     {

         return lds;

     }


     int32_t

     getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const;


     [[nodiscard]] bool sendToLds(GPUDynInstPtr gpuDynInst);


     typedef std::unordered_map<Addr, std::pair<int, int>> pageDataStruct;

     pageDataStruct pageAccesses;


     void exitCallback();


     class GMTokenPort : public TokenRequestPort

     {

       public:

         GMTokenPort(const std::string& name, SimObject *owner,

                     PortID id = InvalidPortID)

             : TokenRequestPort(name, owner, id)

         { }

         ~GMTokenPort() { }


       protected:

         bool recvTimingResp(PacketPtr) { return false; }

         void recvReqRetry() { }

     };


     // Manager for the number of tokens available to this compute unit to

     // send global memory request packets to the coalescer this is only used

     // between global memory pipe and TCP coalescer.

     TokenManager *memPortTokens;

     GMTokenPort gmTokenPort;


     class DataPort : public RequestPort

     {

       public:

         DataPort(const std::string &_name, ComputeUnit *_cu, PortID id)

             : RequestPort(_name, _cu, id), computeUnit(_cu) { }


         bool snoopRangeSent;


         struct SenderState : public Packet::SenderState

         {

             GPUDynInstPtr _gpuDynInst;

             PortID port_index;

             Packet::SenderState *saved;


             SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index,

                         Packet::SenderState *sender_state=nullptr)

                 : _gpuDynInst(gpuDynInst),

                   port_index(_port_index),

                   saved(sender_state) { }

         };


         class SystemHubEvent : public Event

         {

           DataPort *dataPort;

           PacketPtr reqPkt;


           public:

             SystemHubEvent(PacketPtr pkt, DataPort *_dataPort)

                 : dataPort(_dataPort), reqPkt(pkt)

             {

                 setFlags(Event::AutoDelete);

             }


             void

             process()

             {

                 // DMAs do not operate on packets and therefore do not

                 // convert to a response. Do that here instead.

                 reqPkt->makeResponse();

                 dataPort->handleResponse(reqPkt);

             }

         };


         void processMemReqEvent(PacketPtr pkt);

         EventFunctionWrapper *createMemReqEvent(PacketPtr pkt);


         void processMemRespEvent(PacketPtr pkt);

         EventFunctionWrapper *createMemRespEvent(PacketPtr pkt);


         std::deque<std::pair<PacketPtr, GPUDynInstPtr>> retries;


         bool handleResponse(PacketPtr pkt);


       protected:

         ComputeUnit *computeUnit;


         virtual bool recvTimingResp(PacketPtr pkt);

         virtual Tick recvAtomic(PacketPtr pkt) { return 0; }

         virtual void recvFunctional(PacketPtr pkt) { }

         virtual void recvRangeChange() { }

         virtual void recvReqRetry();


         virtual void

         getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)

         {

             resp.clear();

             snoop = true;

         }


     };


     // Scalar data cache access port

     class ScalarDataPort : public RequestPort

     {

       public:

         ScalarDataPort(const std::string &_name, ComputeUnit *_cu)

             : RequestPort(_name, _cu), computeUnit(_cu)

         {

         }


         bool recvTimingResp(PacketPtr pkt) override;

         void recvReqRetry() override;


         struct SenderState : public Packet::SenderState

         {

             SenderState(GPUDynInstPtr gpuDynInst,

                         Packet::SenderState *sender_state=nullptr)

                 : _gpuDynInst(gpuDynInst), saved(sender_state)

             {

             }


             GPUDynInstPtr _gpuDynInst;

             Packet::SenderState *saved;

         };


         class MemReqEvent : public Event

         {

           private:

             ScalarDataPort &scalarDataPort;

             PacketPtr pkt;


           public:

             MemReqEvent(ScalarDataPort &_scalar_data_port, PacketPtr _pkt)

                 : Event(), scalarDataPort(_scalar_data_port), pkt(_pkt)

             {

               setFlags(Event::AutoDelete);

             }


             void process();

             const char *description() const;

         };


         class SystemHubEvent : public Event

         {

           ScalarDataPort *dataPort;

           PacketPtr reqPkt;


           public:

             SystemHubEvent(PacketPtr pkt, ScalarDataPort *_dataPort)

                 : dataPort(_dataPort), reqPkt(pkt)

             {

                 setFlags(Event::AutoDelete);

             }


             void

             process()

             {

                 // DMAs do not operate on packets and therefore do not

                 // convert to a response. Do that here instead.

                 reqPkt->makeResponse();

                 dataPort->handleResponse(reqPkt);

             }

         };


         bool handleResponse(PacketPtr pkt);


         std::deque<PacketPtr> retries;


       private:

         ComputeUnit *computeUnit;

     };


     // Instruction cache access port

     class SQCPort : public RequestPort

     {

       public:

         SQCPort(const std::string &_name, ComputeUnit *_cu)

             : RequestPort(_name, _cu), computeUnit(_cu) { }


         bool snoopRangeSent;


         struct SenderState : public Packet::SenderState

         {

             Wavefront *wavefront;

             Packet::SenderState *saved;

             // kernel id to be used in handling I-Cache invalidate response

             int kernId;


             SenderState(Wavefront *_wavefront, Packet::SenderState

                     *sender_state=nullptr, int _kernId=-1)

                 : wavefront(_wavefront), saved(sender_state),

                 kernId(_kernId){ }

         };


         std::deque<std::pair<PacketPtr, Wavefront*>> retries;


       protected:

         ComputeUnit *computeUnit;


         virtual bool recvTimingResp(PacketPtr pkt);

         virtual Tick recvAtomic(PacketPtr pkt) { return 0; }

         virtual void recvFunctional(PacketPtr pkt) { }

         virtual void recvRangeChange() { }

         virtual void recvReqRetry();


         virtual void

         getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)

         {

             resp.clear();

             snoop = true;

         }

      };


     class DTLBPort : public RequestPort

     {

       public:

         DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id)

             : RequestPort(_name, _cu, id), computeUnit(_cu),

               stalled(false)

         { }


         bool isStalled() { return stalled; }

         void stallPort() { stalled = true; }

         void unstallPort() { stalled = false; }


         std::deque<PacketPtr> retries;


         struct SenderState: public Packet::SenderState

         {

             // the memInst that this is associated with

             GPUDynInstPtr _gpuDynInst;


             // the lane in the memInst this is associated with, so we send

             // the memory request down the right port

             PortID portIndex;


             // constructor used for packets involved in timing accesses

             SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)

                 : _gpuDynInst(gpuDynInst), portIndex(port_index) { }


         };


       protected:

         ComputeUnit *computeUnit;

         bool stalled;


         virtual bool recvTimingResp(PacketPtr pkt);

         virtual Tick recvAtomic(PacketPtr pkt) { return 0; }

         virtual void recvFunctional(PacketPtr pkt) { }

         virtual void recvRangeChange() { }

         virtual void recvReqRetry();

     };


     class ScalarDTLBPort : public RequestPort

     {

       public:

         ScalarDTLBPort(const std::string &_name, ComputeUnit *_cu)

             : RequestPort(_name, _cu), computeUnit(_cu), stalled(false)

         {

         }


         struct SenderState : public Packet::SenderState

         {

             SenderState(GPUDynInstPtr gpuDynInst) : _gpuDynInst(gpuDynInst) { }

             GPUDynInstPtr _gpuDynInst;

         };


         bool recvTimingResp(PacketPtr pkt) override;

         void recvReqRetry() override { assert(false); }


         bool isStalled() const { return stalled; }

         void stallPort() { stalled = true; }

         void unstallPort() { stalled = false; }


         std::deque<PacketPtr> retries;


       private:

         ComputeUnit *computeUnit;

         bool stalled;

     };


     class ITLBPort : public RequestPort

     {

       public:

         ITLBPort(const std::string &_name, ComputeUnit *_cu)

             : RequestPort(_name, _cu), computeUnit(_cu), stalled(false) { }


         bool isStalled() { return stalled; }

         void stallPort() { stalled = true; }

         void unstallPort() { stalled = false; }


         std::deque<PacketPtr> retries;


         struct SenderState: public Packet::SenderState

         {

             // The wavefront associated with this request

             Wavefront *wavefront;


             SenderState(Wavefront *_wavefront) : wavefront(_wavefront) { }

         };


       protected:

         ComputeUnit *computeUnit;

         bool stalled;


         virtual bool recvTimingResp(PacketPtr pkt);

         virtual Tick recvAtomic(PacketPtr pkt) { return 0; }

         virtual void recvFunctional(PacketPtr pkt) { }

         virtual void recvRangeChange() { }

         virtual void recvReqRetry();

     };


     class LDSPort : public RequestPort

     {

       public:

         LDSPort(const std::string &_name, ComputeUnit *_cu)

         : RequestPort(_name, _cu), computeUnit(_cu)

         {

         }


         bool isStalled() const { return stalled; }

         void stallPort() { stalled = true; }

         void unstallPort() { stalled = false; }


         std::queue<PacketPtr> retries;


         class SenderState: public Packet::SenderState

         {

           protected:

             // The actual read/write/atomic request that goes with this command

             GPUDynInstPtr _gpuDynInst = nullptr;


           public:

             SenderState(GPUDynInstPtr gpuDynInst):

               _gpuDynInst(gpuDynInst)

             {

             }


             GPUDynInstPtr

             getMemInst() const

             {

               return _gpuDynInst;

             }

         };


         virtual bool

         sendTimingReq(PacketPtr pkt);


       protected:


         bool stalled = false;


         ComputeUnit *computeUnit;


         virtual bool

         recvTimingResp(PacketPtr pkt);


         virtual Tick

         recvAtomic(PacketPtr pkt) { return 0; }


         virtual void

         recvFunctional(PacketPtr pkt)

         {

         }


         virtual void

         recvRangeChange()

         {

         }


         virtual void

         recvReqRetry();

     };


     LDSPort ldsPort;


     TokenManager *

     getTokenManager()

     {

         return memPortTokens;

     }


     std::vector<DataPort> memPort;

     // port to the TLB hierarchy (i.e., the L1 TLB)

     std::vector<DTLBPort> tlbPort;

     // port to the scalar data cache

     ScalarDataPort scalarDataPort;

     // port to the scalar data TLB

     ScalarDTLBPort scalarDTLBPort;

     // port to the SQC (i.e. the I-cache)

     SQCPort sqcPort;

     // port to the SQC TLB (there's a separate TLB for each I-cache)

     ITLBPort sqcTLBPort;


     Port &

     getPort(const std::string &if_name, PortID idx) override

     {

         if (if_name == "memory_port" && idx < memPort.size()) {

             return memPort[idx];

         } else if (if_name == "translation_port" && idx < tlbPort.size()) {

             return tlbPort[idx];

         } else if (if_name == "scalar_port") {

             return scalarDataPort;

         } else if (if_name == "scalar_tlb_port") {

             return scalarDTLBPort;

         } else if (if_name == "sqc_port") {

             return sqcPort;

         } else if (if_name == "sqc_tlb_port") {

             return sqcTLBPort;

         } else if (if_name == "ldsPort") {

             return ldsPort;

         } else if (if_name == "gmTokenPort") {

             return gmTokenPort;

         } else {

             return ClockedObject::getPort(if_name, idx);

         }

     }


     InstSeqNum getAndIncSeqNum() { return globalSeqNum++; }


   private:

     const int _cacheLineSize;

     const int _numBarrierSlots;

     int cacheLineBits;

     InstSeqNum globalSeqNum;

     int wavefrontSize;


     ScoreboardCheckToSchedule scoreboardCheckToSchedule;

     ScheduleToExecute scheduleToExecute;


     std::vector<WFBarrier> wfBarrierSlots;

     std::unordered_set<int> freeBarrierIds;


     // hold the time of the arrival of the first cache block related to

     // a particular GPUDynInst. This is used to calculate the difference

     // between the first and last chace block arrival times.

     std::unordered_map<GPUDynInstPtr, Tick> headTailMap;


   public:

     void updateInstStats(GPUDynInstPtr gpuDynInst);

     int activeWaves;


     struct ComputeUnitStats : public statistics::Group

     {

         ComputeUnitStats(statistics::Group *parent, int n_wf);


         statistics::Scalar vALUInsts;

         statistics::Formula vALUInstsPerWF;

         statistics::Scalar sALUInsts;

         statistics::Formula sALUInstsPerWF;

         statistics::Scalar instCyclesVALU;

         statistics::Scalar instCyclesSALU;

         statistics::Scalar threadCyclesVALU;

         statistics::Formula vALUUtilization;

         statistics::Scalar ldsNoFlatInsts;

         statistics::Formula ldsNoFlatInstsPerWF;

         statistics::Scalar flatVMemInsts;

         statistics::Formula flatVMemInstsPerWF;

         statistics::Scalar flatLDSInsts;

         statistics::Formula flatLDSInstsPerWF;

         statistics::Scalar vectorMemWrites;

         statistics::Formula vectorMemWritesPerWF;

         statistics::Scalar vectorMemReads;

         statistics::Formula vectorMemReadsPerWF;

         statistics::Scalar scalarMemWrites;

         statistics::Formula scalarMemWritesPerWF;

         statistics::Scalar scalarMemReads;

         statistics::Formula scalarMemReadsPerWF;


         statistics::Formula vectorMemReadsPerKiloInst;

         statistics::Formula vectorMemWritesPerKiloInst;

         statistics::Formula vectorMemInstsPerKiloInst;

         statistics::Formula scalarMemReadsPerKiloInst;

         statistics::Formula scalarMemWritesPerKiloInst;

         statistics::Formula scalarMemInstsPerKiloInst;


         // Cycles required to send register source (addr and data) from

         // register files to memory pipeline, per SIMD.

         statistics::Vector instCyclesVMemPerSimd;

         statistics::Vector instCyclesScMemPerSimd;

         statistics::Vector instCyclesLdsPerSimd;


         statistics::Scalar globalReads;

         statistics::Scalar globalWrites;

         statistics::Formula globalMemInsts;

         statistics::Scalar argReads;

         statistics::Scalar argWrites;

         statistics::Formula argMemInsts;

         statistics::Scalar spillReads;

         statistics::Scalar spillWrites;

         statistics::Formula spillMemInsts;

         statistics::Scalar groupReads;

         statistics::Scalar groupWrites;

         statistics::Formula groupMemInsts;

         statistics::Scalar privReads;

         statistics::Scalar privWrites;

         statistics::Formula privMemInsts;

         statistics::Scalar readonlyReads;

         statistics::Scalar readonlyWrites;

         statistics::Formula readonlyMemInsts;

         statistics::Scalar kernargReads;

         statistics::Scalar kernargWrites;

         statistics::Formula kernargMemInsts;


         statistics::Distribution waveLevelParallelism;


         // the following stats compute the avg. TLB accesslatency per

         // uncoalesced request (only for data)

         statistics::Scalar tlbRequests;

         statistics::Scalar tlbCycles;

         statistics::Formula tlbLatency;

         // hitsPerTLBLevel[x] are the hits in Level x TLB.

         // x = 0 is the page table.

         statistics::Vector hitsPerTLBLevel;


         statistics::Scalar ldsBankAccesses;

         statistics::Distribution ldsBankConflictDist;


         // over all memory instructions executed over all wavefronts

         // how many touched 0-4 pages, 4-8, ..., 60-64 pages

         statistics::Distribution pageDivergenceDist;

         // count of non-flat global memory vector instructions executed

         statistics::Scalar dynamicGMemInstrCnt;

         // count of flat global memory vector instructions executed

         statistics::Scalar dynamicFlatMemInstrCnt;

         statistics::Scalar dynamicLMemInstrCnt;


         statistics::Scalar wgBlockedDueBarrierAllocation;

         statistics::Scalar wgBlockedDueLdsAllocation;

         // Number of instructions executed, i.e. if 64 (or 32 or 7) lanes are

         // active when the instruction is committed, this number is still

         // incremented by 1

         statistics::Scalar numInstrExecuted;

         // Number of cycles among successive instruction executions across all

         // wavefronts of the same CU

         statistics::Distribution execRateDist;

         // number of individual vector operations executed

         statistics::Scalar numVecOpsExecuted;

         // number of individual f16 vector operations executed

         statistics::Scalar numVecOpsExecutedF16;

         // number of individual f32 vector operations executed

         statistics::Scalar numVecOpsExecutedF32;

         // number of individual f64 vector operations executed

         statistics::Scalar numVecOpsExecutedF64;

         // number of individual FMA 16,32,64 vector operations executed

         statistics::Scalar numVecOpsExecutedFMA16;

         statistics::Scalar numVecOpsExecutedFMA32;

         statistics::Scalar numVecOpsExecutedFMA64;

         // number of individual MAC 16,32,64 vector operations executed

         statistics::Scalar numVecOpsExecutedMAC16;

         statistics::Scalar numVecOpsExecutedMAC32;

         statistics::Scalar numVecOpsExecutedMAC64;

         // number of individual MAD 16,32,64 vector operations executed

         statistics::Scalar numVecOpsExecutedMAD16;

         statistics::Scalar numVecOpsExecutedMAD32;

         statistics::Scalar numVecOpsExecutedMAD64;

         // total number of two op FP vector operations executed

         statistics::Scalar numVecOpsExecutedTwoOpFP;

         // Total cycles that something is running on the GPU

         statistics::Scalar totalCycles;

         statistics::Formula vpc; // vector ops per cycle

         statistics::Formula vpc_f16; // vector ops per cycle

         statistics::Formula vpc_f32; // vector ops per cycle

         statistics::Formula vpc_f64; // vector ops per cycle

         statistics::Formula ipc; // vector instructions per cycle

         statistics::Distribution controlFlowDivergenceDist;

         statistics::Distribution activeLanesPerGMemInstrDist;

         statistics::Distribution activeLanesPerLMemInstrDist;

         // number of vector ALU instructions received

         statistics::Formula numALUInstsExecuted;

         // number of times a WG cannot start due to lack of free VGPRs in SIMDs

         statistics::Scalar numTimesWgBlockedDueVgprAlloc;

         // number of times a WG cannot start due to lack of free SGPRs in SIMDs

         statistics::Scalar numTimesWgBlockedDueSgprAlloc;

         statistics::Scalar numCASOps;

         statistics::Scalar numFailedCASOps;

         statistics::Scalar completedWfs;

         statistics::Scalar completedWGs;


         // distrubtion in latency difference between first and last cache block

         // arrival ticks

         statistics::Distribution headTailLatency;


         // Track the amount of interleaving between wavefronts on each SIMD.

         // This stat is sampled using instExecPerSimd to compute the number

         // of instructions that have been executed on a SIMD between a WF

         // executing two successive instructions.

         statistics::VectorDistribution instInterleave;

     } stats;

 };


 } // namespace gem5


 #endif // __COMPUTE_UNIT_HH__

types.hh
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...

callback.hh

gem5::ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:235

gem5::ComputeUnit::DTLBPort
Data TLB port.
Definition: compute_unit.hh:696

gem5::ComputeUnit::DTLBPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:1425

gem5::ComputeUnit::DTLBPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:732

gem5::ComputeUnit::DTLBPort::retries
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Definition: compute_unit.hh:711

gem5::ComputeUnit::DTLBPort::DTLBPort
DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id)
Definition: compute_unit.hh:698

gem5::ComputeUnit::DTLBPort::stalled
bool stalled
Definition: compute_unit.hh:733

gem5::ComputeUnit::DTLBPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:738

gem5::ComputeUnit::DTLBPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:737

gem5::ComputeUnit::DTLBPort::isStalled
bool isStalled()
Definition: compute_unit.hh:703

gem5::ComputeUnit::DTLBPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:705

gem5::ComputeUnit::DTLBPort::stallPort
void stallPort()
Definition: compute_unit.hh:704

gem5::ComputeUnit::DTLBPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:736

gem5::ComputeUnit::DTLBPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:1693

gem5::ComputeUnit::DataPort::SystemHubEvent
Definition: compute_unit.hh:533

gem5::ComputeUnit::DataPort::SystemHubEvent::dataPort
DataPort * dataPort
Definition: compute_unit.hh:534

gem5::ComputeUnit::DataPort::SystemHubEvent::SystemHubEvent
SystemHubEvent(PacketPtr pkt, DataPort *_dataPort)
Definition: compute_unit.hh:538

gem5::ComputeUnit::DataPort::SystemHubEvent::reqPkt
PacketPtr reqPkt
Definition: compute_unit.hh:535

gem5::ComputeUnit::DataPort::SystemHubEvent::process
void process()
Definition: compute_unit.hh:545

gem5::ComputeUnit::DataPort
Data access Port.
Definition: compute_unit.hh:512

gem5::ComputeUnit::DataPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:570

gem5::ComputeUnit::DataPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:808

gem5::ComputeUnit::DataPort::processMemReqEvent
void processMemReqEvent(PacketPtr pkt)
Definition: compute_unit.cc:1627

gem5::ComputeUnit::DataPort::createMemReqEvent
EventFunctionWrapper * createMemReqEvent(PacketPtr pkt)
Definition: compute_unit.cc:1611

gem5::ComputeUnit::DataPort::getDeviceAddressRanges
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
Definition: compute_unit.hh:574

gem5::ComputeUnit::DataPort::createMemRespEvent
EventFunctionWrapper * createMemRespEvent(PacketPtr pkt)
Definition: compute_unit.cc:1619

gem5::ComputeUnit::DataPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:569

gem5::ComputeUnit::DataPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:568

gem5::ComputeUnit::DataPort::retries
std::deque< std::pair< PacketPtr, GPUDynInstPtr > > retries
Definition: compute_unit.hh:560

gem5::ComputeUnit::DataPort::processMemRespEvent
void processMemRespEvent(PacketPtr pkt)
Definition: compute_unit.cc:1343

gem5::ComputeUnit::DataPort::handleResponse
bool handleResponse(PacketPtr pkt)
Definition: compute_unit.cc:814

gem5::ComputeUnit::DataPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:565

gem5::ComputeUnit::DataPort::snoopRangeSent
bool snoopRangeSent
Definition: compute_unit.hh:517

gem5::ComputeUnit::DataPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:971

gem5::ComputeUnit::DataPort::DataPort
DataPort(const std::string &_name, ComputeUnit *_cu, PortID id)
Definition: compute_unit.hh:514

gem5::ComputeUnit::GMTokenPort
Definition: compute_unit.hh:491

gem5::ComputeUnit::GMTokenPort::GMTokenPort
GMTokenPort(const std::string &name, SimObject *owner, PortID id=InvalidPortID)
Definition: compute_unit.hh:493

gem5::ComputeUnit::GMTokenPort::recvTimingResp
bool recvTimingResp(PacketPtr)
Receive a timing response from the peer.
Definition: compute_unit.hh:500

gem5::ComputeUnit::GMTokenPort::~GMTokenPort
~GMTokenPort()
Definition: compute_unit.hh:497

gem5::ComputeUnit::GMTokenPort::recvReqRetry
void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.hh:501

gem5::ComputeUnit::ITLBPort
Definition: compute_unit.hh:771

gem5::ComputeUnit::ITLBPort::retries
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Definition: compute_unit.hh:785

gem5::ComputeUnit::ITLBPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:804

gem5::ComputeUnit::ITLBPort::stallPort
void stallPort()
Definition: compute_unit.hh:778

gem5::ComputeUnit::ITLBPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:803

gem5::ComputeUnit::ITLBPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:1848

gem5::ComputeUnit::ITLBPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:779

gem5::ComputeUnit::ITLBPort::stalled
bool stalled
Definition: compute_unit.hh:800

gem5::ComputeUnit::ITLBPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:799

gem5::ComputeUnit::ITLBPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:805

gem5::ComputeUnit::ITLBPort::isStalled
bool isStalled()
Definition: compute_unit.hh:777

gem5::ComputeUnit::ITLBPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:1795

gem5::ComputeUnit::ITLBPort::ITLBPort
ITLBPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:773

gem5::ComputeUnit::LDSPort::SenderState
SenderState is information carried along with the packet, esp.
Definition: compute_unit.hh:835

gem5::ComputeUnit::LDSPort::SenderState::getMemInst
GPUDynInstPtr getMemInst() const
Definition: compute_unit.hh:847

gem5::ComputeUnit::LDSPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:838

gem5::ComputeUnit::LDSPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.hh:841

gem5::ComputeUnit::LDSPort
the port intended to communicate between the CU and its LDS
Definition: compute_unit.hh:813

gem5::ComputeUnit::LDSPort::stalled
bool stalled
whether or not it is stalled
Definition: compute_unit.hh:858

gem5::ComputeUnit::LDSPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
get the result of packets sent to the LDS when they return
Definition: compute_unit.cc:2099

gem5::ComputeUnit::LDSPort::sendTimingReq
virtual bool sendTimingReq(PacketPtr pkt)
attempt to send this packet, either the port is already stalled, the request is nack'd and must stall...
Definition: compute_unit.cc:2121

gem5::ComputeUnit::LDSPort::recvReqRetry
virtual void recvReqRetry()
the bus is telling the port that there is now space so retrying stalled requests should work now this...
Definition: compute_unit.cc:2163

gem5::ComputeUnit::LDSPort::LDSPort
LDSPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:815

gem5::ComputeUnit::LDSPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:874

gem5::ComputeUnit::LDSPort::isStalled
bool isStalled() const
Definition: compute_unit.hh:820

gem5::ComputeUnit::LDSPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:822

gem5::ComputeUnit::LDSPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:866

gem5::ComputeUnit::LDSPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:860

gem5::ComputeUnit::LDSPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:869

gem5::ComputeUnit::LDSPort::retries
std::queue< PacketPtr > retries
here we queue all the requests that were not successfully sent.
Definition: compute_unit.hh:828

gem5::ComputeUnit::LDSPort::stallPort
void stallPort()
Definition: compute_unit.hh:821

gem5::ComputeUnit::SQCPort
Definition: compute_unit.hh:655

gem5::ComputeUnit::SQCPort::retries
std::deque< std::pair< PacketPtr, Wavefront * > > retries
Definition: compute_unit.hh:675

gem5::ComputeUnit::SQCPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:678

gem5::ComputeUnit::SQCPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:683

gem5::ComputeUnit::SQCPort::SQCPort
SQCPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:657

gem5::ComputeUnit::SQCPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:998

gem5::ComputeUnit::SQCPort::snoopRangeSent
bool snoopRangeSent
Definition: compute_unit.hh:660

gem5::ComputeUnit::SQCPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:682

gem5::ComputeUnit::SQCPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:1012

gem5::ComputeUnit::SQCPort::getDeviceAddressRanges
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
Definition: compute_unit.hh:687

gem5::ComputeUnit::SQCPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:681

gem5::ComputeUnit::ScalarDTLBPort
Definition: compute_unit.hh:743

gem5::ComputeUnit::ScalarDTLBPort::recvReqRetry
void recvReqRetry() override
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.hh:757

gem5::ComputeUnit::ScalarDTLBPort::retries
std::deque< PacketPtr > retries
Definition: compute_unit.hh:763

gem5::ComputeUnit::ScalarDTLBPort::recvTimingResp
bool recvTimingResp(PacketPtr pkt) override
Receive a timing response from the peer.
Definition: compute_unit.cc:1724

gem5::ComputeUnit::ScalarDTLBPort::ScalarDTLBPort
ScalarDTLBPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:745

gem5::ComputeUnit::ScalarDTLBPort::stalled
bool stalled
Definition: compute_unit.hh:767

gem5::ComputeUnit::ScalarDTLBPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:766

gem5::ComputeUnit::ScalarDTLBPort::isStalled
bool isStalled() const
Definition: compute_unit.hh:759

gem5::ComputeUnit::ScalarDTLBPort::stallPort
void stallPort()
Definition: compute_unit.hh:760

gem5::ComputeUnit::ScalarDTLBPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:761

gem5::ComputeUnit::ScalarDataPort::MemReqEvent
Definition: compute_unit.hh:607

gem5::ComputeUnit::ScalarDataPort::MemReqEvent::description
const char * description() const
Return a C string describing the event.
Definition: compute_unit.cc:1654

gem5::ComputeUnit::ScalarDataPort::MemReqEvent::MemReqEvent
MemReqEvent(ScalarDataPort &_scalar_data_port, PacketPtr _pkt)
Definition: compute_unit.hh:613

gem5::ComputeUnit::ScalarDataPort::MemReqEvent::process
void process()
Definition: compute_unit.cc:1660

gem5::ComputeUnit::ScalarDataPort::MemReqEvent::pkt
PacketPtr pkt
Definition: compute_unit.hh:610

gem5::ComputeUnit::ScalarDataPort::MemReqEvent::scalarDataPort
ScalarDataPort & scalarDataPort
Definition: compute_unit.hh:609

gem5::ComputeUnit::ScalarDataPort::SystemHubEvent
Definition: compute_unit.hh:624

gem5::ComputeUnit::ScalarDataPort::SystemHubEvent::reqPkt
PacketPtr reqPkt
Definition: compute_unit.hh:626

gem5::ComputeUnit::ScalarDataPort::SystemHubEvent::dataPort
ScalarDataPort * dataPort
Definition: compute_unit.hh:625

gem5::ComputeUnit::ScalarDataPort::SystemHubEvent::SystemHubEvent
SystemHubEvent(PacketPtr pkt, ScalarDataPort *_dataPort)
Definition: compute_unit.hh:629

gem5::ComputeUnit::ScalarDataPort::SystemHubEvent::process
void process()
Definition: compute_unit.hh:636

gem5::ComputeUnit::ScalarDataPort
Definition: compute_unit.hh:584

gem5::ComputeUnit::ScalarDataPort::recvTimingResp
bool recvTimingResp(PacketPtr pkt) override
Receive a timing response from the peer.
Definition: compute_unit.cc:915

gem5::ComputeUnit::ScalarDataPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:650

gem5::ComputeUnit::ScalarDataPort::handleResponse
bool handleResponse(PacketPtr pkt)
Definition: compute_unit.cc:921

gem5::ComputeUnit::ScalarDataPort::ScalarDataPort
ScalarDataPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:586

gem5::ComputeUnit::ScalarDataPort::recvReqRetry
void recvReqRetry() override
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:959

gem5::ComputeUnit::ScalarDataPort::retries
std::deque< PacketPtr > retries
Definition: compute_unit.hh:647

gem5::ComputeUnit
Definition: compute_unit.hh:202

gem5::ComputeUnit::numCyclesPerLoadTransfer
int numCyclesPerLoadTransfer
Definition: compute_unit.hh:271

gem5::ComputeUnit::oprNetPipeLength
int oprNetPipeLength() const
Definition: compute_unit.hh:387

gem5::ComputeUnit::simdUnitWidth
int simdUnitWidth() const
Definition: compute_unit.hh:388

gem5::ComputeUnit::releaseBarrier
void releaseBarrier(int bar_id)
Definition: compute_unit.cc:703

gem5::ComputeUnit::wfSize
int wfSize() const
Definition: compute_unit.hh:394

gem5::ComputeUnit::mapWaveToScalarAlu
int mapWaveToScalarAlu(Wavefront *w) const
Definition: compute_unit.cc:260

gem5::ComputeUnit::ComputeUnit
ComputeUnit(const Params &p)
Definition: compute_unit.cc:65

gem5::ComputeUnit::processTimingPacket
bool processTimingPacket(PacketPtr pkt)

gem5::ComputeUnit::updatePageDivergenceDist
void updatePageDivergenceDist(Addr addr)
Definition: compute_unit.cc:1986

gem5::ComputeUnit::scalarALUs
std::vector< WaitClass > scalarALUs
Definition: compute_unit.hh:249

gem5::ComputeUnit::vramRequestorId
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from shader.
Definition: compute_unit.cc:2090

gem5::ComputeUnit::scalarMemUnit
WaitClass scalarMemUnit
Definition: compute_unit.hh:241

gem5::ComputeUnit::srf_scm_bus_latency
Cycles srf_scm_bus_latency
Definition: compute_unit.hh:318

gem5::ComputeUnit::execStage
ExecStage execStage
Definition: compute_unit.hh:283

gem5::ComputeUnit::instExecPerSimd
std::vector< uint64_t > instExecPerSimd
Definition: compute_unit.hh:326

gem5::ComputeUnit::init
virtual void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: compute_unit.cc:759

gem5::ComputeUnit::idleCUTimeout
Tick idleCUTimeout
Definition: compute_unit.hh:343

gem5::ComputeUnit::req_tick_latency
Tick req_tick_latency
Definition: compute_unit.hh:355

gem5::ComputeUnit::numVectorGlobalMemUnits
int numVectorGlobalMemUnits
Definition: compute_unit.hh:219

gem5::ComputeUnit::_cacheLineSize
const int _cacheLineSize
Definition: compute_unit.hh:935

gem5::ComputeUnit::exec_policy
EXEC_POLICY exec_policy
Definition: compute_unit.hh:339

gem5::ComputeUnit::pipeMap
std::unordered_set< uint64_t > pipeMap
Definition: compute_unit.hh:276

gem5::ComputeUnit::updateInstStats
void updateInstStats(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.cc:1878

gem5::ComputeUnit::vectorGlobalMemUnit
WaitClass vectorGlobalMemUnit
Definition: compute_unit.hh:225

gem5::ComputeUnit::doInvalidate
void doInvalidate(RequestPtr req, int kernId)
trigger invalidate operation in the cu
Definition: compute_unit.cc:390

gem5::ComputeUnit::vrf_gm_bus_latency
Cycles vrf_gm_bus_latency
Definition: compute_unit.hh:316

gem5::ComputeUnit::isDone
bool isDone() const
Definition: compute_unit.cc:2014

gem5::ComputeUnit::numWfsToSched
std::vector< int > numWfsToSched
Number of WFs to schedule to each SIMD.
Definition: compute_unit.hh:364

gem5::ComputeUnit::localMemoryPipe
LocalMemPipeline localMemoryPipe
Definition: compute_unit.hh:285

gem5::ComputeUnit::mapWaveToGlobalMem
int mapWaveToGlobalMem(Wavefront *w) const
Definition: compute_unit.cc:278

gem5::ComputeUnit::_requestorId
RequestorID _requestorId
Definition: compute_unit.hh:469

gem5::ComputeUnit::handleMemPacket
void handleMemPacket(PacketPtr pkt, int memport_index)

gem5::ComputeUnit::mapWaveToLocalMem
int mapWaveToLocalMem(Wavefront *w) const
Definition: compute_unit.cc:286

gem5::ComputeUnit::scalarMemToSrfBus
WaitClass scalarMemToSrfBus
Definition: compute_unit.hh:237

gem5::ComputeUnit::scalarDTLBPort
ScalarDTLBPort scalarDTLBPort
Definition: compute_unit.hh:902

gem5::ComputeUnit::doSmReturn
void doSmReturn(GPUDynInstPtr gpuDynInst)

gem5::ComputeUnit::spBypassPipeLength
int spBypassPipeLength
Definition: compute_unit.hh:304

gem5::ComputeUnit::releaseWFsFromBarrier
void releaseWFsFromBarrier(int bar_id)
Definition: compute_unit.cc:711

gem5::ComputeUnit::sqcPort
SQCPort sqcPort
Definition: compute_unit.hh:904

gem5::ComputeUnit::numYetToReachBarrier
int numYetToReachBarrier(int bar_id)
Definition: compute_unit.cc:654

gem5::ComputeUnit::vrfToLocalMemPipeBus
WaitClass vrfToLocalMemPipeBus
Definition: compute_unit.hh:231

gem5::ComputeUnit::getRefCounter
int32_t getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
Definition: compute_unit.cc:2044

gem5::ComputeUnit::pageDataStruct
std::unordered_map< Addr, std::pair< int, int > > pageDataStruct
Definition: compute_unit.hh:485

gem5::ComputeUnit::numVecRegsPerSimd
int numVecRegsPerSimd
Definition: compute_unit.hh:371

gem5::ComputeUnit::getCacheLineBits
int getCacheLineBits() const
Definition: compute_unit.hh:412

gem5::ComputeUnit::wfBarrierSlots
std::vector< WFBarrier > wfBarrierSlots
The barrier slots for this CU.
Definition: compute_unit.hh:979

gem5::ComputeUnit::activeWaves
int activeWaves
Definition: compute_unit.hh:992

gem5::ComputeUnit::resetBarrier
void resetBarrier(int bar_id)
Definition: compute_unit.cc:689

gem5::ComputeUnit::perLaneTLB
bool perLaneTLB
Definition: compute_unit.hh:329

gem5::ComputeUnit::numScalarALUs
int numScalarALUs
Definition: compute_unit.hh:248

gem5::ComputeUnit::locMemToVrfBus
WaitClass locMemToVrfBus
Definition: compute_unit.hh:229

gem5::ComputeUnit::lastVaddrSimd
std::vector< std::vector< Addr > > lastVaddrSimd
Definition: compute_unit.hh:336

gem5::ComputeUnit::vrf_lm_bus_latency
Cycles vrf_lm_bus_latency
Definition: compute_unit.hh:320

gem5::ComputeUnit::numVectorSharedMemUnits
int numVectorSharedMemUnits
Definition: compute_unit.hh:227

gem5::ComputeUnit::Params
ComputeUnitParams Params
Definition: compute_unit.hh:290

gem5::ComputeUnit::freeBarrierIds
std::unordered_set< int > freeBarrierIds
A set used to easily retrieve a free barrier ID.
Definition: compute_unit.hh:983

gem5::ComputeUnit::pageAccesses
pageDataStruct pageAccesses
Definition: compute_unit.hh:486

gem5::ComputeUnit::cacheLineSize
int cacheLineSize() const
Definition: compute_unit.hh:411

gem5::ComputeUnit::srfToScalarMemPipeBus
WaitClass srfToScalarMemPipeBus
Definition: compute_unit.hh:239

gem5::ComputeUnit::dpBypassPipeLength
int dpBypassPipeLength
Definition: compute_unit.hh:307

gem5::ComputeUnit::lastMemUnit
int lastMemUnit() const
Definition: compute_unit.cc:253

gem5::ComputeUnit::scalarMemoryPipe
ScalarMemPipeline scalarMemoryPipe
Definition: compute_unit.hh:286

gem5::ComputeUnit::hasDispResources
bool hasDispResources(HSAQueueEntry *task, int &num_wfs_in_wg)
Definition: compute_unit.cc:526

gem5::ComputeUnit::sendRequest
void sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
Definition: compute_unit.cc:1035

gem5::ComputeUnit::numExeUnits
int numExeUnits() const
Definition: compute_unit.cc:238

gem5::ComputeUnit::glbMemToVrfBus
WaitClass glbMemToVrfBus
Definition: compute_unit.hh:221

gem5::ComputeUnit::ldsPort
LDSPort ldsPort
The port to access the Local Data Store Can be connected to a LDS object.
Definition: compute_unit.hh:885

gem5::ComputeUnit::lastExecCycle
std::vector< uint64_t > lastExecCycle
Definition: compute_unit.hh:323

gem5::ComputeUnit::globalMemoryPipe
GlobalMemPipeline globalMemoryPipe
Definition: compute_unit.hh:284

gem5::ComputeUnit::pagesTouched
std::map< Addr, int > pagesTouched
Definition: compute_unit.hh:378

gem5::ComputeUnit::vrfToCoalescerBusWidth
int vrfToCoalescerBusWidth
Definition: compute_unit.hh:268

gem5::ComputeUnit::issuePeriod
Cycles issuePeriod
Definition: compute_unit.hh:313

gem5::ComputeUnit::sendToLds
bool sendToLds(GPUDynInstPtr gpuDynInst)
send a general request to the LDS make sure to look at the return value here as your request might be...
Definition: compute_unit.cc:2070

gem5::ComputeUnit::maxBarrierCnt
int maxBarrierCnt(int bar_id)
Definition: compute_unit.cc:682

gem5::ComputeUnit::insertInPipeMap
void insertInPipeMap(Wavefront *w)
Definition: compute_unit.cc:505

gem5::ComputeUnit::numAtBarrier
int numAtBarrier(int bar_id)
Definition: compute_unit.cc:675

gem5::ComputeUnit::scoreboardCheckToSchedule
ScoreboardCheckToSchedule scoreboardCheckToSchedule
TODO: Update these comments once the pipe stage interface has been fully refactored.
Definition: compute_unit.hh:973

gem5::ComputeUnit::incNumAtBarrier
void incNumAtBarrier(int bar_id)
Definition: compute_unit.cc:668

gem5::ComputeUnit::injectGlobalMemFence
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelMemSync, RequestPtr req=nullptr)
Definition: compute_unit.cc:1261

gem5::ComputeUnit::vectorRegsReserved
std::vector< int > vectorRegsReserved
Definition: compute_unit.hh:367

gem5::ComputeUnit::sqcTLBPort
ITLBPort sqcTLBPort
Definition: compute_unit.hh:906

gem5::ComputeUnit::srf
std::vector< ScalarRegisterFile * > srf
Definition: compute_unit.hh:297

gem5::ComputeUnit::localMemBarrier
bool localMemBarrier
Definition: compute_unit.hh:346

gem5::ComputeUnit::lastVaddrWF
std::vector< std::vector< std::vector< Addr > > > lastVaddrWF
Definition: compute_unit.hh:337

gem5::ComputeUnit::prefetchStride
int prefetchStride
Definition: compute_unit.hh:333

gem5::ComputeUnit::firstMemUnit
int firstMemUnit() const
Definition: compute_unit.cc:246

gem5::ComputeUnit::scoreboardCheckStage
ScoreboardCheckStage scoreboardCheckStage
Definition: compute_unit.hh:281

gem5::ComputeUnit::gmTokenPort
GMTokenPort gmTokenPort
Definition: compute_unit.hh:508

gem5::ComputeUnit::vectorALUs
std::vector< WaitClass > vectorALUs
Definition: compute_unit.hh:245

gem5::ComputeUnit::mapWaveToScalarMem
int mapWaveToScalarMem(Wavefront *w) const
Definition: compute_unit.cc:294

gem5::ComputeUnit::registerManager
RegisterManager * registerManager
Definition: compute_unit.hh:278

gem5::ComputeUnit::countPages
bool countPages
Definition: compute_unit.hh:351

gem5::ComputeUnit::coalescerToVrfBusWidth
int coalescerToVrfBusWidth
Definition: compute_unit.hh:269

gem5::ComputeUnit::resp_tick_latency
Tick resp_tick_latency
Definition: compute_unit.hh:356

gem5::ComputeUnit::startWavefront
void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, HSAQueueEntry *task, int bar_id, bool fetchContext=false)
Definition: compute_unit.cc:316

gem5::ComputeUnit::tickEvent
EventFunctionWrapper tickEvent
Definition: compute_unit.hh:288

gem5::ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:292

gem5::ComputeUnit::functionalTLB
bool functionalTLB
Definition: compute_unit.hh:345

gem5::ComputeUnit::memPortTokens
TokenManager * memPortTokens
Definition: compute_unit.hh:507

gem5::ComputeUnit::scalarDataPort
ScalarDataPort scalarDataPort
Definition: compute_unit.hh:900

gem5::ComputeUnit::fillKernelState
void fillKernelState(Wavefront *w, HSAQueueEntry *task)
Definition: compute_unit.cc:302

gem5::ComputeUnit::numScalarMemUnits
int numScalarMemUnits
Definition: compute_unit.hh:235

gem5::ComputeUnit::dispWorkgroup
void dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg)
Definition: compute_unit.cc:427

gem5::ComputeUnit::vectorSharedMemUnit
WaitClass vectorSharedMemUnit
Definition: compute_unit.hh:233

gem5::ComputeUnit::wavefrontSize
int wavefrontSize
Definition: compute_unit.hh:939

gem5::ComputeUnit::idleWfs
int idleWfs
Definition: compute_unit.hh:344

gem5::ComputeUnit::dpBypassLength
int dpBypassLength() const
Definition: compute_unit.hh:390

gem5::ComputeUnit::loadBusLength
int loadBusLength() const
Definition: compute_unit.hh:393

gem5::ComputeUnit::prefetchType
enums::PrefetchType prefetchType
Definition: compute_unit.hh:338

gem5::ComputeUnit::getPort
Port & getPort(const std::string &if_name, PortID idx) override
Get a port with a given name and index.
Definition: compute_unit.hh:909

gem5::ComputeUnit::processFetchReturn
void processFetchReturn(PacketPtr pkt)

gem5::ComputeUnit::numBarrierSlots
int numBarrierSlots() const
Definition: compute_unit.hh:444

gem5::ComputeUnit::scalarPipeLength
int scalarPipeLength() const
Definition: compute_unit.hh:391

gem5::ComputeUnit::scalarRegsReserved
std::vector< int > scalarRegsReserved
Definition: compute_unit.hh:369

gem5::ComputeUnit::tlbPort
std::vector< DTLBPort > tlbPort
Definition: compute_unit.hh:898

gem5::ComputeUnit::wfList
std::vector< std::vector< Wavefront * > > wfList
Definition: compute_unit.hh:291

gem5::ComputeUnit::mapWaveToScalarAluGlobalIdx
int mapWaveToScalarAluGlobalIdx(Wavefront *w) const
Definition: compute_unit.cc:271

gem5::ComputeUnit::exec
void exec()
Definition: compute_unit.cc:726

gem5::ComputeUnit::prefetchDepth
int prefetchDepth
Definition: compute_unit.hh:331

gem5::ComputeUnit::debugSegFault
bool debugSegFault
Definition: compute_unit.hh:341

gem5::ComputeUnit::getTokenManager
TokenManager * getTokenManager()
Definition: compute_unit.hh:888

gem5::ComputeUnit::barrierSlot
WFBarrier & barrierSlot(int bar_id)
Definition: compute_unit.hh:418

gem5::ComputeUnit::scheduleToExecute
ScheduleToExecute scheduleToExecute
Definition: compute_unit.hh:974

gem5::ComputeUnit::vrf
std::vector< VectorRegisterFile * > vrf
Definition: compute_unit.hh:295

gem5::ComputeUnit::scalarPipeStages
int scalarPipeStages
Definition: compute_unit.hh:309

gem5::ComputeUnit::~ComputeUnit
~ComputeUnit()
Definition: compute_unit.cc:225

gem5::ComputeUnit::cacheLineBits
int cacheLineBits
Definition: compute_unit.hh:937

gem5::ComputeUnit::decMaxBarrierCnt
void decMaxBarrierCnt(int bar_id)
Definition: compute_unit.cc:696

gem5::ComputeUnit::globalSeqNum
InstSeqNum globalSeqNum
Definition: compute_unit.hh:938

gem5::ComputeUnit::headTailMap
std::unordered_map< GPUDynInstPtr, Tick > headTailMap
Definition: compute_unit.hh:988

gem5::ComputeUnit::lastVaddrCU
std::vector< Addr > lastVaddrCU
Definition: compute_unit.hh:335

gem5::ComputeUnit::simdWidth
int simdWidth
Definition: compute_unit.hh:301

gem5::ComputeUnit::resetRegisterPool
void resetRegisterPool()
Definition: compute_unit.cc:417

gem5::ComputeUnit::numCyclesPerStoreTransfer
int numCyclesPerStoreTransfer
Definition: compute_unit.hh:270

gem5::ComputeUnit::_numBarrierSlots
const int _numBarrierSlots
Definition: compute_unit.hh:936

gem5::ComputeUnit::fetchStage
FetchStage fetchStage
Definition: compute_unit.hh:280

gem5::ComputeUnit::vrfToGlobalMemPipeBus
WaitClass vrfToGlobalMemPipeBus
Definition: compute_unit.hh:223

gem5::ComputeUnit::scheduleStage
ScheduleStage scheduleStage
Definition: compute_unit.hh:282

gem5::ComputeUnit::numVectorALUs
int numVectorALUs
Definition: compute_unit.hh:244

gem5::ComputeUnit::exitCallback
void exitCallback()
Definition: compute_unit.cc:1997

gem5::ComputeUnit::storeBusLength
int storeBusLength() const
Definition: compute_unit.hh:392

gem5::ComputeUnit::initiateFetch
void initiateFetch(Wavefront *wavefront)

gem5::ComputeUnit::allAtBarrier
bool allAtBarrier(int bar_id)
Definition: compute_unit.cc:661

gem5::ComputeUnit::isVectorAluIdle
bool isVectorAluIdle(uint32_t simdId) const
Definition: compute_unit.cc:2051

gem5::ComputeUnit::spBypassLength
int spBypassLength() const
Definition: compute_unit.hh:389

gem5::ComputeUnit::getAndIncSeqNum
InstSeqNum getAndIncSeqNum()
Definition: compute_unit.hh:932

gem5::ComputeUnit::shader
Shader * shader
Definition: compute_unit.hh:353

gem5::ComputeUnit::doFlush
void doFlush(GPUDynInstPtr gpuDynInst)
trigger flush operation in the cu
Definition: compute_unit.cc:409

gem5::ComputeUnit::requestorId
RequestorID requestorId()
Definition: compute_unit.hh:460

gem5::ComputeUnit::memPort
std::vector< DataPort > memPort
The memory port for SIMD data accesses.
Definition: compute_unit.hh:896

gem5::ComputeUnit::numScalarRegsPerSimd
int numScalarRegsPerSimd
Definition: compute_unit.hh:373

gem5::ComputeUnit::deleteFromPipeMap
void deleteFromPipeMap(Wavefront *w)
Definition: compute_unit.cc:514

gem5::ComputeUnit::lds
LdsState & lds
Definition: compute_unit.hh:471

gem5::ComputeUnit::operandNetworkLength
int operandNetworkLength
Definition: compute_unit.hh:311

gem5::ComputeUnit::handleSQCReturn
void handleSQCReturn(PacketPtr pkt)
Definition: compute_unit.cc:1006

gem5::ComputeUnit::sendScalarRequest
void sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt)
Definition: compute_unit.cc:1234

gem5::ComputeUnit::getLds
LdsState & getLds() const
Definition: compute_unit.hh:475

gem5::ComputeUnit::fetch
void fetch(PacketPtr pkt, Wavefront *wavefront)

gem5::ComputeUnit::getFreeBarrierId
int getFreeBarrierId()
Definition: compute_unit.hh:425

gem5::ComputeUnit::stats
gem5::ComputeUnit::ComputeUnitStats stats

gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:79

gem5::EventBase::AutoDelete
static const FlagsType AutoDelete
Definition: eventq.hh:107

gem5::EventFunctionWrapper
Definition: eventq.hh:1116

gem5::Event
Definition: eventq.hh:252

gem5::Event::setFlags
void setFlags(Flags _flags)
Definition: eventq.hh:328

gem5::ExecStage
Definition: exec_stage.hh:74

gem5::FetchStage
Definition: fetch_stage.hh:55

gem5::GlobalMemPipeline
Definition: global_memory_pipeline.hh:60

gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:60

gem5::LdsChunk
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Definition: lds_state.hh:57

gem5::LdsState
Definition: lds_state.hh:142

gem5::LocalMemPipeline
Definition: local_memory_pipeline.hh:58

gem5::Named::_name
const std::string _name
Definition: named.hh:41

gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294

gem5::Packet::makeResponse
void makeResponse()
Take a request packet and modify it in place to be suitable for returning as a response to that reque...
Definition: packet.hh:1059

gem5::Port
Ports are used to interface objects to each other.
Definition: port.hh:62

gem5::Port::id
const PortID id
A numeric identifier to distinguish ports in a vector, and set to InvalidPortID in case this port is ...
Definition: port.hh:79

gem5::Port::name
const std::string name() const
Return port name (for DPRINTF).
Definition: port.hh:111

gem5::RegisterManager
Definition: register_manager.hh:58

gem5::RequestPort
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Definition: port.hh:79

gem5::RequestPort::owner
SimObject & owner
Definition: port.hh:86

gem5::ScalarMemPipeline
Definition: scalar_memory_pipeline.hh:59

gem5::ScheduleStage
Definition: schedule_stage.hh:63

gem5::ScheduleToExecute
Communication interface between Schedule and Execute stages.
Definition: comm.hh:99

gem5::ScoreboardCheckStage
Definition: scoreboard_check_stage.hh:62

gem5::ScoreboardCheckToSchedule
Communication interface between ScoreboardCheck and Schedule stages.
Definition: comm.hh:63

gem5::Shader
Definition: shader.hh:84

gem5::SimObject
Abstract superclass for simulation objects.
Definition: sim_object.hh:148

gem5::TokenManager
Definition: token_port.hh:131

gem5::TokenRequestPort
Definition: token_port.hh:45

gem5::WFBarrier
WF barrier slots.
Definition: compute_unit.hh:91

gem5::WFBarrier::InvalidID
static const int InvalidID
Definition: compute_unit.hh:97

gem5::WFBarrier::numAtBarrier
int numAtBarrier() const
Definition: compute_unit.hh:100

gem5::WFBarrier::decMaxBarrierCnt
void decMaxBarrierCnt()
Decrement the number of WFs that are participating in this barrier.
Definition: compute_unit.hh:156

gem5::WFBarrier::numYetToReachBarrier
int numYetToReachBarrier() const
Number of WFs that have not yet reached the barrier.
Definition: compute_unit.hh:109

gem5::WFBarrier::WFBarrier
WFBarrier()
Definition: compute_unit.hh:93

gem5::WFBarrier::setMaxBarrierCnt
void setMaxBarrierCnt(int max_barrier_cnt)
Set the maximum barrier count (i.e., the number of WFs that are participating in the barrier).
Definition: compute_unit.hh:125

gem5::WFBarrier::release
void release()
Release this barrier resource so it can be used by other WGs.
Definition: compute_unit.hh:167

gem5::WFBarrier::reset
void reset()
Reset the barrier.
Definition: compute_unit.hh:178

gem5::WFBarrier::incNumAtBarrier
void incNumAtBarrier()
Mark that a WF has reached the barrier.
Definition: compute_unit.hh:134

gem5::WFBarrier::_maxBarrierCnt
int _maxBarrierCnt
The maximum number of WFs that can reach this barrier.
Definition: compute_unit.hh:198

gem5::WFBarrier::_numAtBarrier
int _numAtBarrier
The number of WFs in the WG that have reached the barrier.
Definition: compute_unit.hh:189

gem5::WFBarrier::allAtBarrier
bool allAtBarrier() const
Have all WFs participating in this barrier reached the barrier? If so, then the barrier is satisfied ...
Definition: compute_unit.hh:146

gem5::WFBarrier::maxBarrierCnt
int maxBarrierCnt() const
Definition: compute_unit.hh:115

gem5::WaitClass
Definition: misc.hh:68

gem5::Wavefront
Definition: wavefront.hh:61

gem5::statistics::Distribution
A simple distribution stat.
Definition: statistics.hh:2085

gem5::statistics::Formula
A formula for statistics that is calculated when printed.
Definition: statistics.hh:2540

gem5::statistics::Group
Statistics container.
Definition: group.hh:94

gem5::statistics::Scalar
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:1931

gem5::statistics::VectorDistribution
A vector of distributions.
Definition: statistics.hh:2246

gem5::statistics::Vector
A vector of scalar stats.
Definition: statistics.hh:2007

std::deque
STL deque class.
Definition: stl.hh:44

std::list< AddrRange >

std::vector
STL vector class.
Definition: stl.hh:37

clocked_object.hh
ClockedObject declaration and implementation.

compiler.hh

exec_stage.hh

fetch_stage.hh

global_memory_pipeline.hh

comm.hh

group.hh

gem5::SimObject::getPort
virtual Port & getPort(const std::string &if_name, PortID idx=InvalidPortID)
Get a port with a given name and index.
Definition: sim_object.cc:126

hsa_queue_entry.hh
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).

local_memory_pipeline.hh

port.hh
Port Object Declaration.

gem5::MipsISA::index
Bitfield< 30, 0 > index
Definition: pra_constants.hh:47

gem5::VegaISA::w
Bitfield< 6 > w
Definition: pagetable.hh:59

gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70

gem5::X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:84

gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:38

gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92

gem5::InvalidPortID
const PortID InvalidPortID
Definition: types.hh:246

gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49

gem5::TLB_CACHE
TLB_CACHE
Definition: compute_unit.hh:79

gem5::TLB_MISS_CACHE_MISS
@ TLB_MISS_CACHE_MISS
Definition: compute_unit.hh:80

gem5::TLB_MISS_CACHE_HIT
@ TLB_MISS_CACHE_HIT
Definition: compute_unit.hh:81

gem5::TLB_HIT_CACHE_HIT
@ TLB_HIT_CACHE_HIT
Definition: compute_unit.hh:83

gem5::TLB_HIT_CACHE_MISS
@ TLB_HIT_CACHE_MISS
Definition: compute_unit.hh:82

gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147

gem5::PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:245

gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58

gem5::RequestorID
uint16_t RequestorID
Definition: request.hh:95

gem5::EXEC_POLICY
EXEC_POLICY
Definition: compute_unit.hh:73

gem5::OLDEST
@ OLDEST
Definition: compute_unit.hh:74

gem5::RR
@ RR
Definition: compute_unit.hh:75

gem5::InstSeqNum
uint64_t InstSeqNum
Definition: inst_seq.hh:40

register_manager.hh

scalar_memory_pipeline.hh

schedule_stage.hh

scoreboard_check_stage.hh

statistics.hh
Declaration of Statistics objects.

gem5::ComputeUnit::ComputeUnitStats
Definition: compute_unit.hh:995

gem5::ComputeUnit::ComputeUnitStats::spillReads
statistics::Scalar spillReads
Definition: compute_unit.hh:1040

gem5::ComputeUnit::ComputeUnitStats::groupWrites
statistics::Scalar groupWrites
Definition: compute_unit.hh:1044

gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedF64
statistics::Scalar numVecOpsExecutedF64
Definition: compute_unit.hh:1095

gem5::ComputeUnit::ComputeUnitStats::numFailedCASOps
statistics::Scalar numFailedCASOps
Definition: compute_unit.hh:1127

gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecuted
statistics::Scalar numVecOpsExecuted
Definition: compute_unit.hh:1089

gem5::ComputeUnit::ComputeUnitStats::vpc_f64
statistics::Formula vpc_f64
Definition: compute_unit.hh:1115

gem5::ComputeUnit::ComputeUnitStats::instCyclesSALU
statistics::Scalar instCyclesSALU
Definition: compute_unit.hh:1003

gem5::ComputeUnit::ComputeUnitStats::vectorMemWritesPerWF
statistics::Formula vectorMemWritesPerWF
Definition: compute_unit.hh:1013

gem5::ComputeUnit::ComputeUnitStats::argWrites
statistics::Scalar argWrites
Definition: compute_unit.hh:1038

gem5::ComputeUnit::ComputeUnitStats::globalReads
statistics::Scalar globalReads
Definition: compute_unit.hh:1034

gem5::ComputeUnit::ComputeUnitStats::numCASOps
statistics::Scalar numCASOps
Definition: compute_unit.hh:1126

gem5::ComputeUnit::ComputeUnitStats::ipc
statistics::Formula ipc
Definition: compute_unit.hh:1116

gem5::ComputeUnit::ComputeUnitStats::completedWGs
statistics::Scalar completedWGs
Definition: compute_unit.hh:1129

gem5::ComputeUnit::ComputeUnitStats::activeLanesPerLMemInstrDist
statistics::Distribution activeLanesPerLMemInstrDist
Definition: compute_unit.hh:1119

gem5::ComputeUnit::ComputeUnitStats::vALUInstsPerWF
statistics::Formula vALUInstsPerWF
Definition: compute_unit.hh:999

gem5::ComputeUnit::ComputeUnitStats::vectorMemWritesPerKiloInst
statistics::Formula vectorMemWritesPerKiloInst
Definition: compute_unit.hh:1022

gem5::ComputeUnit::ComputeUnitStats::sALUInstsPerWF
statistics::Formula sALUInstsPerWF
Definition: compute_unit.hh:1001

gem5::ComputeUnit::ComputeUnitStats::readonlyMemInsts
statistics::Formula readonlyMemInsts
Definition: compute_unit.hh:1051

gem5::ComputeUnit::ComputeUnitStats::vpc
statistics::Formula vpc
Definition: compute_unit.hh:1112

gem5::ComputeUnit::ComputeUnitStats::vALUUtilization
statistics::Formula vALUUtilization
Definition: compute_unit.hh:1005

gem5::ComputeUnit::ComputeUnitStats::ComputeUnitStats
ComputeUnitStats(statistics::Group *parent, int n_wf)
Definition: compute_unit.cc:2194

gem5::ComputeUnit::ComputeUnitStats::privMemInsts
statistics::Formula privMemInsts
Definition: compute_unit.hh:1048

gem5::ComputeUnit::ComputeUnitStats::instInterleave
statistics::VectorDistribution instInterleave
Definition: compute_unit.hh:1139

gem5::ComputeUnit::ComputeUnitStats::flatVMemInsts
statistics::Scalar flatVMemInsts
Definition: compute_unit.hh:1008

gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC64
statistics::Scalar numVecOpsExecutedMAC64
Definition: compute_unit.hh:1103

gem5::ComputeUnit::ComputeUnitStats::vpc_f16
statistics::Formula vpc_f16
Definition: compute_unit.hh:1113

gem5::ComputeUnit::ComputeUnitStats::wgBlockedDueBarrierAllocation
statistics::Scalar wgBlockedDueBarrierAllocation
Definition: compute_unit.hh:1079

gem5::ComputeUnit::ComputeUnitStats::wgBlockedDueLdsAllocation
statistics::Scalar wgBlockedDueLdsAllocation
Definition: compute_unit.hh:1080

gem5::ComputeUnit::ComputeUnitStats::dynamicLMemInstrCnt
statistics::Scalar dynamicLMemInstrCnt
Definition: compute_unit.hh:1077

gem5::ComputeUnit::ComputeUnitStats::flatLDSInstsPerWF
statistics::Formula flatLDSInstsPerWF
Definition: compute_unit.hh:1011

gem5::ComputeUnit::ComputeUnitStats::instCyclesVMemPerSimd
statistics::Vector instCyclesVMemPerSimd
Definition: compute_unit.hh:1030

gem5::ComputeUnit::ComputeUnitStats::flatVMemInstsPerWF
statistics::Formula flatVMemInstsPerWF
Definition: compute_unit.hh:1009

gem5::ComputeUnit::ComputeUnitStats::argReads
statistics::Scalar argReads
Definition: compute_unit.hh:1037

gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedTwoOpFP
statistics::Scalar numVecOpsExecutedTwoOpFP
Definition: compute_unit.hh:1109

gem5::ComputeUnit::ComputeUnitStats::waveLevelParallelism
statistics::Distribution waveLevelParallelism
Definition: compute_unit.hh:1056

gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedF32
statistics::Scalar numVecOpsExecutedF32
Definition: compute_unit.hh:1093

gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA64
statistics::Scalar numVecOpsExecutedFMA64
Definition: compute_unit.hh:1099

gem5::ComputeUnit::ComputeUnitStats::scalarMemWrites
statistics::Scalar scalarMemWrites
Definition: compute_unit.hh:1016

gem5::ComputeUnit::ComputeUnitStats::scalarMemInstsPerKiloInst
statistics::Formula scalarMemInstsPerKiloInst
Definition: compute_unit.hh:1026

gem5::ComputeUnit::ComputeUnitStats::controlFlowDivergenceDist
statistics::Distribution controlFlowDivergenceDist
Definition: compute_unit.hh:1117

gem5::ComputeUnit::ComputeUnitStats::groupMemInsts
statistics::Formula groupMemInsts
Definition: compute_unit.hh:1045

gem5::ComputeUnit::ComputeUnitStats::privReads
statistics::Scalar privReads
Definition: compute_unit.hh:1046

gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC16
statistics::Scalar numVecOpsExecutedMAC16
Definition: compute_unit.hh:1101

gem5::ComputeUnit::ComputeUnitStats::numTimesWgBlockedDueSgprAlloc
statistics::Scalar numTimesWgBlockedDueSgprAlloc
Definition: compute_unit.hh:1125

gem5::ComputeUnit::ComputeUnitStats::numALUInstsExecuted
statistics::Formula numALUInstsExecuted
Definition: compute_unit.hh:1121

gem5::ComputeUnit::ComputeUnitStats::completedWfs
statistics::Scalar completedWfs
Definition: compute_unit.hh:1128

gem5::ComputeUnit::ComputeUnitStats::ldsBankConflictDist
statistics::Distribution ldsBankConflictDist
Definition: compute_unit.hh:1068

gem5::ComputeUnit::ComputeUnitStats::vectorMemWrites
statistics::Scalar vectorMemWrites
Definition: compute_unit.hh:1012

gem5::ComputeUnit::ComputeUnitStats::numInstrExecuted
statistics::Scalar numInstrExecuted
Definition: compute_unit.hh:1084

gem5::ComputeUnit::ComputeUnitStats::vectorMemReads
statistics::Scalar vectorMemReads
Definition: compute_unit.hh:1014

gem5::ComputeUnit::ComputeUnitStats::argMemInsts
statistics::Formula argMemInsts
Definition: compute_unit.hh:1039

gem5::ComputeUnit::ComputeUnitStats::tlbCycles
statistics::Scalar tlbCycles
Definition: compute_unit.hh:1061

gem5::ComputeUnit::ComputeUnitStats::scalarMemWritesPerKiloInst
statistics::Formula scalarMemWritesPerKiloInst
Definition: compute_unit.hh:1025

gem5::ComputeUnit::ComputeUnitStats::scalarMemReads
statistics::Scalar scalarMemReads
Definition: compute_unit.hh:1018

gem5::ComputeUnit::ComputeUnitStats::tlbRequests
statistics::Scalar tlbRequests
Definition: compute_unit.hh:1060

gem5::ComputeUnit::ComputeUnitStats::kernargMemInsts
statistics::Formula kernargMemInsts
Definition: compute_unit.hh:1054

gem5::ComputeUnit::ComputeUnitStats::vectorMemReadsPerKiloInst
statistics::Formula vectorMemReadsPerKiloInst
Definition: compute_unit.hh:1021

gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedF16
statistics::Scalar numVecOpsExecutedF16
Definition: compute_unit.hh:1091

gem5::ComputeUnit::ComputeUnitStats::groupReads
statistics::Scalar groupReads
Definition: compute_unit.hh:1043

gem5::ComputeUnit::ComputeUnitStats::privWrites
statistics::Scalar privWrites
Definition: compute_unit.hh:1047

gem5::ComputeUnit::ComputeUnitStats::kernargReads
statistics::Scalar kernargReads
Definition: compute_unit.hh:1052

gem5::ComputeUnit::ComputeUnitStats::instCyclesVALU
statistics::Scalar instCyclesVALU
Definition: compute_unit.hh:1002

gem5::ComputeUnit::ComputeUnitStats::scalarMemWritesPerWF
statistics::Formula scalarMemWritesPerWF
Definition: compute_unit.hh:1017

gem5::ComputeUnit::ComputeUnitStats::readonlyWrites
statistics::Scalar readonlyWrites
Definition: compute_unit.hh:1050

gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD64
statistics::Scalar numVecOpsExecutedMAD64
Definition: compute_unit.hh:1107

gem5::ComputeUnit::ComputeUnitStats::vectorMemReadsPerWF
statistics::Formula vectorMemReadsPerWF
Definition: compute_unit.hh:1015

gem5::ComputeUnit::ComputeUnitStats::dynamicGMemInstrCnt
statistics::Scalar dynamicGMemInstrCnt
Definition: compute_unit.hh:1074

gem5::ComputeUnit::ComputeUnitStats::vpc_f32
statistics::Formula vpc_f32
Definition: compute_unit.hh:1114

gem5::ComputeUnit::ComputeUnitStats::ldsBankAccesses
statistics::Scalar ldsBankAccesses
Definition: compute_unit.hh:1067

gem5::ComputeUnit::ComputeUnitStats::tlbLatency
statistics::Formula tlbLatency
Definition: compute_unit.hh:1062

gem5::ComputeUnit::ComputeUnitStats::vALUInsts
statistics::Scalar vALUInsts
Definition: compute_unit.hh:998

gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA32
statistics::Scalar numVecOpsExecutedFMA32
Definition: compute_unit.hh:1098

gem5::ComputeUnit::ComputeUnitStats::scalarMemReadsPerKiloInst
statistics::Formula scalarMemReadsPerKiloInst
Definition: compute_unit.hh:1024

gem5::ComputeUnit::ComputeUnitStats::globalMemInsts
statistics::Formula globalMemInsts
Definition: compute_unit.hh:1036

gem5::ComputeUnit::ComputeUnitStats::scalarMemReadsPerWF
statistics::Formula scalarMemReadsPerWF
Definition: compute_unit.hh:1019

gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD16
statistics::Scalar numVecOpsExecutedMAD16
Definition: compute_unit.hh:1105

gem5::ComputeUnit::ComputeUnitStats::hitsPerTLBLevel
statistics::Vector hitsPerTLBLevel
Definition: compute_unit.hh:1065

gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC32
statistics::Scalar numVecOpsExecutedMAC32
Definition: compute_unit.hh:1102

gem5::ComputeUnit::ComputeUnitStats::numTimesWgBlockedDueVgprAlloc
statistics::Scalar numTimesWgBlockedDueVgprAlloc
Definition: compute_unit.hh:1123

gem5::ComputeUnit::ComputeUnitStats::threadCyclesVALU
statistics::Scalar threadCyclesVALU
Definition: compute_unit.hh:1004

gem5::ComputeUnit::ComputeUnitStats::ldsNoFlatInsts
statistics::Scalar ldsNoFlatInsts
Definition: compute_unit.hh:1006

gem5::ComputeUnit::ComputeUnitStats::flatLDSInsts
statistics::Scalar flatLDSInsts
Definition: compute_unit.hh:1010

gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA16
statistics::Scalar numVecOpsExecutedFMA16
Definition: compute_unit.hh:1097

gem5::ComputeUnit::ComputeUnitStats::spillWrites
statistics::Scalar spillWrites
Definition: compute_unit.hh:1041

gem5::ComputeUnit::ComputeUnitStats::ldsNoFlatInstsPerWF
statistics::Formula ldsNoFlatInstsPerWF
Definition: compute_unit.hh:1007

gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD32
statistics::Scalar numVecOpsExecutedMAD32
Definition: compute_unit.hh:1106

gem5::ComputeUnit::ComputeUnitStats::spillMemInsts
statistics::Formula spillMemInsts
Definition: compute_unit.hh:1042

gem5::ComputeUnit::ComputeUnitStats::instCyclesLdsPerSimd
statistics::Vector instCyclesLdsPerSimd
Definition: compute_unit.hh:1032

gem5::ComputeUnit::ComputeUnitStats::instCyclesScMemPerSimd
statistics::Vector instCyclesScMemPerSimd
Definition: compute_unit.hh:1031

gem5::ComputeUnit::ComputeUnitStats::kernargWrites
statistics::Scalar kernargWrites
Definition: compute_unit.hh:1053

gem5::ComputeUnit::ComputeUnitStats::pageDivergenceDist
statistics::Distribution pageDivergenceDist
Definition: compute_unit.hh:1072

gem5::ComputeUnit::ComputeUnitStats::activeLanesPerGMemInstrDist
statistics::Distribution activeLanesPerGMemInstrDist
Definition: compute_unit.hh:1118

gem5::ComputeUnit::ComputeUnitStats::globalWrites
statistics::Scalar globalWrites
Definition: compute_unit.hh:1035

gem5::ComputeUnit::ComputeUnitStats::dynamicFlatMemInstrCnt
statistics::Scalar dynamicFlatMemInstrCnt
Definition: compute_unit.hh:1076

gem5::ComputeUnit::ComputeUnitStats::headTailLatency
statistics::Distribution headTailLatency
Definition: compute_unit.hh:1133

gem5::ComputeUnit::ComputeUnitStats::totalCycles
statistics::Scalar totalCycles
Definition: compute_unit.hh:1111

gem5::ComputeUnit::ComputeUnitStats::execRateDist
statistics::Distribution execRateDist
Definition: compute_unit.hh:1087

gem5::ComputeUnit::ComputeUnitStats::vectorMemInstsPerKiloInst
statistics::Formula vectorMemInstsPerKiloInst
Definition: compute_unit.hh:1023

gem5::ComputeUnit::ComputeUnitStats::readonlyReads
statistics::Scalar readonlyReads
Definition: compute_unit.hh:1049

gem5::ComputeUnit::ComputeUnitStats::sALUInsts
statistics::Scalar sALUInsts
Definition: compute_unit.hh:1000

gem5::ComputeUnit::DTLBPort::SenderState
SenderState is information carried along with the packet throughout the TLB hierarchy.
Definition: compute_unit.hh:717

gem5::ComputeUnit::DTLBPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
Definition: compute_unit.hh:726

gem5::ComputeUnit::DTLBPort::SenderState::portIndex
PortID portIndex
Definition: compute_unit.hh:723

gem5::ComputeUnit::DTLBPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:719

gem5::ComputeUnit::DataPort::SenderState
Definition: compute_unit.hh:520

gem5::ComputeUnit::DataPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, Packet::SenderState *sender_state=nullptr)
Definition: compute_unit.hh:525

gem5::ComputeUnit::DataPort::SenderState::port_index
PortID port_index
Definition: compute_unit.hh:522

gem5::ComputeUnit::DataPort::SenderState::saved
Packet::SenderState * saved
Definition: compute_unit.hh:523

gem5::ComputeUnit::DataPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:521

gem5::ComputeUnit::ITLBPort::SenderState
SenderState is information carried along with the packet throughout the TLB hierarchy.
Definition: compute_unit.hh:791

gem5::ComputeUnit::ITLBPort::SenderState::wavefront
Wavefront * wavefront
Definition: compute_unit.hh:793

gem5::ComputeUnit::ITLBPort::SenderState::SenderState
SenderState(Wavefront *_wavefront)
Definition: compute_unit.hh:795

gem5::ComputeUnit::SQCPort::SenderState
Definition: compute_unit.hh:663

gem5::ComputeUnit::SQCPort::SenderState::SenderState
SenderState(Wavefront *_wavefront, Packet::SenderState *sender_state=nullptr, int _kernId=-1)
Definition: compute_unit.hh:669

gem5::ComputeUnit::SQCPort::SenderState::wavefront
Wavefront * wavefront
Definition: compute_unit.hh:664

gem5::ComputeUnit::SQCPort::SenderState::saved
Packet::SenderState * saved
Definition: compute_unit.hh:665

gem5::ComputeUnit::SQCPort::SenderState::kernId
int kernId
Definition: compute_unit.hh:667

gem5::ComputeUnit::ScalarDTLBPort::SenderState
Definition: compute_unit.hh:751

gem5::ComputeUnit::ScalarDTLBPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.hh:752

gem5::ComputeUnit::ScalarDTLBPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:753

gem5::ComputeUnit::ScalarDataPort::SenderState
Definition: compute_unit.hh:595

gem5::ComputeUnit::ScalarDataPort::SenderState::saved
Packet::SenderState * saved
Definition: compute_unit.hh:603

gem5::ComputeUnit::ScalarDataPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst, Packet::SenderState *sender_state=nullptr)
Definition: compute_unit.hh:596

gem5::ComputeUnit::ScalarDataPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:602

gem5::Packet::SenderState
A virtual base opaque structure used to hold state associated with the packet (e.g....
Definition: packet.hh:468

token_port.hh