gem5
v20.1.0.0
|
#include <compute_unit.hh>
Classes | |
class | DataPort |
Data access Port. More... | |
class | DTLBPort |
Data TLB port. More... | |
class | GMTokenPort |
class | ITLBPort |
class | LDSPort |
the port intended to communicate between the CU and its LDS More... | |
class | ScalarDataPort |
class | ScalarDTLBPort |
class | SQCPort |
Public Types | |
typedef ComputeUnitParams | Params |
typedef std::unordered_map< Addr, std::pair< int, int > > | pageDataStruct |
Public Types inherited from ClockedObject | |
typedef ClockedObjectParams | Params |
Parameters of ClockedObject. More... | |
Public Types inherited from SimObject | |
typedef SimObjectParams | Params |
Public Member Functions | |
int | numExeUnits () const |
int | firstMemUnit () const |
int | lastMemUnit () const |
int | mapWaveToScalarAlu (Wavefront *w) const |
int | mapWaveToScalarAluGlobalIdx (Wavefront *w) const |
int | mapWaveToGlobalMem (Wavefront *w) const |
int | mapWaveToLocalMem (Wavefront *w) const |
int | mapWaveToScalarMem (Wavefront *w) const |
void | insertInPipeMap (Wavefront *w) |
void | deleteFromPipeMap (Wavefront *w) |
ComputeUnit (const Params *p) | |
~ComputeUnit () | |
int | oprNetPipeLength () const |
int | simdUnitWidth () const |
int | spBypassLength () const |
int | dpBypassLength () const |
int | scalarPipeLength () const |
int | storeBusLength () const |
int | loadBusLength () const |
int | wfSize () const |
void | exec () |
void | initiateFetch (Wavefront *wavefront) |
void | fetch (PacketPtr pkt, Wavefront *wavefront) |
void | fillKernelState (Wavefront *w, HSAQueueEntry *task) |
void | startWavefront (Wavefront *w, int waveId, LdsChunk *ldsChunk, HSAQueueEntry *task, int bar_id, bool fetchContext=false) |
void | doInvalidate (RequestPtr req, int kernId) |
trigger invalidate operation in the cu More... | |
void | doFlush (GPUDynInstPtr gpuDynInst) |
trigger flush operation in the cu More... | |
void | dispWorkgroup (HSAQueueEntry *task, int num_wfs_in_wg) |
bool | hasDispResources (HSAQueueEntry *task, int &num_wfs_in_wg) |
int | cacheLineSize () const |
int | getCacheLineBits () const |
int | numYetToReachBarrier (int bar_id) |
bool | allAtBarrier (int bar_id) |
void | incNumAtBarrier (int bar_id) |
int | numAtBarrier (int bar_id) |
int | maxBarrierCnt (int bar_id) |
void | resetBarrier (int bar_id) |
void | decMaxBarrierCnt (int bar_id) |
void | releaseBarrier (int bar_id) |
void | releaseWFsFromBarrier (int bar_id) |
int | numBarrierSlots () const |
template<typename c0 , typename c1 > | |
void | doSmReturn (GPUDynInstPtr gpuDynInst) |
virtual void | init () override |
init() is called after all C++ SimObjects have been created and all ports are connected. More... | |
void | sendRequest (GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt) |
void | sendScalarRequest (GPUDynInstPtr gpuDynInst, PacketPtr pkt) |
void | injectGlobalMemFence (GPUDynInstPtr gpuDynInst, bool kernelMemSync, RequestPtr req=nullptr) |
void | handleMemPacket (PacketPtr pkt, int memport_index) |
bool | processTimingPacket (PacketPtr pkt) |
void | processFetchReturn (PacketPtr pkt) |
void | updatePageDivergenceDist (Addr addr) |
RequestorID | requestorId () |
bool | isDone () const |
bool | isVectorAluIdle (uint32_t simdId) const |
void | updateInstStats (GPUDynInstPtr gpuDynInst) |
void | regStats () override |
Callback to set stat parameters. More... | |
LdsState & | getLds () const |
int32_t | getRefCounter (const uint32_t dispatchId, const uint32_t wgId) const |
bool | sendToLds (GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result)) |
send a general request to the LDS make sure to look at the return value here as your request might be NACK'd and returning false means that you have to have some backup plan More... | |
void | exitCallback () |
TokenManager * | getTokenManager () |
Port & | getPort (const std::string &if_name, PortID idx) override |
Get a port with a given name and index. More... | |
InstSeqNum | getAndIncSeqNum () |
Public Member Functions inherited from ClockedObject | |
ClockedObject (const ClockedObjectParams *p) | |
const Params * | params () const |
void | serialize (CheckpointOut &cp) const override |
Serialize an object. More... | |
void | unserialize (CheckpointIn &cp) override |
Unserialize an object. More... | |
Public Member Functions inherited from SimObject | |
const Params * | params () const |
SimObject (const Params *_params) | |
virtual | ~SimObject () |
virtual const std::string | name () const |
virtual void | loadState (CheckpointIn &cp) |
loadState() is called on each SimObject when restoring from a checkpoint. More... | |
virtual void | initState () |
initState() is called on each SimObject when not restoring from a checkpoint. More... | |
virtual void | regProbePoints () |
Register probe points for this object. More... | |
virtual void | regProbeListeners () |
Register probe listeners for this object. More... | |
ProbeManager * | getProbeManager () |
Get the probe manager for this object. More... | |
virtual void | startup () |
startup() is the final initialization call before simulation. More... | |
DrainState | drain () override |
Provide a default implementation of the drain interface for objects that don't need draining. More... | |
virtual void | memWriteback () |
Write back dirty buffers to memory using functional writes. More... | |
virtual void | memInvalidate () |
Invalidate the contents of memory buffers. More... | |
void | serialize (CheckpointOut &cp) const override |
Serialize an object. More... | |
void | unserialize (CheckpointIn &cp) override |
Unserialize an object. More... | |
Public Member Functions inherited from EventManager | |
EventQueue * | eventQueue () const |
void | schedule (Event &event, Tick when) |
void | deschedule (Event &event) |
void | reschedule (Event &event, Tick when, bool always=false) |
void | schedule (Event *event, Tick when) |
void | deschedule (Event *event) |
void | reschedule (Event *event, Tick when, bool always=false) |
void | wakeupEventQueue (Tick when=(Tick) -1) |
This function is not needed by the usual gem5 event loop but may be necessary in derived EventQueues which host gem5 on other schedulers. More... | |
void | setCurTick (Tick newVal) |
EventManager (EventManager &em) | |
Event manger manages events in the event queue. More... | |
EventManager (EventManager *em) | |
EventManager (EventQueue *eq) | |
Public Member Functions inherited from Serializable | |
Serializable () | |
virtual | ~Serializable () |
void | serializeSection (CheckpointOut &cp, const char *name) const |
Serialize an object into a new section. More... | |
void | serializeSection (CheckpointOut &cp, const std::string &name) const |
void | unserializeSection (CheckpointIn &cp, const char *name) |
Unserialize an a child object. More... | |
void | unserializeSection (CheckpointIn &cp, const std::string &name) |
Public Member Functions inherited from Drainable | |
DrainState | drainState () const |
Return the current drain state of an object. More... | |
virtual void | notifyFork () |
Notify a child process of a fork. More... | |
Public Member Functions inherited from Stats::Group | |
Group (Group *parent, const char *name=nullptr) | |
Construct a new statistics group. More... | |
virtual | ~Group () |
virtual void | resetStats () |
Callback to reset stats. More... | |
virtual void | preDumpStats () |
Callback before stats are dumped. More... | |
void | addStat (Stats::Info *info) |
Register a stat with this group. More... | |
const std::map< std::string, Group * > & | getStatGroups () const |
Get all child groups associated with this object. More... | |
const std::vector< Info * > & | getStats () const |
Get all stats associated with this object. More... | |
void | addStatGroup (const char *name, Group *block) |
Add a stat block as a child of this block. More... | |
const Info * | resolveStat (std::string name) const |
Resolve a stat by its name within this group. More... | |
Group ()=delete | |
Group (const Group &)=delete | |
Group & | operator= (const Group &)=delete |
Public Member Functions inherited from Clocked | |
void | updateClockPeriod () |
Update the tick to the current tick. More... | |
Tick | clockEdge (Cycles cycles=Cycles(0)) const |
Determine the tick when a cycle begins, by default the current one, but the argument also enables the caller to determine a future cycle. More... | |
Cycles | curCycle () const |
Determine the current cycle, corresponding to a tick aligned to a clock edge. More... | |
Tick | nextCycle () const |
Based on the clock of the object, determine the start tick of the first cycle that is at least one cycle in the future. More... | |
uint64_t | frequency () const |
Tick | clockPeriod () const |
double | voltage () const |
Cycles | ticksToCycles (Tick t) const |
Tick | cyclesToTicks (Cycles c) const |
Protected Attributes | |
RequestorID | _requestorId |
LdsState & | lds |
Protected Attributes inherited from SimObject | |
const SimObjectParams * | _params |
Cached copy of the object parameters. More... | |
Protected Attributes inherited from EventManager | |
EventQueue * | eventq |
A pointer to this object's event queue. More... | |
Private Member Functions | |
WFBarrier & | barrierSlot (int bar_id) |
int | getFreeBarrierId () |
Private Attributes | |
const int | _cacheLineSize |
const int | _numBarrierSlots |
int | cacheLineBits |
InstSeqNum | globalSeqNum |
int | wavefrontSize |
ScoreboardCheckToSchedule | scoreboardCheckToSchedule |
TODO: Update these comments once the pipe stage interface has been fully refactored. More... | |
ScheduleToExecute | scheduleToExecute |
std::vector< WFBarrier > | wfBarrierSlots |
The barrier slots for this CU. More... | |
std::unordered_set< int > | freeBarrierIds |
A set used to easily retrieve a free barrier ID. More... | |
std::unordered_map< GPUDynInstPtr, Tick > | headTailMap |
Additional Inherited Members | |
Static Public Member Functions inherited from SimObject | |
static void | serializeAll (CheckpointOut &cp) |
Serialize all SimObjects in the system. More... | |
static SimObject * | find (const char *name) |
Find the SimObject with the given name and return a pointer to it. More... | |
Static Public Member Functions inherited from Serializable | |
static const std::string & | currentSection () |
Gets the fully-qualified name of the active section. More... | |
static void | serializeAll (const std::string &cpt_dir) |
Serializes all the SimObjects. More... | |
static void | unserializeGlobals (CheckpointIn &cp) |
Protected Member Functions inherited from Drainable | |
Drainable () | |
virtual | ~Drainable () |
virtual void | drainResume () |
Resume execution after a successful drain. More... | |
void | signalDrainDone () const |
Signal that an object is drained. More... | |
Protected Member Functions inherited from Clocked | |
Clocked (ClockDomain &clk_domain) | |
Create a clocked object and set the clock domain based on the parameters. More... | |
Clocked (Clocked &)=delete | |
Clocked & | operator= (Clocked &)=delete |
virtual | ~Clocked () |
Virtual destructor due to inheritance. More... | |
void | resetClock () const |
Reset the object's clock using the current global tick value. More... | |
virtual void | clockPeriodUpdated () |
A hook subclasses can implement so they can do any extra work that's needed when the clock rate is changed. More... | |
Definition at line 198 of file compute_unit.hh.
typedef std::unordered_map<Addr, std::pair<int, int> > ComputeUnit::pageDataStruct |
Definition at line 626 of file compute_unit.hh.
typedef ComputeUnitParams ComputeUnit::Params |
Definition at line 287 of file compute_unit.hh.
ComputeUnit::ComputeUnit | ( | const Params * | p | ) |
This check is necessary because std::bitset only provides conversion to unsigned long or unsigned long long via to_ulong() or to_ullong(). there are a few places in the code where to_ullong() is used, however if wavefrontSize is larger than a value the host can support then bitset will throw a runtime exception. We should remove all use of to_long() or to_ullong() so we can have wavefrontSize greater than 64b, however until that is done this assert is required.
Definition at line 62 of file compute_unit.cc.
References exec().
ComputeUnit::~ComputeUnit | ( | ) |
Definition at line 215 of file compute_unit.cc.
References ArmISA::i, ArmISA::j, lastVaddrCU, lastVaddrSimd, Shader::n_wf, numVectorALUs, shader, and wfList.
bool ComputeUnit::allAtBarrier | ( | int | bar_id | ) |
Definition at line 638 of file compute_unit.cc.
References barrierSlot().
Referenced by ScoreboardCheckStage::ready().
|
inlineprivate |
Definition at line 419 of file compute_unit.hh.
References WFBarrier::InvalidID, and wfBarrierSlots.
Referenced by allAtBarrier(), decMaxBarrierCnt(), dispWorkgroup(), incNumAtBarrier(), maxBarrierCnt(), numAtBarrier(), numYetToReachBarrier(), releaseBarrier(), and resetBarrier().
|
inline |
Definition at line 414 of file compute_unit.hh.
References _cacheLineSize.
Referenced by FetchUnit::init(), and FetchUnit::initiateFetch().
void ComputeUnit::decMaxBarrierCnt | ( | int | bar_id | ) |
Definition at line 673 of file compute_unit.cc.
References barrierSlot().
Referenced by Gcn3ISA::Inst_SOPP__S_ENDPGM::execute().
void ComputeUnit::deleteFromPipeMap | ( | Wavefront * | w | ) |
Definition at line 491 of file compute_unit.cc.
References panic_if, pipeMap, and MipsISA::w.
Referenced by Wavefront::exec().
void ComputeUnit::dispWorkgroup | ( | HSAQueueEntry * | task, |
int | num_wfs_in_wg | ||
) |
If this WG only has one WF it will not consume any barrier resources because it has no need of them.
Find a free barrier slot for this WG. Each WF in the WG will receive the same barrier ID.
Definition at line 404 of file compute_unit.cc.
References RegisterManager::allocateRegisters(), barrierSlot(), cu_id, HSAQueueEntry::dispatchId(), DPRINTF, fillKernelState(), getFreeBarrierId(), HSAQueueEntry::globalWgId(), ArmISA::i, WFBarrier::InvalidID, HSAQueueEntry::isInvDone(), ArmISA::j, lds, HSAQueueEntry::ldsSize(), Shader::n_wf, Clocked::nextCycle(), HSAQueueEntry::numScalarRegs(), numVectorALUs, HSAQueueEntry::numVectorRegs(), numWfsToSched, panic_if, registerManager, LdsState::reserveSpace(), Wavefront::S_STOPPED, EventManager::schedule(), Event::scheduled(), shader, startWavefront(), tickEvent, MipsISA::w, and wfList.
void ComputeUnit::doFlush | ( | GPUDynInstPtr | gpuDynInst | ) |
trigger flush operation in the cu
gpuDynInst: inst passed to the request
Definition at line 399 of file compute_unit.cc.
References injectGlobalMemFence().
void ComputeUnit::doInvalidate | ( | RequestPtr | req, |
int | kernId | ||
) |
trigger invalidate operation in the cu
req: request initialized in shader, carrying the invlidate flags
Definition at line 380 of file compute_unit.cc.
References getAndIncSeqNum(), and injectGlobalMemFence().
void ComputeUnit::doSmReturn | ( | GPUDynInstPtr | gpuDynInst | ) |
|
inline |
Definition at line 393 of file compute_unit.hh.
References dpBypassPipeLength.
Referenced by VectorRegisterFile::waveExecuteInst().
void ComputeUnit::exec | ( | ) |
Definition at line 703 of file compute_unit.cc.
References cu_id, DPRINTF, FetchStage::exec(), LocalMemPipeline::exec(), GlobalMemPipeline::exec(), ScalarMemPipeline::exec(), ScheduleStage::exec(), ScoreboardCheckStage::exec(), ExecStage::exec(), execStage, fetchStage, globalMemoryPipe, isDone(), localMemoryPipe, Clocked::nextCycle(), Shader::notifyCuSleep(), scalarMemoryPipe, EventManager::schedule(), scheduleStage, scoreboardCheckStage, shader, srf, tickEvent, totalCycles, and vrf.
Referenced by ComputeUnit().
void ComputeUnit::exitCallback | ( | ) |
Definition at line 2466 of file compute_unit.cc.
References countPages, OutputDirectory::create(), SimObject::name(), pageAccesses, simout, and OutputStream::stream().
void ComputeUnit::fillKernelState | ( | Wavefront * | w, |
HSAQueueEntry * | task | ||
) |
Definition at line 292 of file compute_unit.cc.
References HSAQueueEntry::gridSize(), HSAQueueEntry::numScalarRegs(), HSAQueueEntry::numVectorRegs(), MipsISA::w, and HSAQueueEntry::wgSize().
Referenced by dispWorkgroup().
int ComputeUnit::firstMemUnit | ( | ) | const |
Definition at line 236 of file compute_unit.cc.
References numScalarALUs, and numVectorALUs.
Referenced by ScheduleStage::arbitrateVrfToLdsBus(), and ScheduleStage::exec().
|
inline |
Definition at line 1023 of file compute_unit.hh.
References globalSeqNum.
Referenced by doInvalidate().
|
inline |
Definition at line 415 of file compute_unit.hh.
References cacheLineBits.
Referenced by FetchUnit::initiateFetch().
|
inlineprivate |
Definition at line 426 of file compute_unit.hh.
References freeBarrierIds.
Referenced by dispWorkgroup().
|
inline |
Definition at line 615 of file compute_unit.hh.
References lds.
Referenced by Gcn3ISA::Inst_SOPP__S_ENDPGM::execute().
Get a port with a given name and index.
This is used at binding time and returns a reference to a protocol-agnostic port.
gem5 has a request and response port interface. All memory objects are connected together via ports. These ports provide a rigid interface between these memory objects. These ports implement three different memory system modes: timing, atomic, and functional. The most important mode is the timing mode and here timing mode is used for conducting cycle-level timing experiments. The other modes are only used in special circumstances and should not be used to conduct cycle-level timing experiments. The other modes are only used in special circumstances. These ports allow SimObjects to communicate with each other.
if_name | Port name |
idx | Index in the case of a VectorPort |
Reimplemented from SimObject.
Definition at line 1002 of file compute_unit.hh.
References SimObject::getPort(), ldsPort, memPort, scalarDataPort, scalarDTLBPort, sqcPort, sqcTLBPort, and tlbPort.
int32_t ComputeUnit::getRefCounter | ( | const uint32_t | dispatchId, |
const uint32_t | wgId | ||
) | const |
Definition at line 2513 of file compute_unit.cc.
References LdsState::getRefCounter(), and lds.
|
inline |
Definition at line 981 of file compute_unit.hh.
References memPortTokens.
Referenced by GlobalMemPipeline::exec(), and Wavefront::exec().
void ComputeUnit::handleMemPacket | ( | PacketPtr | pkt, |
int | memport_index | ||
) |
bool ComputeUnit::hasDispResources | ( | HSAQueueEntry * | task, |
int & | num_wfs_in_wg | ||
) |
Definition at line 503 of file compute_unit.cc.
References RegisterManager::canAllocateSgprs(), RegisterManager::canAllocateVgprs(), LdsState::canReserve(), ArmISA::d, DPRINTF, freeBarrierIds, HSAQueueEntry::gridSize(), ArmISA::i, ArmISA::j, lds, HSAQueueEntry::ldsSize(), HSAQueueEntry::MAX_DIM, Shader::n_wf, HSAQueueEntry::numScalarRegs(), numScalarRegsPerSimd, numTimesWgBlockedDueSgprAlloc, numTimesWgBlockedDueVgprAlloc, numVecRegsPerSimd, numVectorALUs, HSAQueueEntry::numVectorRegs(), numWfsToSched, panic_if, registerManager, Wavefront::S_STOPPED, shader, wfList, wfSize(), wgBlockedDueBarrierAllocation, wgBlockedDueLdsAllocation, HSAQueueEntry::wgId(), and HSAQueueEntry::wgSize().
void ComputeUnit::incNumAtBarrier | ( | int | bar_id | ) |
Definition at line 645 of file compute_unit.cc.
References barrierSlot().
Referenced by Gcn3ISA::Inst_SOPP__S_BARRIER::execute().
|
overridevirtual |
init() is called after all C++ SimObjects have been created and all ports are connected.
Initializations that are independent of unserialization but rely on a fully instantiated and connected SimObject graph should be done here.
Reimplemented from SimObject.
Definition at line 736 of file compute_unit.cc.
References Clocked::clockPeriod(), execStage, fatal_if, fetchStage, glbMemToVrfBus, globalMemoryPipe, gmTokenPort, ArmISA::i, FetchStage::init(), GlobalMemPipeline::init(), ScheduleStage::init(), WaitClass::init(), ExecStage::init(), locMemToVrfBus, memPortTokens, numScalarALUs, numScalarMemUnits, numVectorALUs, numVectorGlobalMemUnits, numVectorSharedMemUnits, scalarALUs, scalarMemToSrfBus, scalarMemUnit, scalarRegsReserved, scheduleStage, TokenRequestPort::setTokenManager(), srfToScalarMemPipeBus, vectorALUs, vectorGlobalMemUnit, vectorRegsReserved, vectorSharedMemUnit, vrfToGlobalMemPipeBus, and vrfToLocalMemPipeBus.
void ComputeUnit::initiateFetch | ( | Wavefront * | wavefront | ) |
void ComputeUnit::injectGlobalMemFence | ( | GPUDynInstPtr | gpuDynInst, |
bool | kernelMemSync, | ||
RequestPtr | req = nullptr |
||
) |
Definition at line 1229 of file compute_unit.cc.
References Request::ACQUIRE, cu_id, curTick(), DPRINTF, Shader::impl_kern_end_rel, Request::KERNEL, memPort, MemCmd::MemSyncReq, Packet::pushSenderState(), Packet::req, req_tick_latency, requestorId(), EventManager::schedule(), shader, and Request::WB_L2.
Referenced by doFlush(), and doInvalidate().
void ComputeUnit::insertInPipeMap | ( | Wavefront * | w | ) |
Definition at line 482 of file compute_unit.cc.
References panic_if, pipeMap, and MipsISA::w.
Referenced by ScheduleStage::addToSchList().
bool ComputeUnit::isDone | ( | ) | const |
Definition at line 2483 of file compute_unit.cc.
References glbMemToVrfBus, globalMemoryPipe, ArmISA::i, GlobalMemPipeline::isGMReqFIFOWrRdy(), LocalMemPipeline::isLMReqFIFOWrRdy(), LocalMemPipeline::isLMRespFIFOWrRdy(), isVectorAluIdle(), localMemoryPipe, locMemToVrfBus, numVectorALUs, WaitClass::rdy(), scalarMemToSrfBus, srfToScalarMemPipeBus, vrfToGlobalMemPipeBus, and vrfToLocalMemPipeBus.
Referenced by exec().
bool ComputeUnit::isVectorAluIdle | ( | uint32_t | simdId | ) | const |
Definition at line 2520 of file compute_unit.cc.
References Shader::n_wf, numVectorALUs, Wavefront::S_STOPPED, shader, and wfList.
Referenced by isDone().
int ComputeUnit::lastMemUnit | ( | ) | const |
Definition at line 243 of file compute_unit.cc.
References numExeUnits().
Referenced by ScheduleStage::exec().
|
inline |
Definition at line 396 of file compute_unit.hh.
References numCyclesPerLoadTransfer.
Referenced by LdsState::processPacket().
int ComputeUnit::mapWaveToGlobalMem | ( | Wavefront * | w | ) | const |
Definition at line 268 of file compute_unit.cc.
References numScalarALUs, and numVectorALUs.
Referenced by Wavefront::init().
int ComputeUnit::mapWaveToLocalMem | ( | Wavefront * | w | ) | const |
Definition at line 276 of file compute_unit.cc.
References numScalarALUs, numVectorALUs, and numVectorGlobalMemUnits.
Referenced by Wavefront::init().
int ComputeUnit::mapWaveToScalarAlu | ( | Wavefront * | w | ) | const |
Definition at line 250 of file compute_unit.cc.
References numScalarALUs, and MipsISA::w.
Referenced by Wavefront::init(), and mapWaveToScalarAluGlobalIdx().
int ComputeUnit::mapWaveToScalarAluGlobalIdx | ( | Wavefront * | w | ) | const |
Definition at line 261 of file compute_unit.cc.
References mapWaveToScalarAlu(), numVectorALUs, and MipsISA::w.
Referenced by Wavefront::init().
int ComputeUnit::mapWaveToScalarMem | ( | Wavefront * | w | ) | const |
Definition at line 284 of file compute_unit.cc.
References numScalarALUs, numVectorALUs, numVectorGlobalMemUnits, and numVectorSharedMemUnits.
Referenced by Wavefront::init().
int ComputeUnit::maxBarrierCnt | ( | int | bar_id | ) |
Definition at line 659 of file compute_unit.cc.
References barrierSlot().
Referenced by Gcn3ISA::Inst_SOPP__S_ENDPGM::execute().
int ComputeUnit::numAtBarrier | ( | int | bar_id | ) |
Definition at line 652 of file compute_unit.cc.
References barrierSlot().
Referenced by Gcn3ISA::Inst_SOPP__S_BARRIER::execute().
|
inline |
Definition at line 445 of file compute_unit.hh.
References _numBarrierSlots.
int ComputeUnit::numExeUnits | ( | ) | const |
Definition at line 228 of file compute_unit.cc.
References numScalarALUs, numScalarMemUnits, numVectorALUs, numVectorGlobalMemUnits, and numVectorSharedMemUnits.
Referenced by ScheduleStage::checkRfOperandReadComplete(), ExecStage::dumpDispList(), ScheduleStage::exec(), ExecStage::exec(), ScheduleStage::fillDispatchList(), ScheduleStage::init(), lastMemUnit(), ScoreboardCheckStage::mapWaveToExeUnit(), ExecStage::regStats(), ScheduleStage::regStats(), ScheduleStage::reserveResources(), ScheduleStage::scheduleRfDestOperands(), and ScheduleStage::ScheduleStage().
int ComputeUnit::numYetToReachBarrier | ( | int | bar_id | ) |
Definition at line 631 of file compute_unit.cc.
References barrierSlot().
Referenced by Gcn3ISA::Inst_SOPP__S_BARRIER::execute().
|
inline |
Definition at line 390 of file compute_unit.hh.
References operandNetworkLength.
void ComputeUnit::processFetchReturn | ( | PacketPtr | pkt | ) |
bool ComputeUnit::processTimingPacket | ( | PacketPtr | pkt | ) |
|
overridevirtual |
Callback to set stat parameters.
This callback is typically used for complex stats (e.g., distributions) that need parameters in addition to a name and a description. Stat names and descriptions should typically be set from the constructor usingo from the constructor using the ADD_STAT macro.
Reimplemented from Stats::Group.
Definition at line 1806 of file compute_unit.cc.
References activeLanesPerGMemInstrDist, activeLanesPerLMemInstrDist, argMemInsts, argReads, argWrites, completedWfs, completedWGs, controlFlowDivergenceDist, csprintf(), Stats::DataWrap< Derived, InfoProxyType >::desc(), dynamicFlatMemInstrCnt, dynamicGMemInstrCnt, dynamicLMemInstrCnt, execRateDist, execStage, fetchStage, Stats::DataWrap< Derived, InfoProxyType >::flags(), flatLDSInsts, flatLDSInstsPerWF, flatVMemInsts, flatVMemInstsPerWF, globalMemInsts, globalMemoryPipe, globalReads, globalWrites, groupMemInsts, groupReads, groupWrites, headTailLatency, hitsPerTLBLevel, ArmISA::i, Stats::VectorBase< Derived, Stor >::init(), Stats::Distribution::init(), Stats::VectorDistribution::init(), instCyclesLdsPerSimd, instCyclesSALU, instCyclesScMemPerSimd, instCyclesVALU, instCyclesVMemPerSimd, instInterleave, ipc, kernargMemInsts, kernargReads, kernargWrites, ldsBankAccesses, ldsBankConflictDist, ldsNoFlatInsts, ldsNoFlatInstsPerWF, localMemoryPipe, Shader::n_wf, SimObject::name(), Stats::DataWrap< Derived, InfoProxyType >::name(), numALUInstsExecuted, numCASOps, numFailedCASOps, numInstrExecuted, numTimesWgBlockedDueSgprAlloc, numTimesWgBlockedDueVgprAlloc, numVecOpsExecuted, numVecOpsExecutedF16, numVecOpsExecutedF32, numVecOpsExecutedF64, numVecOpsExecutedFMA16, numVecOpsExecutedFMA32, numVecOpsExecutedFMA64, numVecOpsExecutedMAC16, numVecOpsExecutedMAC32, numVecOpsExecutedMAC64, numVecOpsExecutedMAD16, numVecOpsExecutedMAD32, numVecOpsExecutedMAD64, numVecOpsExecutedTwoOpFP, numVectorALUs, Stats::oneline, pageDivergenceDist, Stats::pdf, privMemInsts, privReads, privWrites, readonlyMemInsts, readonlyReads, readonlyWrites, registerManager, FetchStage::regStats(), RegisterManager::regStats(), LocalMemPipeline::regStats(), ScoreboardCheckStage::regStats(), ExecStage::regStats(), ScalarMemPipeline::regStats(), GlobalMemPipeline::regStats(), ScheduleStage::regStats(), Stats::Group::regStats(), sALUInsts, sALUInstsPerWF, scalarMemInstsPerKiloInst, scalarMemoryPipe, scalarMemReads, scalarMemReadsPerKiloInst, scalarMemReadsPerWF, scalarMemWrites, scalarMemWritesPerKiloInst, scalarMemWritesPerWF, scheduleStage, scoreboardCheckStage, shader, spillMemInsts, spillReads, spillWrites, Stats::DataWrapVec< Derived, InfoProxyType >::subname(), threadCyclesVALU, tlbCycles, tlbLatency, tlbRequests, totalCycles, vALUInsts, vALUInstsPerWF, vALUUtilization, vectorMemInstsPerKiloInst, vectorMemReads, vectorMemReadsPerKiloInst, vectorMemReadsPerWF, vectorMemWrites, vectorMemWritesPerKiloInst, vectorMemWritesPerWF, vpc, vpc_f16, vpc_f32, vpc_f64, waveLevelParallelism, wfSize(), wgBlockedDueBarrierAllocation, and wgBlockedDueLdsAllocation.
void ComputeUnit::releaseBarrier | ( | int | bar_id | ) |
Definition at line 680 of file compute_unit.cc.
References barrierSlot(), and freeBarrierIds.
Referenced by Gcn3ISA::Inst_SOPP__S_ENDPGM::execute().
void ComputeUnit::releaseWFsFromBarrier | ( | int | bar_id | ) |
Definition at line 688 of file compute_unit.cc.
References Wavefront::barrierId(), Wavefront::getStatus(), ArmISA::i, ArmISA::j, Shader::n_wf, numVectorALUs, Wavefront::S_BARRIER, Wavefront::S_RUNNING, Wavefront::setStatus(), shader, and wfList.
Referenced by ScoreboardCheckStage::ready().
|
inline |
Definition at line 461 of file compute_unit.hh.
References _requestorId.
Referenced by FetchUnit::initiateFetch(), and injectGlobalMemFence().
void ComputeUnit::resetBarrier | ( | int | bar_id | ) |
Definition at line 666 of file compute_unit.cc.
References barrierSlot().
Referenced by ScoreboardCheckStage::ready().
|
inline |
Definition at line 394 of file compute_unit.hh.
References scalarPipeStages.
Referenced by ScalarRegisterFile::waveExecuteInst().
void ComputeUnit::sendRequest | ( | GPUDynInstPtr | gpuDynInst, |
PortID | index, | ||
PacketPtr | pkt | ||
) |
Definition at line 1017 of file compute_unit.cc.
References Packet::cmd, cu_id, curTick(), Packet::dataStatic(), debugSegFault, DPRINTF, fatal, functionalTLB, Packet::getAddr(), ThreadContext::getProcessPtr(), Packet::getPtr(), Packet::getSize(), Shader::gpuTc, hitsPerTLBLevel, MipsISA::index, Packet::isRead(), Packet::isWrite(), memPort, MemCmd::MemSyncReq, MipsISA::p, panic, perLaneTLB, BaseTLB::Read, Packet::req, req_tick_latency, X86ISA::GpuTLB::TranslationState::saved, EventManager::schedule(), Packet::senderState, shader, Shader::timingSim, tlbCycles, X86ISA::GpuTLB::TranslationState::tlbEntry, tlbPort, tlbRequests, updatePageDivergenceDist(), MipsISA::vaddr, and BaseTLB::Write.
void ComputeUnit::sendScalarRequest | ( | GPUDynInstPtr | gpuDynInst, |
PacketPtr | pkt | ||
) |
Definition at line 1202 of file compute_unit.cc.
References DPRINTF, Shader::gpuTc, Packet::isRead(), ComputeUnit::ScalarDTLBPort::isStalled(), Packet::isWrite(), BaseTLB::Read, Packet::req, ComputeUnit::ScalarDTLBPort::retries, scalarDTLBPort, Packet::senderState, RequestPort::sendTimingReq(), shader, ComputeUnit::ScalarDTLBPort::stallPort(), and BaseTLB::Write.
bool ComputeUnit::sendToLds | ( | GPUDynInstPtr | gpuDynInst | ) |
send a general request to the LDS make sure to look at the return value here as your request might be NACK'd and returning false means that you have to have some backup plan
Definition at line 2539 of file compute_unit.cc.
References ldsPort, MemCmd::ReadReq, Packet::senderState, and ComputeUnit::LDSPort::sendTimingReq().
Referenced by LocalMemPipeline::exec().
|
inline |
Definition at line 391 of file compute_unit.hh.
References simdWidth.
|
inline |
Definition at line 392 of file compute_unit.hh.
References spBypassPipeLength.
void ComputeUnit::startWavefront | ( | Wavefront * | w, |
int | waveId, | ||
LdsChunk * | ldsChunk, | ||
HSAQueueEntry * | task, | ||
int | bar_id, | ||
bool | fetchContext = false |
||
) |
Definition at line 306 of file compute_unit.cc.
References activeWaves, HSAQueueEntry::codeAddr(), cu_id, HSAQueueEntry::dispatchId(), DPRINTF, HSAQueueEntry::globalWgId(), LdsState::increaseRefCounter(), WFBarrier::InvalidID, MipsISA::k, lds, HSAQueueEntry::numWg(), Stats::DistBase< Derived, Stor >::sample(), MipsISA::w, waveLevelParallelism, and wfSize().
Referenced by dispWorkgroup().
|
inline |
Definition at line 395 of file compute_unit.hh.
References numCyclesPerStoreTransfer.
Referenced by LdsState::processPacket().
void ComputeUnit::updateInstStats | ( | GPUDynInstPtr | gpuDynInst | ) |
this case can occur for flat mem insts who execute with EXEC = 0
this case can occur for flat mem insts who execute with EXEC = 0
Definition at line 2350 of file compute_unit.cc.
References argReads, argWrites, exitSimLoop(), fatal, flatLDSInsts, flatVMemInsts, globalReads, globalWrites, groupReads, groupWrites, instCyclesSALU, instCyclesVALU, kernargReads, kernargWrites, ldsNoFlatInsts, Shader::max_valu_insts, privReads, privWrites, readonlyReads, readonlyWrites, sALUInsts, sc_core::SC_NONE, scalarMemReads, scalarMemWrites, shader, spillReads, spillWrites, threadCyclesVALU, Shader::total_valu_insts, vALUInsts, vectorMemReads, and vectorMemWrites.
Referenced by Wavefront::exec().
void ComputeUnit::updatePageDivergenceDist | ( | Addr | addr | ) |
Definition at line 2455 of file compute_unit.cc.
References addr, ArmISA::PageBytes, pagesTouched, and roundDown().
Referenced by sendRequest().
|
inline |
Definition at line 397 of file compute_unit.hh.
References wavefrontSize.
Referenced by LdsState::countBankConflicts(), GPUDynInst::doApertureCheck(), GPUDynInst::GPUDynInst(), hasDispResources(), Wavefront::initRegState(), regStats(), GPUDynInst::resolveFlatSegment(), and startWavefront().
|
private |
Definition at line 1026 of file compute_unit.hh.
Referenced by cacheLineSize().
|
private |
Definition at line 1027 of file compute_unit.hh.
Referenced by numBarrierSlots().
|
protected |
Definition at line 467 of file compute_unit.hh.
Referenced by requestorId().
Stats::Distribution ComputeUnit::activeLanesPerGMemInstrDist |
Definition at line 594 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Distribution ComputeUnit::activeLanesPerLMemInstrDist |
Definition at line 595 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
int ComputeUnit::activeWaves |
Definition at line 530 of file compute_unit.hh.
Referenced by Gcn3ISA::Inst_SOPP__S_ENDPGM::execute(), and startWavefront().
Stats::Formula ComputeUnit::argMemInsts |
Definition at line 513 of file compute_unit.hh.
Referenced by regStats().
Stats::Scalar ComputeUnit::argReads |
Definition at line 511 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Scalar ComputeUnit::argWrites |
Definition at line 512 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
|
private |
Definition at line 1028 of file compute_unit.hh.
Referenced by getCacheLineBits().
int ComputeUnit::coalescerToVrfBusWidth |
Definition at line 266 of file compute_unit.hh.
Stats::Scalar ComputeUnit::completedWfs |
Definition at line 604 of file compute_unit.hh.
Referenced by Gcn3ISA::Inst_SOPP__S_ENDPGM::execute(), and regStats().
Stats::Scalar ComputeUnit::completedWGs |
Definition at line 605 of file compute_unit.hh.
Referenced by Gcn3ISA::Inst_SOPP__S_ENDPGM::execute(), and regStats().
Stats::Distribution ComputeUnit::controlFlowDivergenceDist |
Definition at line 593 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
bool ComputeUnit::countPages |
Definition at line 354 of file compute_unit.hh.
Referenced by exitCallback().
int ComputeUnit::cu_id |
Definition at line 289 of file compute_unit.hh.
Referenced by GPUDynInst::completeAcc(), dispWorkgroup(), GlobalMemPipeline::exec(), ScalarMemPipeline::exec(), Wavefront::exec(), exec(), Gcn3ISA::Inst_SOPP__S_ENDPGM::execute(), Gcn3ISA::Inst_SOPP__S_BARRIER::execute(), FetchUnit::fetch(), GPUDynInst::GPUDynInst(), GPUDynInst::initiateAcc(), FetchUnit::initiateFetch(), Wavefront::initRegState(), injectGlobalMemFence(), GPUDispatcher::notifyWgCompl(), FetchUnit::processFetchReturn(), ComputeUnit::DataPort::processMemRespEvent(), ScoreboardCheckStage::ready(), ComputeUnit::DataPort::recvTimingResp(), sendRequest(), Wavefront::setStatus(), and startWavefront().
bool ComputeUnit::debugSegFault |
Definition at line 344 of file compute_unit.hh.
Referenced by sendRequest().
int ComputeUnit::dpBypassPipeLength |
Definition at line 304 of file compute_unit.hh.
Referenced by dpBypassLength().
Stats::Scalar ComputeUnit::dynamicFlatMemInstrCnt |
Definition at line 552 of file compute_unit.hh.
Referenced by regStats(), and GPUDynInst::updateStats().
Stats::Scalar ComputeUnit::dynamicGMemInstrCnt |
Definition at line 550 of file compute_unit.hh.
Referenced by regStats(), and GPUDynInst::updateStats().
Stats::Scalar ComputeUnit::dynamicLMemInstrCnt |
Definition at line 553 of file compute_unit.hh.
Referenced by regStats(), and GPUDynInst::updateStats().
EXEC_POLICY ComputeUnit::exec_policy |
Definition at line 342 of file compute_unit.hh.
Stats::Distribution ComputeUnit::execRateDist |
Definition at line 563 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
ExecStage ComputeUnit::execStage |
Definition at line 280 of file compute_unit.hh.
Referenced by exec(), init(), and regStats().
FetchStage ComputeUnit::fetchStage |
Definition at line 277 of file compute_unit.hh.
Referenced by Wavefront::discardFetch(), exec(), Gcn3ISA::Inst_SOPP__S_ENDPGM::execute(), init(), and regStats().
Stats::Scalar ComputeUnit::flatLDSInsts |
Definition at line 484 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Formula ComputeUnit::flatLDSInstsPerWF |
Definition at line 485 of file compute_unit.hh.
Referenced by regStats().
Stats::Scalar ComputeUnit::flatVMemInsts |
Definition at line 482 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Formula ComputeUnit::flatVMemInstsPerWF |
Definition at line 483 of file compute_unit.hh.
Referenced by regStats().
|
private |
A set used to easily retrieve a free barrier ID.
Definition at line 1074 of file compute_unit.hh.
Referenced by getFreeBarrierId(), hasDispResources(), and releaseBarrier().
bool ComputeUnit::functionalTLB |
Definition at line 348 of file compute_unit.hh.
Referenced by sendRequest().
WaitClass ComputeUnit::glbMemToVrfBus |
Definition at line 218 of file compute_unit.hh.
Referenced by GlobalMemPipeline::exec(), init(), and isDone().
Stats::Formula ComputeUnit::globalMemInsts |
Definition at line 510 of file compute_unit.hh.
Referenced by regStats().
GlobalMemPipeline ComputeUnit::globalMemoryPipe |
Definition at line 281 of file compute_unit.hh.
Referenced by ScheduleStage::dispatchReady(), exec(), ScheduleStage::fillDispatchList(), init(), isDone(), ComputeUnit::DataPort::processMemRespEvent(), ComputeUnit::DataPort::recvTimingResp(), and regStats().
Stats::Scalar ComputeUnit::globalReads |
Definition at line 508 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
|
private |
Definition at line 1029 of file compute_unit.hh.
Referenced by getAndIncSeqNum().
Stats::Scalar ComputeUnit::globalWrites |
Definition at line 509 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
GMTokenPort ComputeUnit::gmTokenPort |
Definition at line 649 of file compute_unit.hh.
Referenced by init().
Stats::Formula ComputeUnit::groupMemInsts |
Definition at line 519 of file compute_unit.hh.
Referenced by regStats().
Stats::Scalar ComputeUnit::groupReads |
Definition at line 517 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Scalar ComputeUnit::groupWrites |
Definition at line 518 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Distribution ComputeUnit::headTailLatency |
Definition at line 609 of file compute_unit.hh.
Referenced by ComputeUnit::DataPort::processMemRespEvent(), and regStats().
|
private |
Definition at line 1079 of file compute_unit.hh.
Referenced by ComputeUnit::DataPort::processMemRespEvent().
Stats::Vector ComputeUnit::hitsPerTLBLevel |
Definition at line 541 of file compute_unit.hh.
Referenced by regStats(), and sendRequest().
Tick ComputeUnit::idleCUTimeout |
Definition at line 346 of file compute_unit.hh.
Referenced by Wavefront::setStatus().
int ComputeUnit::idleWfs |
Definition at line 347 of file compute_unit.hh.
Referenced by Wavefront::setStatus().
Stats::Vector ComputeUnit::instCyclesLdsPerSimd |
Definition at line 506 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Scalar ComputeUnit::instCyclesSALU |
Definition at line 477 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Vector ComputeUnit::instCyclesScMemPerSimd |
Definition at line 505 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Scalar ComputeUnit::instCyclesVALU |
Definition at line 476 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Vector ComputeUnit::instCyclesVMemPerSimd |
Definition at line 504 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
std::vector<uint64_t> ComputeUnit::instExecPerSimd |
Definition at line 329 of file compute_unit.hh.
Referenced by Wavefront::exec().
Stats::VectorDistribution ComputeUnit::instInterleave |
Definition at line 326 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Formula ComputeUnit::ipc |
Definition at line 592 of file compute_unit.hh.
Referenced by regStats().
Cycles ComputeUnit::issuePeriod |
Definition at line 310 of file compute_unit.hh.
Referenced by Wavefront::exec().
Stats::Formula ComputeUnit::kernargMemInsts |
Definition at line 528 of file compute_unit.hh.
Referenced by regStats().
Stats::Scalar ComputeUnit::kernargReads |
Definition at line 526 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Scalar ComputeUnit::kernargWrites |
Definition at line 527 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
std::vector<uint64_t> ComputeUnit::lastExecCycle |
Definition at line 320 of file compute_unit.hh.
Referenced by Wavefront::exec().
std::vector<Addr> ComputeUnit::lastVaddrCU |
Definition at line 338 of file compute_unit.hh.
Referenced by ~ComputeUnit().
std::vector<std::vector<Addr> > ComputeUnit::lastVaddrSimd |
Definition at line 339 of file compute_unit.hh.
Referenced by ~ComputeUnit().
std::vector<std::vector<std::vector<Addr> > > ComputeUnit::lastVaddrWF |
Definition at line 340 of file compute_unit.hh.
|
protected |
Definition at line 469 of file compute_unit.hh.
Referenced by dispWorkgroup(), getLds(), getRefCounter(), hasDispResources(), and startWavefront().
Stats::Scalar ComputeUnit::ldsBankAccesses |
Definition at line 543 of file compute_unit.hh.
Referenced by LdsState::processPacket(), and regStats().
Stats::Distribution ComputeUnit::ldsBankConflictDist |
Definition at line 544 of file compute_unit.hh.
Referenced by LdsState::processPacket(), and regStats().
Stats::Scalar ComputeUnit::ldsNoFlatInsts |
Definition at line 480 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Formula ComputeUnit::ldsNoFlatInstsPerWF |
Definition at line 481 of file compute_unit.hh.
Referenced by regStats().
LDSPort ComputeUnit::ldsPort |
The port to access the Local Data Store Can be connected to a LDS object.
Definition at line 978 of file compute_unit.hh.
Referenced by getPort(), and sendToLds().
bool ComputeUnit::localMemBarrier |
Definition at line 349 of file compute_unit.hh.
LocalMemPipeline ComputeUnit::localMemoryPipe |
Definition at line 282 of file compute_unit.hh.
Referenced by ScheduleStage::dispatchReady(), exec(), isDone(), and regStats().
WaitClass ComputeUnit::locMemToVrfBus |
Definition at line 226 of file compute_unit.hh.
Referenced by LocalMemPipeline::exec(), init(), and isDone().
std::vector<DataPort> ComputeUnit::memPort |
The memory port for SIMD data accesses.
Can be connected to PhysMem for Ruby for timing simulations
Definition at line 989 of file compute_unit.hh.
Referenced by getPort(), injectGlobalMemFence(), ComputeUnit::DataPort::recvTimingResp(), and sendRequest().
TokenManager* ComputeUnit::memPortTokens |
Definition at line 648 of file compute_unit.hh.
Referenced by getTokenManager(), and init().
Stats::Formula ComputeUnit::numALUInstsExecuted |
Definition at line 597 of file compute_unit.hh.
Referenced by regStats().
Stats::Scalar ComputeUnit::numCASOps |
Definition at line 602 of file compute_unit.hh.
Referenced by AtomicOpCAS< T >::execute(), and regStats().
int ComputeUnit::numCyclesPerLoadTransfer |
Definition at line 268 of file compute_unit.hh.
Referenced by loadBusLength().
int ComputeUnit::numCyclesPerStoreTransfer |
Definition at line 267 of file compute_unit.hh.
Referenced by storeBusLength().
Stats::Scalar ComputeUnit::numFailedCASOps |
Definition at line 603 of file compute_unit.hh.
Referenced by AtomicOpCAS< T >::execute(), and regStats().
Stats::Scalar ComputeUnit::numInstrExecuted |
Definition at line 560 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
int ComputeUnit::numScalarALUs |
Definition at line 245 of file compute_unit.hh.
Referenced by firstMemUnit(), init(), mapWaveToGlobalMem(), mapWaveToLocalMem(), mapWaveToScalarAlu(), mapWaveToScalarMem(), numExeUnits(), and ExecStage::regStats().
int ComputeUnit::numScalarMemUnits |
Definition at line 232 of file compute_unit.hh.
Referenced by init(), and numExeUnits().
int ComputeUnit::numScalarRegsPerSimd |
Definition at line 376 of file compute_unit.hh.
Referenced by StaticRegisterManagerPolicy::allocateRegisters(), and hasDispResources().
Stats::Scalar ComputeUnit::numTimesWgBlockedDueSgprAlloc |
Definition at line 601 of file compute_unit.hh.
Referenced by hasDispResources(), and regStats().
Stats::Scalar ComputeUnit::numTimesWgBlockedDueVgprAlloc |
Definition at line 599 of file compute_unit.hh.
Referenced by hasDispResources(), and regStats().
Stats::Scalar ComputeUnit::numVecOpsExecuted |
Definition at line 565 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Scalar ComputeUnit::numVecOpsExecutedF16 |
Definition at line 567 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Scalar ComputeUnit::numVecOpsExecutedF32 |
Definition at line 569 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Scalar ComputeUnit::numVecOpsExecutedF64 |
Definition at line 571 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Scalar ComputeUnit::numVecOpsExecutedFMA16 |
Definition at line 573 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Scalar ComputeUnit::numVecOpsExecutedFMA32 |
Definition at line 574 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Scalar ComputeUnit::numVecOpsExecutedFMA64 |
Definition at line 575 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Scalar ComputeUnit::numVecOpsExecutedMAC16 |
Definition at line 577 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Scalar ComputeUnit::numVecOpsExecutedMAC32 |
Definition at line 578 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Scalar ComputeUnit::numVecOpsExecutedMAC64 |
Definition at line 579 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Scalar ComputeUnit::numVecOpsExecutedMAD16 |
Definition at line 581 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Scalar ComputeUnit::numVecOpsExecutedMAD32 |
Definition at line 582 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Scalar ComputeUnit::numVecOpsExecutedMAD64 |
Definition at line 583 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
Stats::Scalar ComputeUnit::numVecOpsExecutedTwoOpFP |
Definition at line 585 of file compute_unit.hh.
Referenced by Wavefront::exec(), and regStats().
int ComputeUnit::numVecRegsPerSimd |
Definition at line 374 of file compute_unit.hh.
Referenced by StaticRegisterManagerPolicy::allocateRegisters(), and hasDispResources().
int ComputeUnit::numVectorALUs |
Definition at line 241 of file compute_unit.hh.
Referenced by dispWorkgroup(), ScoreboardCheckStage::exec(), firstMemUnit(), hasDispResources(), init(), isDone(), isVectorAluIdle(), mapWaveToGlobalMem(), mapWaveToLocalMem(), mapWaveToScalarAluGlobalIdx(), mapWaveToScalarMem(), numExeUnits(), ExecStage::regStats(), regStats(), releaseWFsFromBarrier(), Wavefront::setStatus(), and ~ComputeUnit().
int ComputeUnit::numVectorGlobalMemUnits |
Definition at line 216 of file compute_unit.hh.
Referenced by ScheduleStage::arbitrateVrfToLdsBus(), ScheduleStage::init(), init(), mapWaveToLocalMem(), mapWaveToScalarMem(), and numExeUnits().
int ComputeUnit::numVectorSharedMemUnits |
Definition at line 224 of file compute_unit.hh.
Referenced by ScheduleStage::init(), init(), mapWaveToScalarMem(), and numExeUnits().
std::vector<int> ComputeUnit::numWfsToSched |
Number of WFs to schedule to each SIMD.
This vector is populated by hasDispResources(), and consumed by the subsequent call to dispWorkgroup(), to schedule the specified number of WFs to the SIMD units. Entry I provides the number of WFs to schedule to SIMD I.
Definition at line 367 of file compute_unit.hh.
Referenced by dispWorkgroup(), and hasDispResources().
int ComputeUnit::operandNetworkLength |
Definition at line 308 of file compute_unit.hh.
Referenced by oprNetPipeLength().
pageDataStruct ComputeUnit::pageAccesses |
Definition at line 627 of file compute_unit.hh.
Referenced by exitCallback(), and GPUDynInst::updateStats().
Stats::Distribution ComputeUnit::pageDivergenceDist |
Definition at line 548 of file compute_unit.hh.
Referenced by regStats(), and GPUDynInst::updateStats().
std::map<Addr, int> ComputeUnit::pagesTouched |
Definition at line 381 of file compute_unit.hh.
Referenced by updatePageDivergenceDist(), and GPUDynInst::updateStats().
bool ComputeUnit::perLaneTLB |
Definition at line 332 of file compute_unit.hh.
Referenced by sendRequest().
std::unordered_set<uint64_t> ComputeUnit::pipeMap |
Definition at line 273 of file compute_unit.hh.
Referenced by deleteFromPipeMap(), insertInPipeMap(), and Wavefront::nextInstr().
int ComputeUnit::prefetchDepth |
Definition at line 334 of file compute_unit.hh.
int ComputeUnit::prefetchStride |
Definition at line 336 of file compute_unit.hh.
Enums::PrefetchType ComputeUnit::prefetchType |
Definition at line 341 of file compute_unit.hh.
Stats::Formula ComputeUnit::privMemInsts |
Definition at line 522 of file compute_unit.hh.
Referenced by regStats().
Stats::Scalar ComputeUnit::privReads |
Definition at line 520 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Scalar ComputeUnit::privWrites |
Definition at line 521 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Formula ComputeUnit::readonlyMemInsts |
Definition at line 525 of file compute_unit.hh.
Referenced by regStats().
Stats::Scalar ComputeUnit::readonlyReads |
Definition at line 523 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Scalar ComputeUnit::readonlyWrites |
Definition at line 524 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
RegisterManager* ComputeUnit::registerManager |
Definition at line 275 of file compute_unit.hh.
Referenced by StaticRegisterManagerPolicy::allocateRegisters(), StaticRegisterManagerPolicy::canAllocateSgprs(), StaticRegisterManagerPolicy::canAllocateVgprs(), dispWorkgroup(), Gcn3ISA::Inst_SOPP__S_ENDPGM::execute(), Wavefront::freeRegisterFile(), hasDispResources(), Wavefront::initRegState(), VectorRegisterFile::operandsReady(), ScalarRegisterFile::operandsReady(), Gcn3ISA::VecOperand< DataType, Const, NumDwords >::read(), Gcn3ISA::ScalarOperand< DataType, Const, sizeof(DataType)/sizeof(VecElemU32) >::regIdx(), regStats(), GPUDynInst::resolveFlatSegment(), VectorRegisterFile::scheduleWriteOperands(), ScalarRegisterFile::scheduleWriteOperands(), VectorRegisterFile::scheduleWriteOperandsFromLoad(), ScalarRegisterFile::scheduleWriteOperandsFromLoad(), VectorRegisterFile::waveExecuteInst(), ScalarRegisterFile::waveExecuteInst(), and Gcn3ISA::VecOperand< DataType, Const, NumDwords >::write().
Tick ComputeUnit::req_tick_latency |
Definition at line 358 of file compute_unit.hh.
Referenced by injectGlobalMemFence(), and sendRequest().
Tick ComputeUnit::resp_tick_latency |
Definition at line 359 of file compute_unit.hh.
Referenced by ComputeUnit::DataPort::recvTimingResp().
Stats::Scalar ComputeUnit::sALUInsts |
Definition at line 474 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Formula ComputeUnit::sALUInstsPerWF |
Definition at line 475 of file compute_unit.hh.
Referenced by regStats().
std::vector<WaitClass> ComputeUnit::scalarALUs |
Definition at line 246 of file compute_unit.hh.
Referenced by ScheduleStage::dispatchReady(), Wavefront::exec(), and init().
ScalarDataPort ComputeUnit::scalarDataPort |
Definition at line 993 of file compute_unit.hh.
Referenced by getPort(), and ComputeUnit::ScalarDataPort::MemReqEvent::process().
ScalarDTLBPort ComputeUnit::scalarDTLBPort |
Definition at line 995 of file compute_unit.hh.
Referenced by getPort(), and sendScalarRequest().
Stats::Formula ComputeUnit::scalarMemInstsPerKiloInst |
Definition at line 500 of file compute_unit.hh.
Referenced by regStats().
ScalarMemPipeline ComputeUnit::scalarMemoryPipe |
Definition at line 283 of file compute_unit.hh.
Referenced by ScheduleStage::dispatchReady(), exec(), and regStats().
Stats::Scalar ComputeUnit::scalarMemReads |
Definition at line 492 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Formula ComputeUnit::scalarMemReadsPerKiloInst |
Definition at line 498 of file compute_unit.hh.
Referenced by regStats().
Stats::Formula ComputeUnit::scalarMemReadsPerWF |
Definition at line 493 of file compute_unit.hh.
Referenced by regStats().
WaitClass ComputeUnit::scalarMemToSrfBus |
Definition at line 234 of file compute_unit.hh.
Referenced by ScalarMemPipeline::exec(), init(), and isDone().
WaitClass ComputeUnit::scalarMemUnit |
Definition at line 238 of file compute_unit.hh.
Referenced by ScheduleStage::checkMemResources(), ScalarMemPipeline::exec(), Wavefront::exec(), and init().
Stats::Scalar ComputeUnit::scalarMemWrites |
Definition at line 490 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Formula ComputeUnit::scalarMemWritesPerKiloInst |
Definition at line 499 of file compute_unit.hh.
Referenced by regStats().
Stats::Formula ComputeUnit::scalarMemWritesPerWF |
Definition at line 491 of file compute_unit.hh.
Referenced by regStats().
int ComputeUnit::scalarPipeStages |
Definition at line 306 of file compute_unit.hh.
Referenced by scalarPipeLength().
std::vector<int> ComputeUnit::scalarRegsReserved |
Definition at line 372 of file compute_unit.hh.
Referenced by StaticRegisterManagerPolicy::allocateRegisters(), and init().
ScheduleStage ComputeUnit::scheduleStage |
Definition at line 279 of file compute_unit.hh.
Referenced by ExecStage::exec(), exec(), init(), and regStats().
|
private |
Definition at line 1065 of file compute_unit.hh.
ScoreboardCheckStage ComputeUnit::scoreboardCheckStage |
Definition at line 278 of file compute_unit.hh.
Referenced by exec(), and regStats().
|
private |
TODO: Update these comments once the pipe stage interface has been fully refactored.
Pipeline stage interfaces.
Buffers used to communicate between various pipeline stages List of waves which will be dispatched to each execution resource. An EXREADY implies dispatch list is non-empty and execution unit has something to execute this cycle. Currently, the dispatch list of an execution resource can hold only one wave because an execution resource can execute only one wave in a cycle. dispatchList is used to communicate between schedule and exec stage
At a high level, the following intra-/inter-stage communication occurs: SCB to SCH: readyList provides per exec resource list of waves that passed dependency and readiness checks. If selected by scheduler, attempt to add wave to schList conditional on RF support. SCH: schList holds waves that are gathering operands or waiting for execution resource availability. Once ready, waves are placed on the dispatchList as candidates for execution. A wave may spend multiple cycles in SCH stage, on the schList due to RF access conflicts or execution resource contention. SCH to EX: dispatchList holds waves that are ready to be executed. LM/FLAT arbitration may remove an LM wave and place it back on the schList. RF model may also force a wave back to the schList if using the detailed model.
Definition at line 1064 of file compute_unit.hh.
Shader* ComputeUnit::shader |
Definition at line 356 of file compute_unit.hh.
Referenced by dispWorkgroup(), GPUDynInst::doApertureCheck(), FetchUnit::exec(), LocalMemPipeline::exec(), GlobalMemPipeline::exec(), ScalarMemPipeline::exec(), ScoreboardCheckStage::exec(), Wavefront::exec(), exec(), Gcn3ISA::Inst_SOPP__S_ENDPGM::execute(), hasDispResources(), FetchUnit::init(), GlobalMemPipeline::init(), FetchUnit::initiateFetch(), Wavefront::initRegState(), injectGlobalMemFence(), isVectorAluIdle(), ComputeUnit::DataPort::recvTimingResp(), regStats(), releaseWFsFromBarrier(), GPUDynInst::resolveFlatSegment(), sendRequest(), sendScalarRequest(), Wavefront::setStatus(), updateInstStats(), and ~ComputeUnit().
int ComputeUnit::simdWidth |
Definition at line 298 of file compute_unit.hh.
Referenced by simdUnitWidth().
int ComputeUnit::spBypassPipeLength |
Definition at line 301 of file compute_unit.hh.
Referenced by spBypassLength().
Stats::Formula ComputeUnit::spillMemInsts |
Definition at line 516 of file compute_unit.hh.
Referenced by regStats().
Stats::Scalar ComputeUnit::spillReads |
Definition at line 514 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Scalar ComputeUnit::spillWrites |
Definition at line 515 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
SQCPort ComputeUnit::sqcPort |
Definition at line 997 of file compute_unit.hh.
Referenced by FetchUnit::fetch(), and getPort().
ITLBPort ComputeUnit::sqcTLBPort |
Definition at line 999 of file compute_unit.hh.
Referenced by getPort(), and FetchUnit::initiateFetch().
std::vector<ScalarRegisterFile*> ComputeUnit::srf |
Definition at line 294 of file compute_unit.hh.
Referenced by ScheduleStage::addToSchList(), ScheduleStage::checkRfOperandReadComplete(), Wavefront::exec(), exec(), Wavefront::initRegState(), Gcn3ISA::ScalarOperand< DataType, Const, sizeof(DataType)/sizeof(VecElemU32) >::read(), ScoreboardCheckStage::ready(), ScheduleStage::reserveResources(), GPUDynInst::resolveFlatSegment(), ScheduleStage::schedRfWrites(), RegisterManager::setParent(), and Gcn3ISA::ScalarOperand< DataType, Const, sizeof(DataType)/sizeof(VecElemU32) >::write().
Cycles ComputeUnit::srf_scm_bus_latency |
Definition at line 315 of file compute_unit.hh.
Referenced by Wavefront::exec().
WaitClass ComputeUnit::srfToScalarMemPipeBus |
Definition at line 236 of file compute_unit.hh.
Referenced by ScheduleStage::checkMemResources(), Wavefront::exec(), init(), and isDone().
Stats::Scalar ComputeUnit::threadCyclesVALU |
Definition at line 478 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
EventFunctionWrapper ComputeUnit::tickEvent |
Definition at line 285 of file compute_unit.hh.
Referenced by dispWorkgroup(), and exec().
Stats::Scalar ComputeUnit::tlbCycles |
Definition at line 538 of file compute_unit.hh.
Referenced by regStats(), and sendRequest().
Stats::Formula ComputeUnit::tlbLatency |
Definition at line 539 of file compute_unit.hh.
Referenced by regStats().
std::vector<DTLBPort> ComputeUnit::tlbPort |
Definition at line 991 of file compute_unit.hh.
Referenced by getPort(), and sendRequest().
Stats::Scalar ComputeUnit::tlbRequests |
Definition at line 537 of file compute_unit.hh.
Referenced by regStats(), and sendRequest().
Stats::Scalar ComputeUnit::totalCycles |
Definition at line 587 of file compute_unit.hh.
Referenced by Wavefront::exec(), exec(), and regStats().
Stats::Scalar ComputeUnit::vALUInsts |
Definition at line 472 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Formula ComputeUnit::vALUInstsPerWF |
Definition at line 473 of file compute_unit.hh.
Referenced by regStats().
Stats::Formula ComputeUnit::vALUUtilization |
Definition at line 479 of file compute_unit.hh.
Referenced by regStats().
std::vector<WaitClass> ComputeUnit::vectorALUs |
Definition at line 242 of file compute_unit.hh.
Referenced by ScheduleStage::dispatchReady(), Wavefront::exec(), and init().
WaitClass ComputeUnit::vectorGlobalMemUnit |
Definition at line 222 of file compute_unit.hh.
Referenced by ScheduleStage::checkMemResources(), GlobalMemPipeline::exec(), Wavefront::exec(), and init().
Stats::Formula ComputeUnit::vectorMemInstsPerKiloInst |
Definition at line 497 of file compute_unit.hh.
Referenced by regStats().
Stats::Scalar ComputeUnit::vectorMemReads |
Definition at line 488 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Formula ComputeUnit::vectorMemReadsPerKiloInst |
Definition at line 495 of file compute_unit.hh.
Referenced by regStats().
Stats::Formula ComputeUnit::vectorMemReadsPerWF |
Definition at line 489 of file compute_unit.hh.
Referenced by regStats().
Stats::Scalar ComputeUnit::vectorMemWrites |
Definition at line 486 of file compute_unit.hh.
Referenced by regStats(), and updateInstStats().
Stats::Formula ComputeUnit::vectorMemWritesPerKiloInst |
Definition at line 496 of file compute_unit.hh.
Referenced by regStats().
Stats::Formula ComputeUnit::vectorMemWritesPerWF |
Definition at line 487 of file compute_unit.hh.
Referenced by regStats().
std::vector<int> ComputeUnit::vectorRegsReserved |
Definition at line 370 of file compute_unit.hh.
Referenced by StaticRegisterManagerPolicy::allocateRegisters(), and init().
WaitClass ComputeUnit::vectorSharedMemUnit |
Definition at line 230 of file compute_unit.hh.
Referenced by ScheduleStage::checkMemResources(), LocalMemPipeline::exec(), Wavefront::exec(), and init().
Stats::Formula ComputeUnit::vpc |
Definition at line 588 of file compute_unit.hh.
Referenced by regStats().
Stats::Formula ComputeUnit::vpc_f16 |
Definition at line 589 of file compute_unit.hh.
Referenced by regStats().
Stats::Formula ComputeUnit::vpc_f32 |
Definition at line 590 of file compute_unit.hh.
Referenced by regStats().
Stats::Formula ComputeUnit::vpc_f64 |
Definition at line 591 of file compute_unit.hh.
Referenced by regStats().
std::vector<VectorRegisterFile*> ComputeUnit::vrf |
Definition at line 292 of file compute_unit.hh.
Referenced by ScheduleStage::addToSchList(), ScheduleStage::checkRfOperandReadComplete(), Wavefront::exec(), exec(), Gcn3ISA::Inst_FLAT__FLAT_LOAD_UBYTE::execute(), Gcn3ISA::Inst_FLAT__FLAT_LOAD_USHORT::execute(), Gcn3ISA::Inst_FLAT__FLAT_LOAD_DWORD::execute(), Gcn3ISA::Inst_FLAT__FLAT_LOAD_DWORDX2::execute(), Gcn3ISA::Inst_FLAT__FLAT_LOAD_DWORDX3::execute(), Gcn3ISA::Inst_FLAT__FLAT_LOAD_DWORDX4::execute(), Gcn3ISA::Inst_FLAT__FLAT_ATOMIC_SWAP::execute(), Gcn3ISA::Inst_FLAT__FLAT_ATOMIC_CMPSWAP::execute(), Gcn3ISA::Inst_FLAT__FLAT_ATOMIC_ADD::execute(), Gcn3ISA::Inst_FLAT__FLAT_ATOMIC_SUB::execute(), Gcn3ISA::Inst_FLAT__FLAT_ATOMIC_INC::execute(), Gcn3ISA::Inst_FLAT__FLAT_ATOMIC_DEC::execute(), Gcn3ISA::Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::execute(), Gcn3ISA::Inst_FLAT__FLAT_ATOMIC_ADD_X2::execute(), Gcn3ISA::Inst_FLAT__FLAT_ATOMIC_SUB_X2::execute(), Gcn3ISA::Inst_FLAT__FLAT_ATOMIC_INC_X2::execute(), Gcn3ISA::Inst_FLAT__FLAT_ATOMIC_DEC_X2::execute(), Wavefront::freeRegisterFile(), Wavefront::initRegState(), Gcn3ISA::VecOperand< DataType, Const, NumDwords >::read(), ScoreboardCheckStage::ready(), ScheduleStage::reserveResources(), ScheduleStage::schedRfWrites(), RegisterManager::setParent(), and Gcn3ISA::VecOperand< DataType, Const, NumDwords >::write().
Cycles ComputeUnit::vrf_gm_bus_latency |
Definition at line 313 of file compute_unit.hh.
Referenced by Wavefront::exec().
Cycles ComputeUnit::vrf_lm_bus_latency |
Definition at line 317 of file compute_unit.hh.
Referenced by Wavefront::exec().
int ComputeUnit::vrfToCoalescerBusWidth |
Definition at line 265 of file compute_unit.hh.
WaitClass ComputeUnit::vrfToGlobalMemPipeBus |
Definition at line 220 of file compute_unit.hh.
Referenced by ScheduleStage::checkMemResources(), Wavefront::exec(), init(), and isDone().
WaitClass ComputeUnit::vrfToLocalMemPipeBus |
Definition at line 228 of file compute_unit.hh.
Referenced by ScheduleStage::checkMemResources(), Wavefront::exec(), init(), and isDone().
|
private |
Definition at line 1030 of file compute_unit.hh.
Referenced by wfSize().
Stats::Distribution ComputeUnit::waveLevelParallelism |
Definition at line 531 of file compute_unit.hh.
Referenced by regStats(), and startWavefront().
|
private |
The barrier slots for this CU.
Definition at line 1070 of file compute_unit.hh.
Referenced by barrierSlot().
std::vector<std::vector<Wavefront*> > ComputeUnit::wfList |
Definition at line 288 of file compute_unit.hh.
Referenced by dispWorkgroup(), ScoreboardCheckStage::exec(), hasDispResources(), FetchStage::init(), isVectorAluIdle(), ComputeUnit::DataPort::recvTimingResp(), releaseWFsFromBarrier(), and ~ComputeUnit().
Stats::Scalar ComputeUnit::wgBlockedDueBarrierAllocation |
Definition at line 555 of file compute_unit.hh.
Referenced by hasDispResources(), and regStats().
Stats::Scalar ComputeUnit::wgBlockedDueLdsAllocation |
Definition at line 556 of file compute_unit.hh.
Referenced by hasDispResources(), and regStats().