gem5  v20.0.0.2
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
compute_unit.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __COMPUTE_UNIT_HH__
35 #define __COMPUTE_UNIT_HH__
36 
37 #include <deque>
38 #include <map>
39 #include <unordered_map>
40 #include <vector>
41 
42 #include "base/callback.hh"
43 #include "base/statistics.hh"
44 #include "base/types.hh"
45 #include "enums/PrefetchType.hh"
50 #include "gpu-compute/qstruct.hh"
53 #include "mem/port.hh"
54 #include "mem/token_port.hh"
55 #include "sim/clocked_object.hh"
56 
57 static const int MAX_REGS_FOR_NON_VEC_MEM_INST = 1;
58 static const int MAX_WIDTH_FOR_MEM_INST = 32;
59 
60 class NDRange;
61 class Shader;
62 class VectorRegisterFile;
63 
64 struct ComputeUnitParams;
65 
67 {
68  OLDEST = 0,
70 };
71 
72 // List of execution units
74 {
75  SIMD0 = 0,
82 };
83 
85 {
90 };
91 
92 class ComputeUnit : public ClockedObject
93 {
94  public:
101 
102  // Buffers used to communicate between various pipeline stages
103 
104  // List of waves which are ready to be scheduled.
105  // Each execution resource has a ready list. readyList is
106  // used to communicate between scoreboardCheck stage and
107  // schedule stage
108  // TODO: make enum to index readyList
110 
111  // Stores the status of waves. A READY implies the
112  // wave is ready to be scheduled this cycle and
113  // is already present in the readyList. waveStatusList is
114  // used to communicate between scoreboardCheck stage and
115  // schedule stage
116  // TODO: convert std::pair to a class to increase readability
118 
119  // List of waves which will be dispatched to
120  // each execution resource. A FILLED implies
121  // dispatch list is non-empty and
122  // execution unit has something to execute
123  // this cycle. Currently, the dispatch list of
124  // an execution resource can hold only one wave because
125  // an execution resource can execute only one wave in a cycle.
126  // dispatchList is used to communicate between schedule
127  // and exec stage
128  // TODO: convert std::pair to a class to increase readability
130 
131  int rrNextMemID; // used by RR WF exec policy to cycle through WF's
133  typedef ComputeUnitParams Params;
135  int cu_id;
136 
137  // array of vector register files, one per SIMD
139  // Number of vector ALU units (SIMDs) in CU
140  int numSIMDs;
141  // number of pipe stages for bypassing data to next dependent single
142  // precision vector instruction inside the vector ALU pipeline
144  // number of pipe stages for bypassing data to next dependent double
145  // precision vector instruction inside the vector ALU pipeline
147  // number of cycles per issue period
149 
150  // Number of global and local memory execution resources in CU
153  // tracks the last cycle a vector instruction was executed on a SIMD
155 
156  // true if we allow a separate TLB per lane
158  // if 0, TLB prefetching is off.
160  // if fixed-stride prefetching, this is the stride.
162 
166  Enums::PrefetchType prefetchType;
168 
173 
174  /*
175  * for Counting page accesses
176  *
177  * cuExitCallback inherits from Callback. When you register a callback
178  * function as an exit callback, it will get added to an exit callback
179  * queue, such that on simulation exit, all callbacks in the callback
180  * queue will have their process() function called.
181  */
183 
185  uint32_t barrier_id;
186  // vector of Vector ALU (MACC) pipelines
188  // minimum issue period per SIMD unit (in cycles)
190 
191  // Resource control for Vector Register File->Global Memory pipe buses
193  // Resource control for Vector Register File->Local Memory pipe buses
197  // Resource control for global memory to VRF data/address bus
199  // Resource control for local memory to VRF data/address bus
201 
202  uint32_t vrfToCoalescerBusWidth; // VRF->Coalescer data bus width in bytes
203  uint32_t coalescerToVrfBusWidth; // Coalescer->VRF data bus width in bytes
204  uint32_t numCyclesPerStoreTransfer; // number of cycles per vector store
205  uint32_t numCyclesPerLoadTransfer; // number of cycles per vector load
206 
209 
210  // number of vector registers being reserved for each SIMD unit
212  // number of vector registers per SIMD unit
214  // Support for scheduling VGPR status update events
218 
219  void
220  registerEvent(uint32_t simdId,
221  uint32_t regIdx,
222  uint32_t operandSize,
223  uint64_t when,
224  uint8_t newStatus) {
225  regIdxVec.push_back(std::make_pair(simdId, regIdx));
226  timestampVec.push_back(when);
227  statusVec.push_back(newStatus);
228  if (operandSize > 4) {
229  regIdxVec.push_back(std::make_pair(simdId,
230  ((regIdx + 1) %
231  numVecRegsPerSimd)));
232  timestampVec.push_back(when);
233  statusVec.push_back(newStatus);
234  }
235  }
236 
237  void updateEvents();
238 
239  // this hash map will keep track of page divergence
240  // per memory instruction per wavefront. The hash map
241  // is cleared in GPUDynInst::updateStats() in gpu_dyn_inst.cc.
242  std::map<Addr, int> pagesTouched;
243 
244  ComputeUnit(const Params *p);
245  ~ComputeUnit();
250  int wfSize() const { return wavefrontSize; };
251 
252  void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs);
253  void exec();
254  void initiateFetch(Wavefront *wavefront);
255  void fetch(PacketPtr pkt, Wavefront *wavefront);
256  void fillKernelState(Wavefront *w, NDRange *ndr);
257 
258  void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk,
259  NDRange *ndr);
260 
261  void StartWorkgroup(NDRange *ndr);
262  int ReadyWorkgroup(NDRange *ndr);
263 
264  bool isVecAlu(int unitId) { return unitId >= SIMD0 && unitId <= SIMD3; }
265  bool isGlbMem(int unitId) { return unitId == GLBMEM_PIPE; }
266  bool isShrMem(int unitId) { return unitId == LDSMEM_PIPE; }
267  int GlbMemUnitId() { return GLBMEM_PIPE; }
268  int ShrMemUnitId() { return LDSMEM_PIPE; }
269  int nextGlbRdBus() { return (++nextGlbMemBus) % numGlbMemUnits; }
270  int nextLocRdBus() { return (++nextLocMemBus) % numLocMemUnits; }
271  /* This function cycles through all the wavefronts in all the phases to see
272  * if all of the wavefronts which should be associated with one barrier
273  * (denoted with _barrier_id), are all at the same barrier in the program
274  * (denoted by bcnt). When the number at the barrier matches bslots, then
275  * return true.
276  */
277  int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots);
278  bool cedeSIMD(int simdId, int wfSlotId);
279 
280  template<typename c0, typename c1> void doSmReturn(GPUDynInstPtr gpuDynInst);
281  virtual void init() override;
282  void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);
283  void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);
284  void injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
285  bool kernelLaunch=true,
286  RequestPtr req=nullptr);
287  void handleMemPacket(PacketPtr pkt, int memport_index);
288  bool processTimingPacket(PacketPtr pkt);
289  void processFetchReturn(PacketPtr pkt);
291 
293 
294  bool isDone() const;
295  bool isSimdDone(uint32_t) const;
296 
297  protected:
299 
301 
302  public:
325 
326  void updateInstStats(GPUDynInstPtr gpuDynInst);
327 
328  // the following stats compute the avg. TLB accesslatency per
329  // uncoalesced request (only for data)
333  // hitsPerTLBLevel[x] are the hits in Level x TLB. x = 0 is the page table.
335 
338 
339  // over all memory instructions executed over all wavefronts
340  // how many touched 0-4 pages, 4-8, ..., 60-64 pages
344 
346  // Number of instructions executed, i.e. if 64 (or 32 or 7) lanes are active
347  // when the instruction is committed, this number is still incremented by 1
349  // Number of cycles among successive instruction executions across all
350  // wavefronts of the same CU
352  // number of individual vector operations executed
354  // Total cycles that something is running on the GPU
356  Stats::Formula vpc; // vector ops per cycle
357  Stats::Formula ipc; // vector instructions per cycle
361  // number of vector ALU instructions received
363  // number of times a WG can not start due to lack of free VGPRs in SIMDs
368  // flag per vector SIMD unit that is set when there is at least one
369  // WV that has a vector ALU instruction as the oldest in its
370  // Instruction Buffer: Defined in the Scoreboard stage, consumed
371  // by the Execute stage.
373  // number of available (oldest) LDS instructions that could have
374  // been issued to the LDS at a specific issue slot
376  // number of available Global memory instructions that could have
377  // been issued to TCP at a specific issue slot
379 
380  void
381  regStats() override;
382 
383  LdsState &
384  getLds() const
385  {
386  return lds;
387  }
388 
389  int32_t
390  getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const;
391 
392  int cacheLineSize() const { return _cacheLineSize; }
393 
394  bool
395  sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result));
396 
397  typedef std::unordered_map<Addr, std::pair<int, int>> pageDataStruct;
398  pageDataStruct pageAccesses;
399 
400  class CUExitCallback : public Callback
401  {
402  private:
404 
405  public:
406  virtual ~CUExitCallback() { }
407 
409  {
410  computeUnit = _cu;
411  }
412 
413  virtual void
414  process();
415  };
416 
418 
420  {
421  public:
422  GMTokenPort(const std::string& name, SimObject *owner,
423  PortID id = InvalidPortID)
424  : TokenMasterPort(name, owner, id)
425  { }
427 
428  protected:
429  bool recvTimingResp(PacketPtr) { return false; }
430  void recvReqRetry() { }
431  };
432 
433  // Manager for the number of tokens available to this compute unit to
434  // send global memory request packets to the coalescer this is only used
435  // between global memory pipe and TCP coalescer.
438 
440  class DataPort : public MasterPort
441  {
442  public:
443  DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
444  : MasterPort(_name, _cu), computeUnit(_cu),
445  index(_index) { }
446 
448 
450  {
454 
455  SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index,
456  Packet::SenderState *sender_state=nullptr)
457  : _gpuDynInst(gpuDynInst),
458  port_index(_port_index),
459  saved(sender_state) { }
460  };
461 
462  void processMemReqEvent(PacketPtr pkt);
463  EventFunctionWrapper *createMemReqEvent(PacketPtr pkt);
464 
465  void processMemRespEvent(PacketPtr pkt);
466  EventFunctionWrapper *createMemRespEvent(PacketPtr pkt);
467 
469 
470  protected:
472  int index;
473 
474  virtual bool recvTimingResp(PacketPtr pkt);
475  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
476  virtual void recvFunctional(PacketPtr pkt) { }
477  virtual void recvRangeChange() { }
478  virtual void recvReqRetry();
479 
480  virtual void
482  {
483  resp.clear();
484  snoop = true;
485  }
486 
487  };
488 
489  // Instruction cache access port
490  class SQCPort : public MasterPort
491  {
492  public:
493  SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
494  : MasterPort(_name, _cu), computeUnit(_cu),
495  index(_index) { }
496 
498 
500  {
503 
505  *sender_state=nullptr)
506  : wavefront(_wavefront), saved(sender_state) { }
507  };
508 
510 
511  protected:
513  int index;
514 
515  virtual bool recvTimingResp(PacketPtr pkt);
516  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
517  virtual void recvFunctional(PacketPtr pkt) { }
518  virtual void recvRangeChange() { }
519  virtual void recvReqRetry();
520 
521  virtual void
523  {
524  resp.clear();
525  snoop = true;
526  }
527  };
528 
530  class DTLBPort : public MasterPort
531  {
532  public:
533  DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
534  : MasterPort(_name, _cu), computeUnit(_cu),
535  index(_index), stalled(false)
536  { }
537 
538  bool isStalled() { return stalled; }
539  void stallPort() { stalled = true; }
540  void unstallPort() { stalled = false; }
541 
547 
552  {
553  // the memInst that this is associated with
555 
556  // the lane in the memInst this is associated with, so we send
557  // the memory request down the right port
559 
560  // constructor used for packets involved in timing accesses
561  SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
562  : _gpuDynInst(gpuDynInst), portIndex(port_index) { }
563 
564  };
565 
566  protected:
568  int index;
569  bool stalled;
570 
571  virtual bool recvTimingResp(PacketPtr pkt);
572  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
573  virtual void recvFunctional(PacketPtr pkt) { }
574  virtual void recvRangeChange() { }
575  virtual void recvReqRetry();
576  };
577 
578  class ITLBPort : public MasterPort
579  {
580  public:
581  ITLBPort(const std::string &_name, ComputeUnit *_cu)
582  : MasterPort(_name, _cu), computeUnit(_cu), stalled(false) { }
583 
584 
585  bool isStalled() { return stalled; }
586  void stallPort() { stalled = true; }
587  void unstallPort() { stalled = false; }
588 
594 
599  {
600  // The wavefront associated with this request
602 
603  SenderState(Wavefront *_wavefront) : wavefront(_wavefront) { }
604  };
605 
606  protected:
608  bool stalled;
609 
610  virtual bool recvTimingResp(PacketPtr pkt);
611  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
612  virtual void recvFunctional(PacketPtr pkt) { }
613  virtual void recvRangeChange() { }
614  virtual void recvReqRetry();
615  };
616 
620  class LDSPort : public MasterPort
621  {
622  public:
623  LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
624  : MasterPort(_name, _cu, _id), computeUnit(_cu)
625  {
626  }
627 
628  bool isStalled() const { return stalled; }
629  void stallPort() { stalled = true; }
630  void unstallPort() { stalled = false; }
631 
636  std::queue<PacketPtr> retries;
637 
643  {
644  protected:
645  // The actual read/write/atomic request that goes with this command
646  GPUDynInstPtr _gpuDynInst = nullptr;
647 
648  public:
650  _gpuDynInst(gpuDynInst)
651  {
652  }
653 
655  getMemInst() const
656  {
657  return _gpuDynInst;
658  }
659  };
660 
661  virtual bool
662  sendTimingReq(PacketPtr pkt);
663 
664  protected:
665 
666  bool stalled = false;
667 
669 
670  virtual bool
671  recvTimingResp(PacketPtr pkt);
672 
673  virtual Tick
674  recvAtomic(PacketPtr pkt) { return 0; }
675 
676  virtual void
678  {
679  }
680 
681  virtual void
683  {
684  }
685 
686  virtual void
687  recvReqRetry();
688  };
689 
693  LDSPort *ldsPort = nullptr;
694 
695  LDSPort *
696  getLdsPort() const
697  {
698  return ldsPort;
699  }
700 
701  TokenManager *
703  {
704  return memPortTokens;
705  }
706 
711  // port to the TLB hierarchy (i.e., the L1 TLB)
713  // port to the SQC (i.e. the I-cache)
715  // port to the SQC TLB (there's a separate TLB for each I-cache)
717 
718  Port &
719  getPort(const std::string &if_name, PortID idx) override
720  {
721  if (if_name == "memory_port") {
722  memPort[idx] = new DataPort(csprintf("%s-port%d", name(), idx),
723  this, idx);
724  return *memPort[idx];
725  } else if (if_name == "translation_port") {
726  tlbPort[idx] = new DTLBPort(csprintf("%s-port%d", name(), idx),
727  this, idx);
728  return *tlbPort[idx];
729  } else if (if_name == "sqc_port") {
730  sqcPort = new SQCPort(csprintf("%s-port%d", name(), idx),
731  this, idx);
732  return *sqcPort;
733  } else if (if_name == "sqc_tlb_port") {
734  sqcTLBPort = new ITLBPort(csprintf("%s-port", name()), this);
735  return *sqcTLBPort;
736  } else if (if_name == "ldsPort") {
737  if (ldsPort) {
738  fatal("an LDS port was already allocated");
739  }
740  ldsPort = new LDSPort(csprintf("%s-port", name()), this, idx);
741  return *ldsPort;
742  } else if (if_name == "gmTokenPort") {
743  return gmTokenPort;
744  } else {
745  panic("incorrect port name");
746  }
747  }
748 
749  // xact_cas_load()
751  {
752  public:
754  waveIdentifier(int _simdId, int _wfSlotId)
755  : simdId(_simdId), wfSlotId(_wfSlotId) { }
756 
757  int simdId;
758  int wfSlotId;
759  };
760 
761  class waveQueue
762  {
763  public:
765  };
766  std::map<unsigned, waveQueue> xactCasLoadMap;
767 
768  uint64_t getAndIncSeqNum() { return globalSeqNum++; }
769 
770  private:
771  const int _cacheLineSize;
772  uint64_t globalSeqNum;
775 };
776 
777 #endif // __COMPUTE_UNIT_HH__
uint32_t numVecRegsPerSimd
A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
Definition: port.hh:71
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:163
void updatePageDivergenceDist(Addr addr)
Stats::Formula tlbLatency
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Formula vpc
SenderState(Wavefront *_wavefront, Packet::SenderState *sender_state=nullptr)
Ports are used to interface objects to each other.
Definition: port.hh:56
GMTokenPort gmTokenPort
Stats::Scalar flatLDSInsts
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Bitfield< 30, 0 > index
std::vector< bool > vectorAluInstAvail
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch=true, RequestPtr req=nullptr)
void handleMemPacket(PacketPtr pkt, int memport_index)
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:171
DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
Generic callback class.
Definition: callback.hh:39
uint32_t numCyclesPerLoadTransfer
Stats::Formula ipc
Data TLB port.
WaitClass glbMemToVrfBus
const PortID InvalidPortID
Definition: types.hh:236
static const int MAX_REGS_FOR_NON_VEC_MEM_INST
Definition: compute_unit.hh:57
std::map< unsigned, waveQueue > xactCasLoadMap
bool debugSegFault
LdsState & lds
static const int MAX_WIDTH_FOR_MEM_INST
Definition: compute_unit.hh:58
std::vector< std::vector< std::pair< Wavefront *, WAVE_STATUS > > > waveStatusList
void fillKernelState(Wavefront *w, NDRange *ndr)
Stats::Vector hitsPerTLBLevel
Stats::Scalar dynamicGMemInstrCnt
ScheduleStage scheduleStage
Definition: compute_unit.hh:97
Stats::Formula flatLDSInstsPerWF
int storeBusLength()
const char * __attribute__((weak)) m5MainCommands[]
int dpBypassLength()
Stats::Distribution controlFlowDivergenceDist
ITLBPort * sqcTLBPort
std::vector< std::vector< Wavefront * > > readyList
std::shared_ptr< Request > RequestPtr
Definition: request.hh:81
Stats::Scalar vectorMemWrites
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
ip6_addr_t addr
Definition: inet.hh:330
int dpBypassPipeLength
uint64_t globalSeqNum
SenderState(Wavefront *_wavefront)
virtual void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
int spBypassPipeLength
CUExitCallback * cuExitCallback
Definition: shader.hh:76
A vector of scalar stats.
Definition: statistics.hh:2547
std::vector< DTLBPort * > tlbPort
std::vector< std::vector< Wavefront * > > wfList
this represents a slice of the overall LDS, intended to be associated with an individual workgroup ...
Definition: lds_state.hh:56
void updateEvents()
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Scalar dynamicLMemInstrCnt
SenderState is information carried along with the packet throughout the TLB hierarchy.
Stats::Formula numALUInstsExecuted
Declaration of Statistics objects.
int spBypassLength()
GPUStaticInst * kernelLaunchInst
Stats::Scalar numInstrExecuted
void initiateFetch(Wavefront *wavefront)
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:2505
SenderState(GPUDynInstPtr gpuDynInst)
virtual void recvFunctional(PacketPtr pkt)
Stats::Scalar vALUInsts
STL vector class.
Definition: stl.hh:37
Bitfield< 33 > id
Stats::Distribution ldsBankConflictDist
SenderState is information carried along with the packet throughout the TLB hierarchy.
std::vector< WaitClass > vrfToLocalMemPipeBus
Stats::Formula vectorMemWritesPerWF
std::deque< std::pair< PacketPtr, GPUDynInstPtr > > retries
Stats::Scalar wgBlockedDueLdsAllocation
SQCPort * sqcPort
virtual void recvFunctional(PacketPtr pkt)
TokenManager * memPortTokens
std::vector< std::vector< std::vector< Addr > > > lastVaddrWF
virtual Tick recvAtomic(PacketPtr pkt)
std::vector< WaitClass > aluPipe
uint32_t numCyclesPerStoreTransfer
void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, NDRange *ndr)
ComputeUnit(const Params *p)
Definition: compute_unit.cc:58
bool localMemBarrier
std::deque< std::pair< PacketPtr, Wavefront * > > retries
GlobalMemPipeline globalMemoryPipe
Definition: compute_unit.hh:99
uint32_t coalescerToVrfBusWidth
Stats::Formula vALUUtilization
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:46
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Distribution activeLanesPerLMemInstrDist
ITLBPort(const std::string &_name, ComputeUnit *_cu)
Stats::Formula scalarMemWritesPerWF
Stats::Scalar numTimesWgBlockedDueVgprAlloc
bool functionalTLB
TokenManager * getTokenManager()
CUExitCallback(ComputeUnit *_cu)
Stats::Distribution execRateDist
Stats::Formula vectorMemReadsPerWF
int ShrMemUnitId()
void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
Port & getPort(const std::string &if_name, PortID idx) override
Get a port with a given name and index.
Data access Port.
bool isShrMem(int unitId)
std::vector< std::pair< uint32_t, uint32_t > > regIdxVec
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:158
virtual Tick recvAtomic(PacketPtr pkt)
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
the port intended to communicate between the CU and its LDS
std::list< waveIdentifier > waveIDQueue
Stats::Distribution pageDivergenceDist
LdsState & getLds() const
ExecStage execStage
Definition: compute_unit.hh:98
uint64_t Tick
Tick count type.
Definition: types.hh:61
Stats::Scalar tlbRequests
ComputeUnit * computeUnit
virtual void recvFunctional(PacketPtr pkt)
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
EXEC_UNIT
Definition: compute_unit.hh:73
A simple distribution stat.
Definition: statistics.hh:2589
ComputeUnit * computeUnit
std::vector< WaitClass > vrfToGlobalMemPipeBus
bool isDone() const
void updateInstStats(GPUDynInstPtr gpuDynInst)
ClockedObject declaration and implementation.
Stats::Scalar flatVMemInsts
Stats::Scalar numCASOps
GPUDynInstPtr getMemInst() const
ComputeUnit * computeUnit
MasterID _masterId
int wfSize() const
std::vector< DataPort * > memPort
The memory port for SIMD data accesses.
std::vector< std::vector< Addr > > lastVaddrSimd
uint32_t vrfToCoalescerBusWidth
int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots)
void StartWorkgroup(NDRange *ndr)
Port Object Declaration.
Stats::Formula sALUInstsPerWF
virtual Tick recvAtomic(PacketPtr pkt)
bool isGlbMem(int unitId)
Stats::Scalar scalarMemWrites
std::unordered_map< Addr, std::pair< int, int > > pageDataStruct
Stats::Scalar scalarMemReads
Bitfield< 0 > w
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,16,32,64}_t.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
uint16_t MasterID
Definition: request.hh:84
SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
Stats::Scalar ldsNoFlatInsts
std::vector< std::pair< Wavefront *, DISPATCH_STATUS > > dispatchList
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
bool sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result))
send a general request to the LDS make sure to look at the return value here as your request might be...
bool cedeSIMD(int simdId, int wfSlotId)
bool recvTimingResp(PacketPtr)
Receive a timing response from the peer.
Stats::Scalar instCyclesVALU
Tick resp_tick_latency
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a SimObject that sees the packet.
Definition: packet.hh:397
Stats::Scalar completedWfs
bool xact_cas_mode
STL deque class.
Definition: stl.hh:44
A formula for statistics that is calculated when printed.
Definition: statistics.hh:3009
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, Packet::SenderState *sender_state=nullptr)
Stats::Formula scalarMemReadsPerWF
Stats::Formula vALUInstsPerWF
virtual const std::string name() const
Definition: sim_object.hh:128
Shader * shader
int32_t getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
Stats::Distribution activeLanesPerGMemInstrDist
void doSmReturn(GPUDynInstPtr gpuDynInst)
ComputeUnit * computeUnit
Stats::Scalar tlbCycles
SenderState is information carried along with the packet, esp.
std::queue< PacketPtr > retries
here we queue all the requests that were not successfully sent.
Stats::Scalar numVecOpsExecuted
std::vector< VectorRegisterFile * > vrf
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
int loadBusLength()
virtual void recvFunctional(PacketPtr pkt)
void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs)
EXEC_POLICY
Definition: compute_unit.hh:66
int nextLocRdBus()
Stats::Scalar numFailedCASOps
int ReadyWorkgroup(NDRange *ndr)
LDSPort * getLdsPort() const
std::map< Addr, int > pagesTouched
Stats::Scalar instCyclesSALU
virtual void process()
virtual process function that is invoked when the callback queue is executed.
WaitClass locMemToVrfBus
void fetch(PacketPtr pkt, Wavefront *wavefront)
TLB_CACHE
Definition: compute_unit.hh:84
FetchStage fetchStage
Definition: compute_unit.hh:95
Stats::Formula flatVMemInstsPerWF
std::vector< uint8_t > statusVec
uint32_t barrier_id
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:235
std::vector< uint64_t > lastExecCycle
void registerEvent(uint32_t simdId, uint32_t regIdx, uint32_t operandSize, uint64_t when, uint8_t newStatus)
std::vector< WaitClass > wfWait
LocalMemPipeline localMemoryPipe
void processFetchReturn(PacketPtr pkt)
int GlbMemUnitId()
pageDataStruct pageAccesses
bool processTimingPacket(PacketPtr pkt)
Enums::PrefetchType prefetchType
Stats::Scalar sALUInsts
Stats::Scalar ldsBankAccesses
Tick req_tick_latency
Stats::Scalar totalCycles
LDSPort * ldsPort
The port to access the Local Data Store Can be connected to a LDS object.
waveIdentifier(int _simdId, int _wfSlotId)
std::vector< uint64_t > timestampVec
Stats::Scalar vectorMemReads
int cacheLineSize() const
Bitfield< 0 > p
std::vector< Addr > lastVaddrCU
GMTokenPort(const std::string &name, SimObject *owner, PortID id=InvalidPortID)
int nextGlbRdBus()
void regStats() override
Callback to set stat parameters.
ComputeUnitParams Params
Stats::Formula ldsNoFlatInstsPerWF
virtual Tick recvAtomic(PacketPtr pkt)
virtual Tick recvAtomic(PacketPtr pkt)
ComputeUnit * computeUnit
Abstract superclass for simulation objects.
Definition: sim_object.hh:92
bool isSimdDone(uint32_t) const
uint64_t getAndIncSeqNum()
Stats::Scalar threadCyclesVALU
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
MasterID masterId()
bool isStalled() const
std::vector< int > vectorRegsReserved
bool isVecAlu(int unitId)
EXEC_POLICY exec_policy
LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
ScoreboardCheckStage scoreboardCheckStage
Definition: compute_unit.hh:96
virtual void recvFunctional(PacketPtr pkt)
DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
const int _cacheLineSize

Generated on Mon Jun 8 2020 15:45:11 for gem5 by doxygen 1.8.13