gem5  v20.0.0.3
compute_unit.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __COMPUTE_UNIT_HH__
35 #define __COMPUTE_UNIT_HH__
36 
37 #include <deque>
38 #include <map>
39 #include <unordered_map>
40 #include <vector>
41 
42 #include "base/callback.hh"
43 #include "base/statistics.hh"
44 #include "base/types.hh"
45 #include "enums/PrefetchType.hh"
50 #include "gpu-compute/qstruct.hh"
53 #include "mem/port.hh"
54 #include "sim/clocked_object.hh"
55 
56 static const int MAX_REGS_FOR_NON_VEC_MEM_INST = 1;
57 static const int MAX_WIDTH_FOR_MEM_INST = 32;
58 
59 class NDRange;
60 class Shader;
61 class VectorRegisterFile;
62 
63 struct ComputeUnitParams;
64 
66 {
67  OLDEST = 0,
69 };
70 
71 // List of execution units
73 {
74  SIMD0 = 0,
81 };
82 
84 {
89 };
90 
91 class ComputeUnit : public ClockedObject
92 {
93  public:
100 
101  // Buffers used to communicate between various pipeline stages
102 
103  // List of waves which are ready to be scheduled.
104  // Each execution resource has a ready list. readyList is
105  // used to communicate between scoreboardCheck stage and
106  // schedule stage
107  // TODO: make enum to index readyList
109 
110  // Stores the status of waves. A READY implies the
111  // wave is ready to be scheduled this cycle and
112  // is already present in the readyList. waveStatusList is
113  // used to communicate between scoreboardCheck stage and
114  // schedule stage
115  // TODO: convert std::pair to a class to increase readability
117 
118  // List of waves which will be dispatched to
119  // each execution resource. A FILLED implies
120  // dispatch list is non-empty and
121  // execution unit has something to execute
122  // this cycle. Currently, the dispatch list of
123  // an execution resource can hold only one wave because
124  // an execution resource can execute only one wave in a cycle.
125  // dispatchList is used to communicate between schedule
126  // and exec stage
127  // TODO: convert std::pair to a class to increase readability
129 
130  int rrNextMemID; // used by RR WF exec policy to cycle through WF's
132  typedef ComputeUnitParams Params;
134  int cu_id;
135 
136  // array of vector register files, one per SIMD
138  // Number of vector ALU units (SIMDs) in CU
139  int numSIMDs;
140  // number of pipe stages for bypassing data to next dependent single
141  // precision vector instruction inside the vector ALU pipeline
143  // number of pipe stages for bypassing data to next dependent double
144  // precision vector instruction inside the vector ALU pipeline
146  // number of cycles per issue period
148 
149  // Number of global and local memory execution resources in CU
152  // tracks the last cycle a vector instruction was executed on a SIMD
154 
155  // true if we allow a separate TLB per lane
157  // if 0, TLB prefetching is off.
159  // if fixed-stride prefetching, this is the stride.
161 
165  Enums::PrefetchType prefetchType;
167 
172 
173  /*
174  * for Counting page accesses
175  *
176  * cuExitCallback inherits from Callback. When you register a callback
177  * function as an exit callback, it will get added to an exit callback
178  * queue, such that on simulation exit, all callbacks in the callback
179  * queue will have their process() function called.
180  */
182 
184  uint32_t barrier_id;
185  // vector of Vector ALU (MACC) pipelines
187  // minimum issue period per SIMD unit (in cycles)
189 
190  // Resource control for Vector Register File->Global Memory pipe buses
192  // Resource control for Vector Register File->Local Memory pipe buses
196  // Resource control for global memory to VRF data/address bus
198  // Resource control for local memory to VRF data/address bus
200 
201  uint32_t vrfToCoalescerBusWidth; // VRF->Coalescer data bus width in bytes
202  uint32_t coalescerToVrfBusWidth; // Coalescer->VRF data bus width in bytes
203  uint32_t numCyclesPerStoreTransfer; // number of cycles per vector store
204  uint32_t numCyclesPerLoadTransfer; // number of cycles per vector load
205 
208 
209  // number of vector registers being reserved for each SIMD unit
211  // number of vector registers per SIMD unit
213  // Support for scheduling VGPR status update events
217 
218  void
219  registerEvent(uint32_t simdId,
220  uint32_t regIdx,
221  uint32_t operandSize,
222  uint64_t when,
223  uint8_t newStatus) {
224  regIdxVec.push_back(std::make_pair(simdId, regIdx));
225  timestampVec.push_back(when);
226  statusVec.push_back(newStatus);
227  if (operandSize > 4) {
228  regIdxVec.push_back(std::make_pair(simdId,
229  ((regIdx + 1) %
230  numVecRegsPerSimd)));
231  timestampVec.push_back(when);
232  statusVec.push_back(newStatus);
233  }
234  }
235 
236  void updateEvents();
237 
238  // this hash map will keep track of page divergence
239  // per memory instruction per wavefront. The hash map
240  // is cleared in GPUDynInst::updateStats() in gpu_dyn_inst.cc.
241  std::map<Addr, int> pagesTouched;
242 
243  ComputeUnit(const Params *p);
244  ~ComputeUnit();
249  int wfSize() const { return wavefrontSize; };
250 
251  void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs);
252  void exec();
253  void initiateFetch(Wavefront *wavefront);
254  void fetch(PacketPtr pkt, Wavefront *wavefront);
255  void fillKernelState(Wavefront *w, NDRange *ndr);
256 
257  void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk,
258  NDRange *ndr);
259 
260  void StartWorkgroup(NDRange *ndr);
261  int ReadyWorkgroup(NDRange *ndr);
262 
263  bool isVecAlu(int unitId) { return unitId >= SIMD0 && unitId <= SIMD3; }
264  bool isGlbMem(int unitId) { return unitId == GLBMEM_PIPE; }
265  bool isShrMem(int unitId) { return unitId == LDSMEM_PIPE; }
266  int GlbMemUnitId() { return GLBMEM_PIPE; }
267  int ShrMemUnitId() { return LDSMEM_PIPE; }
268  int nextGlbRdBus() { return (++nextGlbMemBus) % numGlbMemUnits; }
269  int nextLocRdBus() { return (++nextLocMemBus) % numLocMemUnits; }
270  /* This function cycles through all the wavefronts in all the phases to see
271  * if all of the wavefronts which should be associated with one barrier
272  * (denoted with _barrier_id), are all at the same barrier in the program
273  * (denoted by bcnt). When the number at the barrier matches bslots, then
274  * return true.
275  */
276  int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots);
277  bool cedeSIMD(int simdId, int wfSlotId);
278 
279  template<typename c0, typename c1> void doSmReturn(GPUDynInstPtr gpuDynInst);
280  virtual void init() override;
281  void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);
282  void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);
283  void injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
284  bool kernelLaunch=true,
285  RequestPtr req=nullptr);
286  void handleMemPacket(PacketPtr pkt, int memport_index);
287  bool processTimingPacket(PacketPtr pkt);
288  void processFetchReturn(PacketPtr pkt);
290 
292 
293  bool isDone() const;
294  bool isSimdDone(uint32_t) const;
295 
296  protected:
298 
300 
301  public:
324 
325  void updateInstStats(GPUDynInstPtr gpuDynInst);
326 
327  // the following stats compute the avg. TLB accesslatency per
328  // uncoalesced request (only for data)
332  // hitsPerTLBLevel[x] are the hits in Level x TLB. x = 0 is the page table.
334 
337 
338  // over all memory instructions executed over all wavefronts
339  // how many touched 0-4 pages, 4-8, ..., 60-64 pages
343 
345  // Number of instructions executed, i.e. if 64 (or 32 or 7) lanes are active
346  // when the instruction is committed, this number is still incremented by 1
348  // Number of cycles among successive instruction executions across all
349  // wavefronts of the same CU
351  // number of individual vector operations executed
353  // Total cycles that something is running on the GPU
355  Stats::Formula vpc; // vector ops per cycle
356  Stats::Formula ipc; // vector instructions per cycle
360  // number of vector ALU instructions received
362  // number of times a WG can not start due to lack of free VGPRs in SIMDs
367  // flag per vector SIMD unit that is set when there is at least one
368  // WV that has a vector ALU instruction as the oldest in its
369  // Instruction Buffer: Defined in the Scoreboard stage, consumed
370  // by the Execute stage.
372  // number of available (oldest) LDS instructions that could have
373  // been issued to the LDS at a specific issue slot
375  // number of available Global memory instructions that could have
376  // been issued to TCP at a specific issue slot
378 
379  void
380  regStats() override;
381 
382  LdsState &
383  getLds() const
384  {
385  return lds;
386  }
387 
388  int32_t
389  getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const;
390 
391  int cacheLineSize() const { return _cacheLineSize; }
392 
393  bool
394  sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result));
395 
396  typedef std::unordered_map<Addr, std::pair<int, int>> pageDataStruct;
397  pageDataStruct pageAccesses;
398 
399  class CUExitCallback : public Callback
400  {
401  private:
403 
404  public:
405  virtual ~CUExitCallback() { }
406 
408  {
409  computeUnit = _cu;
410  }
411 
412  virtual void
413  process();
414  };
415 
417 
419  class DataPort : public MasterPort
420  {
421  public:
422  DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
423  : MasterPort(_name, _cu), computeUnit(_cu),
424  index(_index) { }
425 
427 
429  {
433 
434  SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index,
435  Packet::SenderState *sender_state=nullptr)
436  : _gpuDynInst(gpuDynInst),
437  port_index(_port_index),
438  saved(sender_state) { }
439  };
440 
441  void processMemReqEvent(PacketPtr pkt);
442  EventFunctionWrapper *createMemReqEvent(PacketPtr pkt);
443 
444  void processMemRespEvent(PacketPtr pkt);
445  EventFunctionWrapper *createMemRespEvent(PacketPtr pkt);
446 
448 
449  protected:
451  int index;
452 
453  virtual bool recvTimingResp(PacketPtr pkt);
454  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
455  virtual void recvFunctional(PacketPtr pkt) { }
456  virtual void recvRangeChange() { }
457  virtual void recvReqRetry();
458 
459  virtual void
461  {
462  resp.clear();
463  snoop = true;
464  }
465 
466  };
467 
468  // Instruction cache access port
469  class SQCPort : public MasterPort
470  {
471  public:
472  SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
473  : MasterPort(_name, _cu), computeUnit(_cu),
474  index(_index) { }
475 
477 
479  {
482 
484  *sender_state=nullptr)
485  : wavefront(_wavefront), saved(sender_state) { }
486  };
487 
489 
490  protected:
492  int index;
493 
494  virtual bool recvTimingResp(PacketPtr pkt);
495  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
496  virtual void recvFunctional(PacketPtr pkt) { }
497  virtual void recvRangeChange() { }
498  virtual void recvReqRetry();
499 
500  virtual void
502  {
503  resp.clear();
504  snoop = true;
505  }
506  };
507 
509  class DTLBPort : public MasterPort
510  {
511  public:
512  DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
513  : MasterPort(_name, _cu), computeUnit(_cu),
514  index(_index), stalled(false)
515  { }
516 
517  bool isStalled() { return stalled; }
518  void stallPort() { stalled = true; }
519  void unstallPort() { stalled = false; }
520 
526 
531  {
532  // the memInst that this is associated with
534 
535  // the lane in the memInst this is associated with, so we send
536  // the memory request down the right port
538 
539  // constructor used for packets involved in timing accesses
540  SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
541  : _gpuDynInst(gpuDynInst), portIndex(port_index) { }
542 
543  };
544 
545  protected:
547  int index;
548  bool stalled;
549 
550  virtual bool recvTimingResp(PacketPtr pkt);
551  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
552  virtual void recvFunctional(PacketPtr pkt) { }
553  virtual void recvRangeChange() { }
554  virtual void recvReqRetry();
555  };
556 
557  class ITLBPort : public MasterPort
558  {
559  public:
560  ITLBPort(const std::string &_name, ComputeUnit *_cu)
561  : MasterPort(_name, _cu), computeUnit(_cu), stalled(false) { }
562 
563 
564  bool isStalled() { return stalled; }
565  void stallPort() { stalled = true; }
566  void unstallPort() { stalled = false; }
567 
573 
578  {
579  // The wavefront associated with this request
581 
582  SenderState(Wavefront *_wavefront) : wavefront(_wavefront) { }
583  };
584 
585  protected:
587  bool stalled;
588 
589  virtual bool recvTimingResp(PacketPtr pkt);
590  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
591  virtual void recvFunctional(PacketPtr pkt) { }
592  virtual void recvRangeChange() { }
593  virtual void recvReqRetry();
594  };
595 
599  class LDSPort : public MasterPort
600  {
601  public:
602  LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
603  : MasterPort(_name, _cu, _id), computeUnit(_cu)
604  {
605  }
606 
607  bool isStalled() const { return stalled; }
608  void stallPort() { stalled = true; }
609  void unstallPort() { stalled = false; }
610 
615  std::queue<PacketPtr> retries;
616 
622  {
623  protected:
624  // The actual read/write/atomic request that goes with this command
625  GPUDynInstPtr _gpuDynInst = nullptr;
626 
627  public:
629  _gpuDynInst(gpuDynInst)
630  {
631  }
632 
634  getMemInst() const
635  {
636  return _gpuDynInst;
637  }
638  };
639 
640  virtual bool
641  sendTimingReq(PacketPtr pkt);
642 
643  protected:
644 
645  bool stalled = false;
646 
648 
649  virtual bool
650  recvTimingResp(PacketPtr pkt);
651 
652  virtual Tick
653  recvAtomic(PacketPtr pkt) { return 0; }
654 
655  virtual void
657  {
658  }
659 
660  virtual void
662  {
663  }
664 
665  virtual void
666  recvReqRetry();
667  };
668 
672  LDSPort *ldsPort = nullptr;
673 
674  LDSPort *
675  getLdsPort() const
676  {
677  return ldsPort;
678  }
679 
684  // port to the TLB hierarchy (i.e., the L1 TLB)
686  // port to the SQC (i.e. the I-cache)
688  // port to the SQC TLB (there's a separate TLB for each I-cache)
690 
691  Port &
692  getPort(const std::string &if_name, PortID idx) override
693  {
694  if (if_name == "memory_port") {
695  memPort[idx] = new DataPort(csprintf("%s-port%d", name(), idx),
696  this, idx);
697  return *memPort[idx];
698  } else if (if_name == "translation_port") {
699  tlbPort[idx] = new DTLBPort(csprintf("%s-port%d", name(), idx),
700  this, idx);
701  return *tlbPort[idx];
702  } else if (if_name == "sqc_port") {
703  sqcPort = new SQCPort(csprintf("%s-port%d", name(), idx),
704  this, idx);
705  return *sqcPort;
706  } else if (if_name == "sqc_tlb_port") {
707  sqcTLBPort = new ITLBPort(csprintf("%s-port", name()), this);
708  return *sqcTLBPort;
709  } else if (if_name == "ldsPort") {
710  if (ldsPort) {
711  fatal("an LDS port was already allocated");
712  }
713  ldsPort = new LDSPort(csprintf("%s-port", name()), this, idx);
714  return *ldsPort;
715  } else {
716  panic("incorrect port name");
717  }
718  }
719 
720  // xact_cas_load()
722  {
723  public:
725  waveIdentifier(int _simdId, int _wfSlotId)
726  : simdId(_simdId), wfSlotId(_wfSlotId) { }
727 
728  int simdId;
729  int wfSlotId;
730  };
731 
732  class waveQueue
733  {
734  public:
736  };
737  std::map<unsigned, waveQueue> xactCasLoadMap;
738 
739  uint64_t getAndIncSeqNum() { return globalSeqNum++; }
740 
741  private:
742  const int _cacheLineSize;
743  uint64_t globalSeqNum;
746 };
747 
748 #endif // __COMPUTE_UNIT_HH__
uint32_t numVecRegsPerSimd
A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
Definition: port.hh:71
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:163
void updatePageDivergenceDist(Addr addr)
Stats::Formula tlbLatency
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Formula vpc
SenderState(Wavefront *_wavefront, Packet::SenderState *sender_state=nullptr)
Ports are used to interface objects to each other.
Definition: port.hh:56
Stats::Scalar flatLDSInsts
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Bitfield< 30, 0 > index
std::vector< bool > vectorAluInstAvail
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch=true, RequestPtr req=nullptr)
void handleMemPacket(PacketPtr pkt, int memport_index)
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:171
DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
Generic callback class.
Definition: callback.hh:39
uint32_t numCyclesPerLoadTransfer
Stats::Formula ipc
Data TLB port.
WaitClass glbMemToVrfBus
static const int MAX_REGS_FOR_NON_VEC_MEM_INST
Definition: compute_unit.hh:56
std::map< unsigned, waveQueue > xactCasLoadMap
bool debugSegFault
LdsState & lds
static const int MAX_WIDTH_FOR_MEM_INST
Definition: compute_unit.hh:57
std::vector< std::vector< std::pair< Wavefront *, WAVE_STATUS > > > waveStatusList
void fillKernelState(Wavefront *w, NDRange *ndr)
Stats::Vector hitsPerTLBLevel
Stats::Scalar dynamicGMemInstrCnt
ScheduleStage scheduleStage
Definition: compute_unit.hh:96
Stats::Formula flatLDSInstsPerWF
int storeBusLength()
const char * __attribute__((weak)) m5MainCommands[]
int dpBypassLength()
Stats::Distribution controlFlowDivergenceDist
ITLBPort * sqcTLBPort
std::vector< std::vector< Wavefront * > > readyList
std::shared_ptr< Request > RequestPtr
Definition: request.hh:81
Stats::Scalar vectorMemWrites
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
ip6_addr_t addr
Definition: inet.hh:330
int dpBypassPipeLength
uint64_t globalSeqNum
SenderState(Wavefront *_wavefront)
virtual void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
int spBypassPipeLength
CUExitCallback * cuExitCallback
Definition: shader.hh:76
A vector of scalar stats.
Definition: statistics.hh:2547
std::vector< DTLBPort * > tlbPort
std::vector< std::vector< Wavefront * > > wfList
this represents a slice of the overall LDS, intended to be associated with an individual workgroup ...
Definition: lds_state.hh:56
void updateEvents()
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Scalar dynamicLMemInstrCnt
SenderState is information carried along with the packet throughout the TLB hierarchy.
Stats::Formula numALUInstsExecuted
Declaration of Statistics objects.
int spBypassLength()
GPUStaticInst * kernelLaunchInst
Stats::Scalar numInstrExecuted
void initiateFetch(Wavefront *wavefront)
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:2505
SenderState(GPUDynInstPtr gpuDynInst)
virtual void recvFunctional(PacketPtr pkt)
Stats::Scalar vALUInsts
STL vector class.
Definition: stl.hh:37
Stats::Distribution ldsBankConflictDist
SenderState is information carried along with the packet throughout the TLB hierarchy.
std::vector< WaitClass > vrfToLocalMemPipeBus
Stats::Formula vectorMemWritesPerWF
std::deque< std::pair< PacketPtr, GPUDynInstPtr > > retries
Stats::Scalar wgBlockedDueLdsAllocation
SQCPort * sqcPort
virtual void recvFunctional(PacketPtr pkt)
std::vector< std::vector< std::vector< Addr > > > lastVaddrWF
virtual Tick recvAtomic(PacketPtr pkt)
std::vector< WaitClass > aluPipe
uint32_t numCyclesPerStoreTransfer
void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, NDRange *ndr)
ComputeUnit(const Params *p)
Definition: compute_unit.cc:58
bool localMemBarrier
std::deque< std::pair< PacketPtr, Wavefront * > > retries
GlobalMemPipeline globalMemoryPipe
Definition: compute_unit.hh:98
uint32_t coalescerToVrfBusWidth
Stats::Formula vALUUtilization
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:46
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Distribution activeLanesPerLMemInstrDist
ITLBPort(const std::string &_name, ComputeUnit *_cu)
Stats::Formula scalarMemWritesPerWF
Stats::Scalar numTimesWgBlockedDueVgprAlloc
bool functionalTLB
CUExitCallback(ComputeUnit *_cu)
Stats::Distribution execRateDist
Stats::Formula vectorMemReadsPerWF
int ShrMemUnitId()
void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
Port & getPort(const std::string &if_name, PortID idx) override
Get a port with a given name and index.
Data access Port.
bool isShrMem(int unitId)
std::vector< std::pair< uint32_t, uint32_t > > regIdxVec
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:158
virtual Tick recvAtomic(PacketPtr pkt)
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
the port intended to communicate between the CU and its LDS
std::list< waveIdentifier > waveIDQueue
Stats::Distribution pageDivergenceDist
LdsState & getLds() const
ExecStage execStage
Definition: compute_unit.hh:97
uint64_t Tick
Tick count type.
Definition: types.hh:61
Stats::Scalar tlbRequests
ComputeUnit * computeUnit
virtual void recvFunctional(PacketPtr pkt)
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
EXEC_UNIT
Definition: compute_unit.hh:72
A simple distribution stat.
Definition: statistics.hh:2589
ComputeUnit * computeUnit
std::vector< WaitClass > vrfToGlobalMemPipeBus
bool isDone() const
void updateInstStats(GPUDynInstPtr gpuDynInst)
ClockedObject declaration and implementation.
Stats::Scalar flatVMemInsts
Stats::Scalar numCASOps
GPUDynInstPtr getMemInst() const
ComputeUnit * computeUnit
MasterID _masterId
int wfSize() const
std::vector< DataPort * > memPort
The memory port for SIMD data accesses.
std::vector< std::vector< Addr > > lastVaddrSimd
uint32_t vrfToCoalescerBusWidth
int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots)
void StartWorkgroup(NDRange *ndr)
Port Object Declaration.
Stats::Formula sALUInstsPerWF
virtual Tick recvAtomic(PacketPtr pkt)
bool isGlbMem(int unitId)
Stats::Scalar scalarMemWrites
std::unordered_map< Addr, std::pair< int, int > > pageDataStruct
Stats::Scalar scalarMemReads
Bitfield< 0 > w
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,16,32,64}_t.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
uint16_t MasterID
Definition: request.hh:84
SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
Stats::Scalar ldsNoFlatInsts
std::vector< std::pair< Wavefront *, DISPATCH_STATUS > > dispatchList
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
bool sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result))
send a general request to the LDS make sure to look at the return value here as your request might be...
bool cedeSIMD(int simdId, int wfSlotId)
Stats::Scalar instCyclesVALU
Tick resp_tick_latency
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a SimObject that sees the packet.
Definition: packet.hh:397
Stats::Scalar completedWfs
bool xact_cas_mode
STL deque class.
Definition: stl.hh:44
A formula for statistics that is calculated when printed.
Definition: statistics.hh:3009
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, Packet::SenderState *sender_state=nullptr)
Stats::Formula scalarMemReadsPerWF
Stats::Formula vALUInstsPerWF
virtual const std::string name() const
Definition: sim_object.hh:129
Shader * shader
int32_t getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
Stats::Distribution activeLanesPerGMemInstrDist
void doSmReturn(GPUDynInstPtr gpuDynInst)
ComputeUnit * computeUnit
Stats::Scalar tlbCycles
SenderState is information carried along with the packet, esp.
std::queue< PacketPtr > retries
here we queue all the requests that were not successfully sent.
Stats::Scalar numVecOpsExecuted
std::vector< VectorRegisterFile * > vrf
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
int loadBusLength()
virtual void recvFunctional(PacketPtr pkt)
void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs)
EXEC_POLICY
Definition: compute_unit.hh:65
int nextLocRdBus()
Stats::Scalar numFailedCASOps
int ReadyWorkgroup(NDRange *ndr)
LDSPort * getLdsPort() const
std::map< Addr, int > pagesTouched
Stats::Scalar instCyclesSALU
virtual void process()
virtual process function that is invoked when the callback queue is executed.
WaitClass locMemToVrfBus
void fetch(PacketPtr pkt, Wavefront *wavefront)
TLB_CACHE
Definition: compute_unit.hh:83
FetchStage fetchStage
Definition: compute_unit.hh:94
Stats::Formula flatVMemInstsPerWF
std::vector< uint8_t > statusVec
uint32_t barrier_id
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:235
std::vector< uint64_t > lastExecCycle
void registerEvent(uint32_t simdId, uint32_t regIdx, uint32_t operandSize, uint64_t when, uint8_t newStatus)
std::vector< WaitClass > wfWait
LocalMemPipeline localMemoryPipe
Definition: compute_unit.hh:99
void processFetchReturn(PacketPtr pkt)
int GlbMemUnitId()
pageDataStruct pageAccesses
bool processTimingPacket(PacketPtr pkt)
Enums::PrefetchType prefetchType
Stats::Scalar sALUInsts
Stats::Scalar ldsBankAccesses
Tick req_tick_latency
Stats::Scalar totalCycles
LDSPort * ldsPort
The port to access the Local Data Store Can be connected to a LDS object.
waveIdentifier(int _simdId, int _wfSlotId)
std::vector< uint64_t > timestampVec
Stats::Scalar vectorMemReads
int cacheLineSize() const
Bitfield< 0 > p
std::vector< Addr > lastVaddrCU
int nextGlbRdBus()
void regStats() override
Callback to set stat parameters.
ComputeUnitParams Params
Stats::Formula ldsNoFlatInstsPerWF
virtual Tick recvAtomic(PacketPtr pkt)
virtual Tick recvAtomic(PacketPtr pkt)
ComputeUnit * computeUnit
bool isSimdDone(uint32_t) const
uint64_t getAndIncSeqNum()
Stats::Scalar threadCyclesVALU
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
MasterID masterId()
bool isStalled() const
std::vector< int > vectorRegsReserved
bool isVecAlu(int unitId)
EXEC_POLICY exec_policy
LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
ScoreboardCheckStage scoreboardCheckStage
Definition: compute_unit.hh:95
virtual void recvFunctional(PacketPtr pkt)
DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
const int _cacheLineSize

Generated on Fri Jul 3 2020 15:53:02 for gem5 by doxygen 1.8.13