gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
compute_unit.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: John Kalamatianos,
34  * Anthony Gutierrez
35  */
36 
37 #ifndef __COMPUTE_UNIT_HH__
38 #define __COMPUTE_UNIT_HH__
39 
40 #include <deque>
41 #include <map>
42 #include <unordered_map>
43 #include <vector>
44 
45 #include "base/callback.hh"
46 #include "base/statistics.hh"
47 #include "base/types.hh"
48 #include "enums/PrefetchType.hh"
53 #include "gpu-compute/qstruct.hh"
56 #include "mem/port.hh"
57 #include "sim/clocked_object.hh"
58 
59 static const int MAX_REGS_FOR_NON_VEC_MEM_INST = 1;
60 static const int MAX_WIDTH_FOR_MEM_INST = 32;
61 
62 class NDRange;
63 class Shader;
64 class VectorRegisterFile;
65 
66 struct ComputeUnitParams;
67 
69 {
70  OLDEST = 0,
72 };
73 
74 // List of execution units
76 {
77  SIMD0 = 0,
84 };
85 
87 {
92 };
93 
94 class ComputeUnit : public ClockedObject
95 {
96  public:
103 
104  // Buffers used to communicate between various pipeline stages
105 
106  // List of waves which are ready to be scheduled.
107  // Each execution resource has a ready list. readyList is
108  // used to communicate between scoreboardCheck stage and
109  // schedule stage
110  // TODO: make enum to index readyList
112 
113  // Stores the status of waves. A READY implies the
114  // wave is ready to be scheduled this cycle and
115  // is already present in the readyList. waveStatusList is
116  // used to communicate between scoreboardCheck stage and
117  // schedule stage
118  // TODO: convert std::pair to a class to increase readability
120 
121  // List of waves which will be dispatched to
122  // each execution resource. A FILLED implies
123  // dispatch list is non-empty and
124  // execution unit has something to execute
125  // this cycle. Currently, the dispatch list of
126  // an execution resource can hold only one wave because
127  // an execution resource can execute only one wave in a cycle.
128  // dispatchList is used to communicate between schedule
129  // and exec stage
130  // TODO: convert std::pair to a class to increase readability
132 
133  int rrNextMemID; // used by RR WF exec policy to cycle through WF's
135  typedef ComputeUnitParams Params;
137  int cu_id;
138 
139  // array of vector register files, one per SIMD
141  // Number of vector ALU units (SIMDs) in CU
142  int numSIMDs;
143  // number of pipe stages for bypassing data to next dependent single
144  // precision vector instruction inside the vector ALU pipeline
146  // number of pipe stages for bypassing data to next dependent double
147  // precision vector instruction inside the vector ALU pipeline
149  // number of cycles per issue period
151 
152  // Number of global and local memory execution resources in CU
155  // tracks the last cycle a vector instruction was executed on a SIMD
157 
158  // true if we allow a separate TLB per lane
160  // if 0, TLB prefetching is off.
162  // if fixed-stride prefetching, this is the stride.
164 
168  Enums::PrefetchType prefetchType;
170 
175 
176  /*
177  * for Counting page accesses
178  *
179  * cuExitCallback inherits from Callback. When you register a callback
180  * function as an exit callback, it will get added to an exit callback
181  * queue, such that on simulation exit, all callbacks in the callback
182  * queue will have their process() function called.
183  */
185 
187  uint32_t barrier_id;
188  // vector of Vector ALU (MACC) pipelines
190  // minimum issue period per SIMD unit (in cycles)
192 
193  // Resource control for Vector Register File->Global Memory pipe buses
195  // Resource control for Vector Register File->Local Memory pipe buses
199  // Resource control for global memory to VRF data/address bus
201  // Resource control for local memory to VRF data/address bus
203 
204  uint32_t vrfToCoalescerBusWidth; // VRF->Coalescer data bus width in bytes
205  uint32_t coalescerToVrfBusWidth; // Coalescer->VRF data bus width in bytes
206  uint32_t numCyclesPerStoreTransfer; // number of cycles per vector store
207  uint32_t numCyclesPerLoadTransfer; // number of cycles per vector load
208 
211 
212  // number of vector registers being reserved for each SIMD unit
214  // number of vector registers per SIMD unit
216  // Support for scheduling VGPR status update events
220 
221  void
222  registerEvent(uint32_t simdId,
223  uint32_t regIdx,
224  uint32_t operandSize,
225  uint64_t when,
226  uint8_t newStatus) {
227  regIdxVec.push_back(std::make_pair(simdId, regIdx));
228  timestampVec.push_back(when);
229  statusVec.push_back(newStatus);
230  if (operandSize > 4) {
231  regIdxVec.push_back(std::make_pair(simdId,
232  ((regIdx + 1) %
233  numVecRegsPerSimd)));
234  timestampVec.push_back(when);
235  statusVec.push_back(newStatus);
236  }
237  }
238 
239  void updateEvents();
240 
241  // this hash map will keep track of page divergence
242  // per memory instruction per wavefront. The hash map
243  // is cleared in GPUDynInst::updateStats() in gpu_dyn_inst.cc.
244  std::map<Addr, int> pagesTouched;
245 
246  ComputeUnit(const Params *p);
247  ~ComputeUnit();
252  int wfSize() const { return wavefrontSize; };
253 
254  void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs);
255  void exec();
256  void initiateFetch(Wavefront *wavefront);
257  void fetch(PacketPtr pkt, Wavefront *wavefront);
258  void fillKernelState(Wavefront *w, NDRange *ndr);
259 
260  void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk,
261  NDRange *ndr);
262 
263  void StartWorkgroup(NDRange *ndr);
264  int ReadyWorkgroup(NDRange *ndr);
265 
266  bool isVecAlu(int unitId) { return unitId >= SIMD0 && unitId <= SIMD3; }
267  bool isGlbMem(int unitId) { return unitId == GLBMEM_PIPE; }
268  bool isShrMem(int unitId) { return unitId == LDSMEM_PIPE; }
269  int GlbMemUnitId() { return GLBMEM_PIPE; }
270  int ShrMemUnitId() { return LDSMEM_PIPE; }
271  int nextGlbRdBus() { return (++nextGlbMemBus) % numGlbMemUnits; }
272  int nextLocRdBus() { return (++nextLocMemBus) % numLocMemUnits; }
273  /* This function cycles through all the wavefronts in all the phases to see
274  * if all of the wavefronts which should be associated with one barrier
275  * (denoted with _barrier_id), are all at the same barrier in the program
276  * (denoted by bcnt). When the number at the barrier matches bslots, then
277  * return true.
278  */
279  int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots);
280  bool cedeSIMD(int simdId, int wfSlotId);
281 
282  template<typename c0, typename c1> void doSmReturn(GPUDynInstPtr gpuDynInst);
283  virtual void init() override;
284  void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);
285  void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);
286  void injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
287  bool kernelLaunch=true,
288  RequestPtr req=nullptr);
289  void handleMemPacket(PacketPtr pkt, int memport_index);
290  bool processTimingPacket(PacketPtr pkt);
291  void processFetchReturn(PacketPtr pkt);
293 
295 
296  bool isDone() const;
297  bool isSimdDone(uint32_t) const;
298 
299  protected:
301 
303 
304  public:
327 
328  void updateInstStats(GPUDynInstPtr gpuDynInst);
329 
330  // the following stats compute the avg. TLB accesslatency per
331  // uncoalesced request (only for data)
335  // hitsPerTLBLevel[x] are the hits in Level x TLB. x = 0 is the page table.
337 
340 
341  // over all memory instructions executed over all wavefronts
342  // how many touched 0-4 pages, 4-8, ..., 60-64 pages
346 
348  // Number of instructions executed, i.e. if 64 (or 32 or 7) lanes are active
349  // when the instruction is committed, this number is still incremented by 1
351  // Number of cycles among successive instruction executions across all
352  // wavefronts of the same CU
354  // number of individual vector operations executed
356  // Total cycles that something is running on the GPU
358  Stats::Formula vpc; // vector ops per cycle
359  Stats::Formula ipc; // vector instructions per cycle
363  // number of vector ALU instructions received
365  // number of times a WG can not start due to lack of free VGPRs in SIMDs
370  // flag per vector SIMD unit that is set when there is at least one
371  // WV that has a vector ALU instruction as the oldest in its
372  // Instruction Buffer: Defined in the Scoreboard stage, consumed
373  // by the Execute stage.
375  // number of available (oldest) LDS instructions that could have
376  // been issued to the LDS at a specific issue slot
378  // number of available Global memory instructions that could have
379  // been issued to TCP at a specific issue slot
381 
382  void
383  regStats() override;
384 
385  LdsState &
386  getLds() const
387  {
388  return lds;
389  }
390 
391  int32_t
392  getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const;
393 
394  int cacheLineSize() const { return _cacheLineSize; }
395 
396  bool
397  sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result));
398 
399  typedef std::unordered_map<Addr, std::pair<int, int>> pageDataStruct;
400  pageDataStruct pageAccesses;
401 
402  class CUExitCallback : public Callback
403  {
404  private:
406 
407  public:
408  virtual ~CUExitCallback() { }
409 
411  {
412  computeUnit = _cu;
413  }
414 
415  virtual void
416  process();
417  };
418 
420 
422  class DataPort : public MasterPort
423  {
424  public:
425  DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
426  : MasterPort(_name, _cu), computeUnit(_cu),
427  index(_index) { }
428 
430 
432  {
436 
437  SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index,
438  Packet::SenderState *sender_state=nullptr)
439  : _gpuDynInst(gpuDynInst),
440  port_index(_port_index),
441  saved(sender_state) { }
442  };
443 
444  void processMemReqEvent(PacketPtr pkt);
445  EventFunctionWrapper *createMemReqEvent(PacketPtr pkt);
446 
447  void processMemRespEvent(PacketPtr pkt);
448  EventFunctionWrapper *createMemRespEvent(PacketPtr pkt);
449 
451 
452  protected:
454  int index;
455 
456  virtual bool recvTimingResp(PacketPtr pkt);
457  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
458  virtual void recvFunctional(PacketPtr pkt) { }
459  virtual void recvRangeChange() { }
460  virtual void recvReqRetry();
461 
462  virtual void
464  {
465  resp.clear();
466  snoop = true;
467  }
468 
469  };
470 
471  // Instruction cache access port
472  class SQCPort : public MasterPort
473  {
474  public:
475  SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
476  : MasterPort(_name, _cu), computeUnit(_cu),
477  index(_index) { }
478 
480 
482  {
485 
487  *sender_state=nullptr)
488  : wavefront(_wavefront), saved(sender_state) { }
489  };
490 
492 
493  protected:
495  int index;
496 
497  virtual bool recvTimingResp(PacketPtr pkt);
498  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
499  virtual void recvFunctional(PacketPtr pkt) { }
500  virtual void recvRangeChange() { }
501  virtual void recvReqRetry();
502 
503  virtual void
505  {
506  resp.clear();
507  snoop = true;
508  }
509  };
510 
512  class DTLBPort : public MasterPort
513  {
514  public:
515  DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
516  : MasterPort(_name, _cu), computeUnit(_cu),
517  index(_index), stalled(false)
518  { }
519 
520  bool isStalled() { return stalled; }
521  void stallPort() { stalled = true; }
522  void unstallPort() { stalled = false; }
523 
529 
534  {
535  // the memInst that this is associated with
537 
538  // the lane in the memInst this is associated with, so we send
539  // the memory request down the right port
541 
542  // constructor used for packets involved in timing accesses
543  SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
544  : _gpuDynInst(gpuDynInst), portIndex(port_index) { }
545 
546  };
547 
548  protected:
550  int index;
551  bool stalled;
552 
553  virtual bool recvTimingResp(PacketPtr pkt);
554  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
555  virtual void recvFunctional(PacketPtr pkt) { }
556  virtual void recvRangeChange() { }
557  virtual void recvReqRetry();
558  };
559 
560  class ITLBPort : public MasterPort
561  {
562  public:
563  ITLBPort(const std::string &_name, ComputeUnit *_cu)
564  : MasterPort(_name, _cu), computeUnit(_cu), stalled(false) { }
565 
566 
567  bool isStalled() { return stalled; }
568  void stallPort() { stalled = true; }
569  void unstallPort() { stalled = false; }
570 
576 
581  {
582  // The wavefront associated with this request
584 
585  SenderState(Wavefront *_wavefront) : wavefront(_wavefront) { }
586  };
587 
588  protected:
590  bool stalled;
591 
592  virtual bool recvTimingResp(PacketPtr pkt);
593  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
594  virtual void recvFunctional(PacketPtr pkt) { }
595  virtual void recvRangeChange() { }
596  virtual void recvReqRetry();
597  };
598 
602  class LDSPort : public MasterPort
603  {
604  public:
605  LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
606  : MasterPort(_name, _cu, _id), computeUnit(_cu)
607  {
608  }
609 
610  bool isStalled() const { return stalled; }
611  void stallPort() { stalled = true; }
612  void unstallPort() { stalled = false; }
613 
618  std::queue<PacketPtr> retries;
619 
625  {
626  protected:
627  // The actual read/write/atomic request that goes with this command
628  GPUDynInstPtr _gpuDynInst = nullptr;
629 
630  public:
632  _gpuDynInst(gpuDynInst)
633  {
634  }
635 
637  getMemInst() const
638  {
639  return _gpuDynInst;
640  }
641  };
642 
643  virtual bool
644  sendTimingReq(PacketPtr pkt);
645 
646  protected:
647 
648  bool stalled = false;
649 
651 
652  virtual bool
653  recvTimingResp(PacketPtr pkt);
654 
655  virtual Tick
656  recvAtomic(PacketPtr pkt) { return 0; }
657 
658  virtual void
660  {
661  }
662 
663  virtual void
665  {
666  }
667 
668  virtual void
669  recvReqRetry();
670  };
671 
675  LDSPort *ldsPort = nullptr;
676 
677  LDSPort *
678  getLdsPort() const
679  {
680  return ldsPort;
681  }
682 
687  // port to the TLB hierarchy (i.e., the L1 TLB)
689  // port to the SQC (i.e. the I-cache)
691  // port to the SQC TLB (there's a separate TLB for each I-cache)
693 
694  Port &
695  getPort(const std::string &if_name, PortID idx) override
696  {
697  if (if_name == "memory_port") {
698  memPort[idx] = new DataPort(csprintf("%s-port%d", name(), idx),
699  this, idx);
700  return *memPort[idx];
701  } else if (if_name == "translation_port") {
702  tlbPort[idx] = new DTLBPort(csprintf("%s-port%d", name(), idx),
703  this, idx);
704  return *tlbPort[idx];
705  } else if (if_name == "sqc_port") {
706  sqcPort = new SQCPort(csprintf("%s-port%d", name(), idx),
707  this, idx);
708  return *sqcPort;
709  } else if (if_name == "sqc_tlb_port") {
710  sqcTLBPort = new ITLBPort(csprintf("%s-port", name()), this);
711  return *sqcTLBPort;
712  } else if (if_name == "ldsPort") {
713  if (ldsPort) {
714  fatal("an LDS port was already allocated");
715  }
716  ldsPort = new LDSPort(csprintf("%s-port", name()), this, idx);
717  return *ldsPort;
718  } else {
719  panic("incorrect port name");
720  }
721  }
722 
723  // xact_cas_load()
725  {
726  public:
728  waveIdentifier(int _simdId, int _wfSlotId)
729  : simdId(_simdId), wfSlotId(_wfSlotId) { }
730 
731  int simdId;
732  int wfSlotId;
733  };
734 
735  class waveQueue
736  {
737  public:
739  };
740  std::map<unsigned, waveQueue> xactCasLoadMap;
741 
742  uint64_t getAndIncSeqNum() { return globalSeqNum++; }
743 
744  private:
745  const int _cacheLineSize;
746  uint64_t globalSeqNum;
749 };
750 
751 #endif // __COMPUTE_UNIT_HH__
uint32_t numVecRegsPerSimd
A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
Definition: port.hh:75
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:167
void updatePageDivergenceDist(Addr addr)
Stats::Formula tlbLatency
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Formula vpc
SenderState(Wavefront *_wavefront, Packet::SenderState *sender_state=nullptr)
Ports are used to interface objects to each other.
Definition: port.hh:60
Stats::Scalar flatLDSInsts
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Bitfield< 30, 0 > index
std::vector< bool > vectorAluInstAvail
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch=true, RequestPtr req=nullptr)
void handleMemPacket(PacketPtr pkt, int memport_index)
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:175
DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
Generic callback class.
Definition: callback.hh:41
uint32_t numCyclesPerLoadTransfer
Stats::Formula ipc
Data TLB port.
WaitClass glbMemToVrfBus
static const int MAX_REGS_FOR_NON_VEC_MEM_INST
Definition: compute_unit.hh:59
std::map< unsigned, waveQueue > xactCasLoadMap
bool debugSegFault
LdsState & lds
static const int MAX_WIDTH_FOR_MEM_INST
Definition: compute_unit.hh:60
std::vector< std::vector< std::pair< Wavefront *, WAVE_STATUS > > > waveStatusList
void fillKernelState(Wavefront *w, NDRange *ndr)
Stats::Vector hitsPerTLBLevel
Stats::Scalar dynamicGMemInstrCnt
ScheduleStage scheduleStage
Definition: compute_unit.hh:99
Stats::Formula flatLDSInstsPerWF
int storeBusLength()
const char * __attribute__((weak)) m5MainCommands[]
int dpBypassLength()
Stats::Distribution controlFlowDivergenceDist
ITLBPort * sqcTLBPort
std::vector< std::vector< Wavefront * > > readyList
std::shared_ptr< Request > RequestPtr
Definition: request.hh:83
Stats::Scalar vectorMemWrites
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
ip6_addr_t addr
Definition: inet.hh:335
int dpBypassPipeLength
uint64_t globalSeqNum
SenderState(Wavefront *_wavefront)
virtual void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
int spBypassPipeLength
CUExitCallback * cuExitCallback
Definition: shader.hh:76
A vector of scalar stats.
Definition: statistics.hh:2550
std::vector< DTLBPort * > tlbPort
std::vector< std::vector< Wavefront * > > wfList
this represents a slice of the overall LDS, intended to be associated with an individual workgroup ...
Definition: lds_state.hh:59
void updateEvents()
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Scalar dynamicLMemInstrCnt
SenderState is information carried along with the packet throughout the TLB hierarchy.
Stats::Formula numALUInstsExecuted
Declaration of Statistics objects.
int spBypassLength()
GPUStaticInst * kernelLaunchInst
Stats::Scalar numInstrExecuted
void initiateFetch(Wavefront *wavefront)
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:2508
SenderState(GPUDynInstPtr gpuDynInst)
virtual void recvFunctional(PacketPtr pkt)
Stats::Scalar vALUInsts
STL vector class.
Definition: stl.hh:40
Stats::Distribution ldsBankConflictDist
SenderState is information carried along with the packet throughout the TLB hierarchy.
std::vector< WaitClass > vrfToLocalMemPipeBus
Stats::Formula vectorMemWritesPerWF
std::deque< std::pair< PacketPtr, GPUDynInstPtr > > retries
Stats::Scalar wgBlockedDueLdsAllocation
SQCPort * sqcPort
virtual void recvFunctional(PacketPtr pkt)
std::vector< std::vector< std::vector< Addr > > > lastVaddrWF
virtual Tick recvAtomic(PacketPtr pkt)
std::vector< WaitClass > aluPipe
uint32_t numCyclesPerStoreTransfer
void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, NDRange *ndr)
ComputeUnit(const Params *p)
Definition: compute_unit.cc:61
bool localMemBarrier
std::deque< std::pair< PacketPtr, Wavefront * > > retries
GlobalMemPipeline globalMemoryPipe
uint32_t coalescerToVrfBusWidth
Stats::Formula vALUUtilization
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Distribution activeLanesPerLMemInstrDist
ITLBPort(const std::string &_name, ComputeUnit *_cu)
Stats::Formula scalarMemWritesPerWF
Stats::Scalar numTimesWgBlockedDueVgprAlloc
bool functionalTLB
CUExitCallback(ComputeUnit *_cu)
Stats::Distribution execRateDist
Stats::Formula vectorMemReadsPerWF
int ShrMemUnitId()
void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
Port & getPort(const std::string &if_name, PortID idx) override
Get a port with a given name and index.
Data access Port.
bool isShrMem(int unitId)
std::vector< std::pair< uint32_t, uint32_t > > regIdxVec
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:162
virtual Tick recvAtomic(PacketPtr pkt)
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
the port intended to communicate between the CU and its LDS
std::list< waveIdentifier > waveIDQueue
Stats::Distribution pageDivergenceDist
LdsState & getLds() const
ExecStage execStage
uint64_t Tick
Tick count type.
Definition: types.hh:63
Stats::Scalar tlbRequests
ComputeUnit * computeUnit
virtual void recvFunctional(PacketPtr pkt)
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
EXEC_UNIT
Definition: compute_unit.hh:75
A simple distribution stat.
Definition: statistics.hh:2592
ComputeUnit * computeUnit
std::vector< WaitClass > vrfToGlobalMemPipeBus
bool isDone() const
void updateInstStats(GPUDynInstPtr gpuDynInst)
ClockedObject declaration and implementation.
Stats::Scalar flatVMemInsts
Stats::Scalar numCASOps
GPUDynInstPtr getMemInst() const
ComputeUnit * computeUnit
MasterID _masterId
int wfSize() const
std::vector< DataPort * > memPort
The memory port for SIMD data accesses.
std::vector< std::vector< Addr > > lastVaddrSimd
uint32_t vrfToCoalescerBusWidth
int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots)
void StartWorkgroup(NDRange *ndr)
Port Object Declaration.
Stats::Formula sALUInstsPerWF
virtual Tick recvAtomic(PacketPtr pkt)
bool isGlbMem(int unitId)
Stats::Scalar scalarMemWrites
std::unordered_map< Addr, std::pair< int, int > > pageDataStruct
Stats::Scalar scalarMemReads
Bitfield< 0 > w
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,16,32,64}_t.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
uint16_t MasterID
Definition: request.hh:86
SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
virtual const std::string name() const
Definition: sim_object.hh:120
Stats::Scalar ldsNoFlatInsts
std::vector< std::pair< Wavefront *, DISPATCH_STATUS > > dispatchList
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:255
bool sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result))
send a general request to the LDS make sure to look at the return value here as your request might be...
bool cedeSIMD(int simdId, int wfSlotId)
Stats::Scalar instCyclesVALU
Tick resp_tick_latency
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a SimObject that sees the packet.
Definition: packet.hh:403
Stats::Scalar completedWfs
bool xact_cas_mode
STL deque class.
Definition: stl.hh:47
A formula for statistics that is calculated when printed.
Definition: statistics.hh:3012
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, Packet::SenderState *sender_state=nullptr)
Stats::Formula scalarMemReadsPerWF
Stats::Formula vALUInstsPerWF
Shader * shader
int32_t getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
Stats::Distribution activeLanesPerGMemInstrDist
void doSmReturn(GPUDynInstPtr gpuDynInst)
ComputeUnit * computeUnit
Stats::Scalar tlbCycles
SenderState is information carried along with the packet, esp.
std::queue< PacketPtr > retries
here we queue all the requests that were not successfully sent.
Stats::Scalar numVecOpsExecuted
std::vector< VectorRegisterFile * > vrf
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
int loadBusLength()
virtual void recvFunctional(PacketPtr pkt)
void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs)
EXEC_POLICY
Definition: compute_unit.hh:68
int nextLocRdBus()
Stats::Scalar numFailedCASOps
int ReadyWorkgroup(NDRange *ndr)
LDSPort * getLdsPort() const
std::map< Addr, int > pagesTouched
Stats::Scalar instCyclesSALU
virtual void process()
virtual process function that is invoked when the callback queue is executed.
WaitClass locMemToVrfBus
void fetch(PacketPtr pkt, Wavefront *wavefront)
TLB_CACHE
Definition: compute_unit.hh:86
FetchStage fetchStage
Definition: compute_unit.hh:97
Stats::Formula flatVMemInstsPerWF
std::vector< uint8_t > statusVec
uint32_t barrier_id
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:237
std::vector< uint64_t > lastExecCycle
void registerEvent(uint32_t simdId, uint32_t regIdx, uint32_t operandSize, uint64_t when, uint8_t newStatus)
std::vector< WaitClass > wfWait
LocalMemPipeline localMemoryPipe
void processFetchReturn(PacketPtr pkt)
int GlbMemUnitId()
pageDataStruct pageAccesses
bool processTimingPacket(PacketPtr pkt)
Enums::PrefetchType prefetchType
Stats::Scalar sALUInsts
Stats::Scalar ldsBankAccesses
Tick req_tick_latency
Stats::Scalar totalCycles
LDSPort * ldsPort
The port to access the Local Data Store Can be connected to a LDS object.
waveIdentifier(int _simdId, int _wfSlotId)
std::vector< uint64_t > timestampVec
Stats::Scalar vectorMemReads
int cacheLineSize() const
Bitfield< 0 > p
std::vector< Addr > lastVaddrCU
int nextGlbRdBus()
void regStats() override
Callback to set stat parameters.
ComputeUnitParams Params
Stats::Formula ldsNoFlatInstsPerWF
virtual Tick recvAtomic(PacketPtr pkt)
virtual Tick recvAtomic(PacketPtr pkt)
ComputeUnit * computeUnit
bool isSimdDone(uint32_t) const
uint64_t getAndIncSeqNum()
Stats::Scalar threadCyclesVALU
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
MasterID masterId()
bool isStalled() const
std::vector< int > vectorRegsReserved
bool isVecAlu(int unitId)
EXEC_POLICY exec_policy
LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
ScoreboardCheckStage scoreboardCheckStage
Definition: compute_unit.hh:98
virtual void recvFunctional(PacketPtr pkt)
DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
const int _cacheLineSize

Generated on Fri Feb 28 2020 16:27:01 for gem5 by doxygen 1.8.13