gem5  v20.1.0.0
compute_unit.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __COMPUTE_UNIT_HH__
35 #define __COMPUTE_UNIT_HH__
36 
37 #include <deque>
38 #include <map>
39 #include <unordered_set>
40 #include <vector>
41 
42 #include "base/callback.hh"
43 #include "base/statistics.hh"
44 #include "base/types.hh"
45 #include "config/the_gpu_isa.hh"
46 #include "enums/PrefetchType.hh"
47 #include "gpu-compute/comm.hh"
57 #include "mem/port.hh"
58 #include "mem/token_port.hh"
59 #include "sim/clocked_object.hh"
60 
61 class HSAQueueEntry;
62 class LdsChunk;
63 class ScalarRegisterFile;
64 class Shader;
65 class VectorRegisterFile;
66 
67 struct ComputeUnitParams;
68 
70 {
71  OLDEST = 0,
73 };
74 
76 {
81 };
82 
87 class WFBarrier
88 {
89  public:
91  {
92  }
93 
94  static const int InvalidID = -1;
95 
96  int
97  numAtBarrier() const
98  {
99  return _numAtBarrier;
100  }
101 
105  int
107  {
108  return _maxBarrierCnt - _numAtBarrier;
109  }
110 
111  int
113  {
114  return _maxBarrierCnt;
115  }
116 
121  void
122  setMaxBarrierCnt(int max_barrier_cnt)
123  {
124  _maxBarrierCnt = max_barrier_cnt;
125  }
126 
130  void
132  {
133  assert(_numAtBarrier < _maxBarrierCnt);
134  ++_numAtBarrier;
135  }
136 
142  bool
143  allAtBarrier() const
144  {
145  return _numAtBarrier == _maxBarrierCnt;
146  }
147 
152  void
154  {
155  assert(_maxBarrierCnt > 0);
156  --_maxBarrierCnt;
157  }
158 
163  void
165  {
166  _numAtBarrier = 0;
167  _maxBarrierCnt = 0;
168  }
169 
174  void
176  {
177  _numAtBarrier = 0;
178  }
179 
180  private:
187 
196 };
197 
199 {
200  public:
201 
202 
203  // Execution resources
204  //
205  // The ordering of units is:
206  // Vector ALUs
207  // Scalar ALUs
208  // GM Pipe
209  // LM Pipe
210  // Scalar Mem Pipe
211  //
212  // Note: the ordering of units is important and the code assumes the
213  // above ordering. However, there may be more than one resource of
214  // each type (e.g., 4 VALUs or 2 SALUs)
215 
217  // Resource control for global memory to VRF data/address bus
219  // Resource control for Vector Register File->Global Memory pipe buses
221  // Resource control for Vector Global Memory execution unit
223 
225  // Resource control for local memory to VRF data/address bus
227  // Resource control for Vector Register File->Local Memory pipe buses
229  // Resource control for Vector Shared/Local Memory execution unit
231 
233  // Resource control for scalar memory to SRF data/address bus
235  // Resource control for Scalar Register File->Scalar Memory pipe buses
237  // Resource control for Scalar Memory execution unit
239 
240  // vector ALU execution resources
243 
244  // scalar ALU execution resources
247 
248  // Return total number of execution units on this CU
249  int numExeUnits() const;
250  // index into readyList of the first memory unit
251  int firstMemUnit() const;
252  // index into readyList of the last memory unit
253  int lastMemUnit() const;
254  // index into scalarALUs vector of SALU used by the wavefront
255  int mapWaveToScalarAlu(Wavefront *w) const;
256  // index into readyList of SALU used by wavefront
258  // index into readyList of Global Memory unit used by wavefront
259  int mapWaveToGlobalMem(Wavefront *w) const;
260  // index into readyList of Local Memory unit used by wavefront
261  int mapWaveToLocalMem(Wavefront *w) const;
262  // index into readyList of Scalar Memory unit used by wavefront
263  int mapWaveToScalarMem(Wavefront *w) const;
264 
265  int vrfToCoalescerBusWidth; // VRF->Coalescer data bus width in bytes
266  int coalescerToVrfBusWidth; // Coalescer->VRF data bus width in bytes
267  int numCyclesPerStoreTransfer; // number of cycles per vector store
268  int numCyclesPerLoadTransfer; // number of cycles per vector load
269 
270  // track presence of dynamic instructions in the Schedule pipeline
271  // stage. This is used to check the readiness of the oldest,
272  // non-dispatched instruction of every WF in the Scoreboard stage.
273  std::unordered_set<uint64_t> pipeMap;
274 
276 
284 
286 
287  typedef ComputeUnitParams Params;
289  int cu_id;
290 
291  // array of vector register files, one per SIMD
293  // array of scalar register files, one per SIMD
295 
296  // Width per VALU/SIMD unit: number of work items that can be executed
297  // on the vector ALU simultaneously in a SIMD unit
299  // number of pipe stages for bypassing data to next dependent single
300  // precision vector instruction inside the vector ALU pipeline
302  // number of pipe stages for bypassing data to next dependent double
303  // precision vector instruction inside the vector ALU pipeline
305  // number of pipe stages for scalar ALU
307  // number of pipe stages for operand collection & distribution network
309  // number of cycles per instruction issue period
311 
312  // VRF to GM Bus latency
314  // SRF to Scalar Mem Bus latency
316  // VRF to LM Bus latency
318 
319  // tracks the last cycle a vector instruction was executed on a SIMD
321 
322  // Track the amount of interleaving between wavefronts on each SIMD.
323  // This stat is sampled using instExecPerSimd to compute the number of
324  // instructions that have been executed on a SIMD between a WF executing
325  // two successive instructions.
327 
328  // tracks the number of dyn inst executed per SIMD
330 
331  // true if we allow a separate TLB per lane
333  // if 0, TLB prefetching is off.
335  // if fixed-stride prefetching, this is the stride.
337 
341  Enums::PrefetchType prefetchType;
343 
345  // Idle CU timeout in ticks
347  int idleWfs;
350 
351  /*
352  * for Counting page accesses
353  */
355 
357 
360 
368 
369  // number of currently reserved vector registers per SIMD unit
371  // number of currently reserved scalar registers per SIMD unit
373  // number of vector registers per SIMD unit
375  // number of available scalar registers per SIMD unit
377 
378  // this hash map will keep track of page divergence
379  // per memory instruction per wavefront. The hash map
380  // is cleared in GPUDynInst::updateStats() in gpu_dyn_inst.cc.
381  std::map<Addr, int> pagesTouched;
382 
383  void insertInPipeMap(Wavefront *w);
385 
386  ComputeUnit(const Params *p);
387  ~ComputeUnit();
388 
389  // Timing Functions
390  int oprNetPipeLength() const { return operandNetworkLength; }
391  int simdUnitWidth() const { return simdWidth; }
392  int spBypassLength() const { return spBypassPipeLength; }
393  int dpBypassLength() const { return dpBypassPipeLength; }
394  int scalarPipeLength() const { return scalarPipeStages; }
396  int loadBusLength() const { return numCyclesPerLoadTransfer; }
397  int wfSize() const { return wavefrontSize; }
398 
399  void exec();
400  void initiateFetch(Wavefront *wavefront);
401  void fetch(PacketPtr pkt, Wavefront *wavefront);
403 
404  void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk,
405  HSAQueueEntry *task, int bar_id,
406  bool fetchContext=false);
407 
408  void doInvalidate(RequestPtr req, int kernId);
409  void doFlush(GPUDynInstPtr gpuDynInst);
410 
411  void dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg);
412  bool hasDispResources(HSAQueueEntry *task, int &num_wfs_in_wg);
413 
414  int cacheLineSize() const { return _cacheLineSize; }
415  int getCacheLineBits() const { return cacheLineBits; }
416 
417  private:
418  WFBarrier&
419  barrierSlot(int bar_id)
420  {
421  assert(bar_id > WFBarrier::InvalidID);
422  return wfBarrierSlots.at(bar_id);
423  }
424 
425  int
427  {
428  assert(freeBarrierIds.size());
429  auto free_bar_id = freeBarrierIds.begin();
430  int bar_id = *free_bar_id;
431  freeBarrierIds.erase(free_bar_id);
432  return bar_id;
433  }
434 
435  public:
436  int numYetToReachBarrier(int bar_id);
437  bool allAtBarrier(int bar_id);
438  void incNumAtBarrier(int bar_id);
439  int numAtBarrier(int bar_id);
440  int maxBarrierCnt(int bar_id);
441  void resetBarrier(int bar_id);
442  void decMaxBarrierCnt(int bar_id);
443  void releaseBarrier(int bar_id);
444  void releaseWFsFromBarrier(int bar_id);
445  int numBarrierSlots() const { return _numBarrierSlots; }
446 
447  template<typename c0, typename c1>
448  void doSmReturn(GPUDynInstPtr gpuDynInst);
449 
450  virtual void init() override;
451  void sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt);
452  void sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt);
453  void injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
454  bool kernelMemSync,
455  RequestPtr req=nullptr);
456  void handleMemPacket(PacketPtr pkt, int memport_index);
457  bool processTimingPacket(PacketPtr pkt);
458  void processFetchReturn(PacketPtr pkt);
460 
462 
463  bool isDone() const;
464  bool isVectorAluIdle(uint32_t simdId) const;
465 
466  protected:
468 
470 
471  public:
494 
501 
502  // Cycles required to send register source (addr and data) from
503  // register files to memory pipeline, per SIMD.
507 
529 
532 
533  void updateInstStats(GPUDynInstPtr gpuDynInst);
534 
535  // the following stats compute the avg. TLB accesslatency per
536  // uncoalesced request (only for data)
540  // hitsPerTLBLevel[x] are the hits in Level x TLB. x = 0 is the page table.
542 
545 
546  // over all memory instructions executed over all wavefronts
547  // how many touched 0-4 pages, 4-8, ..., 60-64 pages
549  // count of non-flat global memory vector instructions executed
551  // count of flat global memory vector instructions executed
554 
557  // Number of instructions executed, i.e. if 64 (or 32 or 7) lanes are
558  // active when the instruction is committed, this number is still
559  // incremented by 1
561  // Number of cycles among successive instruction executions across all
562  // wavefronts of the same CU
564  // number of individual vector operations executed
566  // number of individual f16 vector operations executed
568  // number of individual f32 vector operations executed
570  // number of individual f64 vector operations executed
572  // number of individual FMA 16,32,64 vector operations executed
576  // number of individual MAC 16,32,64 vector operations executed
580  // number of individual MAD 16,32,64 vector operations executed
584  // total number of two op FP vector operations executed
586  // Total cycles that something is running on the GPU
588  Stats::Formula vpc; // vector ops per cycle
589  Stats::Formula vpc_f16; // vector ops per cycle
590  Stats::Formula vpc_f32; // vector ops per cycle
591  Stats::Formula vpc_f64; // vector ops per cycle
592  Stats::Formula ipc; // vector instructions per cycle
596  // number of vector ALU instructions received
598  // number of times a WG can not start due to lack of free VGPRs in SIMDs
600  // number of times a WG can not start due to lack of free SGPRs in SIMDs
606 
607  // distrubtion in latency difference between first and last cache block
608  // arrival ticks
610 
611  void
612  regStats() override;
613 
614  LdsState &
615  getLds() const
616  {
617  return lds;
618  }
619 
620  int32_t
621  getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const;
622 
623  bool
624  sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result));
625 
626  typedef std::unordered_map<Addr, std::pair<int, int>> pageDataStruct;
628 
629  void exitCallback();
630 
632  {
633  public:
634  GMTokenPort(const std::string& name, SimObject *owner,
635  PortID id = InvalidPortID)
637  { }
639 
640  protected:
641  bool recvTimingResp(PacketPtr) { return false; }
642  void recvReqRetry() { }
643  };
644 
645  // Manager for the number of tokens available to this compute unit to
646  // send global memory request packets to the coalescer this is only used
647  // between global memory pipe and TCP coalescer.
650 
652  class DataPort : public RequestPort
653  {
654  public:
655  DataPort(const std::string &_name, ComputeUnit *_cu, PortID id)
656  : RequestPort(_name, _cu, id), computeUnit(_cu) { }
657 
659 
661  {
665 
666  SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index,
667  Packet::SenderState *sender_state=nullptr)
668  : _gpuDynInst(gpuDynInst),
669  port_index(_port_index),
670  saved(sender_state) { }
671  };
672 
673  void processMemReqEvent(PacketPtr pkt);
675 
676  void processMemRespEvent(PacketPtr pkt);
678 
680 
681  protected:
683 
684  virtual bool recvTimingResp(PacketPtr pkt);
685  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
686  virtual void recvFunctional(PacketPtr pkt) { }
687  virtual void recvRangeChange() { }
688  virtual void recvReqRetry();
689 
690  virtual void
692  {
693  resp.clear();
694  snoop = true;
695  }
696 
697  };
698 
699  // Scalar data cache access port
701  {
702  public:
703  ScalarDataPort(const std::string &_name, ComputeUnit *_cu)
704  : RequestPort(_name, _cu), computeUnit(_cu)
705  {
706  }
707 
708  bool recvTimingResp(PacketPtr pkt) override;
709  void recvReqRetry() override;
710 
712  {
714  Packet::SenderState *sender_state=nullptr)
715  : _gpuDynInst(gpuDynInst), saved(sender_state)
716  {
717  }
718 
721  };
722 
723  class MemReqEvent : public Event
724  {
725  private:
728 
729  public:
730  MemReqEvent(ScalarDataPort &_scalar_data_port, PacketPtr _pkt)
731  : Event(), scalarDataPort(_scalar_data_port), pkt(_pkt)
732  {
734  }
735 
736  void process();
737  const char *description() const;
738  };
739 
741 
742  private:
744  };
745 
746  // Instruction cache access port
747  class SQCPort : public RequestPort
748  {
749  public:
750  SQCPort(const std::string &_name, ComputeUnit *_cu)
751  : RequestPort(_name, _cu), computeUnit(_cu) { }
752 
754 
756  {
759  // kernel id to be used in handling I-Cache invalidate response
760  int kernId;
761 
763  *sender_state=nullptr, int _kernId=-1)
764  : wavefront(_wavefront), saved(sender_state),
765  kernId(_kernId){ }
766  };
767 
769 
770  protected:
772 
773  virtual bool recvTimingResp(PacketPtr pkt);
774  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
775  virtual void recvFunctional(PacketPtr pkt) { }
776  virtual void recvRangeChange() { }
777  virtual void recvReqRetry();
778 
779  virtual void
781  {
782  resp.clear();
783  snoop = true;
784  }
785  };
786 
788  class DTLBPort : public RequestPort
789  {
790  public:
791  DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id)
792  : RequestPort(_name, _cu, id), computeUnit(_cu),
793  stalled(false)
794  { }
795 
796  bool isStalled() { return stalled; }
797  void stallPort() { stalled = true; }
798  void unstallPort() { stalled = false; }
799 
805 
810  {
811  // the memInst that this is associated with
813 
814  // the lane in the memInst this is associated with, so we send
815  // the memory request down the right port
817 
818  // constructor used for packets involved in timing accesses
819  SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
820  : _gpuDynInst(gpuDynInst), portIndex(port_index) { }
821 
822  };
823 
824  protected:
826  bool stalled;
827 
828  virtual bool recvTimingResp(PacketPtr pkt);
829  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
830  virtual void recvFunctional(PacketPtr pkt) { }
831  virtual void recvRangeChange() { }
832  virtual void recvReqRetry();
833  };
834 
836  {
837  public:
838  ScalarDTLBPort(const std::string &_name, ComputeUnit *_cu)
839  : RequestPort(_name, _cu), computeUnit(_cu), stalled(false)
840  {
841  }
842 
844  {
845  SenderState(GPUDynInstPtr gpuDynInst) : _gpuDynInst(gpuDynInst) { }
847  };
848 
849  bool recvTimingResp(PacketPtr pkt) override;
850  void recvReqRetry() override { assert(false); }
851 
852  bool isStalled() const { return stalled; }
853  void stallPort() { stalled = true; }
854  void unstallPort() { stalled = false; }
855 
857 
858  private:
860  bool stalled;
861  };
862 
863  class ITLBPort : public RequestPort
864  {
865  public:
866  ITLBPort(const std::string &_name, ComputeUnit *_cu)
867  : RequestPort(_name, _cu), computeUnit(_cu), stalled(false) { }
868 
869 
870  bool isStalled() { return stalled; }
871  void stallPort() { stalled = true; }
872  void unstallPort() { stalled = false; }
873 
879 
884  {
885  // The wavefront associated with this request
887 
888  SenderState(Wavefront *_wavefront) : wavefront(_wavefront) { }
889  };
890 
891  protected:
893  bool stalled;
894 
895  virtual bool recvTimingResp(PacketPtr pkt);
896  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
897  virtual void recvFunctional(PacketPtr pkt) { }
898  virtual void recvRangeChange() { }
899  virtual void recvReqRetry();
900  };
901 
905  class LDSPort : public RequestPort
906  {
907  public:
908  LDSPort(const std::string &_name, ComputeUnit *_cu)
909  : RequestPort(_name, _cu), computeUnit(_cu)
910  {
911  }
912 
913  bool isStalled() const { return stalled; }
914  void stallPort() { stalled = true; }
915  void unstallPort() { stalled = false; }
916 
921  std::queue<PacketPtr> retries;
922 
928  {
929  protected:
930  // The actual read/write/atomic request that goes with this command
932 
933  public:
935  _gpuDynInst(gpuDynInst)
936  {
937  }
938 
940  getMemInst() const
941  {
942  return _gpuDynInst;
943  }
944  };
945 
946  virtual bool
948 
949  protected:
950 
951  bool stalled = false;
952 
954 
955  virtual bool
957 
958  virtual Tick
959  recvAtomic(PacketPtr pkt) { return 0; }
960 
961  virtual void
963  {
964  }
965 
966  virtual void
968  {
969  }
970 
971  virtual void
972  recvReqRetry();
973  };
974 
979 
980  TokenManager *
982  {
983  return memPortTokens;
984  }
985 
990  // port to the TLB hierarchy (i.e., the L1 TLB)
992  // port to the scalar data cache
994  // port to the scalar data TLB
996  // port to the SQC (i.e. the I-cache)
998  // port to the SQC TLB (there's a separate TLB for each I-cache)
1000 
1001  Port &
1002  getPort(const std::string &if_name, PortID idx) override
1003  {
1004  if (if_name == "memory_port" && idx < memPort.size()) {
1005  return memPort[idx];
1006  } else if (if_name == "translation_port" && idx < tlbPort.size()) {
1007  return tlbPort[idx];
1008  } else if (if_name == "scalar_port") {
1009  return scalarDataPort;
1010  } else if (if_name == "scalar_tlb_port") {
1011  return scalarDTLBPort;
1012  } else if (if_name == "sqc_port") {
1013  return sqcPort;
1014  } else if (if_name == "sqc_tlb_port") {
1015  return sqcTLBPort;
1016  } else if (if_name == "ldsPort") {
1017  return ldsPort;
1018  } else {
1019  return ClockedObject::getPort(if_name, idx);
1020  }
1021  }
1022 
1024 
1025  private:
1026  const int _cacheLineSize;
1027  const int _numBarrierSlots;
1031 
1066 
1074  std::unordered_set<int> freeBarrierIds;
1075 
1076  // hold the time of the arrival of the first cache block related to
1077  // a particular GPUDynInst. This is used to calculate the difference
1078  // between the first and last chace block arrival times.
1079  std::unordered_map<GPUDynInstPtr, Tick> headTailMap;
1080 };
1081 
1082 #endif // __COMPUTE_UNIT_HH__
ComputeUnit::controlFlowDivergenceDist
Stats::Distribution controlFlowDivergenceDist
Definition: compute_unit.hh:593
ComputeUnit::DataPort::createMemReqEvent
EventFunctionWrapper * createMemReqEvent(PacketPtr pkt)
Definition: compute_unit.cc:1558
ComputeUnit::vectorALUs
std::vector< WaitClass > vectorALUs
Definition: compute_unit.hh:242
ComputeUnit::ITLBPort::isStalled
bool isStalled()
Definition: compute_unit.hh:870
ComputeUnit::vectorSharedMemUnit
WaitClass vectorSharedMemUnit
Definition: compute_unit.hh:230
ComputeUnit::ITLBPort
Definition: compute_unit.hh:863
ComputeUnit::SQCPort::retries
std::deque< std::pair< PacketPtr, Wavefront * > > retries
Definition: compute_unit.hh:768
ComputeUnit::LDSPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:967
ComputeUnit::globalWrites
Stats::Scalar globalWrites
Definition: compute_unit.hh:509
ComputeUnit::LDSPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:953
hsa_queue_entry.hh
ComputeUnit::DTLBPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
Definition: compute_unit.hh:819
ComputeUnit::vectorMemWrites
Stats::Scalar vectorMemWrites
Definition: compute_unit.hh:486
ComputeUnit::ScalarDataPort::MemReqEvent::MemReqEvent
MemReqEvent(ScalarDataPort &_scalar_data_port, PacketPtr _pkt)
Definition: compute_unit.hh:730
EventBase::AutoDelete
static const FlagsType AutoDelete
Definition: eventq.hh:102
ExecStage
Definition: exec_stage.hh:71
ComputeUnit::privWrites
Stats::Scalar privWrites
Definition: compute_unit.hh:521
ComputeUnit::ITLBPort::stalled
bool stalled
Definition: compute_unit.hh:893
ComputeUnit::numScalarRegsPerSimd
int numScalarRegsPerSimd
Definition: compute_unit.hh:376
ComputeUnit::ScalarDTLBPort::recvReqRetry
void recvReqRetry() override
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.hh:850
ComputeUnit::LDSPort::stallPort
void stallPort()
Definition: compute_unit.hh:914
ComputeUnit::GMTokenPort::~GMTokenPort
~GMTokenPort()
Definition: compute_unit.hh:638
ComputeUnit::prefetchType
Enums::PrefetchType prefetchType
Definition: compute_unit.hh:341
ComputeUnit::ScalarDataPort::MemReqEvent::description
const char * description() const
Return a C string describing the event.
Definition: compute_unit.cc:1597
ComputeUnit::sALUInsts
Stats::Scalar sALUInsts
Definition: compute_unit.hh:474
ComputeUnit::ScalarDataPort::MemReqEvent::scalarDataPort
ScalarDataPort & scalarDataPort
Definition: compute_unit.hh:726
LocalMemPipeline
Definition: local_memory_pipeline.hh:55
ComputeUnit::resetBarrier
void resetBarrier(int bar_id)
Definition: compute_unit.cc:666
ComputeUnit::deleteFromPipeMap
void deleteFromPipeMap(Wavefront *w)
Definition: compute_unit.cc:491
ComputeUnit::req_tick_latency
Tick req_tick_latency
Definition: compute_unit.hh:358
ComputeUnit::numInstrExecuted
Stats::Scalar numInstrExecuted
Definition: compute_unit.hh:560
ComputeUnit::simdUnitWidth
int simdUnitWidth() const
Definition: compute_unit.hh:391
ComputeUnit::initiateFetch
void initiateFetch(Wavefront *wavefront)
ComputeUnit::pagesTouched
std::map< Addr, int > pagesTouched
Definition: compute_unit.hh:381
ComputeUnit::init
virtual void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: compute_unit.cc:736
ComputeUnit::vrfToGlobalMemPipeBus
WaitClass vrfToGlobalMemPipeBus
Definition: compute_unit.hh:220
ComputeUnit::loadBusLength
int loadBusLength() const
Definition: compute_unit.hh:396
ComputeUnit::DTLBPort::stalled
bool stalled
Definition: compute_unit.hh:826
ComputeUnit::mapWaveToScalarAluGlobalIdx
int mapWaveToScalarAluGlobalIdx(Wavefront *w) const
Definition: compute_unit.cc:261
TLB_HIT_CACHE_MISS
@ TLB_HIT_CACHE_MISS
Definition: compute_unit.hh:79
WFBarrier::maxBarrierCnt
int maxBarrierCnt() const
Definition: compute_unit.hh:112
ComputeUnit::ldsBankConflictDist
Stats::Distribution ldsBankConflictDist
Definition: compute_unit.hh:544
WFBarrier::incNumAtBarrier
void incNumAtBarrier()
Mark that a WF has reached the barrier.
Definition: compute_unit.hh:131
ComputeUnit::locMemToVrfBus
WaitClass locMemToVrfBus
Definition: compute_unit.hh:226
ComputeUnit::LDSPort::retries
std::queue< PacketPtr > retries
here we queue all the requests that were not successfully sent.
Definition: compute_unit.hh:921
Shader
Definition: shader.hh:87
ComputeUnit::ScalarDataPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:743
ComputeUnit::vectorMemWritesPerWF
Stats::Formula vectorMemWritesPerWF
Definition: compute_unit.hh:487
WFBarrier::numYetToReachBarrier
int numYetToReachBarrier() const
Number of WFs that have not yet reached the barrier.
Definition: compute_unit.hh:106
ComputeUnit::numVecOpsExecutedF16
Stats::Scalar numVecOpsExecutedF16
Definition: compute_unit.hh:567
ComputeUnit::dispWorkgroup
void dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg)
Definition: compute_unit.cc:404
ComputeUnit::numScalarMemUnits
int numScalarMemUnits
Definition: compute_unit.hh:232
ComputeUnit::ITLBPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:896
ComputeUnit::ldsNoFlatInstsPerWF
Stats::Formula ldsNoFlatInstsPerWF
Definition: compute_unit.hh:481
ComputeUnit::ScalarDataPort::ScalarDataPort
ScalarDataPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:703
MipsISA::index
Bitfield< 30, 0 > index
Definition: pra_constants.hh:44
ComputeUnit::SQCPort::SenderState::SenderState
SenderState(Wavefront *_wavefront, Packet::SenderState *sender_state=nullptr, int _kernId=-1)
Definition: compute_unit.hh:762
fetch_stage.hh
ComputeUnit::DTLBPort::DTLBPort
DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id)
Definition: compute_unit.hh:791
ComputeUnit::globalReads
Stats::Scalar globalReads
Definition: compute_unit.hh:508
ComputeUnit::vpc_f16
Stats::Formula vpc_f16
Definition: compute_unit.hh:589
ComputeUnit::ITLBPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:898
comm.hh
ComputeUnit::SQCPort
Definition: compute_unit.hh:747
InvalidPortID
const PortID InvalidPortID
Definition: types.hh:238
ComputeUnit::localMemoryPipe
LocalMemPipeline localMemoryPipe
Definition: compute_unit.hh:282
ComputeUnit::completedWGs
Stats::Scalar completedWGs
Definition: compute_unit.hh:605
ComputeUnit::mapWaveToGlobalMem
int mapWaveToGlobalMem(Wavefront *w) const
Definition: compute_unit.cc:268
ComputeUnit::LDSPort::LDSPort
LDSPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:908
ComputeUnit::dynamicLMemInstrCnt
Stats::Scalar dynamicLMemInstrCnt
Definition: compute_unit.hh:553
ComputeUnit::srf_scm_bus_latency
Cycles srf_scm_bus_latency
Definition: compute_unit.hh:315
ComputeUnit::perLaneTLB
bool perLaneTLB
Definition: compute_unit.hh:332
ComputeUnit::DataPort::DataPort
DataPort(const std::string &_name, ComputeUnit *_cu, PortID id)
Definition: compute_unit.hh:655
ComputeUnit::instCyclesLdsPerSimd
Stats::Vector instCyclesLdsPerSimd
Definition: compute_unit.hh:506
global_memory_pipeline.hh
ComputeUnit::incNumAtBarrier
void incNumAtBarrier(int bar_id)
Definition: compute_unit.cc:645
ComputeUnit::argWrites
Stats::Scalar argWrites
Definition: compute_unit.hh:512
ComputeUnit::vrf_gm_bus_latency
Cycles vrf_gm_bus_latency
Definition: compute_unit.hh:313
ComputeUnit::ITLBPort::SenderState::wavefront
Wavefront * wavefront
Definition: compute_unit.hh:886
ComputeUnit::scalarDTLBPort
ScalarDTLBPort scalarDTLBPort
Definition: compute_unit.hh:995
ComputeUnit::mapWaveToScalarMem
int mapWaveToScalarMem(Wavefront *w) const
Definition: compute_unit.cc:284
ComputeUnit::lastMemUnit
int lastMemUnit() const
Definition: compute_unit.cc:243
ComputeUnit::mapWaveToLocalMem
int mapWaveToLocalMem(Wavefront *w) const
Definition: compute_unit.cc:276
ComputeUnit::ldsBankAccesses
Stats::Scalar ldsBankAccesses
Definition: compute_unit.hh:543
ComputeUnit::tlbLatency
Stats::Formula tlbLatency
Definition: compute_unit.hh:539
ComputeUnit::totalCycles
Stats::Scalar totalCycles
Definition: compute_unit.hh:587
ComputeUnit::numCyclesPerLoadTransfer
int numCyclesPerLoadTransfer
Definition: compute_unit.hh:268
ComputeUnit::readonlyMemInsts
Stats::Formula readonlyMemInsts
Definition: compute_unit.hh:525
ComputeUnit::DataPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:686
ComputeUnit::DTLBPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:831
ComputeUnit::headTailMap
std::unordered_map< GPUDynInstPtr, Tick > headTailMap
Definition: compute_unit.hh:1079
ComputeUnit::numVecOpsExecutedMAD16
Stats::Scalar numVecOpsExecutedMAD16
Definition: compute_unit.hh:581
ComputeUnit::vALUInsts
Stats::Scalar vALUInsts
Definition: compute_unit.hh:472
ComputeUnit::operandNetworkLength
int operandNetworkLength
Definition: compute_unit.hh:308
ComputeUnit::numYetToReachBarrier
int numYetToReachBarrier(int bar_id)
Definition: compute_unit.cc:631
ComputeUnit::DTLBPort
Data TLB port.
Definition: compute_unit.hh:788
ComputeUnit::ITLBPort::retries
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Definition: compute_unit.hh:878
ComputeUnit::gmTokenPort
GMTokenPort gmTokenPort
Definition: compute_unit.hh:649
ComputeUnit::vpc_f64
Stats::Formula vpc_f64
Definition: compute_unit.hh:591
ComputeUnit::SQCPort::SenderState::wavefront
Wavefront * wavefront
Definition: compute_unit.hh:757
ComputeUnit::doFlush
void doFlush(GPUDynInstPtr gpuDynInst)
trigger flush operation in the cu
Definition: compute_unit.cc:399
ComputeUnit::kernargReads
Stats::Scalar kernargReads
Definition: compute_unit.hh:526
ComputeUnit::memPortTokens
TokenManager * memPortTokens
Definition: compute_unit.hh:648
ComputeUnit::SQCPort::getDeviceAddressRanges
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
Definition: compute_unit.hh:780
FetchStage
Definition: fetch_stage.hh:51
ComputeUnit::readonlyWrites
Stats::Scalar readonlyWrites
Definition: compute_unit.hh:524
ComputeUnit::dynamicFlatMemInstrCnt
Stats::Scalar dynamicFlatMemInstrCnt
Definition: compute_unit.hh:552
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:63
ComputeUnit::tlbRequests
Stats::Scalar tlbRequests
Definition: compute_unit.hh:537
ComputeUnit::ScalarDataPort::SenderState
Definition: compute_unit.hh:711
PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:237
ComputeUnit::kernargMemInsts
Stats::Formula kernargMemInsts
Definition: compute_unit.hh:528
ComputeUnit::ComputeUnit
ComputeUnit(const Params *p)
Definition: compute_unit.cc:62
ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:289
ComputeUnit::ScalarDataPort::MemReqEvent::process
void process()
Definition: compute_unit.cc:1603
ComputeUnit::numExeUnits
int numExeUnits() const
Definition: compute_unit.cc:228
ComputeUnit::ScalarDTLBPort::isStalled
bool isStalled() const
Definition: compute_unit.hh:852
RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:82
ComputeUnit::numVecOpsExecutedFMA32
Stats::Scalar numVecOpsExecutedFMA32
Definition: compute_unit.hh:574
TLB_MISS_CACHE_HIT
@ TLB_MISS_CACHE_HIT
Definition: compute_unit.hh:78
scoreboard_check_stage.hh
ComputeUnit::ScalarDataPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst, Packet::SenderState *sender_state=nullptr)
Definition: compute_unit.hh:713
ComputeUnit::activeLanesPerLMemInstrDist
Stats::Distribution activeLanesPerLMemInstrDist
Definition: compute_unit.hh:595
ComputeUnit::vectorGlobalMemUnit
WaitClass vectorGlobalMemUnit
Definition: compute_unit.hh:222
ComputeUnit::scalarMemWritesPerWF
Stats::Formula scalarMemWritesPerWF
Definition: compute_unit.hh:491
ComputeUnit::ITLBPort::ITLBPort
ITLBPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:866
ComputeUnit::registerManager
RegisterManager * registerManager
Definition: compute_unit.hh:275
WaitClass
Definition: misc.hh:64
ComputeUnit::functionalTLB
bool functionalTLB
Definition: compute_unit.hh:348
std::vector< WaitClass >
ScalarRegisterFile
Definition: scalar_register_file.hh:51
ComputeUnit::SQCPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:994
ComputeUnit::debugSegFault
bool debugSegFault
Definition: compute_unit.hh:344
ComputeUnit::readonlyReads
Stats::Scalar readonlyReads
Definition: compute_unit.hh:523
TLB_CACHE
TLB_CACHE
Definition: compute_unit.hh:75
ComputeUnit::vrf_lm_bus_latency
Cycles vrf_lm_bus_latency
Definition: compute_unit.hh:317
ComputeUnit::LDSPort::sendTimingReq
virtual bool sendTimingReq(PacketPtr pkt)
attempt to send this packet, either the port is already stalled, the request is nack'd and must stall...
Definition: compute_unit.cc:2581
ComputeUnit::spillWrites
Stats::Scalar spillWrites
Definition: compute_unit.hh:515
ComputeUnit::DTLBPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:825
ComputeUnit::ITLBPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:872
ComputeUnit::ScalarDataPort::MemReqEvent
Definition: compute_unit.hh:723
ComputeUnit::hitsPerTLBLevel
Stats::Vector hitsPerTLBLevel
Definition: compute_unit.hh:541
ComputeUnit::scalarPipeLength
int scalarPipeLength() const
Definition: compute_unit.hh:394
ComputeUnit::vpc
Stats::Formula vpc
Definition: compute_unit.hh:588
ComputeUnit::numVecOpsExecutedF64
Stats::Scalar numVecOpsExecutedF64
Definition: compute_unit.hh:571
ComputeUnit::scheduleStage
ScheduleStage scheduleStage
Definition: compute_unit.hh:279
ComputeUnit::flatLDSInstsPerWF
Stats::Formula flatLDSInstsPerWF
Definition: compute_unit.hh:485
ComputeUnit::allAtBarrier
bool allAtBarrier(int bar_id)
Definition: compute_unit.cc:638
ComputeUnit::pageDivergenceDist
Stats::Distribution pageDivergenceDist
Definition: compute_unit.hh:548
EXEC_POLICY
EXEC_POLICY
Definition: compute_unit.hh:69
Stats::Vector
A vector of scalar stats.
Definition: statistics.hh:2575
ComputeUnit::ScalarDTLBPort::SenderState
Definition: compute_unit.hh:843
ComputeUnit::getLds
LdsState & getLds() const
Definition: compute_unit.hh:615
ComputeUnit::insertInPipeMap
void insertInPipeMap(Wavefront *w)
Definition: compute_unit.cc:482
ComputeUnit::sALUInstsPerWF
Stats::Formula sALUInstsPerWF
Definition: compute_unit.hh:475
HSAQueueEntry
Definition: hsa_queue_entry.hh:60
ComputeUnit::argMemInsts
Stats::Formula argMemInsts
Definition: compute_unit.hh:513
WFBarrier::_maxBarrierCnt
int _maxBarrierCnt
The maximum number of WFs that can reach this barrier.
Definition: compute_unit.hh:195
Stats::VectorDistribution
A vector of distributions.
Definition: statistics.hh:2723
ComputeUnit::groupWrites
Stats::Scalar groupWrites
Definition: compute_unit.hh:518
ComputeUnit::exitCallback
void exitCallback()
Definition: compute_unit.cc:2466
ComputeUnit::DTLBPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:830
ComputeUnit::vectorMemInstsPerKiloInst
Stats::Formula vectorMemInstsPerKiloInst
Definition: compute_unit.hh:497
RR
@ RR
Definition: compute_unit.hh:72
ComputeUnit::scalarPipeStages
int scalarPipeStages
Definition: compute_unit.hh:306
ComputeUnit::ITLBPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:892
ComputeUnit::handleMemPacket
void handleMemPacket(PacketPtr pkt, int memport_index)
ComputeUnit::numVectorGlobalMemUnits
int numVectorGlobalMemUnits
Definition: compute_unit.hh:216
ComputeUnit::scalarMemWrites
Stats::Scalar scalarMemWrites
Definition: compute_unit.hh:490
ComputeUnit::DataPort::getDeviceAddressRanges
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
Definition: compute_unit.hh:691
ComputeUnit::numVecOpsExecutedMAD64
Stats::Scalar numVecOpsExecutedMAD64
Definition: compute_unit.hh:583
ComputeUnit::LDSPort::SenderState
SenderState is information carried along with the packet, esp.
Definition: compute_unit.hh:927
ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:231
ComputeUnit::scalarMemReads
Stats::Scalar scalarMemReads
Definition: compute_unit.hh:492
ComputeUnit::scalarALUs
std::vector< WaitClass > scalarALUs
Definition: compute_unit.hh:246
ComputeUnit::DataPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, Packet::SenderState *sender_state=nullptr)
Definition: compute_unit.hh:666
ComputeUnit::ScalarDTLBPort::recvTimingResp
bool recvTimingResp(PacketPtr pkt) override
Receive a timing response from the peer.
Definition: compute_unit.cc:1663
EventFunctionWrapper
Definition: eventq.hh:1101
ComputeUnit::glbMemToVrfBus
WaitClass glbMemToVrfBus
Definition: compute_unit.hh:218
ComputeUnit::scalarMemReadsPerKiloInst
Stats::Formula scalarMemReadsPerKiloInst
Definition: compute_unit.hh:498
ComputeUnit::SQCPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:775
ComputeUnit::argReads
Stats::Scalar argReads
Definition: compute_unit.hh:511
ComputeUnit::ScalarDTLBPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:859
ComputeUnit::DTLBPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:1632
ComputeUnit::ITLBPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:1776
ComputeUnit::sendToLds
bool sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result))
send a general request to the LDS make sure to look at the return value here as your request might be...
Definition: compute_unit.cc:2539
ComputeUnit::numTimesWgBlockedDueSgprAlloc
Stats::Scalar numTimesWgBlockedDueSgprAlloc
Definition: compute_unit.hh:601
Stats::Scalar
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:2533
ComputeUnit::tickEvent
EventFunctionWrapper tickEvent
Definition: compute_unit.hh:285
ComputeUnit
Definition: compute_unit.hh:198
ComputeUnit::instCyclesVALU
Stats::Scalar instCyclesVALU
Definition: compute_unit.hh:476
RequestorID
uint16_t RequestorID
Definition: request.hh:85
ComputeUnit::DTLBPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:1390
ComputeUnit::instCyclesScMemPerSimd
Stats::Vector instCyclesScMemPerSimd
Definition: compute_unit.hh:505
ComputeUnit::completedWfs
Stats::Scalar completedWfs
Definition: compute_unit.hh:604
ComputeUnit::LDSPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:931
ComputeUnit::LDSPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.hh:934
ComputeUnit::numVecOpsExecutedFMA64
Stats::Scalar numVecOpsExecutedFMA64
Definition: compute_unit.hh:575
ComputeUnit::LDSPort::stalled
bool stalled
whether or not it is stalled
Definition: compute_unit.hh:951
ComputeUnit::decMaxBarrierCnt
void decMaxBarrierCnt(int bar_id)
Definition: compute_unit.cc:673
ComputeUnit::srf
std::vector< ScalarRegisterFile * > srf
Definition: compute_unit.hh:294
ComputeUnit::cacheLineBits
int cacheLineBits
Definition: compute_unit.hh:1028
ComputeUnit::numVecOpsExecutedF32
Stats::Scalar numVecOpsExecutedF32
Definition: compute_unit.hh:569
ComputeUnit::exec
void exec()
Definition: compute_unit.cc:703
ComputeUnit::numVectorSharedMemUnits
int numVectorSharedMemUnits
Definition: compute_unit.hh:224
ScheduleToExecute
Communication interface between Schedule and Execute stages.
Definition: comm.hh:99
Packet::SenderState
A virtual base opaque structure used to hold state associated with the packet (e.g....
Definition: packet.hh:431
ComputeUnit::DataPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:687
ComputeUnit::spBypassLength
int spBypassLength() const
Definition: compute_unit.hh:392
ComputeUnit::kernargWrites
Stats::Scalar kernargWrites
Definition: compute_unit.hh:527
ComputeUnit::dpBypassPipeLength
int dpBypassPipeLength
Definition: compute_unit.hh:304
Event
Definition: eventq.hh:246
ComputeUnit::memPort
std::vector< DataPort > memPort
The memory port for SIMD data accesses.
Definition: compute_unit.hh:989
ComputeUnit::getRefCounter
int32_t getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
Definition: compute_unit.cc:2513
MipsISA::w
Bitfield< 0 > w
Definition: pra_constants.hh:278
ComputeUnit::DataPort::SenderState::port_index
PortID port_index
Definition: compute_unit.hh:663
ComputeUnit::numBarrierSlots
int numBarrierSlots() const
Definition: compute_unit.hh:445
SimObject::getPort
virtual Port & getPort(const std::string &if_name, PortID idx=InvalidPortID)
Get a port with a given name and index.
Definition: sim_object.cc:123
ComputeUnit::wfBarrierSlots
std::vector< WFBarrier > wfBarrierSlots
The barrier slots for this CU.
Definition: compute_unit.hh:1070
ComputeUnit::ScalarDTLBPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.hh:845
ComputeUnit::maxBarrierCnt
int maxBarrierCnt(int bar_id)
Definition: compute_unit.cc:659
ComputeUnit::SQCPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:774
ComputeUnit::sendRequest
void sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
Definition: compute_unit.cc:1017
ComputeUnit::idleCUTimeout
Tick idleCUTimeout
Definition: compute_unit.hh:346
ComputeUnit::injectGlobalMemFence
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelMemSync, RequestPtr req=nullptr)
Definition: compute_unit.cc:1229
ComputeUnit::idleWfs
int idleWfs
Definition: compute_unit.hh:347
ComputeUnit::tlbCycles
Stats::Scalar tlbCycles
Definition: compute_unit.hh:538
LdsState
Definition: lds_state.hh:119
ComputeUnit::ITLBPort::SenderState
SenderState is information carried along with the packet throughout the TLB hierarchy.
Definition: compute_unit.hh:883
ComputeUnit::_numBarrierSlots
const int _numBarrierSlots
Definition: compute_unit.hh:1027
statistics.hh
ComputeUnit::resp_tick_latency
Tick resp_tick_latency
Definition: compute_unit.hh:359
ComputeUnit::ScalarDataPort::SenderState::saved
Packet::SenderState * saved
Definition: compute_unit.hh:720
ComputeUnit::DataPort::retries
std::deque< std::pair< PacketPtr, GPUDynInstPtr > > retries
Definition: compute_unit.hh:679
ComputeUnit::wgBlockedDueLdsAllocation
Stats::Scalar wgBlockedDueLdsAllocation
Definition: compute_unit.hh:556
Port
Ports are used to interface objects to each other.
Definition: port.hh:56
ComputeUnit::vrf
std::vector< VectorRegisterFile * > vrf
Definition: compute_unit.hh:292
ComputeUnit::instExecPerSimd
std::vector< uint64_t > instExecPerSimd
Definition: compute_unit.hh:329
ComputeUnit::wavefrontSize
int wavefrontSize
Definition: compute_unit.hh:1030
WFBarrier::reset
void reset()
Reset the barrier.
Definition: compute_unit.hh:175
WFBarrier::decMaxBarrierCnt
void decMaxBarrierCnt()
Decrement the number of WFs that are participating in this barrier.
Definition: compute_unit.hh:153
Event::setFlags
void setFlags(Flags _flags)
Definition: eventq.hh:323
ComputeUnit::prefetchDepth
int prefetchDepth
Definition: compute_unit.hh:334
ComputeUnit::wfList
std::vector< std::vector< Wavefront * > > wfList
Definition: compute_unit.hh:288
ComputeUnit::storeBusLength
int storeBusLength() const
Definition: compute_unit.hh:395
ComputeUnit::prefetchStride
int prefetchStride
Definition: compute_unit.hh:336
port.hh
ComputeUnit::scalarDataPort
ScalarDataPort scalarDataPort
Definition: compute_unit.hh:993
ComputeUnit::numFailedCASOps
Stats::Scalar numFailedCASOps
Definition: compute_unit.hh:603
ComputeUnit::SQCPort::SenderState::kernId
int kernId
Definition: compute_unit.hh:760
ComputeUnit::instInterleave
Stats::VectorDistribution instInterleave
Definition: compute_unit.hh:326
ComputeUnit::numVecOpsExecutedMAC32
Stats::Scalar numVecOpsExecutedMAC32
Definition: compute_unit.hh:578
ComputeUnit::vALUUtilization
Stats::Formula vALUUtilization
Definition: compute_unit.hh:479
ComputeUnit::DTLBPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:812
WFBarrier
WF barrier slots.
Definition: compute_unit.hh:87
ComputeUnit::scoreboardCheckToSchedule
ScoreboardCheckToSchedule scoreboardCheckToSchedule
TODO: Update these comments once the pipe stage interface has been fully refactored.
Definition: compute_unit.hh:1064
ComputeUnit::SQCPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:776
ComputeUnit::ScalarDataPort::MemReqEvent::pkt
PacketPtr pkt
Definition: compute_unit.hh:727
ComputeUnit::releaseBarrier
void releaseBarrier(int bar_id)
Definition: compute_unit.cc:680
ComputeUnit::numALUInstsExecuted
Stats::Formula numALUInstsExecuted
Definition: compute_unit.hh:597
ComputeUnit::numTimesWgBlockedDueVgprAlloc
Stats::Scalar numTimesWgBlockedDueVgprAlloc
Definition: compute_unit.hh:599
ComputeUnit::ScalarDTLBPort::ScalarDTLBPort
ScalarDTLBPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:838
TLB_MISS_CACHE_MISS
@ TLB_MISS_CACHE_MISS
Definition: compute_unit.hh:77
ComputeUnit::ScalarDTLBPort
Definition: compute_unit.hh:835
ComputeUnit::DataPort::snoopRangeSent
bool snoopRangeSent
Definition: compute_unit.hh:658
ComputeUnit::DataPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:682
ComputeUnit::execRateDist
Stats::Distribution execRateDist
Definition: compute_unit.hh:563
WFBarrier::allAtBarrier
bool allAtBarrier() const
Have all WFs participating in this barrier reached the barrier? If so, then the barrier is satisfied ...
Definition: compute_unit.hh:143
ComputeUnit::globalSeqNum
InstSeqNum globalSeqNum
Definition: compute_unit.hh:1029
ComputeUnit::vrfToLocalMemPipeBus
WaitClass vrfToLocalMemPipeBus
Definition: compute_unit.hh:228
ComputeUnit::fetchStage
FetchStage fetchStage
Definition: compute_unit.hh:277
ComputeUnit::isVectorAluIdle
bool isVectorAluIdle(uint32_t simdId) const
Definition: compute_unit.cc:2520
ComputeUnit::startWavefront
void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, HSAQueueEntry *task, int bar_id, bool fetchContext=false)
Definition: compute_unit.cc:306
Port::id
const PortID id
A numeric identifier to distinguish ports in a vector, and set to InvalidPortID in case this port is ...
Definition: port.hh:74
ComputeUnit::scalarMemUnit
WaitClass scalarMemUnit
Definition: compute_unit.hh:238
ComputeUnit::DataPort::SenderState::saved
Packet::SenderState * saved
Definition: compute_unit.hh:664
ComputeUnit::DTLBPort::retries
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Definition: compute_unit.hh:804
ComputeUnit::issuePeriod
Cycles issuePeriod
Definition: compute_unit.hh:310
OLDEST
@ OLDEST
Definition: compute_unit.hh:71
ComputeUnit::DataPort::SenderState
Definition: compute_unit.hh:660
ComputeUnit::LDSPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:915
ComputeUnit::firstMemUnit
int firstMemUnit() const
Definition: compute_unit.cc:236
ComputeUnit::GMTokenPort
Definition: compute_unit.hh:631
RequestPort
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Definition: port.hh:74
ComputeUnit::countPages
bool countPages
Definition: compute_unit.hh:354
RegisterManager
Definition: register_manager.hh:58
ComputeUnit::numVecOpsExecuted
Stats::Scalar numVecOpsExecuted
Definition: compute_unit.hh:565
ComputeUnit::SQCPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:771
local_memory_pipeline.hh
ComputeUnit::groupMemInsts
Stats::Formula groupMemInsts
Definition: compute_unit.hh:519
ComputeUnit::lastExecCycle
std::vector< uint64_t > lastExecCycle
Definition: compute_unit.hh:320
ComputeUnit::scalarMemoryPipe
ScalarMemPipeline scalarMemoryPipe
Definition: compute_unit.hh:283
ComputeUnit::ScalarDataPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:719
InstSeqNum
uint64_t InstSeqNum
Definition: inst_seq.hh:37
ComputeUnit::sendScalarRequest
void sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt)
Definition: compute_unit.cc:1202
TLB_HIT_CACHE_HIT
@ TLB_HIT_CACHE_HIT
Definition: compute_unit.hh:80
ComputeUnit::lastVaddrWF
std::vector< std::vector< std::vector< Addr > > > lastVaddrWF
Definition: compute_unit.hh:340
ComputeUnit::ScalarDTLBPort::stallPort
void stallPort()
Definition: compute_unit.hh:853
ComputeUnit::coalescerToVrfBusWidth
int coalescerToVrfBusWidth
Definition: compute_unit.hh:266
ComputeUnit::srfToScalarMemPipeBus
WaitClass srfToScalarMemPipeBus
Definition: compute_unit.hh:236
ComputeUnit::getAndIncSeqNum
InstSeqNum getAndIncSeqNum()
Definition: compute_unit.hh:1023
ComputeUnit::numVecOpsExecutedMAD32
Stats::Scalar numVecOpsExecutedMAD32
Definition: compute_unit.hh:582
ComputeUnit::DTLBPort::SenderState
SenderState is information carried along with the packet throughout the TLB hierarchy.
Definition: compute_unit.hh:809
ComputeUnit::LDSPort::recvReqRetry
virtual void recvReqRetry()
the bus is telling the port that there is now space so retrying stalled requests should work now this...
Definition: compute_unit.cc:2623
ComputeUnit::processFetchReturn
void processFetchReturn(PacketPtr pkt)
ComputeUnit::ScalarDataPort::recvTimingResp
bool recvTimingResp(PacketPtr pkt) override
Receive a timing response from the peer.
Definition: compute_unit.cc:910
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
ComputeUnit::SQCPort::SenderState::saved
Packet::SenderState * saved
Definition: compute_unit.hh:758
ComputeUnit::ITLBPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:1723
Port::name
const std::string name() const
Return port name (for DPRINTF).
Definition: port.hh:106
Stats::Distribution
A simple distribution stat.
Definition: statistics.hh:2617
ComputeUnit::scalarRegsReserved
std::vector< int > scalarRegsReserved
Definition: compute_unit.hh:372
ComputeUnit::vectorMemReadsPerKiloInst
Stats::Formula vectorMemReadsPerKiloInst
Definition: compute_unit.hh:495
ComputeUnit::LDSPort::isStalled
bool isStalled() const
Definition: compute_unit.hh:913
ComputeUnit::processTimingPacket
bool processTimingPacket(PacketPtr pkt)
ComputeUnit::spillReads
Stats::Scalar spillReads
Definition: compute_unit.hh:514
scalar_memory_pipeline.hh
ComputeUnit::localMemBarrier
bool localMemBarrier
Definition: compute_unit.hh:349
WFBarrier::WFBarrier
WFBarrier()
Definition: compute_unit.hh:90
ComputeUnit::globalMemoryPipe
GlobalMemPipeline globalMemoryPipe
Definition: compute_unit.hh:281
schedule_stage.hh
ComputeUnit::isDone
bool isDone() const
Definition: compute_unit.cc:2483
ComputeUnit::vpc_f32
Stats::Formula vpc_f32
Definition: compute_unit.hh:590
ComputeUnit::numAtBarrier
int numAtBarrier(int bar_id)
Definition: compute_unit.cc:652
ComputeUnit::hasDispResources
bool hasDispResources(HSAQueueEntry *task, int &num_wfs_in_wg)
Definition: compute_unit.cc:503
ComputeUnit::cacheLineSize
int cacheLineSize() const
Definition: compute_unit.hh:414
ComputeUnit::getTokenManager
TokenManager * getTokenManager()
Definition: compute_unit.hh:981
ComputeUnit::ldsPort
LDSPort ldsPort
The port to access the Local Data Store Can be connected to a LDS object.
Definition: compute_unit.hh:978
ComputeUnit::freeBarrierIds
std::unordered_set< int > freeBarrierIds
A set used to easily retrieve a free barrier ID.
Definition: compute_unit.hh:1074
ComputeUnit::flatVMemInsts
Stats::Scalar flatVMemInsts
Definition: compute_unit.hh:482
ComputeUnit::lastVaddrCU
std::vector< Addr > lastVaddrCU
Definition: compute_unit.hh:338
ComputeUnit::headTailLatency
Stats::Distribution headTailLatency
Definition: compute_unit.hh:609
ComputeUnit::vectorMemReadsPerWF
Stats::Formula vectorMemReadsPerWF
Definition: compute_unit.hh:489
RequestPort::owner
SimObject & owner
Definition: port.hh:83
ComputeUnit::getPort
Port & getPort(const std::string &if_name, PortID idx) override
Get a port with a given name and index.
Definition: compute_unit.hh:1002
ComputeUnit::DTLBPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:798
ComputeUnit::privMemInsts
Stats::Formula privMemInsts
Definition: compute_unit.hh:522
ComputeUnit::flatVMemInstsPerWF
Stats::Formula flatVMemInstsPerWF
Definition: compute_unit.hh:483
ComputeUnit::DataPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:662
ComputeUnit::numVecOpsExecutedMAC64
Stats::Scalar numVecOpsExecutedMAC64
Definition: compute_unit.hh:579
ComputeUnit::DTLBPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:829
WFBarrier::release
void release()
Release this barrier resource so it can be used by other WGs.
Definition: compute_unit.hh:164
ComputeUnit::numWfsToSched
std::vector< int > numWfsToSched
Number of WFs to schedule to each SIMD.
Definition: compute_unit.hh:367
ComputeUnit::LDSPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:962
ComputeUnit::oprNetPipeLength
int oprNetPipeLength() const
Definition: compute_unit.hh:390
register_manager.hh
ComputeUnit::_requestorId
RequestorID _requestorId
Definition: compute_unit.hh:467
ComputeUnit::DataPort
Data access Port.
Definition: compute_unit.hh:652
ComputeUnit::DataPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:785
ComputeUnit::numVecRegsPerSimd
int numVecRegsPerSimd
Definition: compute_unit.hh:374
ComputeUnit::SQCPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:987
ComputeUnit::execStage
ExecStage execStage
Definition: compute_unit.hh:280
ComputeUnit::DataPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:685
ComputeUnit::scalarMemToSrfBus
WaitClass scalarMemToSrfBus
Definition: compute_unit.hh:234
ComputeUnit::globalMemInsts
Stats::Formula globalMemInsts
Definition: compute_unit.hh:510
ComputeUnit::DTLBPort::isStalled
bool isStalled()
Definition: compute_unit.hh:796
ComputeUnit::ScalarDTLBPort::stalled
bool stalled
Definition: compute_unit.hh:860
ComputeUnit::waveLevelParallelism
Stats::Distribution waveLevelParallelism
Definition: compute_unit.hh:531
ComputeUnit::pageAccesses
pageDataStruct pageAccesses
Definition: compute_unit.hh:627
ComputeUnit::DataPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:960
ScoreboardCheckToSchedule
Communication interface between ScoreboardCheck and Schedule stages.
Definition: comm.hh:63
ComputeUnit::_cacheLineSize
const int _cacheLineSize
Definition: compute_unit.hh:1026
ComputeUnit::scalarMemWritesPerKiloInst
Stats::Formula scalarMemWritesPerKiloInst
Definition: compute_unit.hh:499
ComputeUnit::wgBlockedDueBarrierAllocation
Stats::Scalar wgBlockedDueBarrierAllocation
Definition: compute_unit.hh:555
ComputeUnit::pageDataStruct
std::unordered_map< Addr, std::pair< int, int > > pageDataStruct
Definition: compute_unit.hh:626
ComputeUnit::sqcPort
SQCPort sqcPort
Definition: compute_unit.hh:997
types.hh
ComputeUnit::updateInstStats
void updateInstStats(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.cc:2350
ComputeUnit::updatePageDivergenceDist
void updatePageDivergenceDist(Addr addr)
Definition: compute_unit.cc:2455
ComputeUnit::numVecOpsExecutedTwoOpFP
Stats::Scalar numVecOpsExecutedTwoOpFP
Definition: compute_unit.hh:585
ComputeUnit::vectorMemReads
Stats::Scalar vectorMemReads
Definition: compute_unit.hh:488
Wavefront
Definition: wavefront.hh:57
TokenRequestPort
Definition: token_port.hh:43
ComputeUnit::GMTokenPort::GMTokenPort
GMTokenPort(const std::string &name, SimObject *owner, PortID id=InvalidPortID)
Definition: compute_unit.hh:634
ComputeUnit::fillKernelState
void fillKernelState(Wavefront *w, HSAQueueEntry *task)
Definition: compute_unit.cc:292
ComputeUnit::groupReads
Stats::Scalar groupReads
Definition: compute_unit.hh:517
clocked_object.hh
ComputeUnit::numCASOps
Stats::Scalar numCASOps
Definition: compute_unit.hh:602
Stats::Formula
A formula for statistics that is calculated when printed.
Definition: statistics.hh:3037
ComputeUnit::numVecOpsExecutedMAC16
Stats::Scalar numVecOpsExecutedMAC16
Definition: compute_unit.hh:577
Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:257
ComputeUnit::regStats
void regStats() override
Callback to set stat parameters.
Definition: compute_unit.cc:1806
std::deque
STL deque class.
Definition: stl.hh:44
ComputeUnit::privReads
Stats::Scalar privReads
Definition: compute_unit.hh:520
WFBarrier::setMaxBarrierCnt
void setMaxBarrierCnt(int max_barrier_cnt)
Set the maximum barrier count (i.e., the number of WFs that are participating in the barrier).
Definition: compute_unit.hh:122
ComputeUnit::flatLDSInsts
Stats::Scalar flatLDSInsts
Definition: compute_unit.hh:484
ComputeUnit::LDSPort::SenderState::getMemInst
GPUDynInstPtr getMemInst() const
Definition: compute_unit.hh:940
ComputeUnit::ScalarDataPort::recvReqRetry
void recvReqRetry() override
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:948
ComputeUnit::Params
ComputeUnitParams Params
Definition: compute_unit.hh:287
ComputeUnit::activeWaves
int activeWaves
Definition: compute_unit.hh:530
token_port.hh
ComputeUnit::GMTokenPort::recvTimingResp
bool recvTimingResp(PacketPtr)
Receive a timing response from the peer.
Definition: compute_unit.hh:641
addr
ip6_addr_t addr
Definition: inet.hh:423
ComputeUnit::wfSize
int wfSize() const
Definition: compute_unit.hh:397
ComputeUnit::vrfToCoalescerBusWidth
int vrfToCoalescerBusWidth
Definition: compute_unit.hh:265
ComputeUnit::numCyclesPerStoreTransfer
int numCyclesPerStoreTransfer
Definition: compute_unit.hh:267
ComputeUnit::lastVaddrSimd
std::vector< std::vector< Addr > > lastVaddrSimd
Definition: compute_unit.hh:339
ComputeUnit::DataPort::processMemRespEvent
void processMemRespEvent(PacketPtr pkt)
Definition: compute_unit.cc:1306
ComputeUnit::threadCyclesVALU
Stats::Scalar threadCyclesVALU
Definition: compute_unit.hh:478
VectorRegisterFile
Definition: vector_register_file.hh:46
GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
ComputeUnit::GMTokenPort::recvReqRetry
void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.hh:642
ComputeUnit::SQCPort::SQCPort
SQCPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:750
Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:83
ComputeUnit::vectorRegsReserved
std::vector< int > vectorRegsReserved
Definition: compute_unit.hh:370
LdsChunk
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Definition: lds_state.hh:55
ComputeUnit::getFreeBarrierId
int getFreeBarrierId()
Definition: compute_unit.hh:426
ComputeUnit::exec_policy
EXEC_POLICY exec_policy
Definition: compute_unit.hh:342
ComputeUnit::ScalarDTLBPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:854
ScalarMemPipeline
Definition: scalar_memory_pipeline.hh:59
ScoreboardCheckStage
Definition: scoreboard_check_stage.hh:59
ComputeUnit::scalarMemReadsPerWF
Stats::Formula scalarMemReadsPerWF
Definition: compute_unit.hh:493
ComputeUnit::scoreboardCheckStage
ScoreboardCheckStage scoreboardCheckStage
Definition: compute_unit.hh:278
ComputeUnit::dpBypassLength
int dpBypassLength() const
Definition: compute_unit.hh:393
ComputeUnit::ScalarDataPort::retries
std::deque< PacketPtr > retries
Definition: compute_unit.hh:740
ComputeUnit::getCacheLineBits
int getCacheLineBits() const
Definition: compute_unit.hh:415
ComputeUnit::ScalarDataPort
Definition: compute_unit.hh:700
ComputeUnit::releaseWFsFromBarrier
void releaseWFsFromBarrier(int bar_id)
Definition: compute_unit.cc:688
TokenManager
Definition: token_port.hh:129
ComputeUnit::DataPort::createMemRespEvent
EventFunctionWrapper * createMemRespEvent(PacketPtr pkt)
Definition: compute_unit.cc:1566
ComputeUnit::doSmReturn
void doSmReturn(GPUDynInstPtr gpuDynInst)
ComputeUnit::ldsNoFlatInsts
Stats::Scalar ldsNoFlatInsts
Definition: compute_unit.hh:480
ComputeUnit::DataPort::processMemReqEvent
void processMemReqEvent(PacketPtr pkt)
Definition: compute_unit.cc:1574
ComputeUnit::tlbPort
std::vector< DTLBPort > tlbPort
Definition: compute_unit.hh:991
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
ComputeUnit::SQCPort::snoopRangeSent
bool snoopRangeSent
Definition: compute_unit.hh:753
std::list< AddrRange >
ScheduleStage
Definition: schedule_stage.hh:59
ComputeUnit::~ComputeUnit
~ComputeUnit()
Definition: compute_unit.cc:215
ComputeUnit::LDSPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
get the result of packets sent to the LDS when they return
Definition: compute_unit.cc:2559
ComputeUnit::pipeMap
std::unordered_set< uint64_t > pipeMap
Definition: compute_unit.hh:273
ComputeUnit::SQCPort::SenderState
Definition: compute_unit.hh:755
ComputeUnit::instCyclesVMemPerSimd
Stats::Vector instCyclesVMemPerSimd
Definition: compute_unit.hh:504
exec_stage.hh
ComputeUnit::LDSPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:959
ComputeUnit::ITLBPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:897
ComputeUnit::spillMemInsts
Stats::Formula spillMemInsts
Definition: compute_unit.hh:516
ComputeUnit::doInvalidate
void doInvalidate(RequestPtr req, int kernId)
trigger invalidate operation in the cu
Definition: compute_unit.cc:380
WFBarrier::numAtBarrier
int numAtBarrier() const
Definition: compute_unit.hh:97
ComputeUnit::sqcTLBPort
ITLBPort sqcTLBPort
Definition: compute_unit.hh:999
ComputeUnit::DTLBPort::SenderState::portIndex
PortID portIndex
Definition: compute_unit.hh:816
ComputeUnit::ipc
Stats::Formula ipc
Definition: compute_unit.hh:592
__attribute__
const char * __attribute__((weak)) m5MainCommands[]
ComputeUnit::ITLBPort::SenderState::SenderState
SenderState(Wavefront *_wavefront)
Definition: compute_unit.hh:888
GlobalMemPipeline
Definition: global_memory_pipeline.hh:56
ComputeUnit::requestorId
RequestorID requestorId()
Definition: compute_unit.hh:461
ComputeUnit::ScalarDTLBPort::retries
std::deque< PacketPtr > retries
Definition: compute_unit.hh:856
ComputeUnit::vALUInstsPerWF
Stats::Formula vALUInstsPerWF
Definition: compute_unit.hh:473
ComputeUnit::mapWaveToScalarAlu
int mapWaveToScalarAlu(Wavefront *w) const
Definition: compute_unit.cc:250
ComputeUnit::DTLBPort::stallPort
void stallPort()
Definition: compute_unit.hh:797
ComputeUnit::lds
LdsState & lds
Definition: compute_unit.hh:469
ComputeUnit::shader
Shader * shader
Definition: compute_unit.hh:356
ComputeUnit::simdWidth
int simdWidth
Definition: compute_unit.hh:298
WFBarrier::InvalidID
static const int InvalidID
Definition: compute_unit.hh:94
ComputeUnit::vectorMemWritesPerKiloInst
Stats::Formula vectorMemWritesPerKiloInst
Definition: compute_unit.hh:496
callback.hh
ComputeUnit::instCyclesSALU
Stats::Scalar instCyclesSALU
Definition: compute_unit.hh:477
ComputeUnit::activeLanesPerGMemInstrDist
Stats::Distribution activeLanesPerGMemInstrDist
Definition: compute_unit.hh:594
ComputeUnit::ITLBPort::stallPort
void stallPort()
Definition: compute_unit.hh:871
ComputeUnit::spBypassPipeLength
int spBypassPipeLength
Definition: compute_unit.hh:301
ComputeUnit::dynamicGMemInstrCnt
Stats::Scalar dynamicGMemInstrCnt
Definition: compute_unit.hh:550
ComputeUnit::numVecOpsExecutedFMA16
Stats::Scalar numVecOpsExecutedFMA16
Definition: compute_unit.hh:573
ComputeUnit::ScalarDTLBPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:846
ComputeUnit::barrierSlot
WFBarrier & barrierSlot(int bar_id)
Definition: compute_unit.hh:419
ComputeUnit::LDSPort
the port intended to communicate between the CU and its LDS
Definition: compute_unit.hh:905
ComputeUnit::scheduleToExecute
ScheduleToExecute scheduleToExecute
Definition: compute_unit.hh:1065
ComputeUnit::numScalarALUs
int numScalarALUs
Definition: compute_unit.hh:245
WFBarrier::_numAtBarrier
int _numAtBarrier
The number of WFs in the WG that have reached the barrier.
Definition: compute_unit.hh:186
ComputeUnit::scalarMemInstsPerKiloInst
Stats::Formula scalarMemInstsPerKiloInst
Definition: compute_unit.hh:500
ComputeUnit::fetch
void fetch(PacketPtr pkt, Wavefront *wavefront)
ComputeUnit::numVectorALUs
int numVectorALUs
Definition: compute_unit.hh:241
SimObject
Abstract superclass for simulation objects.
Definition: sim_object.hh:92

Generated on Wed Sep 30 2020 14:02:12 for gem5 by doxygen 1.8.17