gem5  [DEVELOP-FOR-23.0]
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
compute_unit.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #ifndef __COMPUTE_UNIT_HH__
33 #define __COMPUTE_UNIT_HH__
34 
35 #include <deque>
36 #include <map>
37 #include <unordered_set>
38 #include <vector>
39 
40 #include "base/callback.hh"
41 #include "base/compiler.hh"
42 #include "base/statistics.hh"
43 #include "base/stats/group.hh"
44 #include "base/types.hh"
45 #include "config/the_gpu_isa.hh"
46 #include "enums/PrefetchType.hh"
47 #include "gpu-compute/comm.hh"
57 #include "mem/port.hh"
58 #include "mem/token_port.hh"
59 #include "sim/clocked_object.hh"
60 
61 namespace gem5
62 {
63 
64 class HSAQueueEntry;
65 class LdsChunk;
66 class ScalarRegisterFile;
67 class Shader;
68 class VectorRegisterFile;
69 
70 struct ComputeUnitParams;
71 
73 {
74  OLDEST = 0,
76 };
77 
79 {
84 };
85 
90 class WFBarrier
91 {
92  public:
94  {
95  }
96 
97  static const int InvalidID = -1;
98 
99  int
100  numAtBarrier() const
101  {
102  return _numAtBarrier;
103  }
104 
108  int
110  {
111  return _maxBarrierCnt - _numAtBarrier;
112  }
113 
114  int
116  {
117  return _maxBarrierCnt;
118  }
119 
124  void
125  setMaxBarrierCnt(int max_barrier_cnt)
126  {
127  _maxBarrierCnt = max_barrier_cnt;
128  }
129 
133  void
135  {
136  assert(_numAtBarrier < _maxBarrierCnt);
137  ++_numAtBarrier;
138  }
139 
145  bool
146  allAtBarrier() const
147  {
148  return _numAtBarrier == _maxBarrierCnt;
149  }
150 
155  void
157  {
158  assert(_maxBarrierCnt > 0);
159  --_maxBarrierCnt;
160  }
161 
166  void
168  {
169  _numAtBarrier = 0;
170  _maxBarrierCnt = 0;
171  }
172 
177  void
179  {
180  _numAtBarrier = 0;
181  }
182 
183  private:
190 
199 };
200 
202 {
203  public:
204 
205 
206  // Execution resources
207  //
208  // The ordering of units is:
209  // Vector ALUs
210  // Scalar ALUs
211  // GM Pipe
212  // LM Pipe
213  // Scalar Mem Pipe
214  //
215  // Note: the ordering of units is important and the code assumes the
216  // above ordering. However, there may be more than one resource of
217  // each type (e.g., 4 VALUs or 2 SALUs)
218 
220  // Resource control for global memory to VRF data/address bus
222  // Resource control for Vector Register File->Global Memory pipe buses
224  // Resource control for Vector Global Memory execution unit
226 
228  // Resource control for local memory to VRF data/address bus
230  // Resource control for Vector Register File->Local Memory pipe buses
232  // Resource control for Vector Shared/Local Memory execution unit
234 
236  // Resource control for scalar memory to SRF data/address bus
238  // Resource control for Scalar Register File->Scalar Memory pipe buses
240  // Resource control for Scalar Memory execution unit
242 
243  // vector ALU execution resources
246 
247  // scalar ALU execution resources
250 
251  // Return total number of execution units on this CU
252  int numExeUnits() const;
253  // index into readyList of the first memory unit
254  int firstMemUnit() const;
255  // index into readyList of the last memory unit
256  int lastMemUnit() const;
257  // index into scalarALUs vector of SALU used by the wavefront
258  int mapWaveToScalarAlu(Wavefront *w) const;
259  // index into readyList of SALU used by wavefront
261  // index into readyList of Global Memory unit used by wavefront
262  int mapWaveToGlobalMem(Wavefront *w) const;
263  // index into readyList of Local Memory unit used by wavefront
264  int mapWaveToLocalMem(Wavefront *w) const;
265  // index into readyList of Scalar Memory unit used by wavefront
266  int mapWaveToScalarMem(Wavefront *w) const;
267 
268  int vrfToCoalescerBusWidth; // VRF->Coalescer data bus width in bytes
269  int coalescerToVrfBusWidth; // Coalescer->VRF data bus width in bytes
270  int numCyclesPerStoreTransfer; // number of cycles per vector store
271  int numCyclesPerLoadTransfer; // number of cycles per vector load
272 
273  // track presence of dynamic instructions in the Schedule pipeline
274  // stage. This is used to check the readiness of the oldest,
275  // non-dispatched instruction of every WF in the Scoreboard stage.
276  std::unordered_set<uint64_t> pipeMap;
277 
279 
287 
289 
290  typedef ComputeUnitParams Params;
292  int cu_id;
293 
294  // array of vector register files, one per SIMD
296  // array of scalar register files, one per SIMD
298 
299  // Width per VALU/SIMD unit: number of work items that can be executed
300  // on the vector ALU simultaneously in a SIMD unit
302  // number of pipe stages for bypassing data to next dependent single
303  // precision vector instruction inside the vector ALU pipeline
305  // number of pipe stages for bypassing data to next dependent double
306  // precision vector instruction inside the vector ALU pipeline
308  // number of pipe stages for scalar ALU
310  // number of pipe stages for operand collection & distribution network
312  // number of cycles per instruction issue period
314 
315  // VRF to GM Bus latency
317  // SRF to Scalar Mem Bus latency
319  // VRF to LM Bus latency
321 
322  // tracks the last cycle a vector instruction was executed on a SIMD
324 
325  // tracks the number of dyn inst executed per SIMD
327 
328  // true if we allow a separate TLB per lane
330  // if 0, TLB prefetching is off.
332  // if fixed-stride prefetching, this is the stride.
334 
338  enums::PrefetchType prefetchType;
340 
342  // Idle CU timeout in ticks
344  int idleWfs;
347 
348  /*
349  * for Counting page accesses
350  */
352 
354 
359 
367 
368  // number of currently reserved vector registers per SIMD unit
370  // number of currently reserved scalar registers per SIMD unit
372  // number of vector registers per SIMD unit
374  // number of available scalar registers per SIMD unit
376 
377  // this hash map will keep track of page divergence
378  // per memory instruction per wavefront. The hash map
379  // is cleared in GPUDynInst::updateStats() in gpu_dyn_inst.cc.
380  std::map<Addr, int> pagesTouched;
381 
382  void insertInPipeMap(Wavefront *w);
384 
385  ComputeUnit(const Params &p);
386  ~ComputeUnit();
387 
388  // Timing Functions
389  int oprNetPipeLength() const { return operandNetworkLength; }
390  int simdUnitWidth() const { return simdWidth; }
391  int spBypassLength() const { return spBypassPipeLength; }
392  int dpBypassLength() const { return dpBypassPipeLength; }
393  int scalarPipeLength() const { return scalarPipeStages; }
395  int loadBusLength() const { return numCyclesPerLoadTransfer; }
396  int wfSize() const { return wavefrontSize; }
397 
398  void exec();
399  void initiateFetch(Wavefront *wavefront);
400  void fetch(PacketPtr pkt, Wavefront *wavefront);
402 
403  void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk,
404  HSAQueueEntry *task, int bar_id,
405  bool fetchContext=false);
406 
407  void doInvalidate(RequestPtr req, int kernId);
408  void doFlush(GPUDynInstPtr gpuDynInst);
409 
410  void dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg);
411  bool hasDispResources(HSAQueueEntry *task, int &num_wfs_in_wg);
412 
413  int cacheLineSize() const { return _cacheLineSize; }
414  int getCacheLineBits() const { return cacheLineBits; }
415 
416  void resetRegisterPool();
417 
418  private:
419  WFBarrier&
420  barrierSlot(int bar_id)
421  {
422  assert(bar_id > WFBarrier::InvalidID);
423  return wfBarrierSlots.at(bar_id);
424  }
425 
426  int
428  {
429  assert(freeBarrierIds.size());
430  auto free_bar_id = freeBarrierIds.begin();
431  int bar_id = *free_bar_id;
432  freeBarrierIds.erase(free_bar_id);
433  return bar_id;
434  }
435 
436  public:
437  int numYetToReachBarrier(int bar_id);
438  bool allAtBarrier(int bar_id);
439  void incNumAtBarrier(int bar_id);
440  int numAtBarrier(int bar_id);
441  int maxBarrierCnt(int bar_id);
442  void resetBarrier(int bar_id);
443  void decMaxBarrierCnt(int bar_id);
444  void releaseBarrier(int bar_id);
445  void releaseWFsFromBarrier(int bar_id);
446  int numBarrierSlots() const { return _numBarrierSlots; }
447 
448  template<typename c0, typename c1>
449  void doSmReturn(GPUDynInstPtr gpuDynInst);
450 
451  virtual void init() override;
452  void sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt);
453  void sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt);
454  void injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
455  bool kernelMemSync,
456  RequestPtr req=nullptr);
457  void handleMemPacket(PacketPtr pkt, int memport_index);
458  bool processTimingPacket(PacketPtr pkt);
459  void processFetchReturn(PacketPtr pkt);
461 
464 
465  bool isDone() const;
466  bool isVectorAluIdle(uint32_t simdId) const;
467 
468  void handleSQCReturn(PacketPtr pkt);
469 
470  protected:
472 
474 
475  public:
476  LdsState &
477  getLds() const
478  {
479  return lds;
480  }
481 
482  int32_t
483  getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const;
484 
485  [[nodiscard]] bool sendToLds(GPUDynInstPtr gpuDynInst);
486 
487  typedef std::unordered_map<Addr, std::pair<int, int>> pageDataStruct;
489 
490  void exitCallback();
491 
493  {
494  public:
495  GMTokenPort(const std::string& name, SimObject *owner,
496  PortID id = InvalidPortID)
498  { }
500 
501  protected:
502  bool recvTimingResp(PacketPtr) { return false; }
503  void recvReqRetry() { }
504  };
505 
506  // Manager for the number of tokens available to this compute unit to
507  // send global memory request packets to the coalescer this is only used
508  // between global memory pipe and TCP coalescer.
511 
513  class DataPort : public RequestPort
514  {
515  public:
516  DataPort(const std::string &_name, ComputeUnit *_cu, PortID id)
517  : RequestPort(_name, id), computeUnit(_cu) { }
518 
520 
522  {
526 
527  SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index,
528  Packet::SenderState *sender_state=nullptr)
529  : _gpuDynInst(gpuDynInst),
530  port_index(_port_index),
531  saved(sender_state) { }
532  };
533 
534  class SystemHubEvent : public Event
535  {
538 
539  public:
541  : dataPort(_dataPort), reqPkt(pkt)
542  {
544  }
545 
546  void
548  {
549  // DMAs do not operate on packets and therefore do not
550  // convert to a response. Do that here instead.
551  reqPkt->makeResponse();
553  }
554  };
555 
556  void processMemReqEvent(PacketPtr pkt);
558 
559  void processMemRespEvent(PacketPtr pkt);
561 
563 
564  bool handleResponse(PacketPtr pkt);
565 
566  protected:
568 
569  virtual bool recvTimingResp(PacketPtr pkt);
570  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
571  virtual void recvFunctional(PacketPtr pkt) { }
572  virtual void recvRangeChange() { }
573  virtual void recvReqRetry();
574 
575  virtual void
577  {
578  resp.clear();
579  snoop = true;
580  }
581 
582  };
583 
584  // Scalar data cache access port
586  {
587  public:
588  ScalarDataPort(const std::string &_name, ComputeUnit *_cu)
589  : RequestPort(_name), computeUnit(_cu)
590  {
591  }
592 
593  bool recvTimingResp(PacketPtr pkt) override;
594  void recvReqRetry() override;
595 
597  {
599  Packet::SenderState *sender_state=nullptr)
600  : _gpuDynInst(gpuDynInst), saved(sender_state)
601  {
602  }
603 
606  };
607 
608  class MemReqEvent : public Event
609  {
610  private:
613 
614  public:
615  MemReqEvent(ScalarDataPort &_scalar_data_port, PacketPtr _pkt)
616  : Event(), scalarDataPort(_scalar_data_port), pkt(_pkt)
617  {
619  }
620 
621  void process();
622  const char *description() const;
623  };
624 
625  class SystemHubEvent : public Event
626  {
629 
630  public:
632  : dataPort(_dataPort), reqPkt(pkt)
633  {
635  }
636 
637  void
639  {
640  // DMAs do not operate on packets and therefore do not
641  // convert to a response. Do that here instead.
642  reqPkt->makeResponse();
644  }
645  };
646 
647  bool handleResponse(PacketPtr pkt);
648 
650 
651  private:
653  };
654 
655  // Instruction cache access port
656  class SQCPort : public RequestPort
657  {
658  public:
659  SQCPort(const std::string &_name, ComputeUnit *_cu)
660  : RequestPort(_name), computeUnit(_cu) { }
661 
663 
665  {
668  // kernel id to be used in handling I-Cache invalidate response
669  int kernId;
670 
672  *sender_state=nullptr, int _kernId=-1)
673  : wavefront(_wavefront), saved(sender_state),
674  kernId(_kernId){ }
675  };
676 
678 
679  protected:
681 
682  virtual bool recvTimingResp(PacketPtr pkt);
683  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
684  virtual void recvFunctional(PacketPtr pkt) { }
685  virtual void recvRangeChange() { }
686  virtual void recvReqRetry();
687 
688  virtual void
690  {
691  resp.clear();
692  snoop = true;
693  }
694  };
695 
697  class DTLBPort : public RequestPort
698  {
699  public:
700  DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id)
701  : RequestPort(_name, id), computeUnit(_cu),
702  stalled(false)
703  { }
704 
705  bool isStalled() { return stalled; }
706  void stallPort() { stalled = true; }
707  void unstallPort() { stalled = false; }
708 
714 
719  {
720  // the memInst that this is associated with
722 
723  // the lane in the memInst this is associated with, so we send
724  // the memory request down the right port
726 
727  // constructor used for packets involved in timing accesses
728  SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
729  : _gpuDynInst(gpuDynInst), portIndex(port_index) { }
730 
731  };
732 
733  protected:
735  bool stalled;
736 
737  virtual bool recvTimingResp(PacketPtr pkt);
738  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
739  virtual void recvFunctional(PacketPtr pkt) { }
740  virtual void recvRangeChange() { }
741  virtual void recvReqRetry();
742  };
743 
745  {
746  public:
747  ScalarDTLBPort(const std::string &_name, ComputeUnit *_cu)
748  : RequestPort(_name), computeUnit(_cu), stalled(false)
749  {
750  }
751 
753  {
754  SenderState(GPUDynInstPtr gpuDynInst) : _gpuDynInst(gpuDynInst) { }
756  };
757 
758  bool recvTimingResp(PacketPtr pkt) override;
759  void recvReqRetry() override { assert(false); }
760 
761  bool isStalled() const { return stalled; }
762  void stallPort() { stalled = true; }
763  void unstallPort() { stalled = false; }
764 
766 
767  private:
769  bool stalled;
770  };
771 
772  class ITLBPort : public RequestPort
773  {
774  public:
775  ITLBPort(const std::string &_name, ComputeUnit *_cu)
776  : RequestPort(_name), computeUnit(_cu), stalled(false) { }
777 
778 
779  bool isStalled() { return stalled; }
780  void stallPort() { stalled = true; }
781  void unstallPort() { stalled = false; }
782 
788 
793  {
794  // The wavefront associated with this request
796 
797  SenderState(Wavefront *_wavefront) : wavefront(_wavefront) { }
798  };
799 
800  protected:
802  bool stalled;
803 
804  virtual bool recvTimingResp(PacketPtr pkt);
805  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
806  virtual void recvFunctional(PacketPtr pkt) { }
807  virtual void recvRangeChange() { }
808  virtual void recvReqRetry();
809  };
810 
814  class LDSPort : public RequestPort
815  {
816  public:
817  LDSPort(const std::string &_name, ComputeUnit *_cu)
818  : RequestPort(_name), computeUnit(_cu)
819  {
820  }
821 
822  bool isStalled() const { return stalled; }
823  void stallPort() { stalled = true; }
824  void unstallPort() { stalled = false; }
825 
830  std::queue<PacketPtr> retries;
831 
837  {
838  protected:
839  // The actual read/write/atomic request that goes with this command
841 
842  public:
844  _gpuDynInst(gpuDynInst)
845  {
846  }
847 
849  getMemInst() const
850  {
851  return _gpuDynInst;
852  }
853  };
854 
855  virtual bool
857 
858  protected:
859 
860  bool stalled = false;
861 
863 
864  virtual bool
866 
867  virtual Tick
868  recvAtomic(PacketPtr pkt) { return 0; }
869 
870  virtual void
872  {
873  }
874 
875  virtual void
877  {
878  }
879 
880  virtual void
881  recvReqRetry();
882  };
883 
888 
889  TokenManager *
891  {
892  return memPortTokens;
893  }
894 
899  // port to the TLB hierarchy (i.e., the L1 TLB)
901  // port to the scalar data cache
903  // port to the scalar data TLB
905  // port to the SQC (i.e. the I-cache)
907  // port to the SQC TLB (there's a separate TLB for each I-cache)
909 
910  Port &
911  getPort(const std::string &if_name, PortID idx) override
912  {
913  if (if_name == "memory_port" && idx < memPort.size()) {
914  return memPort[idx];
915  } else if (if_name == "translation_port" && idx < tlbPort.size()) {
916  return tlbPort[idx];
917  } else if (if_name == "scalar_port") {
918  return scalarDataPort;
919  } else if (if_name == "scalar_tlb_port") {
920  return scalarDTLBPort;
921  } else if (if_name == "sqc_port") {
922  return sqcPort;
923  } else if (if_name == "sqc_tlb_port") {
924  return sqcTLBPort;
925  } else if (if_name == "ldsPort") {
926  return ldsPort;
927  } else if (if_name == "gmTokenPort") {
928  return gmTokenPort;
929  } else {
930  return ClockedObject::getPort(if_name, idx);
931  }
932  }
933 
935 
936  private:
937  const int _cacheLineSize;
938  const int _numBarrierSlots;
942 
977 
985  std::unordered_set<int> freeBarrierIds;
986 
987  // hold the time of the arrival of the first cache block related to
988  // a particular GPUDynInst. This is used to calculate the difference
989  // between the first and last chace block arrival times.
990  std::unordered_map<GPUDynInstPtr, Tick> headTailMap;
991 
992  public:
993  void updateInstStats(GPUDynInstPtr gpuDynInst);
995 
997  {
998  ComputeUnitStats(statistics::Group *parent, int n_wf);
999 
1022 
1029 
1030  // Cycles required to send register source (addr and data) from
1031  // register files to memory pipeline, per SIMD.
1035 
1057 
1059 
1060  // the following stats compute the avg. TLB accesslatency per
1061  // uncoalesced request (only for data)
1065  // hitsPerTLBLevel[x] are the hits in Level x TLB.
1066  // x = 0 is the page table.
1068 
1071 
1072  // over all memory instructions executed over all wavefronts
1073  // how many touched 0-4 pages, 4-8, ..., 60-64 pages
1075  // count of non-flat global memory vector instructions executed
1077  // count of flat global memory vector instructions executed
1080 
1083  // Number of instructions executed, i.e. if 64 (or 32 or 7) lanes are
1084  // active when the instruction is committed, this number is still
1085  // incremented by 1
1087  // Number of cycles among successive instruction executions across all
1088  // wavefronts of the same CU
1090  // number of individual vector operations executed
1092  // number of individual f16 vector operations executed
1094  // number of individual f32 vector operations executed
1096  // number of individual f64 vector operations executed
1098  // number of individual FMA 16,32,64 vector operations executed
1102  // number of individual MAC 16,32,64 vector operations executed
1106  // number of individual MAD 16,32,64 vector operations executed
1110  // total number of two op FP vector operations executed
1112  // Total cycles that something is running on the GPU
1114  statistics::Formula vpc; // vector ops per cycle
1115  statistics::Formula vpc_f16; // vector ops per cycle
1116  statistics::Formula vpc_f32; // vector ops per cycle
1117  statistics::Formula vpc_f64; // vector ops per cycle
1118  statistics::Formula ipc; // vector instructions per cycle
1122  // number of vector ALU instructions received
1124  // number of times a WG cannot start due to lack of free VGPRs in SIMDs
1126  // number of times a WG cannot start due to lack of free SGPRs in SIMDs
1132 
1133  // distrubtion in latency difference between first and last cache block
1134  // arrival ticks
1136 
1137  // Track the amount of interleaving between wavefronts on each SIMD.
1138  // This stat is sampled using instExecPerSimd to compute the number
1139  // of instructions that have been executed on a SIMD between a WF
1140  // executing two successive instructions.
1142  } stats;
1143 };
1144 
1145 } // namespace gem5
1146 
1147 #endif // __COMPUTE_UNIT_HH__
gem5::ComputeUnit::SQCPort::SenderState
Definition: compute_unit.hh:664
gem5::ComputeUnit::ComputeUnitStats::tlbRequests
statistics::Scalar tlbRequests
Definition: compute_unit.hh:1062
gem5::statistics::Scalar
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:1929
gem5::ComputeUnit::GMTokenPort::recvTimingResp
bool recvTimingResp(PacketPtr)
Receive a timing response from the peer.
Definition: compute_unit.hh:502
gem5::ComputeUnit::GMTokenPort::GMTokenPort
GMTokenPort(const std::string &name, SimObject *owner, PortID id=InvalidPortID)
Definition: compute_unit.hh:495
gem5::ComputeUnit::ComputeUnitStats::sALUInstsPerWF
statistics::Formula sALUInstsPerWF
Definition: compute_unit.hh:1003
gem5::PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:245
gem5::ComputeUnit::ComputeUnitStats::vALUUtilization
statistics::Formula vALUUtilization
Definition: compute_unit.hh:1007
gem5::ComputeUnit::getAndIncSeqNum
InstSeqNum getAndIncSeqNum()
Definition: compute_unit.hh:934
gem5::ComputeUnit::ScalarDTLBPort
Definition: compute_unit.hh:744
gem5::SimObject::getPort
virtual Port & getPort(const std::string &if_name, PortID idx=InvalidPortID)
Get a port with a given name and index.
Definition: sim_object.cc:123
hsa_queue_entry.hh
gem5::ComputeUnit::ScalarDataPort::SenderState::saved
Packet::SenderState * saved
Definition: compute_unit.hh:605
gem5::ComputeUnit::wfList
std::vector< std::vector< Wavefront * > > wfList
Definition: compute_unit.hh:291
gem5::ComputeUnit::ComputeUnit
ComputeUnit(const Params &p)
Definition: compute_unit.cc:65
gem5::ScoreboardCheckStage
Definition: scoreboard_check_stage.hh:61
gem5::ComputeUnit::ComputeUnitStats::scalarMemReadsPerWF
statistics::Formula scalarMemReadsPerWF
Definition: compute_unit.hh:1021
gem5::ComputeUnit::wfBarrierSlots
std::vector< WFBarrier > wfBarrierSlots
The barrier slots for this CU.
Definition: compute_unit.hh:981
gem5::ComputeUnit::doSmReturn
void doSmReturn(GPUDynInstPtr gpuDynInst)
gem5::ComputeUnit::ComputeUnitStats::instCyclesSALU
statistics::Scalar instCyclesSALU
Definition: compute_unit.hh:1005
gem5::ComputeUnit::scoreboardCheckToSchedule
ScoreboardCheckToSchedule scoreboardCheckToSchedule
TODO: Update these comments once the pipe stage interface has been fully refactored.
Definition: compute_unit.hh:975
gem5::ComputeUnit::fetchStage
FetchStage fetchStage
Definition: compute_unit.hh:280
gem5::ComputeUnit::ComputeUnitStats::instInterleave
statistics::VectorDistribution instInterleave
Definition: compute_unit.hh:1141
gem5::ComputeUnit::ComputeUnitStats::flatVMemInsts
statistics::Scalar flatVMemInsts
Definition: compute_unit.hh:1010
gem5::Port::name
const std::string name() const
Return port name (for DPRINTF).
Definition: port.hh:111
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedTwoOpFP
statistics::Scalar numVecOpsExecutedTwoOpFP
Definition: compute_unit.hh:1111
gem5::ComputeUnit::ScalarDTLBPort::retries
std::deque< PacketPtr > retries
Definition: compute_unit.hh:765
gem5::ComputeUnit::sendRequest
void sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
Definition: compute_unit.cc:1039
gem5::statistics::Distribution
A simple distribution stat.
Definition: statistics.hh:2083
gem5::ComputeUnit::coalescerToVrfBusWidth
int coalescerToVrfBusWidth
Definition: compute_unit.hh:269
gem5::ComputeUnit::lastExecCycle
std::vector< uint64_t > lastExecCycle
Definition: compute_unit.hh:323
gem5::ComputeUnit::globalSeqNum
InstSeqNum globalSeqNum
Definition: compute_unit.hh:940
gem5::ComputeUnit::debugSegFault
bool debugSegFault
Definition: compute_unit.hh:341
gem5::ComputeUnit::DTLBPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:740
gem5::ComputeUnit::ITLBPort::isStalled
bool isStalled()
Definition: compute_unit.hh:779
gem5::ComputeUnit::DataPort::processMemReqEvent
void processMemReqEvent(PacketPtr pkt)
Definition: compute_unit.cc:1633
gem5::ComputeUnit::localMemoryPipe
LocalMemPipeline localMemoryPipe
Definition: compute_unit.hh:285
gem5::ComputeUnit::ComputeUnitStats::privWrites
statistics::Scalar privWrites
Definition: compute_unit.hh:1049
gem5::ComputeUnit::ComputeUnitStats::kernargWrites
statistics::Scalar kernargWrites
Definition: compute_unit.hh:1055
fetch_stage.hh
gem5::ComputeUnit::numVecRegsPerSimd
int numVecRegsPerSimd
Definition: compute_unit.hh:373
gem5::ComputeUnit::DataPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, Packet::SenderState *sender_state=nullptr)
Definition: compute_unit.hh:527
gem5::ComputeUnit::DTLBPort
Data TLB port.
Definition: compute_unit.hh:697
group.hh
comm.hh
gem5::ComputeUnit::srf
std::vector< ScalarRegisterFile * > srf
Definition: compute_unit.hh:297
gem5::WFBarrier::numYetToReachBarrier
int numYetToReachBarrier() const
Number of WFs that have not yet reached the barrier.
Definition: compute_unit.hh:109
gem5::ComputeUnit::ComputeUnitStats::scalarMemWritesPerWF
statistics::Formula scalarMemWritesPerWF
Definition: compute_unit.hh:1019
gem5::ComputeUnit::LDSPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.hh:843
gem5::ComputeUnit::ComputeUnitStats::argMemInsts
statistics::Formula argMemInsts
Definition: compute_unit.hh:1041
gem5::ComputeUnit::ITLBPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:1801
gem5::ComputeUnit::ComputeUnitStats::spillWrites
statistics::Scalar spillWrites
Definition: compute_unit.hh:1043
gem5::ComputeUnit::exec_policy
EXEC_POLICY exec_policy
Definition: compute_unit.hh:339
gem5::ComputeUnit::ComputeUnitStats::spillMemInsts
statistics::Formula spillMemInsts
Definition: compute_unit.hh:1044
gem5::ComputeUnit::ComputeUnitStats::scalarMemWritesPerKiloInst
statistics::Formula scalarMemWritesPerKiloInst
Definition: compute_unit.hh:1027
gem5::MipsISA::index
Bitfield< 30, 0 > index
Definition: pra_constants.hh:47
gem5::ComputeUnit::ScalarDTLBPort::recvReqRetry
void recvReqRetry() override
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.hh:759
gem5::ComputeUnit::DataPort::handleResponse
bool handleResponse(PacketPtr pkt)
Definition: compute_unit.cc:818
gem5::ComputeUnit::DataPort::SystemHubEvent::dataPort
DataPort * dataPort
Definition: compute_unit.hh:536
gem5::ComputeUnit::ComputeUnitStats::readonlyReads
statistics::Scalar readonlyReads
Definition: compute_unit.hh:1051
gem5::ComputeUnit::handleSQCReturn
void handleSQCReturn(PacketPtr pkt)
Definition: compute_unit.cc:1010
gem5::ScheduleToExecute
Communication interface between Schedule and Execute stages.
Definition: comm.hh:98
gem5::ComputeUnit::LDSPort::SenderState
SenderState is information carried along with the packet, esp.
Definition: compute_unit.hh:836
gem5::ComputeUnit::ComputeUnitStats::wgBlockedDueBarrierAllocation
statistics::Scalar wgBlockedDueBarrierAllocation
Definition: compute_unit.hh:1081
gem5::TLB_MISS_CACHE_HIT
@ TLB_MISS_CACHE_HIT
Definition: compute_unit.hh:81
gem5::ComputeUnit::DataPort::snoopRangeSent
bool snoopRangeSent
Definition: compute_unit.hh:519
gem5::ComputeUnit::DTLBPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:1431
global_memory_pipeline.hh
gem5::ComputeUnit::ComputeUnitStats::completedWGs
statistics::Scalar completedWGs
Definition: compute_unit.hh:1131
gem5::ComputeUnit::DTLBPort::stallPort
void stallPort()
Definition: compute_unit.hh:706
gem5::ComputeUnit::ComputeUnitStats::vectorMemReads
statistics::Scalar vectorMemReads
Definition: compute_unit.hh:1016
gem5::ComputeUnit::DTLBPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:739
gem5::ComputeUnit::lastVaddrSimd
std::vector< std::vector< Addr > > lastVaddrSimd
Definition: compute_unit.hh:336
gem5::ComputeUnit::simdWidth
int simdWidth
Definition: compute_unit.hh:301
gem5::Wavefront
Definition: wavefront.hh:60
gem5::ComputeUnit::ScalarDTLBPort::isStalled
bool isStalled() const
Definition: compute_unit.hh:761
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedF64
statistics::Scalar numVecOpsExecutedF64
Definition: compute_unit.hh:1097
gem5::ComputeUnit::fetch
void fetch(PacketPtr pkt, Wavefront *wavefront)
gem5::TokenManager
Definition: token_port.hh:130
gem5::ComputeUnit::GMTokenPort::recvReqRetry
void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.hh:503
gem5::ComputeUnit::ComputeUnitStats::dynamicGMemInstrCnt
statistics::Scalar dynamicGMemInstrCnt
Definition: compute_unit.hh:1076
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:60
gem5::ComputeUnit::numCyclesPerStoreTransfer
int numCyclesPerStoreTransfer
Definition: compute_unit.hh:270
gem5::ComputeUnit::DTLBPort::isStalled
bool isStalled()
Definition: compute_unit.hh:705
gem5::ComputeUnit::firstMemUnit
int firstMemUnit() const
Definition: compute_unit.cc:250
gem5::ComputeUnit::numCyclesPerLoadTransfer
int numCyclesPerLoadTransfer
Definition: compute_unit.hh:271
gem5::Port::id
const PortID id
A numeric identifier to distinguish ports in a vector, and set to InvalidPortID in case this port is ...
Definition: port.hh:79
gem5::ComputeUnit::ITLBPort::retries
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Definition: compute_unit.hh:787
gem5::ComputeUnit::pagesTouched
std::map< Addr, int > pagesTouched
Definition: compute_unit.hh:380
gem5::ComputeUnit::DataPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:570
gem5::ComputeUnit::ScalarDataPort::MemReqEvent::pkt
PacketPtr pkt
Definition: compute_unit.hh:612
gem5::ComputeUnit::LDSPort::stalled
bool stalled
whether or not it is stalled
Definition: compute_unit.hh:860
gem5::ComputeUnit::DataPort::SystemHubEvent
Definition: compute_unit.hh:534
gem5::ComputeUnit::scoreboardCheckStage
ScoreboardCheckStage scoreboardCheckStage
Definition: compute_unit.hh:281
gem5::ComputeUnit::stats
gem5::ComputeUnit::ComputeUnitStats stats
gem5::ComputeUnit::DTLBPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:734
gem5::ComputeUnit::headTailMap
std::unordered_map< GPUDynInstPtr, Tick > headTailMap
Definition: compute_unit.hh:990
gem5::ComputeUnit::ComputeUnitStats::vpc_f16
statistics::Formula vpc_f16
Definition: compute_unit.hh:1115
gem5::ComputeUnit::ComputeUnitStats::tlbLatency
statistics::Formula tlbLatency
Definition: compute_unit.hh:1064
gem5::ComputeUnit::_requestorId
RequestorID _requestorId
Definition: compute_unit.hh:471
gem5::ComputeUnit::DataPort::processMemRespEvent
void processMemRespEvent(PacketPtr pkt)
Definition: compute_unit.cc:1349
gem5::ComputeUnit::lastVaddrCU
std::vector< Addr > lastVaddrCU
Definition: compute_unit.hh:335
gem5::WFBarrier::release
void release()
Release this barrier resource so it can be used by other WGs.
Definition: compute_unit.hh:167
gem5::ComputeUnit::ScalarDTLBPort::recvTimingResp
bool recvTimingResp(PacketPtr pkt) override
Receive a timing response from the peer.
Definition: compute_unit.cc:1730
gem5::ComputeUnit::resp_tick_latency
Tick resp_tick_latency
Definition: compute_unit.hh:356
gem5::ComputeUnit::ComputeUnitStats::sALUInsts
statistics::Scalar sALUInsts
Definition: compute_unit.hh:1002
gem5::ComputeUnit::ScalarDataPort::SystemHubEvent::reqPkt
PacketPtr reqPkt
Definition: compute_unit.hh:628
gem5::ComputeUnit::exec
void exec()
Definition: compute_unit.cc:730
gem5::ExecStage
Definition: exec_stage.hh:73
gem5::ComputeUnit::DTLBPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
Definition: compute_unit.hh:728
gem5::ComputeUnit::SQCPort::SenderState::wavefront
Wavefront * wavefront
Definition: compute_unit.hh:666
gem5::ComputeUnit::srfToScalarMemPipeBus
WaitClass srfToScalarMemPipeBus
Definition: compute_unit.hh:239
gem5::ComputeUnit::releaseBarrier
void releaseBarrier(int bar_id)
Definition: compute_unit.cc:707
scoreboard_check_stage.hh
gem5::ComputeUnit::ComputeUnitStats::instCyclesScMemPerSimd
statistics::Vector instCyclesScMemPerSimd
Definition: compute_unit.hh:1033
gem5::WFBarrier::_maxBarrierCnt
int _maxBarrierCnt
The maximum number of WFs that can reach this barrier.
Definition: compute_unit.hh:198
gem5::ComputeUnit::spBypassLength
int spBypassLength() const
Definition: compute_unit.hh:391
gem5::ComputeUnit::numYetToReachBarrier
int numYetToReachBarrier(int bar_id)
Definition: compute_unit.cc:658
gem5::statistics::Vector
A vector of scalar stats.
Definition: statistics.hh:2005
gem5::ComputeUnit::dpBypassPipeLength
int dpBypassPipeLength
Definition: compute_unit.hh:307
gem5::VegaISA::w
Bitfield< 6 > w
Definition: pagetable.hh:59
gem5::ComputeUnit::ComputeUnitStats::ldsBankConflictDist
statistics::Distribution ldsBankConflictDist
Definition: compute_unit.hh:1070
gem5::ComputeUnit::getRefCounter
int32_t getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
Definition: compute_unit.cc:2050
gem5::statistics::Formula
A formula for statistics that is calculated when printed.
Definition: statistics.hh:2538
gem5::ComputeUnit::ITLBPort::SenderState::wavefront
Wavefront * wavefront
Definition: compute_unit.hh:795
std::vector
STL vector class.
Definition: stl.hh:37
gem5::ComputeUnit::ScalarDataPort::handleResponse
bool handleResponse(PacketPtr pkt)
Definition: compute_unit.cc:925
gem5::ComputeUnit::LDSPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:868
gem5::ComputeUnit::scalarPipeLength
int scalarPipeLength() const
Definition: compute_unit.hh:393
gem5::ComputeUnit::ComputeUnitStats::kernargReads
statistics::Scalar kernargReads
Definition: compute_unit.hh:1054
gem5::ComputeUnit::prefetchDepth
int prefetchDepth
Definition: compute_unit.hh:331
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA32
statistics::Scalar numVecOpsExecutedFMA32
Definition: compute_unit.hh:1100
gem5::ComputeUnit::vrfToGlobalMemPipeBus
WaitClass vrfToGlobalMemPipeBus
Definition: compute_unit.hh:223
gem5::ComputeUnit::ComputeUnitStats::ldsNoFlatInsts
statistics::Scalar ldsNoFlatInsts
Definition: compute_unit.hh:1008
gem5::ComputeUnit::resetBarrier
void resetBarrier(int bar_id)
Definition: compute_unit.cc:693
gem5::ComputeUnit::SQCPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:680
gem5::ComputeUnit::ComputeUnitStats::globalReads
statistics::Scalar globalReads
Definition: compute_unit.hh:1036
gem5::ComputeUnit::storeBusLength
int storeBusLength() const
Definition: compute_unit.hh:394
gem5::ComputeUnit::SQCPort::retries
std::deque< std::pair< PacketPtr, Wavefront * > > retries
Definition: compute_unit.hh:677
gem5::ComputeUnit::ComputeUnitStats::groupMemInsts
statistics::Formula groupMemInsts
Definition: compute_unit.hh:1047
gem5::ComputeUnit::ComputeUnitStats::vpc
statistics::Formula vpc
Definition: compute_unit.hh:1114
gem5::ComputeUnit::ComputeUnitStats::activeLanesPerLMemInstrDist
statistics::Distribution activeLanesPerLMemInstrDist
Definition: compute_unit.hh:1121
gem5::ComputeUnit::memPortTokens
TokenManager * memPortTokens
Definition: compute_unit.hh:509
gem5::WFBarrier::decMaxBarrierCnt
void decMaxBarrierCnt()
Decrement the number of WFs that are participating in this barrier.
Definition: compute_unit.hh:156
gem5::ComputeUnit::getLds
LdsState & getLds() const
Definition: compute_unit.hh:477
gem5::FetchStage
Definition: fetch_stage.hh:54
gem5::ScheduleStage
Definition: schedule_stage.hh:62
gem5::InvalidPortID
const PortID InvalidPortID
Definition: types.hh:246
gem5::ComputeUnit::numVectorSharedMemUnits
int numVectorSharedMemUnits
Definition: compute_unit.hh:227
gem5::ComputeUnit::shader
Shader * shader
Definition: compute_unit.hh:353
gem5::ComputeUnit::req_tick_latency
Tick req_tick_latency
Definition: compute_unit.hh:355
gem5::ComputeUnit::ComputeUnitStats::vectorMemWritesPerWF
statistics::Formula vectorMemWritesPerWF
Definition: compute_unit.hh:1015
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD64
statistics::Scalar numVecOpsExecutedMAD64
Definition: compute_unit.hh:1109
gem5::ComputeUnit::issuePeriod
Cycles issuePeriod
Definition: compute_unit.hh:313
gem5::ComputeUnit::ComputeUnitStats::headTailLatency
statistics::Distribution headTailLatency
Definition: compute_unit.hh:1135
gem5::ComputeUnit::scalarDataPort
ScalarDataPort scalarDataPort
Definition: compute_unit.hh:902
gem5::ComputeUnit::ITLBPort::stallPort
void stallPort()
Definition: compute_unit.hh:780
gem5::ComputeUnit::ComputeUnitStats::threadCyclesVALU
statistics::Scalar threadCyclesVALU
Definition: compute_unit.hh:1006
gem5::ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:292
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC32
statistics::Scalar numVecOpsExecutedMAC32
Definition: compute_unit.hh:1104
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD32
statistics::Scalar numVecOpsExecutedMAD32
Definition: compute_unit.hh:1108
gem5::ComputeUnit::ComputeUnitStats::vectorMemReadsPerWF
statistics::Formula vectorMemReadsPerWF
Definition: compute_unit.hh:1017
gem5::ComputeUnit::vrf
std::vector< VectorRegisterFile * > vrf
Definition: compute_unit.hh:295
gem5::ComputeUnit::ComputeUnitStats::instCyclesVMemPerSimd
statistics::Vector instCyclesVMemPerSimd
Definition: compute_unit.hh:1032
gem5::ComputeUnit::ComputeUnitStats::dynamicFlatMemInstrCnt
statistics::Scalar dynamicFlatMemInstrCnt
Definition: compute_unit.hh:1078
gem5::ComputeUnit::DataPort::SystemHubEvent::SystemHubEvent
SystemHubEvent(PacketPtr pkt, DataPort *_dataPort)
Definition: compute_unit.hh:540
gem5::Event::setFlags
void setFlags(Flags _flags)
Definition: eventq.hh:331
gem5::ComputeUnit::SQCPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:1016
gem5::ComputeUnit::ComputeUnitStats::groupReads
statistics::Scalar groupReads
Definition: compute_unit.hh:1045
gem5::ComputeUnit::ComputeUnitStats::vpc_f64
statistics::Formula vpc_f64
Definition: compute_unit.hh:1117
gem5::RequestPort
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Definition: port.hh:118
gem5::ComputeUnit::injectGlobalMemFence
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelMemSync, RequestPtr req=nullptr)
Definition: compute_unit.cc:1267
gem5::ComputeUnit::ScalarDataPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:652
gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:78
gem5::EventBase::AutoDelete
static const FlagsType AutoDelete
Definition: eventq.hh:110
gem5::ComputeUnit::LDSPort::sendTimingReq
virtual bool sendTimingReq(PacketPtr pkt)
attempt to send this packet, either the port is already stalled, the request is nack'd and must stall...
Definition: compute_unit.cc:2127
gem5::ComputeUnit::locMemToVrfBus
WaitClass locMemToVrfBus
Definition: compute_unit.hh:229
gem5::ComputeUnit::LDSPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:824
gem5::ComputeUnit::idleWfs
int idleWfs
Definition: compute_unit.hh:344
gem5::ComputeUnit::ComputeUnitStats::kernargMemInsts
statistics::Formula kernargMemInsts
Definition: compute_unit.hh:1056
gem5::TLB_CACHE
TLB_CACHE
Definition: compute_unit.hh:78
gem5::ComputeUnit::ComputeUnitStats::flatVMemInstsPerWF
statistics::Formula flatVMemInstsPerWF
Definition: compute_unit.hh:1011
gem5::ComputeUnit::SQCPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:685
gem5::ComputeUnit::idleCUTimeout
Tick idleCUTimeout
Definition: compute_unit.hh:343
gem5::ComputeUnit::loadBusLength
int loadBusLength() const
Definition: compute_unit.hh:395
gem5::ComputeUnit::ComputeUnitStats
Definition: compute_unit.hh:996
gem5::ComputeUnit::numScalarMemUnits
int numScalarMemUnits
Definition: compute_unit.hh:235
gem5::ComputeUnit::DTLBPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:1699
gem5::ComputeUnit::processTimingPacket
bool processTimingPacket(PacketPtr pkt)
gem5::ComputeUnit::ITLBPort::SenderState
SenderState is information carried along with the packet throughout the TLB hierarchy.
Definition: compute_unit.hh:792
gem5::ComputeUnit
Definition: compute_unit.hh:201
gem5::ComputeUnit::ScalarDataPort::MemReqEvent::process
void process()
Definition: compute_unit.cc:1666
gem5::ComputeUnit::pageAccesses
pageDataStruct pageAccesses
Definition: compute_unit.hh:488
gem5::ComputeUnit::ScalarDataPort::retries
std::deque< PacketPtr > retries
Definition: compute_unit.hh:649
gem5::ComputeUnit::ScalarDataPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:604
gem5::ComputeUnit::ComputeUnitStats::flatLDSInsts
statistics::Scalar flatLDSInsts
Definition: compute_unit.hh:1012
gem5::ComputeUnit::numScalarALUs
int numScalarALUs
Definition: compute_unit.hh:248
gem5::ComputeUnit::numVectorALUs
int numVectorALUs
Definition: compute_unit.hh:244
gem5::ComputeUnit::DataPort::DataPort
DataPort(const std::string &_name, ComputeUnit *_cu, PortID id)
Definition: compute_unit.hh:516
gem5::ComputeUnit::wavefrontSize
int wavefrontSize
Definition: compute_unit.hh:941
gem5::ComputeUnit::startWavefront
void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, HSAQueueEntry *task, int bar_id, bool fetchContext=false)
Definition: compute_unit.cc:320
gem5::ComputeUnit::sqcTLBPort
ITLBPort sqcTLBPort
Definition: compute_unit.hh:908
gem5::ComputeUnit::ComputeUnitStats::privReads
statistics::Scalar privReads
Definition: compute_unit.hh:1048
gem5::ComputeUnit::functionalTLB
bool functionalTLB
Definition: compute_unit.hh:345
gem5::ComputeUnit::numAtBarrier
int numAtBarrier(int bar_id)
Definition: compute_unit.cc:679
gem5::WFBarrier::numAtBarrier
int numAtBarrier() const
Definition: compute_unit.hh:100
gem5::ComputeUnit::incNumAtBarrier
void incNumAtBarrier(int bar_id)
Definition: compute_unit.cc:672
gem5::ComputeUnit::ScalarDataPort::MemReqEvent::scalarDataPort
ScalarDataPort & scalarDataPort
Definition: compute_unit.hh:611
gem5::ComputeUnit::ComputeUnitStats::completedWfs
statistics::Scalar completedWfs
Definition: compute_unit.hh:1130
gem5::ComputeUnit::DTLBPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:738
gem5::ScalarMemPipeline
Definition: scalar_memory_pipeline.hh:58
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecuted
statistics::Scalar numVecOpsExecuted
Definition: compute_unit.hh:1091
gem5::ComputeUnit::oprNetPipeLength
int oprNetPipeLength() const
Definition: compute_unit.hh:389
gem5::TLB_HIT_CACHE_MISS
@ TLB_HIT_CACHE_MISS
Definition: compute_unit.hh:82
gem5::ComputeUnit::cacheLineBits
int cacheLineBits
Definition: compute_unit.hh:939
gem5::WFBarrier::InvalidID
static const int InvalidID
Definition: compute_unit.hh:97
gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70
gem5::ComputeUnit::ITLBPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:801
gem5::ComputeUnit::SQCPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:684
gem5::ComputeUnit::decMaxBarrierCnt
void decMaxBarrierCnt(int bar_id)
Definition: compute_unit.cc:700
gem5::ComputeUnit::vectorSharedMemUnit
WaitClass vectorSharedMemUnit
Definition: compute_unit.hh:233
gem5::ComputeUnit::SQCPort::SenderState::kernId
int kernId
Definition: compute_unit.hh:669
gem5::ComputeUnit::releaseWFsFromBarrier
void releaseWFsFromBarrier(int bar_id)
Definition: compute_unit.cc:715
gem5::ComputeUnit::ITLBPort
Definition: compute_unit.hh:772
gem5::ComputeUnit::ComputeUnitStats::activeLanesPerGMemInstrDist
statistics::Distribution activeLanesPerGMemInstrDist
Definition: compute_unit.hh:1120
gem5::ComputeUnit::ScalarDTLBPort::ScalarDTLBPort
ScalarDTLBPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:747
gem5::Event
Definition: eventq.hh:254
gem5::ComputeUnit::scalarMemUnit
WaitClass scalarMemUnit
Definition: compute_unit.hh:241
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
gem5::ComputeUnit::initiateFetch
void initiateFetch(Wavefront *wavefront)
gem5::ComputeUnit::execStage
ExecStage execStage
Definition: compute_unit.hh:283
gem5::ComputeUnit::ScalarDataPort::MemReqEvent::description
const char * description() const
Return a C string describing the event.
Definition: compute_unit.cc:1660
gem5::ComputeUnit::ComputeUnitStats::vALUInsts
statistics::Scalar vALUInsts
Definition: compute_unit.hh:1000
gem5::ComputeUnit::ComputeUnitStats::instCyclesVALU
statistics::Scalar instCyclesVALU
Definition: compute_unit.hh:1004
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC64
statistics::Scalar numVecOpsExecutedMAC64
Definition: compute_unit.hh:1105
statistics.hh
gem5::ComputeUnit::handleMemPacket
void handleMemPacket(PacketPtr pkt, int memport_index)
gem5::ComputeUnit::LDSPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:840
gem5::ComputeUnit::DTLBPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:707
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
gem5::ComputeUnit::tickEvent
EventFunctionWrapper tickEvent
Definition: compute_unit.hh:288
gem5::WaitClass
Definition: misc.hh:67
gem5::ComputeUnit::scheduleToExecute
ScheduleToExecute scheduleToExecute
Definition: compute_unit.hh:976
gem5::RR
@ RR
Definition: compute_unit.hh:75
gem5::ComputeUnit::vramRequestorId
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from shader.
Definition: compute_unit.cc:2096
gem5::ComputeUnit::globalMemoryPipe
GlobalMemPipeline globalMemoryPipe
Definition: compute_unit.hh:284
gem5::ComputeUnit::resetRegisterPool
void resetRegisterPool()
Definition: compute_unit.cc:421
gem5::ComputeUnit::ScalarDTLBPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:768
gem5::ComputeUnit::registerManager
RegisterManager * registerManager
Definition: compute_unit.hh:278
gem5::ComputeUnit::ComputeUnitStats::numInstrExecuted
statistics::Scalar numInstrExecuted
Definition: compute_unit.hh:1086
gem5::WFBarrier::WFBarrier
WFBarrier()
Definition: compute_unit.hh:93
gem5::TLB_MISS_CACHE_MISS
@ TLB_MISS_CACHE_MISS
Definition: compute_unit.hh:80
gem5::ComputeUnit::ScalarDataPort::recvTimingResp
bool recvTimingResp(PacketPtr pkt) override
Receive a timing response from the peer.
Definition: compute_unit.cc:919
gem5::ComputeUnit::ITLBPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:1854
gem5::ComputeUnit::ScalarDTLBPort::stallPort
void stallPort()
Definition: compute_unit.hh:762
gem5::ComputeUnit::ComputeUnitStats::numCASOps
statistics::Scalar numCASOps
Definition: compute_unit.hh:1128
port.hh
gem5::ComputeUnit::DataPort::createMemReqEvent
EventFunctionWrapper * createMemReqEvent(PacketPtr pkt)
Definition: compute_unit.cc:1617
gem5::ComputeUnit::ComputeUnitStats::vectorMemInstsPerKiloInst
statistics::Formula vectorMemInstsPerKiloInst
Definition: compute_unit.hh:1025
gem5::EXEC_POLICY
EXEC_POLICY
Definition: compute_unit.hh:72
gem5::ComputeUnit::~ComputeUnit
~ComputeUnit()
Definition: compute_unit.cc:229
gem5::ComputeUnit::DTLBPort::retries
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Definition: compute_unit.hh:713
gem5::ComputeUnit::DataPort
Data access Port.
Definition: compute_unit.hh:513
gem5::ComputeUnit::ComputeUnitStats::ldsBankAccesses
statistics::Scalar ldsBankAccesses
Definition: compute_unit.hh:1069
gem5::ComputeUnit::SQCPort
Definition: compute_unit.hh:656
gem5::ComputeUnit::vrf_lm_bus_latency
Cycles vrf_lm_bus_latency
Definition: compute_unit.hh:320
gem5::ComputeUnit::GMTokenPort
Definition: compute_unit.hh:492
gem5::ComputeUnit::ScalarDTLBPort::stalled
bool stalled
Definition: compute_unit.hh:769
gem5::ComputeUnit::spBypassPipeLength
int spBypassPipeLength
Definition: compute_unit.hh:304
gem5::ComputeUnit::DTLBPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:721
gem5::ComputeUnit::activeWaves
int activeWaves
Definition: compute_unit.hh:994
gem5::ComputeUnit::ScalarDataPort::SystemHubEvent::dataPort
ScalarDataPort * dataPort
Definition: compute_unit.hh:627
gem5::ComputeUnit::ComputeUnitStats::numTimesWgBlockedDueVgprAlloc
statistics::Scalar numTimesWgBlockedDueVgprAlloc
Definition: compute_unit.hh:1125
gem5::ComputeUnit::processFetchReturn
void processFetchReturn(PacketPtr pkt)
gem5::ComputeUnit::DataPort::getDeviceAddressRanges
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
Definition: compute_unit.hh:576
gem5::ComputeUnit::ITLBPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:805
compiler.hh
gem5::LdsChunk
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Definition: lds_state.hh:56
gem5::ComputeUnit::cacheLineSize
int cacheLineSize() const
Definition: compute_unit.hh:413
gem5::ComputeUnit::ITLBPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:807
gem5::ComputeUnit::DataPort::SystemHubEvent::reqPkt
PacketPtr reqPkt
Definition: compute_unit.hh:537
gem5::ComputeUnit::mapWaveToScalarMem
int mapWaveToScalarMem(Wavefront *w) const
Definition: compute_unit.cc:298
gem5::ComputeUnit::mapWaveToGlobalMem
int mapWaveToGlobalMem(Wavefront *w) const
Definition: compute_unit.cc:282
gem5::ComputeUnit::deleteFromPipeMap
void deleteFromPipeMap(Wavefront *w)
Definition: compute_unit.cc:518
gem5::ComputeUnit::SQCPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:683
gem5::ComputeUnit::LDSPort
the port intended to communicate between the CU and its LDS
Definition: compute_unit.hh:814
gem5::ComputeUnit::doFlush
void doFlush(GPUDynInstPtr gpuDynInst)
trigger flush operation in the cu
Definition: compute_unit.cc:413
gem5::SimObject
Abstract superclass for simulation objects.
Definition: sim_object.hh:146
gem5::ComputeUnit::DataPort::SenderState::port_index
PortID port_index
Definition: compute_unit.hh:524
gem5::ComputeUnit::init
virtual void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: compute_unit.cc:763
gem5::ComputeUnit::scalarALUs
std::vector< WaitClass > scalarALUs
Definition: compute_unit.hh:249
gem5::ComputeUnit::DataPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:523
gem5::ComputeUnit::dpBypassLength
int dpBypassLength() const
Definition: compute_unit.hh:392
gem5::ComputeUnit::memPort
std::vector< DataPort > memPort
The memory port for SIMD data accesses.
Definition: compute_unit.hh:898
gem5::OLDEST
@ OLDEST
Definition: compute_unit.hh:74
gem5::ComputeUnit::ComputeUnitStats::scalarMemReadsPerKiloInst
statistics::Formula scalarMemReadsPerKiloInst
Definition: compute_unit.hh:1026
gem5::Packet::SenderState
A virtual base opaque structure used to hold state associated with the packet (e.g....
Definition: packet.hh:468
gem5::ComputeUnit::ComputeUnitStats::vectorMemReadsPerKiloInst
statistics::Formula vectorMemReadsPerKiloInst
Definition: compute_unit.hh:1023
gem5::ComputeUnit::DTLBPort::SenderState::portIndex
PortID portIndex
Definition: compute_unit.hh:725
gem5::ComputeUnit::perLaneTLB
bool perLaneTLB
Definition: compute_unit.hh:329
local_memory_pipeline.hh
gem5::ComputeUnit::instExecPerSimd
std::vector< uint64_t > instExecPerSimd
Definition: compute_unit.hh:326
gem5::ComputeUnit::lastMemUnit
int lastMemUnit() const
Definition: compute_unit.cc:257
gem5::ComputeUnit::ITLBPort::SenderState::SenderState
SenderState(Wavefront *_wavefront)
Definition: compute_unit.hh:797
gem5::ComputeUnit::ScalarDTLBPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:763
gem5::ComputeUnit::lastVaddrWF
std::vector< std::vector< std::vector< Addr > > > lastVaddrWF
Definition: compute_unit.hh:337
gem5::ComputeUnit::ScalarDTLBPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:755
gem5::ComputeUnit::ComputeUnitStats::groupWrites
statistics::Scalar groupWrites
Definition: compute_unit.hh:1046
gem5::ComputeUnit::LDSPort::LDSPort
LDSPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:817
gem5::ComputeUnit::numBarrierSlots
int numBarrierSlots() const
Definition: compute_unit.hh:446
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::ComputeUnit::ComputeUnitStats::globalWrites
statistics::Scalar globalWrites
Definition: compute_unit.hh:1037
gem5::ComputeUnit::ComputeUnitStats::vALUInstsPerWF
statistics::Formula vALUInstsPerWF
Definition: compute_unit.hh:1001
gem5::ComputeUnit::getTokenManager
TokenManager * getTokenManager()
Definition: compute_unit.hh:890
gem5::ComputeUnit::ScalarDataPort::MemReqEvent::MemReqEvent
MemReqEvent(ScalarDataPort &_scalar_data_port, PacketPtr _pkt)
Definition: compute_unit.hh:615
gem5::ComputeUnit::ComputeUnitStats::numTimesWgBlockedDueSgprAlloc
statistics::Scalar numTimesWgBlockedDueSgprAlloc
Definition: compute_unit.hh:1127
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedF16
statistics::Scalar numVecOpsExecutedF16
Definition: compute_unit.hh:1093
gem5::ComputeUnit::barrierSlot
WFBarrier & barrierSlot(int bar_id)
Definition: compute_unit.hh:420
scalar_memory_pipeline.hh
gem5::ComputeUnit::exitCallback
void exitCallback()
Definition: compute_unit.cc:2003
gem5::ComputeUnit::SQCPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:1002
gem5::ComputeUnit::ComputeUnitStats::privMemInsts
statistics::Formula privMemInsts
Definition: compute_unit.hh:1050
gem5::ComputeUnit::mapWaveToScalarAlu
int mapWaveToScalarAlu(Wavefront *w) const
Definition: compute_unit.cc:264
schedule_stage.hh
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
gem5::ComputeUnit::ScalarDataPort::SystemHubEvent::process
void process()
Definition: compute_unit.hh:638
gem5::ComputeUnit::hasDispResources
bool hasDispResources(HSAQueueEntry *task, int &num_wfs_in_wg)
Definition: compute_unit.cc:530
gem5::ComputeUnit::DataPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:567
gem5::ComputeUnit::getFreeBarrierId
int getFreeBarrierId()
Definition: compute_unit.hh:427
gem5::ComputeUnit::wfSize
int wfSize() const
Definition: compute_unit.hh:396
gem5::ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:234
gem5::ComputeUnit::pipeMap
std::unordered_set< uint64_t > pipeMap
Definition: compute_unit.hh:276
gem5::ComputeUnit::SQCPort::snoopRangeSent
bool snoopRangeSent
Definition: compute_unit.hh:662
gem5::TLB_HIT_CACHE_HIT
@ TLB_HIT_CACHE_HIT
Definition: compute_unit.hh:83
gem5::ComputeUnit::LDSPort::recvReqRetry
virtual void recvReqRetry()
the bus is telling the port that there is now space so retrying stalled requests should work now this...
Definition: compute_unit.cc:2169
gem5::ComputeUnit::DataPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:571
gem5::ComputeUnit::SQCPort::SenderState::saved
Packet::SenderState * saved
Definition: compute_unit.hh:667
gem5::ComputeUnit::ScalarDataPort::SystemHubEvent::SystemHubEvent
SystemHubEvent(PacketPtr pkt, ScalarDataPort *_dataPort)
Definition: compute_unit.hh:631
gem5::ComputeUnit::DataPort::retries
std::deque< std::pair< PacketPtr, GPUDynInstPtr > > retries
Definition: compute_unit.hh:562
gem5::ComputeUnit::SQCPort::getDeviceAddressRanges
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
Definition: compute_unit.hh:689
gem5::ComputeUnit::SQCPort::SenderState::SenderState
SenderState(Wavefront *_wavefront, Packet::SenderState *sender_state=nullptr, int _kernId=-1)
Definition: compute_unit.hh:671
gem5::EventFunctionWrapper
Definition: eventq.hh:1136
gem5::ComputeUnit::updateInstStats
void updateInstStats(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.cc:1884
gem5::ComputeUnit::ComputeUnitStats::numALUInstsExecuted
statistics::Formula numALUInstsExecuted
Definition: compute_unit.hh:1123
gem5::ComputeUnit::ComputeUnitStats::instCyclesLdsPerSimd
statistics::Vector instCyclesLdsPerSimd
Definition: compute_unit.hh:1034
register_manager.hh
gem5::ComputeUnit::ComputeUnitStats::argReads
statistics::Scalar argReads
Definition: compute_unit.hh:1039
gem5::ComputeUnit::getCacheLineBits
int getCacheLineBits() const
Definition: compute_unit.hh:414
gem5::ComputeUnit::pageDataStruct
std::unordered_map< Addr, std::pair< int, int > > pageDataStruct
Definition: compute_unit.hh:487
gem5::ComputeUnit::ComputeUnitStats::globalMemInsts
statistics::Formula globalMemInsts
Definition: compute_unit.hh:1038
gem5::ComputeUnit::ComputeUnitStats::wgBlockedDueLdsAllocation
statistics::Scalar wgBlockedDueLdsAllocation
Definition: compute_unit.hh:1082
gem5::ComputeUnit::LDSPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
get the result of packets sent to the LDS when they return
Definition: compute_unit.cc:2105
gem5::ComputeUnit::operandNetworkLength
int operandNetworkLength
Definition: compute_unit.hh:311
gem5::ComputeUnit::numVectorGlobalMemUnits
int numVectorGlobalMemUnits
Definition: compute_unit.hh:219
gem5::ComputeUnit::prefetchStride
int prefetchStride
Definition: compute_unit.hh:333
gem5::ComputeUnit::Params
ComputeUnitParams Params
Definition: compute_unit.hh:290
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD16
statistics::Scalar numVecOpsExecutedMAD16
Definition: compute_unit.hh:1107
gem5::ComputeUnit::ComputeUnitStats::ipc
statistics::Formula ipc
Definition: compute_unit.hh:1118
gem5::ComputeUnit::localMemBarrier
bool localMemBarrier
Definition: compute_unit.hh:346
gem5::ComputeUnit::updatePageDivergenceDist
void updatePageDivergenceDist(Addr addr)
Definition: compute_unit.cc:1992
gem5::ScoreboardCheckToSchedule
Communication interface between ScoreboardCheck and Schedule stages.
Definition: comm.hh:62
gem5::ComputeUnit::vectorRegsReserved
std::vector< int > vectorRegsReserved
Definition: compute_unit.hh:369
gem5::ComputeUnit::ComputeUnitStats::readonlyWrites
statistics::Scalar readonlyWrites
Definition: compute_unit.hh:1052
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC16
statistics::Scalar numVecOpsExecutedMAC16
Definition: compute_unit.hh:1103
gem5::ComputeUnit::ComputeUnitStats::waveLevelParallelism
statistics::Distribution waveLevelParallelism
Definition: compute_unit.hh:1058
gem5::ComputeUnit::ComputeUnitStats::scalarMemWrites
statistics::Scalar scalarMemWrites
Definition: compute_unit.hh:1018
gem5::ComputeUnit::ITLBPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:781
gem5::ComputeUnit::vrf_gm_bus_latency
Cycles vrf_gm_bus_latency
Definition: compute_unit.hh:316
gem5::ComputeUnit::vrfToCoalescerBusWidth
int vrfToCoalescerBusWidth
Definition: compute_unit.hh:268
gem5::ComputeUnit::ComputeUnitStats::controlFlowDivergenceDist
statistics::Distribution controlFlowDivergenceDist
Definition: compute_unit.hh:1119
gem5::Port
Ports are used to interface objects to each other.
Definition: port.hh:61
gem5::ComputeUnit::ComputeUnitStats::vectorMemWrites
statistics::Scalar vectorMemWrites
Definition: compute_unit.hh:1014
gem5::ComputeUnit::insertInPipeMap
void insertInPipeMap(Wavefront *w)
Definition: compute_unit.cc:509
gem5::GlobalMemPipeline
Definition: global_memory_pipeline.hh:59
types.hh
gem5::ComputeUnit::LDSPort::stallPort
void stallPort()
Definition: compute_unit.hh:823
gem5::WFBarrier::_numAtBarrier
int _numAtBarrier
The number of WFs in the WG that have reached the barrier.
Definition: compute_unit.hh:189
gem5::ComputeUnit::ScalarDTLBPort::SenderState
Definition: compute_unit.hh:752
gem5::ComputeUnit::mapWaveToLocalMem
int mapWaveToLocalMem(Wavefront *w) const
Definition: compute_unit.cc:290
gem5::ComputeUnit::LDSPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:876
gem5::ComputeUnit::ldsPort
LDSPort ldsPort
The port to access the Local Data Store Can be connected to a LDS object.
Definition: compute_unit.hh:887
gem5::ComputeUnit::LDSPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:862
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA64
statistics::Scalar numVecOpsExecutedFMA64
Definition: compute_unit.hh:1101
clocked_object.hh
gem5::ComputeUnit::ComputeUnitStats::flatLDSInstsPerWF
statistics::Formula flatLDSInstsPerWF
Definition: compute_unit.hh:1013
std::deque
STL deque class.
Definition: stl.hh:44
gem5::WFBarrier
WF barrier slots.
Definition: compute_unit.hh:90
gem5::ComputeUnit::DataPort::SenderState::saved
Packet::SenderState * saved
Definition: compute_unit.hh:525
gem5::Packet::makeResponse
void makeResponse()
Take a request packet and modify it in place to be suitable for returning as a response to that reque...
Definition: packet.hh:1062
gem5::ComputeUnit::isDone
bool isDone() const
Definition: compute_unit.cc:2020
gem5::ComputeUnit::LDSPort::SenderState::getMemInst
GPUDynInstPtr getMemInst() const
Definition: compute_unit.hh:849
gem5::ComputeUnit::ComputeUnitStats::hitsPerTLBLevel
statistics::Vector hitsPerTLBLevel
Definition: compute_unit.hh:1067
token_port.hh
gem5::ComputeUnit::maxBarrierCnt
int maxBarrierCnt(int bar_id)
Definition: compute_unit.cc:686
gem5::ComputeUnit::scalarRegsReserved
std::vector< int > scalarRegsReserved
Definition: compute_unit.hh:371
gem5::ComputeUnit::fillKernelState
void fillKernelState(Wavefront *w, HSAQueueEntry *task)
Definition: compute_unit.cc:306
gem5::ComputeUnit::lds
LdsState & lds
Definition: compute_unit.hh:473
gem5::ComputeUnit::LDSPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:871
gem5::ComputeUnit::DTLBPort::SenderState
SenderState is information carried along with the packet throughout the TLB hierarchy.
Definition: compute_unit.hh:718
gem5::ComputeUnit::SQCPort::SQCPort
SQCPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:659
gem5::ComputeUnit::vrfToLocalMemPipeBus
WaitClass vrfToLocalMemPipeBus
Definition: compute_unit.hh:231
gem5::statistics::Group
Statistics container.
Definition: group.hh:92
gem5::ComputeUnit::ComputeUnitStats::execRateDist
statistics::Distribution execRateDist
Definition: compute_unit.hh:1089
gem5::ComputeUnit::tlbPort
std::vector< DTLBPort > tlbPort
Definition: compute_unit.hh:900
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedF32
statistics::Scalar numVecOpsExecutedF32
Definition: compute_unit.hh:1095
gem5::ComputeUnit::isVectorAluIdle
bool isVectorAluIdle(uint32_t simdId) const
Definition: compute_unit.cc:2057
gem5::ComputeUnit::numScalarRegsPerSimd
int numScalarRegsPerSimd
Definition: compute_unit.hh:375
gem5::ComputeUnit::vectorALUs
std::vector< WaitClass > vectorALUs
Definition: compute_unit.hh:245
gem5::WFBarrier::allAtBarrier
bool allAtBarrier() const
Have all WFs participating in this barrier reached the barrier? If so, then the barrier is satisfied ...
Definition: compute_unit.hh:146
gem5::InstSeqNum
uint64_t InstSeqNum
Definition: inst_seq.hh:40
gem5::ComputeUnit::sendScalarRequest
void sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt)
Definition: compute_unit.cc:1240
gem5::ComputeUnit::LDSPort::isStalled
bool isStalled() const
Definition: compute_unit.hh:822
gem5::ComputeUnit::countPages
bool countPages
Definition: compute_unit.hh:351
gem5::ComputeUnit::_cacheLineSize
const int _cacheLineSize
Definition: compute_unit.hh:937
gem5::ComputeUnit::freeBarrierIds
std::unordered_set< int > freeBarrierIds
A set used to easily retrieve a free barrier ID.
Definition: compute_unit.hh:985
gem5::ComputeUnit::ComputeUnitStats::ComputeUnitStats
ComputeUnitStats(statistics::Group *parent, int n_wf)
Definition: compute_unit.cc:2200
gem5::RegisterManager
Definition: register_manager.hh:57
gem5::ComputeUnit::scalar_req_tick_latency
Tick scalar_req_tick_latency
Definition: compute_unit.hh:357
gem5::ComputeUnit::LDSPort::retries
std::queue< PacketPtr > retries
here we queue all the requests that were not successfully sent.
Definition: compute_unit.hh:830
gem5::ComputeUnit::scalarMemToSrfBus
WaitClass scalarMemToSrfBus
Definition: compute_unit.hh:237
gem5::ComputeUnit::scalarDTLBPort
ScalarDTLBPort scalarDTLBPort
Definition: compute_unit.hh:904
gem5::LocalMemPipeline
Definition: local_memory_pipeline.hh:57
gem5::ComputeUnit::ComputeUnitStats::pageDivergenceDist
statistics::Distribution pageDivergenceDist
Definition: compute_unit.hh:1074
gem5::ComputeUnit::DataPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:572
gem5::RequestorID
uint16_t RequestorID
Definition: request.hh:95
gem5::WFBarrier::setMaxBarrierCnt
void setMaxBarrierCnt(int max_barrier_cnt)
Set the maximum barrier count (i.e., the number of WFs that are participating in the barrier).
Definition: compute_unit.hh:125
gem5::statistics::VectorDistribution
A vector of distributions.
Definition: statistics.hh:2244
gem5::ComputeUnit::ScalarDataPort::MemReqEvent
Definition: compute_unit.hh:608
gem5::ComputeUnit::ScalarDataPort::ScalarDataPort
ScalarDataPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:588
gem5::ComputeUnit::ComputeUnitStats::argWrites
statistics::Scalar argWrites
Definition: compute_unit.hh:1040
gem5::ComputeUnit::GMTokenPort::~GMTokenPort
~GMTokenPort()
Definition: compute_unit.hh:499
gem5::WFBarrier::reset
void reset()
Reset the barrier.
Definition: compute_unit.hh:178
gem5::ComputeUnit::ComputeUnitStats::vpc_f32
statistics::Formula vpc_f32
Definition: compute_unit.hh:1116
gem5::WFBarrier::incNumAtBarrier
void incNumAtBarrier()
Mark that a WF has reached the barrier.
Definition: compute_unit.hh:134
std::list< AddrRange >
gem5::WFBarrier::maxBarrierCnt
int maxBarrierCnt() const
Definition: compute_unit.hh:115
gem5::ComputeUnit::sendToLds
bool sendToLds(GPUDynInstPtr gpuDynInst)
send a general request to the LDS make sure to look at the return value here as your request might be...
Definition: compute_unit.cc:2076
gem5::ComputeUnit::ComputeUnitStats::dynamicLMemInstrCnt
statistics::Scalar dynamicLMemInstrCnt
Definition: compute_unit.hh:1079
gem5::RequestPort::owner
SimObject & owner
Definition: port.hh:127
gem5::ComputeUnit::DataPort::SystemHubEvent::process
void process()
Definition: compute_unit.hh:547
gem5::ComputeUnit::ComputeUnitStats::numFailedCASOps
statistics::Scalar numFailedCASOps
Definition: compute_unit.hh:1129
gem5::ComputeUnit::ITLBPort::stalled
bool stalled
Definition: compute_unit.hh:802
gem5::ComputeUnit::scalarMemoryPipe
ScalarMemPipeline scalarMemoryPipe
Definition: compute_unit.hh:286
gem5::ComputeUnit::ITLBPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:806
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::ComputeUnit::DataPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:812
gem5::ComputeUnit::vectorGlobalMemUnit
WaitClass vectorGlobalMemUnit
Definition: compute_unit.hh:225
gem5::ComputeUnit::ScalarDataPort::SystemHubEvent
Definition: compute_unit.hh:625
gem5::ComputeUnit::simdUnitWidth
int simdUnitWidth() const
Definition: compute_unit.hh:390
gem5::ComputeUnit::ComputeUnitStats::readonlyMemInsts
statistics::Formula readonlyMemInsts
Definition: compute_unit.hh:1053
gem5::ComputeUnit::ScalarDataPort::recvReqRetry
void recvReqRetry() override
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:963
gem5::ComputeUnit::ComputeUnitStats::scalarMemReads
statistics::Scalar scalarMemReads
Definition: compute_unit.hh:1020
gem5::ComputeUnit::ComputeUnitStats::totalCycles
statistics::Scalar totalCycles
Definition: compute_unit.hh:1113
gem5::ComputeUnit::dispWorkgroup
void dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg)
Definition: compute_unit.cc:431
gem5::ComputeUnit::prefetchType
enums::PrefetchType prefetchType
Definition: compute_unit.hh:338
exec_stage.hh
gem5::ComputeUnit::ComputeUnitStats::tlbCycles
statistics::Scalar tlbCycles
Definition: compute_unit.hh:1063
gem5::ComputeUnit::mapWaveToScalarAluGlobalIdx
int mapWaveToScalarAluGlobalIdx(Wavefront *w) const
Definition: compute_unit.cc:275
gem5::ComputeUnit::gmTokenPort
GMTokenPort gmTokenPort
Definition: compute_unit.hh:510
gem5::ComputeUnit::getPort
Port & getPort(const std::string &if_name, PortID idx) override
Get a port with a given name and index.
Definition: compute_unit.hh:911
gem5::ComputeUnit::scalarPipeStages
int scalarPipeStages
Definition: compute_unit.hh:309
gem5::ComputeUnit::DataPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:975
gem5::ComputeUnit::DTLBPort::stalled
bool stalled
Definition: compute_unit.hh:735
gem5::ComputeUnit::doInvalidate
void doInvalidate(RequestPtr req, int kernId)
trigger invalidate operation in the cu
Definition: compute_unit.cc:394
gem5::ComputeUnit::sqcPort
SQCPort sqcPort
Definition: compute_unit.hh:906
gem5::ComputeUnit::ComputeUnitStats::spillReads
statistics::Scalar spillReads
Definition: compute_unit.hh:1042
gem5::ComputeUnit::ITLBPort::ITLBPort
ITLBPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:775
gem5::ComputeUnit::allAtBarrier
bool allAtBarrier(int bar_id)
Definition: compute_unit.cc:665
gem5::ComputeUnit::numWfsToSched
std::vector< int > numWfsToSched
Number of WFs to schedule to each SIMD.
Definition: compute_unit.hh:366
gem5::ComputeUnit::ComputeUnitStats::ldsNoFlatInstsPerWF
statistics::Formula ldsNoFlatInstsPerWF
Definition: compute_unit.hh:1009
gem5::LdsState
Definition: lds_state.hh:141
gem5::ComputeUnit::ScalarDTLBPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.hh:754
gem5::ComputeUnit::srf_scm_bus_latency
Cycles srf_scm_bus_latency
Definition: compute_unit.hh:318
callback.hh
gem5::ComputeUnit::ScalarDataPort
Definition: compute_unit.hh:585
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA16
statistics::Scalar numVecOpsExecutedFMA16
Definition: compute_unit.hh:1099
gem5::ComputeUnit::DataPort::SenderState
Definition: compute_unit.hh:521
gem5::Named::_name
const std::string _name
Definition: named.hh:41
gem5::ComputeUnit::DTLBPort::DTLBPort
DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id)
Definition: compute_unit.hh:700
gem5::ComputeUnit::ScalarDataPort::SenderState
Definition: compute_unit.hh:596
gem5::ComputeUnit::scheduleStage
ScheduleStage scheduleStage
Definition: compute_unit.hh:282
gem5::ComputeUnit::_numBarrierSlots
const int _numBarrierSlots
Definition: compute_unit.hh:938
gem5::ComputeUnit::ScalarDataPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst, Packet::SenderState *sender_state=nullptr)
Definition: compute_unit.hh:598
gem5::ComputeUnit::DataPort::createMemRespEvent
EventFunctionWrapper * createMemRespEvent(PacketPtr pkt)
Definition: compute_unit.cc:1625
gem5::ComputeUnit::requestorId
RequestorID requestorId()
Definition: compute_unit.hh:462
gem5::X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:84
gem5::ComputeUnit::ComputeUnitStats::scalarMemInstsPerKiloInst
statistics::Formula scalarMemInstsPerKiloInst
Definition: compute_unit.hh:1028
gem5::ComputeUnit::ComputeUnitStats::vectorMemWritesPerKiloInst
statistics::Formula vectorMemWritesPerKiloInst
Definition: compute_unit.hh:1024
gem5::ComputeUnit::scalar_resp_tick_latency
Tick scalar_resp_tick_latency
Definition: compute_unit.hh:358
gem5::TokenRequestPort
Definition: token_port.hh:44
gem5::ComputeUnit::numExeUnits
int numExeUnits() const
Definition: compute_unit.cc:242
gem5::Shader
Definition: shader.hh:83
gem5::ComputeUnit::glbMemToVrfBus
WaitClass glbMemToVrfBus
Definition: compute_unit.hh:221

Generated on Sun Jul 30 2023 01:56:56 for gem5 by doxygen 1.8.17