gem5  v21.0.1.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
compute_unit.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __COMPUTE_UNIT_HH__
35 #define __COMPUTE_UNIT_HH__
36 
37 #include <deque>
38 #include <map>
39 #include <unordered_set>
40 #include <vector>
41 
42 #include "base/callback.hh"
43 #include "base/compiler.hh"
44 #include "base/statistics.hh"
45 #include "base/stats/group.hh"
46 #include "base/types.hh"
47 #include "config/the_gpu_isa.hh"
48 #include "enums/PrefetchType.hh"
49 #include "gpu-compute/comm.hh"
59 #include "mem/port.hh"
60 #include "mem/token_port.hh"
61 #include "sim/clocked_object.hh"
62 
63 class HSAQueueEntry;
64 class LdsChunk;
65 class ScalarRegisterFile;
66 class Shader;
67 class VectorRegisterFile;
68 
69 struct ComputeUnitParams;
70 
72 {
73  OLDEST = 0,
75 };
76 
78 {
83 };
84 
89 class WFBarrier
90 {
91  public:
93  {
94  }
95 
96  static const int InvalidID = -1;
97 
98  int
99  numAtBarrier() const
100  {
101  return _numAtBarrier;
102  }
103 
107  int
109  {
110  return _maxBarrierCnt - _numAtBarrier;
111  }
112 
113  int
115  {
116  return _maxBarrierCnt;
117  }
118 
123  void
124  setMaxBarrierCnt(int max_barrier_cnt)
125  {
126  _maxBarrierCnt = max_barrier_cnt;
127  }
128 
132  void
134  {
135  assert(_numAtBarrier < _maxBarrierCnt);
136  ++_numAtBarrier;
137  }
138 
144  bool
145  allAtBarrier() const
146  {
147  return _numAtBarrier == _maxBarrierCnt;
148  }
149 
154  void
156  {
157  assert(_maxBarrierCnt > 0);
158  --_maxBarrierCnt;
159  }
160 
165  void
167  {
168  _numAtBarrier = 0;
169  _maxBarrierCnt = 0;
170  }
171 
176  void
178  {
179  _numAtBarrier = 0;
180  }
181 
182  private:
189 
198 };
199 
201 {
202  public:
203 
204 
205  // Execution resources
206  //
207  // The ordering of units is:
208  // Vector ALUs
209  // Scalar ALUs
210  // GM Pipe
211  // LM Pipe
212  // Scalar Mem Pipe
213  //
214  // Note: the ordering of units is important and the code assumes the
215  // above ordering. However, there may be more than one resource of
216  // each type (e.g., 4 VALUs or 2 SALUs)
217 
219  // Resource control for global memory to VRF data/address bus
221  // Resource control for Vector Register File->Global Memory pipe buses
223  // Resource control for Vector Global Memory execution unit
225 
227  // Resource control for local memory to VRF data/address bus
229  // Resource control for Vector Register File->Local Memory pipe buses
231  // Resource control for Vector Shared/Local Memory execution unit
233 
235  // Resource control for scalar memory to SRF data/address bus
237  // Resource control for Scalar Register File->Scalar Memory pipe buses
239  // Resource control for Scalar Memory execution unit
241 
242  // vector ALU execution resources
245 
246  // scalar ALU execution resources
249 
250  // Return total number of execution units on this CU
251  int numExeUnits() const;
252  // index into readyList of the first memory unit
253  int firstMemUnit() const;
254  // index into readyList of the last memory unit
255  int lastMemUnit() const;
256  // index into scalarALUs vector of SALU used by the wavefront
257  int mapWaveToScalarAlu(Wavefront *w) const;
258  // index into readyList of SALU used by wavefront
260  // index into readyList of Global Memory unit used by wavefront
261  int mapWaveToGlobalMem(Wavefront *w) const;
262  // index into readyList of Local Memory unit used by wavefront
263  int mapWaveToLocalMem(Wavefront *w) const;
264  // index into readyList of Scalar Memory unit used by wavefront
265  int mapWaveToScalarMem(Wavefront *w) const;
266 
267  int vrfToCoalescerBusWidth; // VRF->Coalescer data bus width in bytes
268  int coalescerToVrfBusWidth; // Coalescer->VRF data bus width in bytes
269  int numCyclesPerStoreTransfer; // number of cycles per vector store
270  int numCyclesPerLoadTransfer; // number of cycles per vector load
271 
272  // track presence of dynamic instructions in the Schedule pipeline
273  // stage. This is used to check the readiness of the oldest,
274  // non-dispatched instruction of every WF in the Scoreboard stage.
275  std::unordered_set<uint64_t> pipeMap;
276 
278 
286 
288 
289  typedef ComputeUnitParams Params;
291  int cu_id;
292 
293  // array of vector register files, one per SIMD
295  // array of scalar register files, one per SIMD
297 
298  // Width per VALU/SIMD unit: number of work items that can be executed
299  // on the vector ALU simultaneously in a SIMD unit
301  // number of pipe stages for bypassing data to next dependent single
302  // precision vector instruction inside the vector ALU pipeline
304  // number of pipe stages for bypassing data to next dependent double
305  // precision vector instruction inside the vector ALU pipeline
307  // number of pipe stages for scalar ALU
309  // number of pipe stages for operand collection & distribution network
311  // number of cycles per instruction issue period
313 
314  // VRF to GM Bus latency
316  // SRF to Scalar Mem Bus latency
318  // VRF to LM Bus latency
320 
321  // tracks the last cycle a vector instruction was executed on a SIMD
323 
324  // tracks the number of dyn inst executed per SIMD
326 
327  // true if we allow a separate TLB per lane
329  // if 0, TLB prefetching is off.
331  // if fixed-stride prefetching, this is the stride.
333 
337  Enums::PrefetchType prefetchType;
339 
341  // Idle CU timeout in ticks
343  int idleWfs;
346 
347  /*
348  * for Counting page accesses
349  */
351 
353 
356 
364 
365  // number of currently reserved vector registers per SIMD unit
367  // number of currently reserved scalar registers per SIMD unit
369  // number of vector registers per SIMD unit
371  // number of available scalar registers per SIMD unit
373 
374  // this hash map will keep track of page divergence
375  // per memory instruction per wavefront. The hash map
376  // is cleared in GPUDynInst::updateStats() in gpu_dyn_inst.cc.
377  std::map<Addr, int> pagesTouched;
378 
379  void insertInPipeMap(Wavefront *w);
381 
382  ComputeUnit(const Params &p);
383  ~ComputeUnit();
384 
385  // Timing Functions
386  int oprNetPipeLength() const { return operandNetworkLength; }
387  int simdUnitWidth() const { return simdWidth; }
388  int spBypassLength() const { return spBypassPipeLength; }
389  int dpBypassLength() const { return dpBypassPipeLength; }
390  int scalarPipeLength() const { return scalarPipeStages; }
392  int loadBusLength() const { return numCyclesPerLoadTransfer; }
393  int wfSize() const { return wavefrontSize; }
394 
395  void exec();
396  void initiateFetch(Wavefront *wavefront);
397  void fetch(PacketPtr pkt, Wavefront *wavefront);
399 
400  void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk,
401  HSAQueueEntry *task, int bar_id,
402  bool fetchContext=false);
403 
404  void doInvalidate(RequestPtr req, int kernId);
405  void doFlush(GPUDynInstPtr gpuDynInst);
406 
407  void dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg);
408  bool hasDispResources(HSAQueueEntry *task, int &num_wfs_in_wg);
409 
410  int cacheLineSize() const { return _cacheLineSize; }
411  int getCacheLineBits() const { return cacheLineBits; }
412 
413  void resetRegisterPool();
414 
415  private:
416  WFBarrier&
417  barrierSlot(int bar_id)
418  {
419  assert(bar_id > WFBarrier::InvalidID);
420  return wfBarrierSlots.at(bar_id);
421  }
422 
423  int
425  {
426  assert(freeBarrierIds.size());
427  auto free_bar_id = freeBarrierIds.begin();
428  int bar_id = *free_bar_id;
429  freeBarrierIds.erase(free_bar_id);
430  return bar_id;
431  }
432 
433  public:
434  int numYetToReachBarrier(int bar_id);
435  bool allAtBarrier(int bar_id);
436  void incNumAtBarrier(int bar_id);
437  int numAtBarrier(int bar_id);
438  int maxBarrierCnt(int bar_id);
439  void resetBarrier(int bar_id);
440  void decMaxBarrierCnt(int bar_id);
441  void releaseBarrier(int bar_id);
442  void releaseWFsFromBarrier(int bar_id);
443  int numBarrierSlots() const { return _numBarrierSlots; }
444 
445  template<typename c0, typename c1>
446  void doSmReturn(GPUDynInstPtr gpuDynInst);
447 
448  virtual void init() override;
449  void sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt);
450  void sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt);
451  void injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
452  bool kernelMemSync,
453  RequestPtr req=nullptr);
454  void handleMemPacket(PacketPtr pkt, int memport_index);
455  bool processTimingPacket(PacketPtr pkt);
456  void processFetchReturn(PacketPtr pkt);
458 
460 
461  bool isDone() const;
462  bool isVectorAluIdle(uint32_t simdId) const;
463 
464  protected:
466 
468 
469  public:
470  LdsState &
471  getLds() const
472  {
473  return lds;
474  }
475 
476  int32_t
477  getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const;
478 
479  M5_NODISCARD bool sendToLds(GPUDynInstPtr gpuDynInst);
480 
481  typedef std::unordered_map<Addr, std::pair<int, int>> pageDataStruct;
483 
484  void exitCallback();
485 
487  {
488  public:
489  GMTokenPort(const std::string& name, SimObject *owner,
490  PortID id = InvalidPortID)
492  { }
494 
495  protected:
496  bool recvTimingResp(PacketPtr) { return false; }
497  void recvReqRetry() { }
498  };
499 
500  // Manager for the number of tokens available to this compute unit to
501  // send global memory request packets to the coalescer this is only used
502  // between global memory pipe and TCP coalescer.
505 
507  class DataPort : public RequestPort
508  {
509  public:
510  DataPort(const std::string &_name, ComputeUnit *_cu, PortID id)
511  : RequestPort(_name, _cu, id), computeUnit(_cu) { }
512 
514 
516  {
520 
521  SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index,
522  Packet::SenderState *sender_state=nullptr)
523  : _gpuDynInst(gpuDynInst),
524  port_index(_port_index),
525  saved(sender_state) { }
526  };
527 
528  void processMemReqEvent(PacketPtr pkt);
530 
531  void processMemRespEvent(PacketPtr pkt);
533 
535 
536  protected:
538 
539  virtual bool recvTimingResp(PacketPtr pkt);
540  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
541  virtual void recvFunctional(PacketPtr pkt) { }
542  virtual void recvRangeChange() { }
543  virtual void recvReqRetry();
544 
545  virtual void
547  {
548  resp.clear();
549  snoop = true;
550  }
551 
552  };
553 
554  // Scalar data cache access port
556  {
557  public:
558  ScalarDataPort(const std::string &_name, ComputeUnit *_cu)
559  : RequestPort(_name, _cu), computeUnit(_cu)
560  {
561  }
562 
563  bool recvTimingResp(PacketPtr pkt) override;
564  void recvReqRetry() override;
565 
567  {
569  Packet::SenderState *sender_state=nullptr)
570  : _gpuDynInst(gpuDynInst), saved(sender_state)
571  {
572  }
573 
576  };
577 
578  class MemReqEvent : public Event
579  {
580  private:
583 
584  public:
585  MemReqEvent(ScalarDataPort &_scalar_data_port, PacketPtr _pkt)
586  : Event(), scalarDataPort(_scalar_data_port), pkt(_pkt)
587  {
589  }
590 
591  void process();
592  const char *description() const;
593  };
594 
596 
597  private:
599  };
600 
601  // Instruction cache access port
602  class SQCPort : public RequestPort
603  {
604  public:
605  SQCPort(const std::string &_name, ComputeUnit *_cu)
606  : RequestPort(_name, _cu), computeUnit(_cu) { }
607 
609 
611  {
614  // kernel id to be used in handling I-Cache invalidate response
615  int kernId;
616 
618  *sender_state=nullptr, int _kernId=-1)
619  : wavefront(_wavefront), saved(sender_state),
620  kernId(_kernId){ }
621  };
622 
624 
625  protected:
627 
628  virtual bool recvTimingResp(PacketPtr pkt);
629  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
630  virtual void recvFunctional(PacketPtr pkt) { }
631  virtual void recvRangeChange() { }
632  virtual void recvReqRetry();
633 
634  virtual void
636  {
637  resp.clear();
638  snoop = true;
639  }
640  };
641 
643  class DTLBPort : public RequestPort
644  {
645  public:
646  DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id)
647  : RequestPort(_name, _cu, id), computeUnit(_cu),
648  stalled(false)
649  { }
650 
651  bool isStalled() { return stalled; }
652  void stallPort() { stalled = true; }
653  void unstallPort() { stalled = false; }
654 
660 
665  {
666  // the memInst that this is associated with
668 
669  // the lane in the memInst this is associated with, so we send
670  // the memory request down the right port
672 
673  // constructor used for packets involved in timing accesses
674  SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
675  : _gpuDynInst(gpuDynInst), portIndex(port_index) { }
676 
677  };
678 
679  protected:
681  bool stalled;
682 
683  virtual bool recvTimingResp(PacketPtr pkt);
684  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
685  virtual void recvFunctional(PacketPtr pkt) { }
686  virtual void recvRangeChange() { }
687  virtual void recvReqRetry();
688  };
689 
691  {
692  public:
693  ScalarDTLBPort(const std::string &_name, ComputeUnit *_cu)
694  : RequestPort(_name, _cu), computeUnit(_cu), stalled(false)
695  {
696  }
697 
699  {
700  SenderState(GPUDynInstPtr gpuDynInst) : _gpuDynInst(gpuDynInst) { }
702  };
703 
704  bool recvTimingResp(PacketPtr pkt) override;
705  void recvReqRetry() override { assert(false); }
706 
707  bool isStalled() const { return stalled; }
708  void stallPort() { stalled = true; }
709  void unstallPort() { stalled = false; }
710 
712 
713  private:
715  bool stalled;
716  };
717 
718  class ITLBPort : public RequestPort
719  {
720  public:
721  ITLBPort(const std::string &_name, ComputeUnit *_cu)
722  : RequestPort(_name, _cu), computeUnit(_cu), stalled(false) { }
723 
724 
725  bool isStalled() { return stalled; }
726  void stallPort() { stalled = true; }
727  void unstallPort() { stalled = false; }
728 
734 
739  {
740  // The wavefront associated with this request
742 
743  SenderState(Wavefront *_wavefront) : wavefront(_wavefront) { }
744  };
745 
746  protected:
748  bool stalled;
749 
750  virtual bool recvTimingResp(PacketPtr pkt);
751  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
752  virtual void recvFunctional(PacketPtr pkt) { }
753  virtual void recvRangeChange() { }
754  virtual void recvReqRetry();
755  };
756 
760  class LDSPort : public RequestPort
761  {
762  public:
763  LDSPort(const std::string &_name, ComputeUnit *_cu)
764  : RequestPort(_name, _cu), computeUnit(_cu)
765  {
766  }
767 
768  bool isStalled() const { return stalled; }
769  void stallPort() { stalled = true; }
770  void unstallPort() { stalled = false; }
771 
776  std::queue<PacketPtr> retries;
777 
783  {
784  protected:
785  // The actual read/write/atomic request that goes with this command
787 
788  public:
790  _gpuDynInst(gpuDynInst)
791  {
792  }
793 
795  getMemInst() const
796  {
797  return _gpuDynInst;
798  }
799  };
800 
801  virtual bool
803 
804  protected:
805 
806  bool stalled = false;
807 
809 
810  virtual bool
812 
813  virtual Tick
814  recvAtomic(PacketPtr pkt) { return 0; }
815 
816  virtual void
818  {
819  }
820 
821  virtual void
823  {
824  }
825 
826  virtual void
827  recvReqRetry();
828  };
829 
834 
835  TokenManager *
837  {
838  return memPortTokens;
839  }
840 
845  // port to the TLB hierarchy (i.e., the L1 TLB)
847  // port to the scalar data cache
849  // port to the scalar data TLB
851  // port to the SQC (i.e. the I-cache)
853  // port to the SQC TLB (there's a separate TLB for each I-cache)
855 
856  Port &
857  getPort(const std::string &if_name, PortID idx) override
858  {
859  if (if_name == "memory_port" && idx < memPort.size()) {
860  return memPort[idx];
861  } else if (if_name == "translation_port" && idx < tlbPort.size()) {
862  return tlbPort[idx];
863  } else if (if_name == "scalar_port") {
864  return scalarDataPort;
865  } else if (if_name == "scalar_tlb_port") {
866  return scalarDTLBPort;
867  } else if (if_name == "sqc_port") {
868  return sqcPort;
869  } else if (if_name == "sqc_tlb_port") {
870  return sqcTLBPort;
871  } else if (if_name == "ldsPort") {
872  return ldsPort;
873  } else if (if_name == "gmTokenPort") {
874  return gmTokenPort;
875  } else {
876  return ClockedObject::getPort(if_name, idx);
877  }
878  }
879 
881 
882  private:
883  const int _cacheLineSize;
884  const int _numBarrierSlots;
888 
923 
931  std::unordered_set<int> freeBarrierIds;
932 
933  // hold the time of the arrival of the first cache block related to
934  // a particular GPUDynInst. This is used to calculate the difference
935  // between the first and last chace block arrival times.
936  std::unordered_map<GPUDynInstPtr, Tick> headTailMap;
937 
938  public:
939  void updateInstStats(GPUDynInstPtr gpuDynInst);
941 
943  {
944  ComputeUnitStats(Stats::Group *parent, int n_wf);
945 
968 
975 
976  // Cycles required to send register source (addr and data) from
977  // register files to memory pipeline, per SIMD.
981 
1003 
1005 
1006  // the following stats compute the avg. TLB accesslatency per
1007  // uncoalesced request (only for data)
1011  // hitsPerTLBLevel[x] are the hits in Level x TLB.
1012  // x = 0 is the page table.
1014 
1017 
1018  // over all memory instructions executed over all wavefronts
1019  // how many touched 0-4 pages, 4-8, ..., 60-64 pages
1021  // count of non-flat global memory vector instructions executed
1023  // count of flat global memory vector instructions executed
1026 
1029  // Number of instructions executed, i.e. if 64 (or 32 or 7) lanes are
1030  // active when the instruction is committed, this number is still
1031  // incremented by 1
1033  // Number of cycles among successive instruction executions across all
1034  // wavefronts of the same CU
1036  // number of individual vector operations executed
1038  // number of individual f16 vector operations executed
1040  // number of individual f32 vector operations executed
1042  // number of individual f64 vector operations executed
1044  // number of individual FMA 16,32,64 vector operations executed
1048  // number of individual MAC 16,32,64 vector operations executed
1052  // number of individual MAD 16,32,64 vector operations executed
1056  // total number of two op FP vector operations executed
1058  // Total cycles that something is running on the GPU
1060  Stats::Formula vpc; // vector ops per cycle
1061  Stats::Formula vpc_f16; // vector ops per cycle
1062  Stats::Formula vpc_f32; // vector ops per cycle
1063  Stats::Formula vpc_f64; // vector ops per cycle
1064  Stats::Formula ipc; // vector instructions per cycle
1068  // number of vector ALU instructions received
1070  // number of times a WG cannot start due to lack of free VGPRs in SIMDs
1072  // number of times a WG cannot start due to lack of free SGPRs in SIMDs
1078 
1079  // distrubtion in latency difference between first and last cache block
1080  // arrival ticks
1082 
1083  // Track the amount of interleaving between wavefronts on each SIMD.
1084  // This stat is sampled using instExecPerSimd to compute the number
1085  // of instructions that have been executed on a SIMD between a WF
1086  // executing two successive instructions.
1088  } stats;
1089 };
1090 
1091 #endif // __COMPUTE_UNIT_HH__
ComputeUnit::DataPort::createMemReqEvent
EventFunctionWrapper * createMemReqEvent(PacketPtr pkt)
Definition: compute_unit.cc:1545
ComputeUnit::vectorALUs
std::vector< WaitClass > vectorALUs
Definition: compute_unit.hh:244
ComputeUnit::ITLBPort::isStalled
bool isStalled()
Definition: compute_unit.hh:725
ComputeUnit::vectorSharedMemUnit
WaitClass vectorSharedMemUnit
Definition: compute_unit.hh:232
ComputeUnit::ITLBPort
Definition: compute_unit.hh:718
ComputeUnit::ComputeUnitStats::completedWGs
Stats::Scalar completedWGs
Definition: compute_unit.hh:1077
ComputeUnit::SQCPort::retries
std::deque< std::pair< PacketPtr, Wavefront * > > retries
Definition: compute_unit.hh:623
ComputeUnit::LDSPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:822
ComputeUnit::LDSPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:808
hsa_queue_entry.hh
ComputeUnit::DTLBPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
Definition: compute_unit.hh:674
ComputeUnit::ComputeUnitStats::pageDivergenceDist
Stats::Distribution pageDivergenceDist
Definition: compute_unit.hh:1020
ComputeUnit::ScalarDataPort::MemReqEvent::MemReqEvent
MemReqEvent(ScalarDataPort &_scalar_data_port, PacketPtr _pkt)
Definition: compute_unit.hh:585
EventBase::AutoDelete
static const FlagsType AutoDelete
Definition: eventq.hh:104
ExecStage
Definition: exec_stage.hh:72
ComputeUnit::ComputeUnitStats::instCyclesVALU
Stats::Scalar instCyclesVALU
Definition: compute_unit.hh:950
ComputeUnit::ITLBPort::stalled
bool stalled
Definition: compute_unit.hh:748
ComputeUnit::numScalarRegsPerSimd
int numScalarRegsPerSimd
Definition: compute_unit.hh:372
ComputeUnit::ScalarDTLBPort::recvReqRetry
void recvReqRetry() override
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.hh:705
ComputeUnit::LDSPort::stallPort
void stallPort()
Definition: compute_unit.hh:769
ComputeUnit::GMTokenPort::~GMTokenPort
~GMTokenPort()
Definition: compute_unit.hh:493
ComputeUnit::prefetchType
Enums::PrefetchType prefetchType
Definition: compute_unit.hh:337
ComputeUnit::ComputeUnitStats::vectorMemWritesPerWF
Stats::Formula vectorMemWritesPerWF
Definition: compute_unit.hh:961
ComputeUnit::ComputeUnitStats::kernargReads
Stats::Scalar kernargReads
Definition: compute_unit.hh:1000
ComputeUnit::ScalarDataPort::MemReqEvent::description
const char * description() const
Return a C string describing the event.
Definition: compute_unit.cc:1584
ComputeUnit::ComputeUnitStats::spillMemInsts
Stats::Formula spillMemInsts
Definition: compute_unit.hh:990
ComputeUnit::ScalarDataPort::MemReqEvent::scalarDataPort
ScalarDataPort & scalarDataPort
Definition: compute_unit.hh:581
LocalMemPipeline
Definition: local_memory_pipeline.hh:56
ComputeUnit::resetBarrier
void resetBarrier(int bar_id)
Definition: compute_unit.cc:680
ComputeUnit::ComputeUnitStats::hitsPerTLBLevel
Stats::Vector hitsPerTLBLevel
Definition: compute_unit.hh:1013
ComputeUnit::deleteFromPipeMap
void deleteFromPipeMap(Wavefront *w)
Definition: compute_unit.cc:505
ComputeUnit::req_tick_latency
Tick req_tick_latency
Definition: compute_unit.hh:354
ComputeUnit::simdUnitWidth
int simdUnitWidth() const
Definition: compute_unit.hh:387
ComputeUnit::initiateFetch
void initiateFetch(Wavefront *wavefront)
ComputeUnit::pagesTouched
std::map< Addr, int > pagesTouched
Definition: compute_unit.hh:377
ComputeUnit::init
virtual void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: compute_unit.cc:750
ComputeUnit::ComputeUnitStats::vpc_f32
Stats::Formula vpc_f32
Definition: compute_unit.hh:1062
ComputeUnit::vrfToGlobalMemPipeBus
WaitClass vrfToGlobalMemPipeBus
Definition: compute_unit.hh:222
ComputeUnit::loadBusLength
int loadBusLength() const
Definition: compute_unit.hh:392
ComputeUnit::DTLBPort::stalled
bool stalled
Definition: compute_unit.hh:681
ComputeUnit::mapWaveToScalarAluGlobalIdx
int mapWaveToScalarAluGlobalIdx(Wavefront *w) const
Definition: compute_unit.cc:262
TLB_HIT_CACHE_MISS
@ TLB_HIT_CACHE_MISS
Definition: compute_unit.hh:81
ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC64
Stats::Scalar numVecOpsExecutedMAC64
Definition: compute_unit.hh:1051
WFBarrier::maxBarrierCnt
int maxBarrierCnt() const
Definition: compute_unit.hh:114
WFBarrier::incNumAtBarrier
void incNumAtBarrier()
Mark that a WF has reached the barrier.
Definition: compute_unit.hh:133
ComputeUnit::locMemToVrfBus
WaitClass locMemToVrfBus
Definition: compute_unit.hh:228
ComputeUnit::LDSPort::retries
std::queue< PacketPtr > retries
here we queue all the requests that were not successfully sent.
Definition: compute_unit.hh:776
Shader
Definition: shader.hh:87
ComputeUnit::ScalarDataPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:598
WFBarrier::numYetToReachBarrier
int numYetToReachBarrier() const
Number of WFs that have not yet reached the barrier.
Definition: compute_unit.hh:108
ComputeUnit::dispWorkgroup
void dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg)
Definition: compute_unit.cc:418
ComputeUnit::numScalarMemUnits
int numScalarMemUnits
Definition: compute_unit.hh:234
ComputeUnit::ITLBPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:751
ComputeUnit::ScalarDataPort::ScalarDataPort
ScalarDataPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:558
MipsISA::index
Bitfield< 30, 0 > index
Definition: pra_constants.hh:44
ComputeUnit::SQCPort::SenderState::SenderState
SenderState(Wavefront *_wavefront, Packet::SenderState *sender_state=nullptr, int _kernId=-1)
Definition: compute_unit.hh:617
fetch_stage.hh
ComputeUnit::DTLBPort::DTLBPort
DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id)
Definition: compute_unit.hh:646
group.hh
ComputeUnit::ITLBPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:753
comm.hh
ComputeUnit::SQCPort
Definition: compute_unit.hh:602
InvalidPortID
const PortID InvalidPortID
Definition: types.hh:244
ComputeUnit::localMemoryPipe
LocalMemPipeline localMemoryPipe
Definition: compute_unit.hh:284
ComputeUnit::mapWaveToGlobalMem
int mapWaveToGlobalMem(Wavefront *w) const
Definition: compute_unit.cc:269
ComputeUnit::LDSPort::LDSPort
LDSPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:763
ComputeUnit::srf_scm_bus_latency
Cycles srf_scm_bus_latency
Definition: compute_unit.hh:317
ComputeUnit::perLaneTLB
bool perLaneTLB
Definition: compute_unit.hh:328
ComputeUnit::DataPort::DataPort
DataPort(const std::string &_name, ComputeUnit *_cu, PortID id)
Definition: compute_unit.hh:510
global_memory_pipeline.hh
ComputeUnit::ComputeUnitStats::flatLDSInstsPerWF
Stats::Formula flatLDSInstsPerWF
Definition: compute_unit.hh:959
ComputeUnit::incNumAtBarrier
void incNumAtBarrier(int bar_id)
Definition: compute_unit.cc:659
ComputeUnit::vrf_gm_bus_latency
Cycles vrf_gm_bus_latency
Definition: compute_unit.hh:315
ComputeUnit::ComputeUnitStats::numVecOpsExecutedTwoOpFP
Stats::Scalar numVecOpsExecutedTwoOpFP
Definition: compute_unit.hh:1057
ComputeUnit::ComputeUnitStats::privMemInsts
Stats::Formula privMemInsts
Definition: compute_unit.hh:996
ComputeUnit::ITLBPort::SenderState::wavefront
Wavefront * wavefront
Definition: compute_unit.hh:741
ComputeUnit::ComputeUnitStats::readonlyMemInsts
Stats::Formula readonlyMemInsts
Definition: compute_unit.hh:999
ComputeUnit::scalarDTLBPort
ScalarDTLBPort scalarDTLBPort
Definition: compute_unit.hh:850
ComputeUnit::mapWaveToScalarMem
int mapWaveToScalarMem(Wavefront *w) const
Definition: compute_unit.cc:285
ComputeUnit::lastMemUnit
int lastMemUnit() const
Definition: compute_unit.cc:244
ComputeUnit::mapWaveToLocalMem
int mapWaveToLocalMem(Wavefront *w) const
Definition: compute_unit.cc:277
ComputeUnit::ComputeUnitStats::numVecOpsExecutedF64
Stats::Scalar numVecOpsExecutedF64
Definition: compute_unit.hh:1043
ComputeUnit::numCyclesPerLoadTransfer
int numCyclesPerLoadTransfer
Definition: compute_unit.hh:270
ComputeUnit::DataPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:541
ComputeUnit::DTLBPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:686
ComputeUnit::headTailMap
std::unordered_map< GPUDynInstPtr, Tick > headTailMap
Definition: compute_unit.hh:936
ComputeUnit::operandNetworkLength
int operandNetworkLength
Definition: compute_unit.hh:310
ComputeUnit::numYetToReachBarrier
int numYetToReachBarrier(int bar_id)
Definition: compute_unit.cc:645
ComputeUnit::ComputeUnitStats::tlbCycles
Stats::Scalar tlbCycles
Definition: compute_unit.hh:1009
ComputeUnit::DTLBPort
Data TLB port.
Definition: compute_unit.hh:643
ComputeUnit::ITLBPort::retries
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Definition: compute_unit.hh:733
ComputeUnit::gmTokenPort
GMTokenPort gmTokenPort
Definition: compute_unit.hh:504
ComputeUnit::SQCPort::SenderState::wavefront
Wavefront * wavefront
Definition: compute_unit.hh:612
ComputeUnit::doFlush
void doFlush(GPUDynInstPtr gpuDynInst)
trigger flush operation in the cu
Definition: compute_unit.cc:400
ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA64
Stats::Scalar numVecOpsExecutedFMA64
Definition: compute_unit.hh:1047
ComputeUnit::memPortTokens
TokenManager * memPortTokens
Definition: compute_unit.hh:503
ComputeUnit::SQCPort::getDeviceAddressRanges
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
Definition: compute_unit.hh:635
FetchStage
Definition: fetch_stage.hh:53
ComputeUnit::ComputeUnitStats::scalarMemInstsPerKiloInst
Stats::Formula scalarMemInstsPerKiloInst
Definition: compute_unit.hh:974
ComputeUnit::ComputeUnitStats::globalReads
Stats::Scalar globalReads
Definition: compute_unit.hh:982
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:59
ComputeUnit::ComputeUnitStats::groupReads
Stats::Scalar groupReads
Definition: compute_unit.hh:991
ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD64
Stats::Scalar numVecOpsExecutedMAD64
Definition: compute_unit.hh:1055
ComputeUnit::ComputeUnitStats::sALUInstsPerWF
Stats::Formula sALUInstsPerWF
Definition: compute_unit.hh:949
ComputeUnit::ScalarDataPort::SenderState
Definition: compute_unit.hh:566
PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:243
ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:291
ComputeUnit::ScalarDataPort::MemReqEvent::process
void process()
Definition: compute_unit.cc:1590
ComputeUnit::numExeUnits
int numExeUnits() const
Definition: compute_unit.cc:229
ComputeUnit::ScalarDTLBPort::isStalled
bool isStalled() const
Definition: compute_unit.hh:707
RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:86
ComputeUnit::ComputeUnitStats::flatVMemInstsPerWF
Stats::Formula flatVMemInstsPerWF
Definition: compute_unit.hh:957
TLB_MISS_CACHE_HIT
@ TLB_MISS_CACHE_HIT
Definition: compute_unit.hh:80
scoreboard_check_stage.hh
ComputeUnit::ScalarDataPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst, Packet::SenderState *sender_state=nullptr)
Definition: compute_unit.hh:568
ComputeUnit::stats
ComputeUnit::ComputeUnitStats stats
ComputeUnit::vectorGlobalMemUnit
WaitClass vectorGlobalMemUnit
Definition: compute_unit.hh:224
ComputeUnit::ITLBPort::ITLBPort
ITLBPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:721
ComputeUnit::registerManager
RegisterManager * registerManager
Definition: compute_unit.hh:277
WaitClass
Definition: misc.hh:64
ComputeUnit::functionalTLB
bool functionalTLB
Definition: compute_unit.hh:344
std::vector< WaitClass >
ComputeUnit::ComputeUnitStats::dynamicGMemInstrCnt
Stats::Scalar dynamicGMemInstrCnt
Definition: compute_unit.hh:1022
ScalarRegisterFile
Definition: scalar_register_file.hh:48
ComputeUnit::SQCPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:984
ComputeUnit::debugSegFault
bool debugSegFault
Definition: compute_unit.hh:340
TLB_CACHE
TLB_CACHE
Definition: compute_unit.hh:77
ComputeUnit::vrf_lm_bus_latency
Cycles vrf_lm_bus_latency
Definition: compute_unit.hh:319
ComputeUnit::LDSPort::sendTimingReq
virtual bool sendTimingReq(PacketPtr pkt)
attempt to send this packet, either the port is already stalled, the request is nack'd and must stall...
Definition: compute_unit.cc:2025
ComputeUnit::DTLBPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:680
ComputeUnit::ITLBPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:727
ComputeUnit::ScalarDataPort::MemReqEvent
Definition: compute_unit.hh:578
ComputeUnit::scalarPipeLength
int scalarPipeLength() const
Definition: compute_unit.hh:390
ComputeUnit::ComputeUnitStats::numVecOpsExecutedF32
Stats::Scalar numVecOpsExecutedF32
Definition: compute_unit.hh:1041
ComputeUnit::scheduleStage
ScheduleStage scheduleStage
Definition: compute_unit.hh:281
M5_NODISCARD
#define M5_NODISCARD
Definition: compiler.hh:68
ComputeUnit::allAtBarrier
bool allAtBarrier(int bar_id)
Definition: compute_unit.cc:652
ComputeUnit::ComputeUnitStats::scalarMemReadsPerWF
Stats::Formula scalarMemReadsPerWF
Definition: compute_unit.hh:967
ComputeUnit::ComputeUnitStats::numTimesWgBlockedDueSgprAlloc
Stats::Scalar numTimesWgBlockedDueSgprAlloc
Definition: compute_unit.hh:1073
EXEC_POLICY
EXEC_POLICY
Definition: compute_unit.hh:71
Stats::Vector
A vector of scalar stats.
Definition: statistics.hh:2007
ComputeUnit::ScalarDTLBPort::SenderState
Definition: compute_unit.hh:698
ComputeUnit::getLds
LdsState & getLds() const
Definition: compute_unit.hh:471
ComputeUnit::insertInPipeMap
void insertInPipeMap(Wavefront *w)
Definition: compute_unit.cc:496
HSAQueueEntry
Definition: hsa_queue_entry.hh:58
WFBarrier::_maxBarrierCnt
int _maxBarrierCnt
The maximum number of WFs that can reach this barrier.
Definition: compute_unit.hh:197
Stats::VectorDistribution
A vector of distributions.
Definition: statistics.hh:2244
ComputeUnit::ComputeUnit
ComputeUnit(const Params &p)
Definition: compute_unit.cc:62
ComputeUnit::exitCallback
void exitCallback()
Definition: compute_unit.cc:1910
ComputeUnit::ComputeUnitStats::numVecOpsExecutedF16
Stats::Scalar numVecOpsExecutedF16
Definition: compute_unit.hh:1039
ComputeUnit::DTLBPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:685
RR
@ RR
Definition: compute_unit.hh:74
ComputeUnit::scalarPipeStages
int scalarPipeStages
Definition: compute_unit.hh:308
ComputeUnit::ITLBPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:747
ComputeUnit::handleMemPacket
void handleMemPacket(PacketPtr pkt, int memport_index)
ComputeUnit::numVectorGlobalMemUnits
int numVectorGlobalMemUnits
Definition: compute_unit.hh:218
ComputeUnit::DataPort::getDeviceAddressRanges
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
Definition: compute_unit.hh:546
ComputeUnit::ComputeUnitStats::instCyclesLdsPerSimd
Stats::Vector instCyclesLdsPerSimd
Definition: compute_unit.hh:980
ComputeUnit::ComputeUnitStats::numCASOps
Stats::Scalar numCASOps
Definition: compute_unit.hh:1074
ComputeUnit::LDSPort::SenderState
SenderState is information carried along with the packet, esp.
Definition: compute_unit.hh:782
ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:231
ComputeUnit::scalarALUs
std::vector< WaitClass > scalarALUs
Definition: compute_unit.hh:248
ComputeUnit::DataPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, Packet::SenderState *sender_state=nullptr)
Definition: compute_unit.hh:521
ComputeUnit::ScalarDTLBPort::recvTimingResp
bool recvTimingResp(PacketPtr pkt) override
Receive a timing response from the peer.
Definition: compute_unit.cc:1650
ComputeUnit::ComputeUnitStats::vectorMemWritesPerKiloInst
Stats::Formula vectorMemWritesPerKiloInst
Definition: compute_unit.hh:970
EventFunctionWrapper
Definition: eventq.hh:1112
ComputeUnit::glbMemToVrfBus
WaitClass glbMemToVrfBus
Definition: compute_unit.hh:220
ComputeUnit::SQCPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:630
ComputeUnit::ScalarDTLBPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:714
ComputeUnit::DTLBPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:1619
ComputeUnit::ComputeUnitStats::scalarMemWritesPerKiloInst
Stats::Formula scalarMemWritesPerKiloInst
Definition: compute_unit.hh:973
ComputeUnit::ComputeUnitStats::groupWrites
Stats::Scalar groupWrites
Definition: compute_unit.hh:992
ComputeUnit::ITLBPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:1763
Stats::Scalar
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:1933
ComputeUnit::tickEvent
EventFunctionWrapper tickEvent
Definition: compute_unit.hh:287
ComputeUnit
Definition: compute_unit.hh:200
RequestorID
uint16_t RequestorID
Definition: request.hh:89
ComputeUnit::DTLBPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:1377
ComputeUnit::LDSPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:786
ComputeUnit::ComputeUnitStats::ldsBankConflictDist
Stats::Distribution ldsBankConflictDist
Definition: compute_unit.hh:1016
ComputeUnit::ComputeUnitStats::spillWrites
Stats::Scalar spillWrites
Definition: compute_unit.hh:989
ComputeUnit::LDSPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.hh:789
ComputeUnit::LDSPort::stalled
bool stalled
whether or not it is stalled
Definition: compute_unit.hh:806
ComputeUnit::decMaxBarrierCnt
void decMaxBarrierCnt(int bar_id)
Definition: compute_unit.cc:687
ComputeUnit::srf
std::vector< ScalarRegisterFile * > srf
Definition: compute_unit.hh:296
ComputeUnit::cacheLineBits
int cacheLineBits
Definition: compute_unit.hh:885
ComputeUnit::ComputeUnitStats::wgBlockedDueBarrierAllocation
Stats::Scalar wgBlockedDueBarrierAllocation
Definition: compute_unit.hh:1027
ComputeUnit::exec
void exec()
Definition: compute_unit.cc:717
ComputeUnit::ComputeUnitStats::totalCycles
Stats::Scalar totalCycles
Definition: compute_unit.hh:1059
ComputeUnit::ComputeUnitStats::vpc
Stats::Formula vpc
Definition: compute_unit.hh:1060
ComputeUnit::numVectorSharedMemUnits
int numVectorSharedMemUnits
Definition: compute_unit.hh:226
ScheduleToExecute
Communication interface between Schedule and Execute stages.
Definition: comm.hh:97
Packet::SenderState
A virtual base opaque structure used to hold state associated with the packet (e.g....
Definition: packet.hh:432
ComputeUnit::DataPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:542
ComputeUnit::spBypassLength
int spBypassLength() const
Definition: compute_unit.hh:388
ComputeUnit::dpBypassPipeLength
int dpBypassPipeLength
Definition: compute_unit.hh:306
Event
Definition: eventq.hh:248
ComputeUnit::memPort
std::vector< DataPort > memPort
The memory port for SIMD data accesses.
Definition: compute_unit.hh:844
ComputeUnit::getRefCounter
int32_t getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
Definition: compute_unit.cc:1957
MipsISA::w
Bitfield< 0 > w
Definition: pra_constants.hh:278
ComputeUnit::DataPort::SenderState::port_index
PortID port_index
Definition: compute_unit.hh:518
ComputeUnit::numBarrierSlots
int numBarrierSlots() const
Definition: compute_unit.hh:443
SimObject::getPort
virtual Port & getPort(const std::string &if_name, PortID idx=InvalidPortID)
Get a port with a given name and index.
Definition: sim_object.cc:120
ComputeUnit::wfBarrierSlots
std::vector< WFBarrier > wfBarrierSlots
The barrier slots for this CU.
Definition: compute_unit.hh:927
ComputeUnit::ScalarDTLBPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.hh:700
ComputeUnit::maxBarrierCnt
int maxBarrierCnt(int bar_id)
Definition: compute_unit.cc:673
ComputeUnit::ComputeUnitStats::kernargWrites
Stats::Scalar kernargWrites
Definition: compute_unit.hh:1001
ComputeUnit::ComputeUnitStats::tlbLatency
Stats::Formula tlbLatency
Definition: compute_unit.hh:1010
ComputeUnit::SQCPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:629
ComputeUnit::sendRequest
void sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
Definition: compute_unit.cc:1007
ComputeUnit::idleCUTimeout
Tick idleCUTimeout
Definition: compute_unit.hh:342
ComputeUnit::ComputeUnitStats::flatLDSInsts
Stats::Scalar flatLDSInsts
Definition: compute_unit.hh:958
ComputeUnit::injectGlobalMemFence
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelMemSync, RequestPtr req=nullptr)
Definition: compute_unit.cc:1219
ComputeUnit::idleWfs
int idleWfs
Definition: compute_unit.hh:343
ComputeUnit::ComputeUnitStats::numFailedCASOps
Stats::Scalar numFailedCASOps
Definition: compute_unit.hh:1075
ComputeUnit::ComputeUnitStats::vpc_f64
Stats::Formula vpc_f64
Definition: compute_unit.hh:1063
LdsState
Definition: lds_state.hh:119
ComputeUnit::ITLBPort::SenderState
SenderState is information carried along with the packet throughout the TLB hierarchy.
Definition: compute_unit.hh:738
ComputeUnit::_numBarrierSlots
const int _numBarrierSlots
Definition: compute_unit.hh:884
statistics.hh
ComputeUnit::resp_tick_latency
Tick resp_tick_latency
Definition: compute_unit.hh:355
ComputeUnit::ScalarDataPort::SenderState::saved
Packet::SenderState * saved
Definition: compute_unit.hh:575
ComputeUnit::ComputeUnitStats::ldsNoFlatInstsPerWF
Stats::Formula ldsNoFlatInstsPerWF
Definition: compute_unit.hh:955
ComputeUnit::DataPort::retries
std::deque< std::pair< PacketPtr, GPUDynInstPtr > > retries
Definition: compute_unit.hh:534
ComputeUnit::ComputeUnitStats::vALUInsts
Stats::Scalar vALUInsts
Definition: compute_unit.hh:946
Port
Ports are used to interface objects to each other.
Definition: port.hh:56
ComputeUnit::vrf
std::vector< VectorRegisterFile * > vrf
Definition: compute_unit.hh:294
ComputeUnit::instExecPerSimd
std::vector< uint64_t > instExecPerSimd
Definition: compute_unit.hh:325
ComputeUnit::wavefrontSize
int wavefrontSize
Definition: compute_unit.hh:887
ComputeUnit::ComputeUnitStats::sALUInsts
Stats::Scalar sALUInsts
Definition: compute_unit.hh:948
WFBarrier::reset
void reset()
Reset the barrier.
Definition: compute_unit.hh:177
ComputeUnit::ComputeUnitStats::argReads
Stats::Scalar argReads
Definition: compute_unit.hh:985
ComputeUnit::ComputeUnitStats::globalWrites
Stats::Scalar globalWrites
Definition: compute_unit.hh:983
WFBarrier::decMaxBarrierCnt
void decMaxBarrierCnt()
Decrement the number of WFs that are participating in this barrier.
Definition: compute_unit.hh:155
Event::setFlags
void setFlags(Flags _flags)
Definition: eventq.hh:325
ComputeUnit::ComputeUnitStats::numTimesWgBlockedDueVgprAlloc
Stats::Scalar numTimesWgBlockedDueVgprAlloc
Definition: compute_unit.hh:1071
ComputeUnit::prefetchDepth
int prefetchDepth
Definition: compute_unit.hh:330
ComputeUnit::wfList
std::vector< std::vector< Wavefront * > > wfList
Definition: compute_unit.hh:290
ComputeUnit::storeBusLength
int storeBusLength() const
Definition: compute_unit.hh:391
ComputeUnit::prefetchStride
int prefetchStride
Definition: compute_unit.hh:332
port.hh
ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC16
Stats::Scalar numVecOpsExecutedMAC16
Definition: compute_unit.hh:1049
ComputeUnit::scalarDataPort
ScalarDataPort scalarDataPort
Definition: compute_unit.hh:848
ComputeUnit::SQCPort::SenderState::kernId
int kernId
Definition: compute_unit.hh:615
ComputeUnit::ComputeUnitStats::tlbRequests
Stats::Scalar tlbRequests
Definition: compute_unit.hh:1008
ComputeUnit::ComputeUnitStats::scalarMemReadsPerKiloInst
Stats::Formula scalarMemReadsPerKiloInst
Definition: compute_unit.hh:972
ComputeUnit::DTLBPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:667
WFBarrier
WF barrier slots.
Definition: compute_unit.hh:89
ComputeUnit::scoreboardCheckToSchedule
ScoreboardCheckToSchedule scoreboardCheckToSchedule
TODO: Update these comments once the pipe stage interface has been fully refactored.
Definition: compute_unit.hh:921
ComputeUnit::SQCPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer response port.
Definition: compute_unit.hh:631
ComputeUnit::ScalarDataPort::MemReqEvent::pkt
PacketPtr pkt
Definition: compute_unit.hh:582
ComputeUnit::releaseBarrier
void releaseBarrier(int bar_id)
Definition: compute_unit.cc:694
ComputeUnit::ScalarDTLBPort::ScalarDTLBPort
ScalarDTLBPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:693
TLB_MISS_CACHE_MISS
@ TLB_MISS_CACHE_MISS
Definition: compute_unit.hh:79
ComputeUnit::ComputeUnitStats::privWrites
Stats::Scalar privWrites
Definition: compute_unit.hh:995
ComputeUnit::ScalarDTLBPort
Definition: compute_unit.hh:690
ComputeUnit::DataPort::snoopRangeSent
bool snoopRangeSent
Definition: compute_unit.hh:513
ComputeUnit::DataPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:537
ComputeUnit::ComputeUnitStats::controlFlowDivergenceDist
Stats::Distribution controlFlowDivergenceDist
Definition: compute_unit.hh:1065
ComputeUnit::ComputeUnitStats::ComputeUnitStats
ComputeUnitStats(Stats::Group *parent, int n_wf)
Definition: compute_unit.cc:2098
WFBarrier::allAtBarrier
bool allAtBarrier() const
Have all WFs participating in this barrier reached the barrier? If so, then the barrier is satisfied ...
Definition: compute_unit.hh:145
compiler.hh
ComputeUnit::globalSeqNum
InstSeqNum globalSeqNum
Definition: compute_unit.hh:886
ComputeUnit::vrfToLocalMemPipeBus
WaitClass vrfToLocalMemPipeBus
Definition: compute_unit.hh:230
ComputeUnit::fetchStage
FetchStage fetchStage
Definition: compute_unit.hh:279
ComputeUnit::isVectorAluIdle
bool isVectorAluIdle(uint32_t simdId) const
Definition: compute_unit.cc:1964
ComputeUnit::ComputeUnitStats::spillReads
Stats::Scalar spillReads
Definition: compute_unit.hh:988
ComputeUnit::startWavefront
void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, HSAQueueEntry *task, int bar_id, bool fetchContext=false)
Definition: compute_unit.cc:307
Port::id
const PortID id
A numeric identifier to distinguish ports in a vector, and set to InvalidPortID in case this port is ...
Definition: port.hh:74
ComputeUnit::scalarMemUnit
WaitClass scalarMemUnit
Definition: compute_unit.hh:240
ComputeUnit::DataPort::SenderState::saved
Packet::SenderState * saved
Definition: compute_unit.hh:519
ComputeUnit::DTLBPort::retries
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Definition: compute_unit.hh:659
ComputeUnit::issuePeriod
Cycles issuePeriod
Definition: compute_unit.hh:312
OLDEST
@ OLDEST
Definition: compute_unit.hh:73
ComputeUnit::DataPort::SenderState
Definition: compute_unit.hh:515
ComputeUnit::LDSPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:770
ComputeUnit::firstMemUnit
int firstMemUnit() const
Definition: compute_unit.cc:237
ComputeUnit::ComputeUnitStats::instCyclesVMemPerSimd
Stats::Vector instCyclesVMemPerSimd
Definition: compute_unit.hh:978
ComputeUnit::GMTokenPort
Definition: compute_unit.hh:486
RequestPort
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Definition: port.hh:74
ComputeUnit::countPages
bool countPages
Definition: compute_unit.hh:350
RegisterManager
Definition: register_manager.hh:56
ComputeUnit::ComputeUnitStats::vALUInstsPerWF
Stats::Formula vALUInstsPerWF
Definition: compute_unit.hh:947
ComputeUnit::SQCPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:626
local_memory_pipeline.hh
ComputeUnit::ComputeUnitStats::ldsBankAccesses
Stats::Scalar ldsBankAccesses
Definition: compute_unit.hh:1015
ComputeUnit::lastExecCycle
std::vector< uint64_t > lastExecCycle
Definition: compute_unit.hh:322
ComputeUnit::scalarMemoryPipe
ScalarMemPipeline scalarMemoryPipe
Definition: compute_unit.hh:285
ComputeUnit::ScalarDataPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:574
InstSeqNum
uint64_t InstSeqNum
Definition: inst_seq.hh:37
ComputeUnit::ComputeUnitStats::vectorMemInstsPerKiloInst
Stats::Formula vectorMemInstsPerKiloInst
Definition: compute_unit.hh:971
ComputeUnit::sendScalarRequest
void sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt)
Definition: compute_unit.cc:1192
TLB_HIT_CACHE_HIT
@ TLB_HIT_CACHE_HIT
Definition: compute_unit.hh:82
ComputeUnit::lastVaddrWF
std::vector< std::vector< std::vector< Addr > > > lastVaddrWF
Definition: compute_unit.hh:336
ComputeUnit::ComputeUnitStats::vectorMemReads
Stats::Scalar vectorMemReads
Definition: compute_unit.hh:962
ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA16
Stats::Scalar numVecOpsExecutedFMA16
Definition: compute_unit.hh:1045
ComputeUnit::ComputeUnitStats::activeLanesPerLMemInstrDist
Stats::Distribution activeLanesPerLMemInstrDist
Definition: compute_unit.hh:1067
ComputeUnit::ScalarDTLBPort::stallPort
void stallPort()
Definition: compute_unit.hh:708
ComputeUnit::sendToLds
M5_NODISCARD bool sendToLds(GPUDynInstPtr gpuDynInst)
send a general request to the LDS make sure to look at the return value here as your request might be...
Definition: compute_unit.cc:1983
ComputeUnit::coalescerToVrfBusWidth
int coalescerToVrfBusWidth
Definition: compute_unit.hh:268
ComputeUnit::srfToScalarMemPipeBus
WaitClass srfToScalarMemPipeBus
Definition: compute_unit.hh:238
ComputeUnit::getAndIncSeqNum
InstSeqNum getAndIncSeqNum()
Definition: compute_unit.hh:880
ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA32
Stats::Scalar numVecOpsExecutedFMA32
Definition: compute_unit.hh:1046
ComputeUnit::DTLBPort::SenderState
SenderState is information carried along with the packet throughout the TLB hierarchy.
Definition: compute_unit.hh:664
ComputeUnit::LDSPort::recvReqRetry
virtual void recvReqRetry()
the bus is telling the port that there is now space so retrying stalled requests should work now this...
Definition: compute_unit.cc:2067
ComputeUnit::processFetchReturn
void processFetchReturn(PacketPtr pkt)
ComputeUnit::ScalarDataPort::recvTimingResp
bool recvTimingResp(PacketPtr pkt) override
Receive a timing response from the peer.
Definition: compute_unit.cc:900
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:148
ComputeUnit::SQCPort::SenderState::saved
Packet::SenderState * saved
Definition: compute_unit.hh:613
ComputeUnit::ITLBPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:1710
Port::name
const std::string name() const
Return port name (for DPRINTF).
Definition: port.hh:106
Stats::Distribution
A simple distribution stat.
Definition: statistics.hh:2084
ComputeUnit::scalarRegsReserved
std::vector< int > scalarRegsReserved
Definition: compute_unit.hh:368
ComputeUnit::LDSPort::isStalled
bool isStalled() const
Definition: compute_unit.hh:768
ComputeUnit::ComputeUnitStats::argMemInsts
Stats::Formula argMemInsts
Definition: compute_unit.hh:987
ComputeUnit::resetRegisterPool
void resetRegisterPool()
Definition: compute_unit.cc:408
ComputeUnit::processTimingPacket
bool processTimingPacket(PacketPtr pkt)
scalar_memory_pipeline.hh
ComputeUnit::localMemBarrier
bool localMemBarrier
Definition: compute_unit.hh:345
WFBarrier::WFBarrier
WFBarrier()
Definition: compute_unit.hh:92
ComputeUnit::globalMemoryPipe
GlobalMemPipeline globalMemoryPipe
Definition: compute_unit.hh:283
schedule_stage.hh
ComputeUnit::isDone
bool isDone() const
Definition: compute_unit.cc:1927
ComputeUnit::ComputeUnitStats::vectorMemReadsPerWF
Stats::Formula vectorMemReadsPerWF
Definition: compute_unit.hh:963
ComputeUnit::numAtBarrier
int numAtBarrier(int bar_id)
Definition: compute_unit.cc:666
ComputeUnit::hasDispResources
bool hasDispResources(HSAQueueEntry *task, int &num_wfs_in_wg)
Definition: compute_unit.cc:517
ComputeUnit::cacheLineSize
int cacheLineSize() const
Definition: compute_unit.hh:410
ComputeUnit::getTokenManager
TokenManager * getTokenManager()
Definition: compute_unit.hh:836
ComputeUnit::ldsPort
LDSPort ldsPort
The port to access the Local Data Store Can be connected to a LDS object.
Definition: compute_unit.hh:833
ComputeUnit::freeBarrierIds
std::unordered_set< int > freeBarrierIds
A set used to easily retrieve a free barrier ID.
Definition: compute_unit.hh:931
ComputeUnit::lastVaddrCU
std::vector< Addr > lastVaddrCU
Definition: compute_unit.hh:334
ComputeUnit::ComputeUnitStats::scalarMemWritesPerWF
Stats::Formula scalarMemWritesPerWF
Definition: compute_unit.hh:965
RequestPort::owner
SimObject & owner
Definition: port.hh:83
ComputeUnit::getPort
Port & getPort(const std::string &if_name, PortID idx) override
Get a port with a given name and index.
Definition: compute_unit.hh:857
ComputeUnit::DTLBPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:653
X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:80
ComputeUnit::DataPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:517
ComputeUnit::DTLBPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:684
WFBarrier::release
void release()
Release this barrier resource so it can be used by other WGs.
Definition: compute_unit.hh:166
ComputeUnit::ComputeUnitStats::globalMemInsts
Stats::Formula globalMemInsts
Definition: compute_unit.hh:984
ComputeUnit::numWfsToSched
std::vector< int > numWfsToSched
Number of WFs to schedule to each SIMD.
Definition: compute_unit.hh:363
ComputeUnit::LDSPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:817
ComputeUnit::oprNetPipeLength
int oprNetPipeLength() const
Definition: compute_unit.hh:386
ComputeUnit::ComputeUnitStats::dynamicFlatMemInstrCnt
Stats::Scalar dynamicFlatMemInstrCnt
Definition: compute_unit.hh:1024
register_manager.hh
ComputeUnit::_requestorId
RequestorID _requestorId
Definition: compute_unit.hh:465
ComputeUnit::DataPort
Data access Port.
Definition: compute_unit.hh:507
ComputeUnit::DataPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:799
ComputeUnit::numVecRegsPerSimd
int numVecRegsPerSimd
Definition: compute_unit.hh:370
ComputeUnit::ComputeUnitStats::vectorMemWrites
Stats::Scalar vectorMemWrites
Definition: compute_unit.hh:960
ComputeUnit::ComputeUnitStats::ipc
Stats::Formula ipc
Definition: compute_unit.hh:1064
ComputeUnit::SQCPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
Definition: compute_unit.cc:977
ComputeUnit::execStage
ExecStage execStage
Definition: compute_unit.hh:282
ComputeUnit::DataPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:540
ComputeUnit::ComputeUnitStats::readonlyWrites
Stats::Scalar readonlyWrites
Definition: compute_unit.hh:998
ComputeUnit::scalarMemToSrfBus
WaitClass scalarMemToSrfBus
Definition: compute_unit.hh:236
ComputeUnit::DTLBPort::isStalled
bool isStalled()
Definition: compute_unit.hh:651
ComputeUnit::ScalarDTLBPort::stalled
bool stalled
Definition: compute_unit.hh:715
ComputeUnit::ComputeUnitStats::waveLevelParallelism
Stats::Distribution waveLevelParallelism
Definition: compute_unit.hh:1004
ComputeUnit::pageAccesses
pageDataStruct pageAccesses
Definition: compute_unit.hh:482
ComputeUnit::DataPort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:950
ScoreboardCheckToSchedule
Communication interface between ScoreboardCheck and Schedule stages.
Definition: comm.hh:61
ComputeUnit::_cacheLineSize
const int _cacheLineSize
Definition: compute_unit.hh:883
ComputeUnit::ComputeUnitStats::kernargMemInsts
Stats::Formula kernargMemInsts
Definition: compute_unit.hh:1002
ComputeUnit::pageDataStruct
std::unordered_map< Addr, std::pair< int, int > > pageDataStruct
Definition: compute_unit.hh:481
ComputeUnit::sqcPort
SQCPort sqcPort
Definition: compute_unit.hh:852
types.hh
ComputeUnit::updateInstStats
void updateInstStats(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.cc:1793
ComputeUnit::updatePageDivergenceDist
void updatePageDivergenceDist(Addr addr)
Definition: compute_unit.cc:1899
Wavefront
Definition: wavefront.hh:59
TokenRequestPort
Definition: token_port.hh:43
ComputeUnit::GMTokenPort::GMTokenPort
GMTokenPort(const std::string &name, SimObject *owner, PortID id=InvalidPortID)
Definition: compute_unit.hh:489
ComputeUnit::fillKernelState
void fillKernelState(Wavefront *w, HSAQueueEntry *task)
Definition: compute_unit.cc:293
ComputeUnit::ComputeUnitStats::activeLanesPerGMemInstrDist
Stats::Distribution activeLanesPerGMemInstrDist
Definition: compute_unit.hh:1066
clocked_object.hh
Stats::Formula
A formula for statistics that is calculated when printed.
Definition: statistics.hh:2538
Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:258
ComputeUnit::ComputeUnitStats::groupMemInsts
Stats::Formula groupMemInsts
Definition: compute_unit.hh:993
std::deque
STL deque class.
Definition: stl.hh:44
Stats::Group
Statistics container.
Definition: group.hh:87
WFBarrier::setMaxBarrierCnt
void setMaxBarrierCnt(int max_barrier_cnt)
Set the maximum barrier count (i.e., the number of WFs that are participating in the barrier).
Definition: compute_unit.hh:124
ComputeUnit::LDSPort::SenderState::getMemInst
GPUDynInstPtr getMemInst() const
Definition: compute_unit.hh:795
ComputeUnit::ScalarDataPort::recvReqRetry
void recvReqRetry() override
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.cc:938
ComputeUnit::Params
ComputeUnitParams Params
Definition: compute_unit.hh:289
ComputeUnit::activeWaves
int activeWaves
Definition: compute_unit.hh:940
token_port.hh
ComputeUnit::GMTokenPort::recvTimingResp
bool recvTimingResp(PacketPtr)
Receive a timing response from the peer.
Definition: compute_unit.hh:496
ComputeUnit::wfSize
int wfSize() const
Definition: compute_unit.hh:393
ComputeUnit::vrfToCoalescerBusWidth
int vrfToCoalescerBusWidth
Definition: compute_unit.hh:267
ComputeUnit::numCyclesPerStoreTransfer
int numCyclesPerStoreTransfer
Definition: compute_unit.hh:269
ComputeUnit::lastVaddrSimd
std::vector< std::vector< Addr > > lastVaddrSimd
Definition: compute_unit.hh:335
ComputeUnit::ComputeUnitStats::ldsNoFlatInsts
Stats::Scalar ldsNoFlatInsts
Definition: compute_unit.hh:954
ComputeUnit::ComputeUnitStats
Definition: compute_unit.hh:942
ComputeUnit::DataPort::processMemRespEvent
void processMemRespEvent(PacketPtr pkt)
Definition: compute_unit.cc:1297
ComputeUnit::ComputeUnitStats::privReads
Stats::Scalar privReads
Definition: compute_unit.hh:994
VectorRegisterFile
Definition: vector_register_file.hh:46
GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
ComputeUnit::ComputeUnitStats::threadCyclesVALU
Stats::Scalar threadCyclesVALU
Definition: compute_unit.hh:952
ComputeUnit::GMTokenPort::recvReqRetry
void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: compute_unit.hh:497
ComputeUnit::SQCPort::SQCPort
SQCPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:605
Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:79
ComputeUnit::ComputeUnitStats::completedWfs
Stats::Scalar completedWfs
Definition: compute_unit.hh:1076
ComputeUnit::vectorRegsReserved
std::vector< int > vectorRegsReserved
Definition: compute_unit.hh:366
LdsChunk
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Definition: lds_state.hh:55
ComputeUnit::getFreeBarrierId
int getFreeBarrierId()
Definition: compute_unit.hh:424
ComputeUnit::exec_policy
EXEC_POLICY exec_policy
Definition: compute_unit.hh:338
ComputeUnit::ScalarDTLBPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:709
ComputeUnit::ComputeUnitStats::numALUInstsExecuted
Stats::Formula numALUInstsExecuted
Definition: compute_unit.hh:1069
ScalarMemPipeline
Definition: scalar_memory_pipeline.hh:57
ScoreboardCheckStage
Definition: scoreboard_check_stage.hh:60
ComputeUnit::ComputeUnitStats::instCyclesSALU
Stats::Scalar instCyclesSALU
Definition: compute_unit.hh:951
ComputeUnit::scoreboardCheckStage
ScoreboardCheckStage scoreboardCheckStage
Definition: compute_unit.hh:280
ComputeUnit::dpBypassLength
int dpBypassLength() const
Definition: compute_unit.hh:389
ComputeUnit::ComputeUnitStats::execRateDist
Stats::Distribution execRateDist
Definition: compute_unit.hh:1035
ComputeUnit::ScalarDataPort::retries
std::deque< PacketPtr > retries
Definition: compute_unit.hh:595
ComputeUnit::ComputeUnitStats::instCyclesScMemPerSimd
Stats::Vector instCyclesScMemPerSimd
Definition: compute_unit.hh:979
ComputeUnit::getCacheLineBits
int getCacheLineBits() const
Definition: compute_unit.hh:411
ComputeUnit::ScalarDataPort
Definition: compute_unit.hh:555
ComputeUnit::releaseWFsFromBarrier
void releaseWFsFromBarrier(int bar_id)
Definition: compute_unit.cc:702
TokenManager
Definition: token_port.hh:129
ComputeUnit::DataPort::createMemRespEvent
EventFunctionWrapper * createMemRespEvent(PacketPtr pkt)
Definition: compute_unit.cc:1553
ComputeUnit::doSmReturn
void doSmReturn(GPUDynInstPtr gpuDynInst)
ComputeUnit::DataPort::processMemReqEvent
void processMemReqEvent(PacketPtr pkt)
Definition: compute_unit.cc:1561
ComputeUnit::tlbPort
std::vector< DTLBPort > tlbPort
Definition: compute_unit.hh:846
ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD16
Stats::Scalar numVecOpsExecutedMAD16
Definition: compute_unit.hh:1053
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
ComputeUnit::SQCPort::snoopRangeSent
bool snoopRangeSent
Definition: compute_unit.hh:608
std::list< AddrRange >
ComputeUnit::ComputeUnitStats::wgBlockedDueLdsAllocation
Stats::Scalar wgBlockedDueLdsAllocation
Definition: compute_unit.hh:1028
ComputeUnit::ComputeUnitStats::scalarMemWrites
Stats::Scalar scalarMemWrites
Definition: compute_unit.hh:964
ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC32
Stats::Scalar numVecOpsExecutedMAC32
Definition: compute_unit.hh:1050
ComputeUnit::ComputeUnitStats::numVecOpsExecuted
Stats::Scalar numVecOpsExecuted
Definition: compute_unit.hh:1037
ScheduleStage
Definition: schedule_stage.hh:61
ComputeUnit::~ComputeUnit
~ComputeUnit()
Definition: compute_unit.cc:216
ComputeUnit::ComputeUnitStats::argWrites
Stats::Scalar argWrites
Definition: compute_unit.hh:986
ComputeUnit::LDSPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
get the result of packets sent to the LDS when they return
Definition: compute_unit.cc:2003
ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD32
Stats::Scalar numVecOpsExecutedMAD32
Definition: compute_unit.hh:1054
ComputeUnit::pipeMap
std::unordered_set< uint64_t > pipeMap
Definition: compute_unit.hh:275
ComputeUnit::SQCPort::SenderState
Definition: compute_unit.hh:610
exec_stage.hh
ComputeUnit::LDSPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:814
ComputeUnit::ComputeUnitStats::numInstrExecuted
Stats::Scalar numInstrExecuted
Definition: compute_unit.hh:1032
ComputeUnit::ITLBPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:752
ComputeUnit::ComputeUnitStats::readonlyReads
Stats::Scalar readonlyReads
Definition: compute_unit.hh:997
ComputeUnit::doInvalidate
void doInvalidate(RequestPtr req, int kernId)
trigger invalidate operation in the cu
Definition: compute_unit.cc:381
WFBarrier::numAtBarrier
int numAtBarrier() const
Definition: compute_unit.hh:99
ComputeUnit::sqcTLBPort
ITLBPort sqcTLBPort
Definition: compute_unit.hh:854
ComputeUnit::DTLBPort::SenderState::portIndex
PortID portIndex
Definition: compute_unit.hh:671
ComputeUnit::ComputeUnitStats::vALUUtilization
Stats::Formula vALUUtilization
Definition: compute_unit.hh:953
ComputeUnit::ComputeUnitStats::scalarMemReads
Stats::Scalar scalarMemReads
Definition: compute_unit.hh:966
ComputeUnit::ITLBPort::SenderState::SenderState
SenderState(Wavefront *_wavefront)
Definition: compute_unit.hh:743
ComputeUnit::ComputeUnitStats::headTailLatency
Stats::Distribution headTailLatency
Definition: compute_unit.hh:1081
ComputeUnit::ComputeUnitStats::instInterleave
Stats::VectorDistribution instInterleave
Definition: compute_unit.hh:1087
GlobalMemPipeline
Definition: global_memory_pipeline.hh:58
ComputeUnit::requestorId
RequestorID requestorId()
Definition: compute_unit.hh:459
ComputeUnit::ComputeUnitStats::flatVMemInsts
Stats::Scalar flatVMemInsts
Definition: compute_unit.hh:956
ComputeUnit::ScalarDTLBPort::retries
std::deque< PacketPtr > retries
Definition: compute_unit.hh:711
ComputeUnit::mapWaveToScalarAlu
int mapWaveToScalarAlu(Wavefront *w) const
Definition: compute_unit.cc:251
ComputeUnit::DTLBPort::stallPort
void stallPort()
Definition: compute_unit.hh:652
ComputeUnit::ComputeUnitStats::vectorMemReadsPerKiloInst
Stats::Formula vectorMemReadsPerKiloInst
Definition: compute_unit.hh:969
ComputeUnit::lds
LdsState & lds
Definition: compute_unit.hh:467
ComputeUnit::shader
Shader * shader
Definition: compute_unit.hh:352
ComputeUnit::simdWidth
int simdWidth
Definition: compute_unit.hh:300
WFBarrier::InvalidID
static const int InvalidID
Definition: compute_unit.hh:96
callback.hh
ComputeUnit::ComputeUnitStats::dynamicLMemInstrCnt
Stats::Scalar dynamicLMemInstrCnt
Definition: compute_unit.hh:1025
ComputeUnit::ITLBPort::stallPort
void stallPort()
Definition: compute_unit.hh:726
ComputeUnit::spBypassPipeLength
int spBypassPipeLength
Definition: compute_unit.hh:303
ComputeUnit::ScalarDTLBPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:701
ComputeUnit::barrierSlot
WFBarrier & barrierSlot(int bar_id)
Definition: compute_unit.hh:417
ComputeUnit::LDSPort
the port intended to communicate between the CU and its LDS
Definition: compute_unit.hh:760
ComputeUnit::scheduleToExecute
ScheduleToExecute scheduleToExecute
Definition: compute_unit.hh:922
ComputeUnit::numScalarALUs
int numScalarALUs
Definition: compute_unit.hh:247
WFBarrier::_numAtBarrier
int _numAtBarrier
The number of WFs in the WG that have reached the barrier.
Definition: compute_unit.hh:188
ComputeUnit::fetch
void fetch(PacketPtr pkt, Wavefront *wavefront)
ComputeUnit::numVectorALUs
int numVectorALUs
Definition: compute_unit.hh:243
SimObject
Abstract superclass for simulation objects.
Definition: sim_object.hh:141
ComputeUnit::ComputeUnitStats::vpc_f16
Stats::Formula vpc_f16
Definition: compute_unit.hh:1061

Generated on Tue Jun 22 2021 15:28:28 for gem5 by doxygen 1.8.17