gem5  v20.0.0.3
lds_state.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __LDS_STATE_HH__
35 #define __LDS_STATE_HH__
36 
37 #include <array>
38 #include <queue>
39 #include <string>
40 #include <unordered_map>
41 #include <utility>
42 #include <vector>
43 
44 #include "enums/MemType.hh"
45 #include "gpu-compute/misc.hh"
46 #include "mem/port.hh"
47 #include "params/LdsState.hh"
48 #include "sim/clocked_object.hh"
49 
50 class ComputeUnit;
51 
56 class LdsChunk
57 {
58  public:
59  LdsChunk(const uint32_t x_size):
60  chunk(x_size)
61  {
62  }
63 
64  LdsChunk() {}
65 
69  template<class T>
70  T
71  read(const uint32_t index)
72  {
73  fatal_if(!chunk.size(), "cannot read from an LDS chunk of size 0");
74  fatal_if(index >= chunk.size(), "out-of-bounds access to an LDS chunk");
75  T *p0 = (T *) (&(chunk.at(index)));
76  return *p0;
77  }
78 
82  template<class T>
83  void
84  write(const uint32_t index, const T value)
85  {
86  fatal_if(!chunk.size(), "cannot write to an LDS chunk of size 0");
87  fatal_if(index >= chunk.size(), "out-of-bounds access to an LDS chunk");
88  T *p0 = (T *) (&(chunk.at(index)));
89  *p0 = value;
90  }
91 
96  size() const
97  {
98  return chunk.size();
99  }
100 
101  protected:
102  // the actual data store for this slice of the LDS
104 };
105 
106 // Local Data Share (LDS) State per Wavefront (contents of the LDS region
107 // allocated to the WorkGroup of this Wavefront)
108 class LdsState: public ClockedObject
109 {
110  protected:
111 
115  class TickEvent: public Event
116  {
117  protected:
118 
119  LdsState *ldsState = nullptr;
120 
121  Tick nextTick = 0;
122 
123  public:
124 
125  TickEvent(LdsState *_ldsState) :
126  ldsState(_ldsState)
127  {
128  }
129 
130  virtual void
131  process();
132 
133  void
135  {
136  mainEventQueue[0]->schedule(this, when);
137  }
138 
139  void
141  {
142  mainEventQueue[0]->deschedule(this);
143  }
144  };
145 
149  class CuSidePort: public SlavePort
150  {
151  public:
152  CuSidePort(const std::string &_name, LdsState *_ownerLds) :
153  SlavePort(_name, _ownerLds), ownerLds(_ownerLds)
154  {
155  }
156 
157  protected:
159 
160  virtual bool
161  recvTimingReq(PacketPtr pkt);
162 
163  virtual Tick
165  {
166  return 0;
167  }
168 
169  virtual void
170  recvFunctional(PacketPtr pkt);
171 
172  virtual void
174  {
175  }
176 
177  virtual void
178  recvRetry();
179 
180  virtual void
181  recvRespRetry();
182 
183  virtual AddrRangeList
185  {
186  AddrRangeList ranges;
187  ranges.push_back(ownerLds->getAddrRange());
188  return ranges;
189  }
190 
191  template<typename T>
192  void
193  loadData(PacketPtr packet);
194 
195  template<typename T>
196  void
197  storeData(PacketPtr packet);
198 
199  template<typename T>
200  void
201  atomicOperation(PacketPtr packet);
202  };
203 
204  protected:
205 
206  // the lds reference counter
207  // The key is the workgroup ID and dispatch ID
208  // The value is the number of wavefronts that reference this LDS, as
209  // wavefronts are launched, the counter goes up for that workgroup and when
210  // they return it decreases, once it reaches 0 then this chunk of the LDS is
211  // returned to the available pool. However,it is deallocated on the 1->0
212  // transition, not whenever the counter is 0 as it always starts with 0 when
213  // the workgroup asks for space
214  std::unordered_map<uint32_t,
215  std::unordered_map<uint32_t, int32_t>> refCounter;
216 
217  // the map that allows workgroups to access their own chunk of the LDS
218  std::unordered_map<uint32_t,
219  std::unordered_map<uint32_t, LdsChunk>> chunkMap;
220 
221  // an event to allow the LDS to wake up at a specified time
223 
224  // the queue of packets that are going back to the CU after a
225  // read/write/atomic op
226  // TODO need to make this have a maximum size to create flow control
227  std::queue<std::pair<Tick, PacketPtr>> returnQueue;
228 
229  // whether or not there are pending responses
230  bool retryResp = false;
231 
232  bool
233  process();
234 
236  getDynInstr(PacketPtr packet);
237 
238  bool
239  processPacket(PacketPtr packet);
240 
241  unsigned
242  countBankConflicts(PacketPtr packet, unsigned *bankAccesses);
243 
244  unsigned
245  countBankConflicts(GPUDynInstPtr gpuDynInst,
246  unsigned *numBankAccesses);
247 
248  public:
249  typedef LdsStateParams Params;
250 
251  LdsState(const Params *params);
252 
253  // prevent copy construction
254  LdsState(const LdsState&) = delete;
255 
257  {
258  parent = nullptr;
259  }
260 
261  const Params *
262  params() const
263  {
264  return dynamic_cast<const Params *>(_params);
265  }
266 
267  bool
268  isRetryResp() const
269  {
270  return retryResp;
271  }
272 
273  void
274  setRetryResp(const bool value)
275  {
276  retryResp = value;
277  }
278 
279  // prevent assignment
280  LdsState &
281  operator=(const LdsState &) = delete;
282 
286  int
287  increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
288  {
289  int refCount = getRefCounter(dispatchId, wgId);
290  fatal_if(refCount < 0,
291  "reference count should not be below zero");
292  return ++refCounter[dispatchId][wgId];
293  }
294 
299  int
300  decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
301  {
302  int refCount = getRefCounter(dispatchId, wgId);
303 
304  fatal_if(refCount <= 0,
305  "reference count should not be below zero or at zero to"
306  "decrement");
307 
308  refCounter[dispatchId][wgId]--;
309 
310  if (refCounter[dispatchId][wgId] == 0) {
311  releaseSpace(dispatchId, wgId);
312  return 0;
313  } else {
314  return refCounter[dispatchId][wgId];
315  }
316  }
317 
321  int
322  getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
323  {
324  auto dispatchIter = chunkMap.find(dispatchId);
325  fatal_if(dispatchIter == chunkMap.end(),
326  "could not locate this dispatch id [%d]", dispatchId);
327 
328  auto workgroup = dispatchIter->second.find(wgId);
329  fatal_if(workgroup == dispatchIter->second.end(),
330  "could not find this workgroup id within this dispatch id"
331  " did[%d] wgid[%d]", dispatchId, wgId);
332 
333  auto refCountIter = refCounter.find(dispatchId);
334  if (refCountIter == refCounter.end()) {
335  fatal("could not locate this dispatch id [%d]", dispatchId);
336  } else {
337  auto workgroup = refCountIter->second.find(wgId);
338  if (workgroup == refCountIter->second.end()) {
339  fatal("could not find this workgroup id within this dispatch id"
340  " did[%d] wgid[%d]", dispatchId, wgId);
341  } else {
342  return refCounter.at(dispatchId).at(wgId);
343  }
344  }
345 
346  fatal("should not reach this point");
347  return 0;
348  }
349 
354  LdsChunk *
355  reserveSpace(const uint32_t dispatchId, const uint32_t wgId,
356  const uint32_t size)
357  {
358  if (chunkMap.find(dispatchId) != chunkMap.end()) {
359  fatal_if(
360  chunkMap[dispatchId].find(wgId) != chunkMap[dispatchId].end(),
361  "duplicate workgroup ID asking for space in the LDS "
362  "did[%d] wgid[%d]", dispatchId, wgId);
363  }
364 
365  fatal_if(bytesAllocated + size > maximumSize,
366  "request would ask for more space than is available");
367 
368  bytesAllocated += size;
369 
370  chunkMap[dispatchId].emplace(wgId, LdsChunk(size));
371  // make an entry for this workgroup
372  refCounter[dispatchId][wgId] = 0;
373 
374  return &chunkMap[dispatchId][wgId];
375  }
376 
377  bool
378  returnQueuePush(std::pair<Tick, PacketPtr> thePair);
379 
380  Tick
382  {
383  // TODO set to max(lastCommand+1, curTick())
384  return returnQueue.empty() ? curTick() : returnQueue.back().first;
385  }
386 
387  void
388  setParent(ComputeUnit *x_parent);
389 
390  // accessors
391  ComputeUnit *
392  getParent() const
393  {
394  return parent;
395  }
396 
397  std::string
399  {
400  return _name;
401  }
402 
403  int
404  getBanks() const
405  {
406  return banks;
407  }
408 
409  ComputeUnit *
411  {
412  return parent;
413  }
414 
415  int
417  {
418  return bankConflictPenalty;
419  }
420 
424  std::size_t
425  ldsSize(const uint32_t x_wgId)
426  {
427  return chunkMap[x_wgId].size();
428  }
429 
430  AddrRange
431  getAddrRange() const
432  {
433  return range;
434  }
435 
436  Port &
437  getPort(const std::string &if_name, PortID idx)
438  {
439  if (if_name == "cuPort") {
440  // TODO need to set name dynamically at this point?
441  return cuPort;
442  } else {
443  fatal("cannot resolve the port name " + if_name);
444  }
445  }
446 
450  bool
451  canReserve(uint32_t x_size) const
452  {
453  return bytesAllocated + x_size <= maximumSize;
454  }
455 
456  private:
460  bool
461  releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
462  {
463  auto dispatchIter = chunkMap.find(x_dispatchId);
464 
465  if (dispatchIter == chunkMap.end()) {
466  fatal("dispatch id not found [%d]", x_dispatchId);
467  } else {
468  auto workgroupIter = dispatchIter->second.find(x_wgId);
469  if (workgroupIter == dispatchIter->second.end()) {
470  fatal("workgroup id [%d] not found in dispatch id [%d]",
471  x_wgId, x_dispatchId);
472  }
473  }
474 
475  fatal_if(bytesAllocated < chunkMap[x_dispatchId][x_wgId].size(),
476  "releasing more space than was allocated");
477 
478  bytesAllocated -= chunkMap[x_dispatchId][x_wgId].size();
479  chunkMap[x_dispatchId].erase(chunkMap[x_dispatchId].find(x_wgId));
480  return true;
481  }
482 
483  // the port that connects this LDS to its owner CU
485 
486  ComputeUnit* parent = nullptr;
487 
488  std::string _name;
489 
490  // the number of bytes currently reserved by all workgroups
491  int bytesAllocated = 0;
492 
493  // the size of the LDS, the most bytes available
495 
496  // Address range of this memory
498 
499  // the penalty, in cycles, for each LDS bank conflict
500  int bankConflictPenalty = 0;
501 
502  // the number of banks in the LDS underlying data store
503  int banks = 0;
504 };
505 
506 #endif // __LDS_STATE_HH__
Ports are used to interface objects to each other.
Definition: port.hh:56
LdsState * ownerLds
Definition: lds_state.hh:158
Bitfield< 30, 0 > index
an event to allow event-driven execution
Definition: lds_state.hh:115
std::string _name
Definition: lds_state.hh:488
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:171
std::vector< uint8_t >::size_type size() const
get the size of this chunk
Definition: lds_state.hh:96
std::string getName()
Definition: lds_state.hh:398
Tick earliestReturnTime() const
Definition: lds_state.hh:381
STL pair class.
Definition: stl.hh:58
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition: lds_state.hh:227
const Params * params() const
Definition: lds_state.hh:262
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: lds_state.hh:184
std::vector< uint8_t > chunk
Definition: lds_state.hh:103
TickEvent(LdsState *_ldsState)
Definition: lds_state.hh:125
AddrRange range
Definition: lds_state.hh:497
vector< EventQueue * > mainEventQueue
Array for main event queues.
Definition: eventq.cc:55
LdsStateParams Params
Definition: lds_state.hh:249
A SlavePort is a specialisation of a port.
Definition: port.hh:254
this represents a slice of the overall LDS, intended to be associated with an individual workgroup ...
Definition: lds_state.hh:56
STL vector class.
Definition: stl.hh:37
ComputeUnit * getParent() const
Definition: lds_state.hh:392
CuSidePort(const std::string &_name, LdsState *_ownerLds)
Definition: lds_state.hh:152
bool isRetryResp() const
Definition: lds_state.hh:268
void schedule(Tick when)
Definition: lds_state.hh:134
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition: addr_range.hh:68
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:46
virtual void recvRangeChange()
Definition: lds_state.hh:173
void setRetryResp(const bool value)
Definition: lds_state.hh:274
Tick curTick()
The current simulated tick.
Definition: core.hh:44
std::size_t ldsSize(const uint32_t x_wgId)
get the allocated size for this workgroup
Definition: lds_state.hh:425
uint64_t Tick
Tick count type.
Definition: types.hh:61
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
bool canReserve(uint32_t x_size) const
can this much space be reserved for a workgroup?
Definition: lds_state.hh:451
ClockedObject declaration and implementation.
Port & getPort(const std::string &if_name, PortID idx)
Get a port with a given name and index.
Definition: lds_state.hh:437
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:199
Port Object Declaration.
std::unordered_map< uint32_t, std::unordered_map< uint32_t, int32_t > > refCounter
Definition: lds_state.hh:215
int increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
use the dynamic wave id to create or just increase the reference count
Definition: lds_state.hh:287
int decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
decrease the reference count after making sure it is in the list give back this chunk if the ref coun...
Definition: lds_state.hh:300
T read(const uint32_t index)
a read operation
Definition: lds_state.hh:71
virtual Tick recvAtomic(PacketPtr pkt)
Receive an atomic request packet from the peer.
Definition: lds_state.hh:164
LdsChunk(const uint32_t x_size)
Definition: lds_state.hh:59
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
TickEvent tickEvent
Definition: lds_state.hh:222
std::unordered_map< uint32_t, std::unordered_map< uint32_t, LdsChunk > > chunkMap
Definition: lds_state.hh:219
Definition: eventq.hh:245
LdsChunk * reserveSpace(const uint32_t dispatchId, const uint32_t wgId, const uint32_t size)
assign a parent and request this amount of space be set aside for this wgid
Definition: lds_state.hh:355
int getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
return the current reference count for this workgroup id
Definition: lds_state.hh:322
int getBanks() const
Definition: lds_state.hh:404
void write(const uint32_t index, const T value)
a write operation
Definition: lds_state.hh:84
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:235
int maximumSize
Definition: lds_state.hh:494
int getBankConflictPenalty() const
Definition: lds_state.hh:416
bool releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
give back the space
Definition: lds_state.hh:461
CuSidePort cuPort
Definition: lds_state.hh:484
LdsChunk()
Definition: lds_state.hh:64
CuSidePort is the LDS Port closer to the CU side.
Definition: lds_state.hh:149
AddrRange getAddrRange() const
Definition: lds_state.hh:431
ComputeUnit * getComputeUnit() const
Definition: lds_state.hh:410

Generated on Fri Jul 3 2020 15:53:03 for gem5 by doxygen 1.8.13