gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
lds_state.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: John Kalamatianos,
34  * Joe Gross
35  */
36 
37 #ifndef __LDS_STATE_HH__
38 #define __LDS_STATE_HH__
39 
40 #include <array>
41 #include <queue>
42 #include <string>
43 #include <unordered_map>
44 #include <utility>
45 #include <vector>
46 
47 #include "enums/MemType.hh"
48 #include "gpu-compute/misc.hh"
49 #include "mem/port.hh"
50 #include "params/LdsState.hh"
51 #include "sim/clocked_object.hh"
52 
53 class ComputeUnit;
54 
59 class LdsChunk
60 {
61  public:
62  LdsChunk(const uint32_t x_size):
63  chunk(x_size)
64  {
65  }
66 
67  LdsChunk() {}
68 
72  template<class T>
73  T
74  read(const uint32_t index)
75  {
76  fatal_if(!chunk.size(), "cannot read from an LDS chunk of size 0");
77  fatal_if(index >= chunk.size(), "out-of-bounds access to an LDS chunk");
78  T *p0 = (T *) (&(chunk.at(index)));
79  return *p0;
80  }
81 
85  template<class T>
86  void
87  write(const uint32_t index, const T value)
88  {
89  fatal_if(!chunk.size(), "cannot write to an LDS chunk of size 0");
90  fatal_if(index >= chunk.size(), "out-of-bounds access to an LDS chunk");
91  T *p0 = (T *) (&(chunk.at(index)));
92  *p0 = value;
93  }
94 
99  size() const
100  {
101  return chunk.size();
102  }
103 
104  protected:
105  // the actual data store for this slice of the LDS
107 };
108 
109 // Local Data Share (LDS) State per Wavefront (contents of the LDS region
110 // allocated to the WorkGroup of this Wavefront)
111 class LdsState: public ClockedObject
112 {
113  protected:
114 
118  class TickEvent: public Event
119  {
120  protected:
121 
122  LdsState *ldsState = nullptr;
123 
124  Tick nextTick = 0;
125 
126  public:
127 
128  TickEvent(LdsState *_ldsState) :
129  ldsState(_ldsState)
130  {
131  }
132 
133  virtual void
134  process();
135 
136  void
138  {
139  mainEventQueue[0]->schedule(this, when);
140  }
141 
142  void
144  {
145  mainEventQueue[0]->deschedule(this);
146  }
147  };
148 
152  class CuSidePort: public SlavePort
153  {
154  public:
155  CuSidePort(const std::string &_name, LdsState *_ownerLds) :
156  SlavePort(_name, _ownerLds), ownerLds(_ownerLds)
157  {
158  }
159 
160  protected:
162 
163  virtual bool
164  recvTimingReq(PacketPtr pkt);
165 
166  virtual Tick
168  {
169  return 0;
170  }
171 
172  virtual void
173  recvFunctional(PacketPtr pkt);
174 
175  virtual void
177  {
178  }
179 
180  virtual void
181  recvRetry();
182 
183  virtual void
184  recvRespRetry();
185 
186  virtual AddrRangeList
188  {
189  AddrRangeList ranges;
190  ranges.push_back(ownerLds->getAddrRange());
191  return ranges;
192  }
193 
194  template<typename T>
195  void
196  loadData(PacketPtr packet);
197 
198  template<typename T>
199  void
200  storeData(PacketPtr packet);
201 
202  template<typename T>
203  void
204  atomicOperation(PacketPtr packet);
205  };
206 
207  protected:
208 
209  // the lds reference counter
210  // The key is the workgroup ID and dispatch ID
211  // The value is the number of wavefronts that reference this LDS, as
212  // wavefronts are launched, the counter goes up for that workgroup and when
213  // they return it decreases, once it reaches 0 then this chunk of the LDS is
214  // returned to the available pool. However,it is deallocated on the 1->0
215  // transition, not whenever the counter is 0 as it always starts with 0 when
216  // the workgroup asks for space
217  std::unordered_map<uint32_t,
218  std::unordered_map<uint32_t, int32_t>> refCounter;
219 
220  // the map that allows workgroups to access their own chunk of the LDS
221  std::unordered_map<uint32_t,
222  std::unordered_map<uint32_t, LdsChunk>> chunkMap;
223 
224  // an event to allow the LDS to wake up at a specified time
226 
227  // the queue of packets that are going back to the CU after a
228  // read/write/atomic op
229  // TODO need to make this have a maximum size to create flow control
230  std::queue<std::pair<Tick, PacketPtr>> returnQueue;
231 
232  // whether or not there are pending responses
233  bool retryResp = false;
234 
235  bool
236  process();
237 
239  getDynInstr(PacketPtr packet);
240 
241  bool
242  processPacket(PacketPtr packet);
243 
244  unsigned
245  countBankConflicts(PacketPtr packet, unsigned *bankAccesses);
246 
247  unsigned
248  countBankConflicts(GPUDynInstPtr gpuDynInst,
249  unsigned *numBankAccesses);
250 
251  public:
252  typedef LdsStateParams Params;
253 
254  LdsState(const Params *params);
255 
256  // prevent copy construction
257  LdsState(const LdsState&) = delete;
258 
260  {
261  parent = nullptr;
262  }
263 
264  const Params *
265  params() const
266  {
267  return dynamic_cast<const Params *>(_params);
268  }
269 
270  bool
271  isRetryResp() const
272  {
273  return retryResp;
274  }
275 
276  void
277  setRetryResp(const bool value)
278  {
279  retryResp = value;
280  }
281 
282  // prevent assignment
283  LdsState &
284  operator=(const LdsState &) = delete;
285 
289  int
290  increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
291  {
292  int refCount = getRefCounter(dispatchId, wgId);
293  fatal_if(refCount < 0,
294  "reference count should not be below zero");
295  return ++refCounter[dispatchId][wgId];
296  }
297 
302  int
303  decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
304  {
305  int refCount = getRefCounter(dispatchId, wgId);
306 
307  fatal_if(refCount <= 0,
308  "reference count should not be below zero or at zero to"
309  "decrement");
310 
311  refCounter[dispatchId][wgId]--;
312 
313  if (refCounter[dispatchId][wgId] == 0) {
314  releaseSpace(dispatchId, wgId);
315  return 0;
316  } else {
317  return refCounter[dispatchId][wgId];
318  }
319  }
320 
324  int
325  getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
326  {
327  auto dispatchIter = chunkMap.find(dispatchId);
328  fatal_if(dispatchIter == chunkMap.end(),
329  "could not locate this dispatch id [%d]", dispatchId);
330 
331  auto workgroup = dispatchIter->second.find(wgId);
332  fatal_if(workgroup == dispatchIter->second.end(),
333  "could not find this workgroup id within this dispatch id"
334  " did[%d] wgid[%d]", dispatchId, wgId);
335 
336  auto refCountIter = refCounter.find(dispatchId);
337  if (refCountIter == refCounter.end()) {
338  fatal("could not locate this dispatch id [%d]", dispatchId);
339  } else {
340  auto workgroup = refCountIter->second.find(wgId);
341  if (workgroup == refCountIter->second.end()) {
342  fatal("could not find this workgroup id within this dispatch id"
343  " did[%d] wgid[%d]", dispatchId, wgId);
344  } else {
345  return refCounter.at(dispatchId).at(wgId);
346  }
347  }
348 
349  fatal("should not reach this point");
350  return 0;
351  }
352 
357  LdsChunk *
358  reserveSpace(const uint32_t dispatchId, const uint32_t wgId,
359  const uint32_t size)
360  {
361  if (chunkMap.find(dispatchId) != chunkMap.end()) {
362  fatal_if(
363  chunkMap[dispatchId].find(wgId) != chunkMap[dispatchId].end(),
364  "duplicate workgroup ID asking for space in the LDS "
365  "did[%d] wgid[%d]", dispatchId, wgId);
366  }
367 
368  fatal_if(bytesAllocated + size > maximumSize,
369  "request would ask for more space than is available");
370 
371  bytesAllocated += size;
372 
373  chunkMap[dispatchId].emplace(wgId, LdsChunk(size));
374  // make an entry for this workgroup
375  refCounter[dispatchId][wgId] = 0;
376 
377  return &chunkMap[dispatchId][wgId];
378  }
379 
380  bool
381  returnQueuePush(std::pair<Tick, PacketPtr> thePair);
382 
383  Tick
385  {
386  // TODO set to max(lastCommand+1, curTick())
387  return returnQueue.empty() ? curTick() : returnQueue.back().first;
388  }
389 
390  void
391  setParent(ComputeUnit *x_parent);
392 
393  // accessors
394  ComputeUnit *
395  getParent() const
396  {
397  return parent;
398  }
399 
400  std::string
402  {
403  return _name;
404  }
405 
406  int
407  getBanks() const
408  {
409  return banks;
410  }
411 
412  ComputeUnit *
414  {
415  return parent;
416  }
417 
418  int
420  {
421  return bankConflictPenalty;
422  }
423 
427  std::size_t
428  ldsSize(const uint32_t x_wgId)
429  {
430  return chunkMap[x_wgId].size();
431  }
432 
433  AddrRange
434  getAddrRange() const
435  {
436  return range;
437  }
438 
439  Port &
440  getPort(const std::string &if_name, PortID idx)
441  {
442  if (if_name == "cuPort") {
443  // TODO need to set name dynamically at this point?
444  return cuPort;
445  } else {
446  fatal("cannot resolve the port name " + if_name);
447  }
448  }
449 
453  bool
454  canReserve(uint32_t x_size) const
455  {
456  return bytesAllocated + x_size <= maximumSize;
457  }
458 
459  private:
463  bool
464  releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
465  {
466  auto dispatchIter = chunkMap.find(x_dispatchId);
467 
468  if (dispatchIter == chunkMap.end()) {
469  fatal("dispatch id not found [%d]", x_dispatchId);
470  } else {
471  auto workgroupIter = dispatchIter->second.find(x_wgId);
472  if (workgroupIter == dispatchIter->second.end()) {
473  fatal("workgroup id [%d] not found in dispatch id [%d]",
474  x_wgId, x_dispatchId);
475  }
476  }
477 
478  fatal_if(bytesAllocated < chunkMap[x_dispatchId][x_wgId].size(),
479  "releasing more space than was allocated");
480 
481  bytesAllocated -= chunkMap[x_dispatchId][x_wgId].size();
482  chunkMap[x_dispatchId].erase(chunkMap[x_dispatchId].find(x_wgId));
483  return true;
484  }
485 
486  // the port that connects this LDS to its owner CU
488 
489  ComputeUnit* parent = nullptr;
490 
491  std::string _name;
492 
493  // the number of bytes currently reserved by all workgroups
494  int bytesAllocated = 0;
495 
496  // the size of the LDS, the most bytes available
498 
499  // Address range of this memory
501 
502  // the penalty, in cycles, for each LDS bank conflict
503  int bankConflictPenalty = 0;
504 
505  // the number of banks in the LDS underlying data store
506  int banks = 0;
507 };
508 
509 #endif // __LDS_STATE_HH__
Ports are used to interface objects to each other.
Definition: port.hh:60
LdsState * ownerLds
Definition: lds_state.hh:161
Bitfield< 30, 0 > index
an event to allow event-driven execution
Definition: lds_state.hh:118
std::string _name
Definition: lds_state.hh:491
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:175
std::vector< uint8_t >::size_type size() const
get the size of this chunk
Definition: lds_state.hh:99
std::string getName()
Definition: lds_state.hh:401
Tick earliestReturnTime() const
Definition: lds_state.hh:384
STL pair class.
Definition: stl.hh:61
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition: lds_state.hh:230
const Params * params() const
Definition: lds_state.hh:265
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: lds_state.hh:187
std::vector< uint8_t > chunk
Definition: lds_state.hh:106
TickEvent(LdsState *_ldsState)
Definition: lds_state.hh:128
AddrRange range
Definition: lds_state.hh:500
vector< EventQueue * > mainEventQueue
Array for main event queues.
Definition: eventq.cc:59
LdsStateParams Params
Definition: lds_state.hh:252
A SlavePort is a specialisation of a port.
Definition: port.hh:258
this represents a slice of the overall LDS, intended to be associated with an individual workgroup ...
Definition: lds_state.hh:59
STL vector class.
Definition: stl.hh:40
ComputeUnit * getParent() const
Definition: lds_state.hh:395
CuSidePort(const std::string &_name, LdsState *_ownerLds)
Definition: lds_state.hh:155
bool isRetryResp() const
Definition: lds_state.hh:271
void schedule(Tick when)
Definition: lds_state.hh:137
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition: addr_range.hh:72
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
virtual void recvRangeChange()
Definition: lds_state.hh:176
void setRetryResp(const bool value)
Definition: lds_state.hh:277
Tick curTick()
The current simulated tick.
Definition: core.hh:47
std::size_t ldsSize(const uint32_t x_wgId)
get the allocated size for this workgroup
Definition: lds_state.hh:428
uint64_t Tick
Tick count type.
Definition: types.hh:63
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
bool canReserve(uint32_t x_size) const
can this much space be reserved for a workgroup?
Definition: lds_state.hh:454
ClockedObject declaration and implementation.
Port & getPort(const std::string &if_name, PortID idx)
Get a port with a given name and index.
Definition: lds_state.hh:440
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:203
Port Object Declaration.
std::unordered_map< uint32_t, std::unordered_map< uint32_t, int32_t > > refCounter
Definition: lds_state.hh:218
int increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
use the dynamic wave id to create or just increase the reference count
Definition: lds_state.hh:290
int decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
decrease the reference count after making sure it is in the list give back this chunk if the ref coun...
Definition: lds_state.hh:303
T read(const uint32_t index)
a read operation
Definition: lds_state.hh:74
virtual Tick recvAtomic(PacketPtr pkt)
Receive an atomic request packet from the peer.
Definition: lds_state.hh:167
LdsChunk(const uint32_t x_size)
Definition: lds_state.hh:62
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:255
TickEvent tickEvent
Definition: lds_state.hh:225
std::unordered_map< uint32_t, std::unordered_map< uint32_t, LdsChunk > > chunkMap
Definition: lds_state.hh:222
Definition: eventq.hh:189
LdsChunk * reserveSpace(const uint32_t dispatchId, const uint32_t wgId, const uint32_t size)
assign a parent and request this amount of space be set aside for this wgid
Definition: lds_state.hh:358
int getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
return the current reference count for this workgroup id
Definition: lds_state.hh:325
int getBanks() const
Definition: lds_state.hh:407
void write(const uint32_t index, const T value)
a write operation
Definition: lds_state.hh:87
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:237
int maximumSize
Definition: lds_state.hh:497
int getBankConflictPenalty() const
Definition: lds_state.hh:419
bool releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
give back the space
Definition: lds_state.hh:464
CuSidePort cuPort
Definition: lds_state.hh:487
LdsChunk()
Definition: lds_state.hh:67
CuSidePort is the LDS Port closer to the CU side.
Definition: lds_state.hh:152
AddrRange getAddrRange() const
Definition: lds_state.hh:434
ComputeUnit * getComputeUnit() const
Definition: lds_state.hh:413

Generated on Fri Feb 28 2020 16:27:01 for gem5 by doxygen 1.8.13