gem5  v22.1.0.0
lds_state.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #ifndef __LDS_STATE_HH__
33 #define __LDS_STATE_HH__
34 
35 #include <array>
36 #include <queue>
37 #include <string>
38 #include <unordered_map>
39 #include <utility>
40 #include <vector>
41 
42 #include "gpu-compute/misc.hh"
43 #include "mem/port.hh"
44 #include "params/LdsState.hh"
45 #include "sim/clocked_object.hh"
46 
47 namespace gem5
48 {
49 
50 class ComputeUnit;
51 
56 class LdsChunk
57 {
58  public:
59  LdsChunk(const uint32_t x_size):
60  chunk(x_size)
61  {
62  }
63 
64  LdsChunk() {}
65 
69  template<class T>
70  T
71  read(const uint32_t index)
72  {
77  if (index >= chunk.size()) {
78  return (T)0;
79  }
80 
81  T *p0 = (T *) (&(chunk.at(index)));
82  return *p0;
83  }
84 
88  template<class T>
89  void
90  write(const uint32_t index, const T value)
91  {
96  if (index >= chunk.size()) {
97  return;
98  }
99 
100  T *p0 = (T *) (&(chunk.at(index)));
101  *p0 = value;
102  }
103 
107  template<class T>
108  T
109  atomic(const uint32_t index, AtomicOpFunctorPtr amoOp)
110  {
115  if (index >= chunk.size()) {
116  return (T)0;
117  }
118  T *p0 = (T *) (&(chunk.at(index)));
119  T tmp = *p0;
120 
121  (*amoOp)((uint8_t *)p0);
122  return tmp;
123  }
124 
129  size() const
130  {
131  return chunk.size();
132  }
133 
134  protected:
135  // the actual data store for this slice of the LDS
137 };
138 
139 // Local Data Share (LDS) State per Wavefront (contents of the LDS region
140 // allocated to the WorkGroup of this Wavefront)
141 class LdsState: public ClockedObject
142 {
143  protected:
144 
148  class TickEvent: public Event
149  {
150  protected:
151 
152  LdsState *ldsState = nullptr;
153 
155 
156  public:
157 
158  TickEvent(LdsState *_ldsState) :
159  ldsState(_ldsState)
160  {
161  }
162 
163  virtual void
164  process();
165 
166  void
168  {
169  mainEventQueue[0]->schedule(this, when);
170  }
171 
172  void
174  {
175  mainEventQueue[0]->deschedule(this);
176  }
177  };
178 
182  class CuSidePort: public ResponsePort
183  {
184  public:
185  CuSidePort(const std::string &_name, LdsState *_ownerLds) :
186  ResponsePort(_name, _ownerLds), ownerLds(_ownerLds)
187  {
188  }
189 
190  protected:
192 
193  virtual bool
195 
196  virtual Tick
198  {
199  return 0;
200  }
201 
202  virtual void
204 
205  virtual void
207  {
208  }
209 
210  virtual void
211  recvRetry();
212 
213  virtual void
214  recvRespRetry();
215 
216  virtual AddrRangeList
218  {
219  AddrRangeList ranges;
220  ranges.push_back(ownerLds->getAddrRange());
221  return ranges;
222  }
223 
224  template<typename T>
225  void
227 
228  template<typename T>
229  void
231 
232  template<typename T>
233  void
235  };
236 
237  protected:
238 
249  std::unordered_map<uint32_t,
250  std::unordered_map<uint32_t, int32_t>> refCounter;
251 
252  // the map that allows workgroups to access their own chunk of the LDS
253  std::unordered_map<uint32_t,
254  std::unordered_map<uint32_t, LdsChunk>> chunkMap;
255 
256  // an event to allow the LDS to wake up at a specified time
258 
259  // the queue of packets that are going back to the CU after a
260  // read/write/atomic op
261  // TODO need to make this have a maximum size to create flow control
262  std::queue<std::pair<Tick, PacketPtr>> returnQueue;
263 
264  // whether or not there are pending responses
265  bool retryResp = false;
266 
267  bool
268  process();
269 
271  getDynInstr(PacketPtr packet);
272 
273  bool
274  processPacket(PacketPtr packet);
275 
276  unsigned
277  countBankConflicts(PacketPtr packet, unsigned *bankAccesses);
278 
279  unsigned
281  unsigned *numBankAccesses);
282 
283  public:
284  using Params = LdsStateParams;
285 
286  LdsState(const Params &params);
287 
288  // prevent copy construction
289  LdsState(const LdsState&) = delete;
290 
292  {
293  parent = nullptr;
294  }
295 
296  bool
297  isRetryResp() const
298  {
299  return retryResp;
300  }
301 
302  void
303  setRetryResp(const bool value)
304  {
305  retryResp = value;
306  }
307 
308  // prevent assignment
309  LdsState &
310  operator=(const LdsState &) = delete;
311 
315  int
316  increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
317  {
318  int refCount = getRefCounter(dispatchId, wgId);
319  fatal_if(refCount < 0,
320  "reference count should not be below zero");
321  return ++refCounter[dispatchId][wgId];
322  }
323 
328  int
329  decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
330  {
331  int refCount = getRefCounter(dispatchId, wgId);
332 
333  fatal_if(refCount <= 0,
334  "reference count should not be below zero or at zero to"
335  "decrement");
336 
337  refCounter[dispatchId][wgId]--;
338 
339  if (refCounter[dispatchId][wgId] == 0) {
340  releaseSpace(dispatchId, wgId);
341  return 0;
342  } else {
343  return refCounter[dispatchId][wgId];
344  }
345  }
346 
350  int
351  getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
352  {
353  auto dispatchIter = chunkMap.find(dispatchId);
354  fatal_if(dispatchIter == chunkMap.end(),
355  "could not locate this dispatch id [%d]", dispatchId);
356 
357  auto workgroup = dispatchIter->second.find(wgId);
358  fatal_if(workgroup == dispatchIter->second.end(),
359  "could not find this workgroup id within this dispatch id"
360  " did[%d] wgid[%d]", dispatchId, wgId);
361 
362  auto refCountIter = refCounter.find(dispatchId);
363  if (refCountIter == refCounter.end()) {
364  fatal("could not locate this dispatch id [%d]", dispatchId);
365  } else {
366  auto workgroup = refCountIter->second.find(wgId);
367  if (workgroup == refCountIter->second.end()) {
368  fatal("could not find this workgroup id within this dispatch id"
369  " did[%d] wgid[%d]", dispatchId, wgId);
370  } else {
371  return refCounter.at(dispatchId).at(wgId);
372  }
373  }
374 
375  fatal("should not reach this point");
376  return 0;
377  }
378 
383  LdsChunk *
384  reserveSpace(const uint32_t dispatchId, const uint32_t wgId,
385  const uint32_t size)
386  {
387  if (chunkMap.find(dispatchId) != chunkMap.end()) {
388  panic_if(
389  chunkMap[dispatchId].find(wgId) != chunkMap[dispatchId].end(),
390  "duplicate workgroup ID asking for space in the LDS "
391  "did[%d] wgid[%d]", dispatchId, wgId);
392  }
393 
394  if (bytesAllocated + size > maximumSize) {
395  return nullptr;
396  } else {
397  bytesAllocated += size;
398 
399  auto value = chunkMap[dispatchId].emplace(wgId, LdsChunk(size));
400  panic_if(!value.second, "was unable to allocate a new chunkMap");
401 
402  // make an entry for this workgroup
403  refCounter[dispatchId][wgId] = 0;
404 
405  return &chunkMap[dispatchId][wgId];
406  }
407  }
408 
409  /*
410  * return pointer to lds chunk for wgid
411  */
412  LdsChunk *
413  getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
414  {
415  fatal_if(chunkMap.find(dispatchId) == chunkMap.end(),
416  "fetch for unknown dispatch ID did[%d]", dispatchId);
417 
418  fatal_if(chunkMap[dispatchId].find(wgId) == chunkMap[dispatchId].end(),
419  "fetch for unknown workgroup ID wgid[%d] in dispatch ID did[%d]",
420  wgId, dispatchId);
421 
422  return &chunkMap[dispatchId][wgId];
423  }
424 
425  bool
427 
428  Tick
430  {
431  // TODO set to max(lastCommand+1, curTick())
432  return returnQueue.empty() ? curTick() : returnQueue.back().first;
433  }
434 
435  void
436  setParent(ComputeUnit *x_parent);
437 
438  // accessors
439  ComputeUnit *
440  getParent() const
441  {
442  return parent;
443  }
444 
445  std::string
447  {
448  return _name;
449  }
450 
451  int
452  getBanks() const
453  {
454  return banks;
455  }
456 
457  ComputeUnit *
459  {
460  return parent;
461  }
462 
463  int
465  {
466  return bankConflictPenalty;
467  }
468 
472  std::size_t
473  ldsSize(const uint32_t x_wgId)
474  {
475  return chunkMap[x_wgId].size();
476  }
477 
478  AddrRange
479  getAddrRange() const
480  {
481  return range;
482  }
483 
484  Port &
485  getPort(const std::string &if_name, PortID idx)
486  {
487  if (if_name == "cuPort") {
488  // TODO need to set name dynamically at this point?
489  return cuPort;
490  } else {
491  fatal("cannot resolve the port name " + if_name);
492  }
493  }
494 
498  bool
499  canReserve(uint32_t x_size) const
500  {
501  return bytesAllocated + x_size <= maximumSize;
502  }
503 
504  private:
508  bool
509  releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
510  {
511  auto dispatchIter = chunkMap.find(x_dispatchId);
512 
513  if (dispatchIter == chunkMap.end()) {
514  fatal("dispatch id not found [%d]", x_dispatchId);
515  } else {
516  auto workgroupIter = dispatchIter->second.find(x_wgId);
517  if (workgroupIter == dispatchIter->second.end()) {
518  fatal("workgroup id [%d] not found in dispatch id [%d]",
519  x_wgId, x_dispatchId);
520  }
521  }
522 
523  fatal_if(bytesAllocated < chunkMap[x_dispatchId][x_wgId].size(),
524  "releasing more space than was allocated");
525 
526  bytesAllocated -= chunkMap[x_dispatchId][x_wgId].size();
527  chunkMap[x_dispatchId].erase(chunkMap[x_dispatchId].find(x_wgId));
528  return true;
529  }
530 
531  // the port that connects this LDS to its owner CU
533 
534  ComputeUnit* parent = nullptr;
535 
536  std::string _name;
537 
538  // the number of bytes currently reserved by all workgroups
539  int bytesAllocated = 0;
540 
541  // the size of the LDS, the most bytes available
543 
544  // Address range of this memory
546 
547  // the penalty, in cycles, for each LDS bank conflict
549 
550  // the number of banks in the LDS underlying data store
551  int banks = 0;
552 };
553 
554 } // namespace gem5
555 
556 #endif // __LDS_STATE_HH__
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition: addr_range.hh:82
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Definition: lds_state.hh:57
void write(const uint32_t index, const T value)
a write operation
Definition: lds_state.hh:90
T atomic(const uint32_t index, AtomicOpFunctorPtr amoOp)
an atomic operation
Definition: lds_state.hh:109
std::vector< uint8_t >::size_type size() const
get the size of this chunk
Definition: lds_state.hh:129
LdsChunk(const uint32_t x_size)
Definition: lds_state.hh:59
T read(const uint32_t index)
a read operation
Definition: lds_state.hh:71
std::vector< uint8_t > chunk
Definition: lds_state.hh:136
CuSidePort is the LDS Port closer to the CU side.
Definition: lds_state.hh:183
virtual Tick recvAtomic(PacketPtr pkt)
Receive an atomic request packet from the peer.
Definition: lds_state.hh:197
virtual void recvRetry()
receive a retry
Definition: lds_state.cc:257
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
Definition: lds_state.cc:169
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: lds_state.hh:217
void storeData(PacketPtr packet)
virtual void recvRangeChange()
Definition: lds_state.hh:206
CuSidePort(const std::string &_name, LdsState *_ownerLds)
Definition: lds_state.hh:185
void atomicOperation(PacketPtr packet)
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
Definition: lds_state.cc:236
virtual void recvRespRetry()
receive a retry for a response
Definition: lds_state.cc:245
void loadData(PacketPtr packet)
an event to allow event-driven execution
Definition: lds_state.hh:149
virtual void process()
wake up at this time and perform specified actions
Definition: lds_state.cc:319
TickEvent(LdsState *_ldsState)
Definition: lds_state.hh:158
void schedule(Tick when)
Definition: lds_state.hh:167
int getBanks() const
Definition: lds_state.hh:452
LdsChunk * reserveSpace(const uint32_t dispatchId, const uint32_t wgId, const uint32_t size)
assign a parent and request this amount of space be set aside for this wgid
Definition: lds_state.hh:384
int increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
use the dynamic wave id to create or just increase the reference count
Definition: lds_state.hh:316
bool process()
look for packets to return at this time
Definition: lds_state.cc:266
AddrRange range
Definition: lds_state.hh:545
bool canReserve(uint32_t x_size) const
can this much space be reserved for a workgroup?
Definition: lds_state.hh:499
ComputeUnit * getParent() const
Definition: lds_state.hh:440
int decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
decrease the reference count after making sure it is in the list give back this chunk if the ref coun...
Definition: lds_state.hh:329
std::string _name
Definition: lds_state.hh:536
void setRetryResp(const bool value)
Definition: lds_state.hh:303
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
Definition: lds_state.cc:218
std::unordered_map< uint32_t, std::unordered_map< uint32_t, int32_t > > refCounter
the lds reference counter The key is the workgroup ID and dispatch ID The value is the number of wave...
Definition: lds_state.hh:250
int getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
return the current reference count for this workgroup id
Definition: lds_state.hh:351
LdsChunk * getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
Definition: lds_state.hh:413
std::unordered_map< uint32_t, std::unordered_map< uint32_t, LdsChunk > > chunkMap
Definition: lds_state.hh:254
Port & getPort(const std::string &if_name, PortID idx)
Get a port with a given name and index.
Definition: lds_state.hh:485
LdsState(const Params &params)
the default constructor that works with SWIG
Definition: lds_state.cc:48
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
Definition: lds_state.cc:187
int bankConflictPenalty
Definition: lds_state.hh:548
LdsState & operator=(const LdsState &)=delete
TickEvent tickEvent
Definition: lds_state.hh:257
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
Definition: lds_state.cc:86
LdsState(const LdsState &)=delete
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition: lds_state.hh:262
int getBankConflictPenalty() const
Definition: lds_state.hh:464
bool isRetryResp() const
Definition: lds_state.hh:297
ComputeUnit * parent
Definition: lds_state.hh:534
LdsStateParams Params
Definition: lds_state.hh:284
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
Definition: lds_state.cc:71
std::string getName()
Definition: lds_state.hh:446
CuSidePort cuPort
Definition: lds_state.hh:532
ComputeUnit * getComputeUnit() const
Definition: lds_state.hh:458
GPUDynInstPtr getDynInstr(PacketPtr packet)
Definition: lds_state.cc:175
bool releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
give back the space
Definition: lds_state.hh:509
std::size_t ldsSize(const uint32_t x_wgId)
get the allocated size for this workgroup
Definition: lds_state.hh:473
AddrRange getAddrRange() const
Definition: lds_state.hh:479
Tick earliestReturnTime() const
Definition: lds_state.hh:429
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
Ports are used to interface objects to each other.
Definition: port.hh:62
A ResponsePort is a specialization of a port.
Definition: port.hh:270
STL pair class.
Definition: stl.hh:58
STL vector class.
Definition: stl.hh:37
ClockedObject declaration and implementation.
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition: amo.hh:242
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:508
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:226
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:190
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:204
const Params & params() const
Definition: sim_object.hh:176
static SimObject * find(const char *name)
Find the SimObject with the given name and return a pointer to it.
Definition: sim_object.cc:176
Port Object Declaration.
Bitfield< 30, 0 > index
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:245
uint64_t Tick
Tick count type.
Definition: types.hh:58
std::vector< EventQueue * > mainEventQueue
Array for main event queues.
Definition: eventq.cc:57

Generated on Wed Dec 21 2022 10:22:35 for gem5 by doxygen 1.9.1