gem5  v21.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
lds_state.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __LDS_STATE_HH__
35 #define __LDS_STATE_HH__
36 
37 #include <array>
38 #include <queue>
39 #include <string>
40 #include <unordered_map>
41 #include <utility>
42 #include <vector>
43 
44 #include "gpu-compute/misc.hh"
45 #include "mem/port.hh"
46 #include "params/LdsState.hh"
47 #include "sim/clocked_object.hh"
48 
49 class ComputeUnit;
50 
55 class LdsChunk
56 {
57  public:
58  LdsChunk(const uint32_t x_size):
59  chunk(x_size)
60  {
61  }
62 
63  LdsChunk() {}
64 
68  template<class T>
69  T
70  read(const uint32_t index)
71  {
76  if (index >= chunk.size()) {
77  return (T)0;
78  }
79 
80  T *p0 = (T *) (&(chunk.at(index)));
81  return *p0;
82  }
83 
87  template<class T>
88  void
89  write(const uint32_t index, const T value)
90  {
95  if (index >= chunk.size()) {
96  return;
97  }
98 
99  T *p0 = (T *) (&(chunk.at(index)));
100  *p0 = value;
101  }
102 
107  size() const
108  {
109  return chunk.size();
110  }
111 
112  protected:
113  // the actual data store for this slice of the LDS
115 };
116 
117 // Local Data Share (LDS) State per Wavefront (contents of the LDS region
118 // allocated to the WorkGroup of this Wavefront)
119 class LdsState: public ClockedObject
120 {
121  protected:
122 
126  class TickEvent: public Event
127  {
128  protected:
129 
130  LdsState *ldsState = nullptr;
131 
133 
134  public:
135 
136  TickEvent(LdsState *_ldsState) :
137  ldsState(_ldsState)
138  {
139  }
140 
141  virtual void
142  process();
143 
144  void
146  {
147  mainEventQueue[0]->schedule(this, when);
148  }
149 
150  void
152  {
153  mainEventQueue[0]->deschedule(this);
154  }
155  };
156 
160  class CuSidePort: public ResponsePort
161  {
162  public:
163  CuSidePort(const std::string &_name, LdsState *_ownerLds) :
164  ResponsePort(_name, _ownerLds), ownerLds(_ownerLds)
165  {
166  }
167 
168  protected:
170 
171  virtual bool
173 
174  virtual Tick
176  {
177  return 0;
178  }
179 
180  virtual void
182 
183  virtual void
185  {
186  }
187 
188  virtual void
189  recvRetry();
190 
191  virtual void
192  recvRespRetry();
193 
194  virtual AddrRangeList
196  {
197  AddrRangeList ranges;
198  ranges.push_back(ownerLds->getAddrRange());
199  return ranges;
200  }
201 
202  template<typename T>
203  void
204  loadData(PacketPtr packet);
205 
206  template<typename T>
207  void
208  storeData(PacketPtr packet);
209 
210  template<typename T>
211  void
212  atomicOperation(PacketPtr packet);
213  };
214 
215  protected:
216 
227  std::unordered_map<uint32_t,
228  std::unordered_map<uint32_t, int32_t>> refCounter;
229 
230  // the map that allows workgroups to access their own chunk of the LDS
231  std::unordered_map<uint32_t,
232  std::unordered_map<uint32_t, LdsChunk>> chunkMap;
233 
234  // an event to allow the LDS to wake up at a specified time
236 
237  // the queue of packets that are going back to the CU after a
238  // read/write/atomic op
239  // TODO need to make this have a maximum size to create flow control
240  std::queue<std::pair<Tick, PacketPtr>> returnQueue;
241 
242  // whether or not there are pending responses
243  bool retryResp = false;
244 
245  bool
246  process();
247 
249  getDynInstr(PacketPtr packet);
250 
251  bool
252  processPacket(PacketPtr packet);
253 
254  unsigned
255  countBankConflicts(PacketPtr packet, unsigned *bankAccesses);
256 
257  unsigned
259  unsigned *numBankAccesses);
260 
261  public:
262  using Params = LdsStateParams;
263 
264  LdsState(const Params &params);
265 
266  // prevent copy construction
267  LdsState(const LdsState&) = delete;
268 
270  {
271  parent = nullptr;
272  }
273 
274  bool
275  isRetryResp() const
276  {
277  return retryResp;
278  }
279 
280  void
281  setRetryResp(const bool value)
282  {
283  retryResp = value;
284  }
285 
286  // prevent assignment
287  LdsState &
288  operator=(const LdsState &) = delete;
289 
293  int
294  increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
295  {
296  int refCount = getRefCounter(dispatchId, wgId);
297  fatal_if(refCount < 0,
298  "reference count should not be below zero");
299  return ++refCounter[dispatchId][wgId];
300  }
301 
306  int
307  decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
308  {
309  int refCount = getRefCounter(dispatchId, wgId);
310 
311  fatal_if(refCount <= 0,
312  "reference count should not be below zero or at zero to"
313  "decrement");
314 
315  refCounter[dispatchId][wgId]--;
316 
317  if (refCounter[dispatchId][wgId] == 0) {
318  releaseSpace(dispatchId, wgId);
319  return 0;
320  } else {
321  return refCounter[dispatchId][wgId];
322  }
323  }
324 
328  int
329  getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
330  {
331  auto dispatchIter = chunkMap.find(dispatchId);
332  fatal_if(dispatchIter == chunkMap.end(),
333  "could not locate this dispatch id [%d]", dispatchId);
334 
335  auto workgroup = dispatchIter->second.find(wgId);
336  fatal_if(workgroup == dispatchIter->second.end(),
337  "could not find this workgroup id within this dispatch id"
338  " did[%d] wgid[%d]", dispatchId, wgId);
339 
340  auto refCountIter = refCounter.find(dispatchId);
341  if (refCountIter == refCounter.end()) {
342  fatal("could not locate this dispatch id [%d]", dispatchId);
343  } else {
344  auto workgroup = refCountIter->second.find(wgId);
345  if (workgroup == refCountIter->second.end()) {
346  fatal("could not find this workgroup id within this dispatch id"
347  " did[%d] wgid[%d]", dispatchId, wgId);
348  } else {
349  return refCounter.at(dispatchId).at(wgId);
350  }
351  }
352 
353  fatal("should not reach this point");
354  return 0;
355  }
356 
361  LdsChunk *
362  reserveSpace(const uint32_t dispatchId, const uint32_t wgId,
363  const uint32_t size)
364  {
365  if (chunkMap.find(dispatchId) != chunkMap.end()) {
366  panic_if(
367  chunkMap[dispatchId].find(wgId) != chunkMap[dispatchId].end(),
368  "duplicate workgroup ID asking for space in the LDS "
369  "did[%d] wgid[%d]", dispatchId, wgId);
370  }
371 
372  if (bytesAllocated + size > maximumSize) {
373  return nullptr;
374  } else {
375  bytesAllocated += size;
376 
377  auto value = chunkMap[dispatchId].emplace(wgId, LdsChunk(size));
378  panic_if(!value.second, "was unable to allocate a new chunkMap");
379 
380  // make an entry for this workgroup
381  refCounter[dispatchId][wgId] = 0;
382 
383  return &chunkMap[dispatchId][wgId];
384  }
385  }
386 
387  /*
388  * return pointer to lds chunk for wgid
389  */
390  LdsChunk *
391  getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
392  {
393  fatal_if(chunkMap.find(dispatchId) == chunkMap.end(),
394  "fetch for unknown dispatch ID did[%d]", dispatchId);
395 
396  fatal_if(chunkMap[dispatchId].find(wgId) == chunkMap[dispatchId].end(),
397  "fetch for unknown workgroup ID wgid[%d] in dispatch ID did[%d]",
398  wgId, dispatchId);
399 
400  return &chunkMap[dispatchId][wgId];
401  }
402 
403  bool
405 
406  Tick
408  {
409  // TODO set to max(lastCommand+1, curTick())
410  return returnQueue.empty() ? curTick() : returnQueue.back().first;
411  }
412 
413  void
414  setParent(ComputeUnit *x_parent);
415 
416  // accessors
417  ComputeUnit *
418  getParent() const
419  {
420  return parent;
421  }
422 
423  std::string
425  {
426  return _name;
427  }
428 
429  int
430  getBanks() const
431  {
432  return banks;
433  }
434 
435  ComputeUnit *
437  {
438  return parent;
439  }
440 
441  int
443  {
444  return bankConflictPenalty;
445  }
446 
450  std::size_t
451  ldsSize(const uint32_t x_wgId)
452  {
453  return chunkMap[x_wgId].size();
454  }
455 
456  AddrRange
457  getAddrRange() const
458  {
459  return range;
460  }
461 
462  Port &
463  getPort(const std::string &if_name, PortID idx)
464  {
465  if (if_name == "cuPort") {
466  // TODO need to set name dynamically at this point?
467  return cuPort;
468  } else {
469  fatal("cannot resolve the port name " + if_name);
470  }
471  }
472 
476  bool
477  canReserve(uint32_t x_size) const
478  {
479  return bytesAllocated + x_size <= maximumSize;
480  }
481 
482  private:
486  bool
487  releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
488  {
489  auto dispatchIter = chunkMap.find(x_dispatchId);
490 
491  if (dispatchIter == chunkMap.end()) {
492  fatal("dispatch id not found [%d]", x_dispatchId);
493  } else {
494  auto workgroupIter = dispatchIter->second.find(x_wgId);
495  if (workgroupIter == dispatchIter->second.end()) {
496  fatal("workgroup id [%d] not found in dispatch id [%d]",
497  x_wgId, x_dispatchId);
498  }
499  }
500 
501  fatal_if(bytesAllocated < chunkMap[x_dispatchId][x_wgId].size(),
502  "releasing more space than was allocated");
503 
504  bytesAllocated -= chunkMap[x_dispatchId][x_wgId].size();
505  chunkMap[x_dispatchId].erase(chunkMap[x_dispatchId].find(x_wgId));
506  return true;
507  }
508 
509  // the port that connects this LDS to its owner CU
511 
512  ComputeUnit* parent = nullptr;
513 
514  std::string _name;
515 
516  // the number of bytes currently reserved by all workgroups
517  int bytesAllocated = 0;
518 
519  // the size of the LDS, the most bytes available
521 
522  // Address range of this memory
524 
525  // the penalty, in cycles, for each LDS bank conflict
527 
528  // the number of banks in the LDS underlying data store
529  int banks = 0;
530 };
531 
532 #endif // __LDS_STATE_HH__
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:183
LdsState::tickEvent
TickEvent tickEvent
Definition: lds_state.hh:235
LdsState::CuSidePort::recvRetry
virtual void recvRetry()
receive a retry
Definition: lds_state.cc:256
ResponsePort
A ResponsePort is a specialization of a port.
Definition: port.hh:265
LdsState::CuSidePort::loadData
void loadData(PacketPtr packet)
LdsChunk::write
void write(const uint32_t index, const T value)
a write operation
Definition: lds_state.hh:89
LdsState::operator=
LdsState & operator=(const LdsState &)=delete
LdsState::range
AddrRange range
Definition: lds_state.hh:523
MipsISA::index
Bitfield< 30, 0 > index
Definition: pra_constants.hh:44
LdsState::isRetryResp
bool isRetryResp() const
Definition: lds_state.hh:275
LdsState::CuSidePort
CuSidePort is the LDS Port closer to the CU side.
Definition: lds_state.hh:160
LdsState::TickEvent::process
virtual void process()
wake up at this time and perform specified actions
Definition: lds_state.cc:318
LdsState::getAddrRange
AddrRange getAddrRange() const
Definition: lds_state.hh:457
misc.hh
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:59
PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:243
LdsState::returnQueuePush
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
Definition: lds_state.cc:217
LdsState::CuSidePort::recvTimingReq
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
Definition: lds_state.cc:168
LdsState::getComputeUnit
ComputeUnit * getComputeUnit() const
Definition: lds_state.hh:436
LdsState::getParent
ComputeUnit * getParent() const
Definition: lds_state.hh:418
std::vector
STL vector class.
Definition: stl.hh:37
LdsState::Params
LdsStateParams Params
Definition: lds_state.hh:262
LdsState::process
bool process()
look for packets to return at this time
Definition: lds_state.cc:265
LdsState::TickEvent
an event to allow event-driven execution
Definition: lds_state.hh:126
Event::when
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:505
ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:231
LdsState::CuSidePort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Receive an atomic request packet from the peer.
Definition: lds_state.hh:175
LdsState::getRefCounter
int getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
return the current reference count for this workgroup id
Definition: lds_state.hh:329
ComputeUnit
Definition: compute_unit.hh:200
LdsChunk::chunk
std::vector< uint8_t > chunk
Definition: lds_state.hh:114
LdsState::refCounter
std::unordered_map< uint32_t, std::unordered_map< uint32_t, int32_t > > refCounter
the lds reference counter The key is the workgroup ID and dispatch ID The value is the number of wave...
Definition: lds_state.hh:228
LdsState::TickEvent::ldsState
LdsState * ldsState
Definition: lds_state.hh:130
LdsState::TickEvent::deschedule
void deschedule()
Definition: lds_state.hh:151
LdsState::banks
int banks
Definition: lds_state.hh:529
LdsState::getPort
Port & getPort(const std::string &if_name, PortID idx)
Get a port with a given name and index.
Definition: lds_state.hh:463
Event
Definition: eventq.hh:248
AddrRange
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition: addr_range.hh:68
LdsState::returnQueue
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition: lds_state.hh:240
LdsState::TickEvent::schedule
void schedule(Tick when)
Definition: lds_state.hh:145
LdsState::earliestReturnTime
Tick earliestReturnTime() const
Definition: lds_state.hh:407
LdsState::CuSidePort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
Definition: lds_state.cc:235
LdsState
Definition: lds_state.hh:119
LdsState::CuSidePort::storeData
void storeData(PacketPtr packet)
Port
Ports are used to interface objects to each other.
Definition: port.hh:56
LdsState::CuSidePort::getAddrRanges
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: lds_state.hh:195
port.hh
LdsState::~LdsState
~LdsState()
Definition: lds_state.hh:269
LdsState::TickEvent::nextTick
Tick nextTick
Definition: lds_state.hh:132
LdsState::getDynInstr
GPUDynInstPtr getDynInstr(PacketPtr packet)
Definition: lds_state.cc:174
std::pair
STL pair class.
Definition: stl.hh:58
LdsState::CuSidePort::CuSidePort
CuSidePort(const std::string &_name, LdsState *_ownerLds)
Definition: lds_state.hh:163
LdsState::bankConflictPenalty
int bankConflictPenalty
Definition: lds_state.hh:526
LdsState::retryResp
bool retryResp
Definition: lds_state.hh:243
LdsState::maximumSize
int maximumSize
Definition: lds_state.hh:520
LdsState::ldsSize
std::size_t ldsSize(const uint32_t x_wgId)
get the allocated size for this workgroup
Definition: lds_state.hh:451
LdsState::_name
std::string _name
Definition: lds_state.hh:514
LdsState::chunkMap
std::unordered_map< uint32_t, std::unordered_map< uint32_t, LdsChunk > > chunkMap
Definition: lds_state.hh:232
LdsState::countBankConflicts
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
Definition: lds_state.cc:85
LdsState::CuSidePort::ownerLds
LdsState * ownerLds
Definition: lds_state.hh:169
LdsState::CuSidePort::recvRangeChange
virtual void recvRangeChange()
Definition: lds_state.hh:184
LdsState::releaseSpace
bool releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
give back the space
Definition: lds_state.hh:487
LdsState::CuSidePort::atomicOperation
void atomicOperation(PacketPtr packet)
LdsState::setParent
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
Definition: lds_state.cc:70
LdsChunk::LdsChunk
LdsChunk()
Definition: lds_state.hh:63
LdsState::processPacket
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
Definition: lds_state.cc:186
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:197
LdsState::getName
std::string getName()
Definition: lds_state.hh:424
LdsState::decreaseRefCounter
int decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
decrease the reference count after making sure it is in the list give back this chunk if the ref coun...
Definition: lds_state.hh:307
LdsState::TickEvent::TickEvent
TickEvent(LdsState *_ldsState)
Definition: lds_state.hh:136
LdsState::reserveSpace
LdsChunk * reserveSpace(const uint32_t dispatchId, const uint32_t wgId, const uint32_t size)
assign a parent and request this amount of space be set aside for this wgid
Definition: lds_state.hh:362
LdsState::cuPort
CuSidePort cuPort
Definition: lds_state.hh:510
LdsChunk::read
T read(const uint32_t index)
a read operation
Definition: lds_state.hh:70
clocked_object.hh
Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:258
LdsChunk::LdsChunk
LdsChunk(const uint32_t x_size)
Definition: lds_state.hh:58
GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
mainEventQueue
std::vector< EventQueue * > mainEventQueue
Array for main event queues.
Definition: eventq.cc:55
LdsChunk
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Definition: lds_state.hh:55
LdsState::setRetryResp
void setRetryResp(const bool value)
Definition: lds_state.hh:281
LdsState::getBankConflictPenalty
int getBankConflictPenalty() const
Definition: lds_state.hh:442
LdsState::CuSidePort::recvRespRetry
virtual void recvRespRetry()
receive a retry for a response
Definition: lds_state.cc:244
LdsState::bytesAllocated
int bytesAllocated
Definition: lds_state.hh:517
curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:43
LdsState::parent
ComputeUnit * parent
Definition: lds_state.hh:512
SimObject::params
const Params & params() const
Definition: sim_object.hh:168
std::list< AddrRange >
LdsState::LdsState
LdsState(const Params &params)
the default constructor that works with SWIG
Definition: lds_state.cc:47
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:219
LdsState::increaseRefCounter
int increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
use the dynamic wave id to create or just increase the reference count
Definition: lds_state.hh:294
LdsState::getLdsChunk
LdsChunk * getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
Definition: lds_state.hh:391
LdsState::getBanks
int getBanks() const
Definition: lds_state.hh:430
SimObject::find
static SimObject * find(const char *name)
Find the SimObject with the given name and return a pointer to it.
Definition: sim_object.cc:168
LdsState::canReserve
bool canReserve(uint32_t x_size) const
can this much space be reserved for a workgroup?
Definition: lds_state.hh:477
LdsChunk::size
std::vector< uint8_t >::size_type size() const
get the size of this chunk
Definition: lds_state.hh:107

Generated on Tue Mar 23 2021 19:41:27 for gem5 by doxygen 1.8.17