gem5  v20.1.0.0
lds_state.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __LDS_STATE_HH__
35 #define __LDS_STATE_HH__
36 
37 #include <array>
38 #include <queue>
39 #include <string>
40 #include <unordered_map>
41 #include <utility>
42 #include <vector>
43 
44 #include "gpu-compute/misc.hh"
45 #include "mem/port.hh"
46 #include "params/LdsState.hh"
47 #include "sim/clocked_object.hh"
48 
49 class ComputeUnit;
50 
55 class LdsChunk
56 {
57  public:
58  LdsChunk(const uint32_t x_size):
59  chunk(x_size)
60  {
61  }
62 
63  LdsChunk() {}
64 
68  template<class T>
69  T
70  read(const uint32_t index)
71  {
76  if (index >= chunk.size()) {
77  return (T)0;
78  }
79 
80  T *p0 = (T *) (&(chunk.at(index)));
81  return *p0;
82  }
83 
87  template<class T>
88  void
89  write(const uint32_t index, const T value)
90  {
95  if (index >= chunk.size()) {
96  return;
97  }
98 
99  T *p0 = (T *) (&(chunk.at(index)));
100  *p0 = value;
101  }
102 
107  size() const
108  {
109  return chunk.size();
110  }
111 
112  protected:
113  // the actual data store for this slice of the LDS
115 };
116 
117 // Local Data Share (LDS) State per Wavefront (contents of the LDS region
118 // allocated to the WorkGroup of this Wavefront)
119 class LdsState: public ClockedObject
120 {
121  protected:
122 
126  class TickEvent: public Event
127  {
128  protected:
129 
130  LdsState *ldsState = nullptr;
131 
133 
134  public:
135 
136  TickEvent(LdsState *_ldsState) :
137  ldsState(_ldsState)
138  {
139  }
140 
141  virtual void
142  process();
143 
144  void
146  {
147  mainEventQueue[0]->schedule(this, when);
148  }
149 
150  void
152  {
153  mainEventQueue[0]->deschedule(this);
154  }
155  };
156 
160  class CuSidePort: public ResponsePort
161  {
162  public:
163  CuSidePort(const std::string &_name, LdsState *_ownerLds) :
164  ResponsePort(_name, _ownerLds), ownerLds(_ownerLds)
165  {
166  }
167 
168  protected:
170 
171  virtual bool
173 
174  virtual Tick
176  {
177  return 0;
178  }
179 
180  virtual void
182 
183  virtual void
185  {
186  }
187 
188  virtual void
189  recvRetry();
190 
191  virtual void
192  recvRespRetry();
193 
194  virtual AddrRangeList
196  {
197  AddrRangeList ranges;
198  ranges.push_back(ownerLds->getAddrRange());
199  return ranges;
200  }
201 
202  template<typename T>
203  void
204  loadData(PacketPtr packet);
205 
206  template<typename T>
207  void
208  storeData(PacketPtr packet);
209 
210  template<typename T>
211  void
212  atomicOperation(PacketPtr packet);
213  };
214 
215  protected:
216 
227  std::unordered_map<uint32_t,
228  std::unordered_map<uint32_t, int32_t>> refCounter;
229 
230  // the map that allows workgroups to access their own chunk of the LDS
231  std::unordered_map<uint32_t,
232  std::unordered_map<uint32_t, LdsChunk>> chunkMap;
233 
234  // an event to allow the LDS to wake up at a specified time
236 
237  // the queue of packets that are going back to the CU after a
238  // read/write/atomic op
239  // TODO need to make this have a maximum size to create flow control
240  std::queue<std::pair<Tick, PacketPtr>> returnQueue;
241 
242  // whether or not there are pending responses
243  bool retryResp = false;
244 
245  bool
246  process();
247 
249  getDynInstr(PacketPtr packet);
250 
251  bool
252  processPacket(PacketPtr packet);
253 
254  unsigned
255  countBankConflicts(PacketPtr packet, unsigned *bankAccesses);
256 
257  unsigned
259  unsigned *numBankAccesses);
260 
261  public:
262  typedef LdsStateParams Params;
263 
264  LdsState(const Params *params);
265 
266  // prevent copy construction
267  LdsState(const LdsState&) = delete;
268 
270  {
271  parent = nullptr;
272  }
273 
274  const Params *
275  params() const
276  {
277  return dynamic_cast<const Params *>(_params);
278  }
279 
280  bool
281  isRetryResp() const
282  {
283  return retryResp;
284  }
285 
286  void
287  setRetryResp(const bool value)
288  {
289  retryResp = value;
290  }
291 
292  // prevent assignment
293  LdsState &
294  operator=(const LdsState &) = delete;
295 
299  int
300  increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
301  {
302  int refCount = getRefCounter(dispatchId, wgId);
303  fatal_if(refCount < 0,
304  "reference count should not be below zero");
305  return ++refCounter[dispatchId][wgId];
306  }
307 
312  int
313  decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
314  {
315  int refCount = getRefCounter(dispatchId, wgId);
316 
317  fatal_if(refCount <= 0,
318  "reference count should not be below zero or at zero to"
319  "decrement");
320 
321  refCounter[dispatchId][wgId]--;
322 
323  if (refCounter[dispatchId][wgId] == 0) {
324  releaseSpace(dispatchId, wgId);
325  return 0;
326  } else {
327  return refCounter[dispatchId][wgId];
328  }
329  }
330 
334  int
335  getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
336  {
337  auto dispatchIter = chunkMap.find(dispatchId);
338  fatal_if(dispatchIter == chunkMap.end(),
339  "could not locate this dispatch id [%d]", dispatchId);
340 
341  auto workgroup = dispatchIter->second.find(wgId);
342  fatal_if(workgroup == dispatchIter->second.end(),
343  "could not find this workgroup id within this dispatch id"
344  " did[%d] wgid[%d]", dispatchId, wgId);
345 
346  auto refCountIter = refCounter.find(dispatchId);
347  if (refCountIter == refCounter.end()) {
348  fatal("could not locate this dispatch id [%d]", dispatchId);
349  } else {
350  auto workgroup = refCountIter->second.find(wgId);
351  if (workgroup == refCountIter->second.end()) {
352  fatal("could not find this workgroup id within this dispatch id"
353  " did[%d] wgid[%d]", dispatchId, wgId);
354  } else {
355  return refCounter.at(dispatchId).at(wgId);
356  }
357  }
358 
359  fatal("should not reach this point");
360  return 0;
361  }
362 
367  LdsChunk *
368  reserveSpace(const uint32_t dispatchId, const uint32_t wgId,
369  const uint32_t size)
370  {
371  if (chunkMap.find(dispatchId) != chunkMap.end()) {
372  panic_if(
373  chunkMap[dispatchId].find(wgId) != chunkMap[dispatchId].end(),
374  "duplicate workgroup ID asking for space in the LDS "
375  "did[%d] wgid[%d]", dispatchId, wgId);
376  }
377 
378  if (bytesAllocated + size > maximumSize) {
379  return nullptr;
380  } else {
381  bytesAllocated += size;
382 
383  auto value = chunkMap[dispatchId].emplace(wgId, LdsChunk(size));
384  panic_if(!value.second, "was unable to allocate a new chunkMap");
385 
386  // make an entry for this workgroup
387  refCounter[dispatchId][wgId] = 0;
388 
389  return &chunkMap[dispatchId][wgId];
390  }
391  }
392 
393  /*
394  * return pointer to lds chunk for wgid
395  */
396  LdsChunk *
397  getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
398  {
399  fatal_if(chunkMap.find(dispatchId) == chunkMap.end(),
400  "fetch for unknown dispatch ID did[%d]", dispatchId);
401 
402  fatal_if(chunkMap[dispatchId].find(wgId) == chunkMap[dispatchId].end(),
403  "fetch for unknown workgroup ID wgid[%d] in dispatch ID did[%d]",
404  wgId, dispatchId);
405 
406  return &chunkMap[dispatchId][wgId];
407  }
408 
409  bool
411 
412  Tick
414  {
415  // TODO set to max(lastCommand+1, curTick())
416  return returnQueue.empty() ? curTick() : returnQueue.back().first;
417  }
418 
419  void
420  setParent(ComputeUnit *x_parent);
421 
422  // accessors
423  ComputeUnit *
424  getParent() const
425  {
426  return parent;
427  }
428 
429  std::string
431  {
432  return _name;
433  }
434 
435  int
436  getBanks() const
437  {
438  return banks;
439  }
440 
441  ComputeUnit *
443  {
444  return parent;
445  }
446 
447  int
449  {
450  return bankConflictPenalty;
451  }
452 
456  std::size_t
457  ldsSize(const uint32_t x_wgId)
458  {
459  return chunkMap[x_wgId].size();
460  }
461 
462  AddrRange
463  getAddrRange() const
464  {
465  return range;
466  }
467 
468  Port &
469  getPort(const std::string &if_name, PortID idx)
470  {
471  if (if_name == "cuPort") {
472  // TODO need to set name dynamically at this point?
473  return cuPort;
474  } else {
475  fatal("cannot resolve the port name " + if_name);
476  }
477  }
478 
482  bool
483  canReserve(uint32_t x_size) const
484  {
485  return bytesAllocated + x_size <= maximumSize;
486  }
487 
488  private:
492  bool
493  releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
494  {
495  auto dispatchIter = chunkMap.find(x_dispatchId);
496 
497  if (dispatchIter == chunkMap.end()) {
498  fatal("dispatch id not found [%d]", x_dispatchId);
499  } else {
500  auto workgroupIter = dispatchIter->second.find(x_wgId);
501  if (workgroupIter == dispatchIter->second.end()) {
502  fatal("workgroup id [%d] not found in dispatch id [%d]",
503  x_wgId, x_dispatchId);
504  }
505  }
506 
507  fatal_if(bytesAllocated < chunkMap[x_dispatchId][x_wgId].size(),
508  "releasing more space than was allocated");
509 
510  bytesAllocated -= chunkMap[x_dispatchId][x_wgId].size();
511  chunkMap[x_dispatchId].erase(chunkMap[x_dispatchId].find(x_wgId));
512  return true;
513  }
514 
515  // the port that connects this LDS to its owner CU
517 
518  ComputeUnit* parent = nullptr;
519 
520  std::string _name;
521 
522  // the number of bytes currently reserved by all workgroups
523  int bytesAllocated = 0;
524 
525  // the size of the LDS, the most bytes available
527 
528  // Address range of this memory
530 
531  // the penalty, in cycles, for each LDS bank conflict
533 
534  // the number of banks in the LDS underlying data store
535  int banks = 0;
536 };
537 
538 #endif // __LDS_STATE_HH__
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:183
LdsState::tickEvent
TickEvent tickEvent
Definition: lds_state.hh:235
LdsState::CuSidePort::recvRetry
virtual void recvRetry()
receive a retry
Definition: lds_state.cc:265
ResponsePort
A ResponsePort is a specialization of a port.
Definition: port.hh:265
LdsState::CuSidePort::loadData
void loadData(PacketPtr packet)
LdsChunk::write
void write(const uint32_t index, const T value)
a write operation
Definition: lds_state.hh:89
LdsState::operator=
LdsState & operator=(const LdsState &)=delete
LdsState::range
AddrRange range
Definition: lds_state.hh:529
MipsISA::index
Bitfield< 30, 0 > index
Definition: pra_constants.hh:44
LdsState::isRetryResp
bool isRetryResp() const
Definition: lds_state.hh:281
LdsState::CuSidePort
CuSidePort is the LDS Port closer to the CU side.
Definition: lds_state.hh:160
LdsState::TickEvent::process
virtual void process()
wake up at this time and perform specified actions
Definition: lds_state.cc:327
LdsState::getAddrRange
AddrRange getAddrRange() const
Definition: lds_state.hh:463
misc.hh
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:63
PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:237
LdsState::returnQueuePush
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
Definition: lds_state.cc:226
LdsState::CuSidePort::recvTimingReq
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
Definition: lds_state.cc:177
LdsState::getComputeUnit
ComputeUnit * getComputeUnit() const
Definition: lds_state.hh:442
LdsState::getParent
ComputeUnit * getParent() const
Definition: lds_state.hh:424
std::vector
STL vector class.
Definition: stl.hh:37
LdsState::params
const Params * params() const
Definition: lds_state.hh:275
LdsState::process
bool process()
look for packets to return at this time
Definition: lds_state.cc:274
LdsState::TickEvent
an event to allow event-driven execution
Definition: lds_state.hh:126
Event::when
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:503
ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:231
LdsState::CuSidePort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Receive an atomic request packet from the peer.
Definition: lds_state.hh:175
LdsState::getRefCounter
int getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
return the current reference count for this workgroup id
Definition: lds_state.hh:335
ComputeUnit
Definition: compute_unit.hh:198
LdsChunk::chunk
std::vector< uint8_t > chunk
Definition: lds_state.hh:114
mainEventQueue
vector< EventQueue * > mainEventQueue
Array for main event queues.
Definition: eventq.cc:56
LdsState::refCounter
std::unordered_map< uint32_t, std::unordered_map< uint32_t, int32_t > > refCounter
the lds reference counter The key is the workgroup ID and dispatch ID The value is the number of wave...
Definition: lds_state.hh:228
LdsState::TickEvent::ldsState
LdsState * ldsState
Definition: lds_state.hh:130
LdsState::TickEvent::deschedule
void deschedule()
Definition: lds_state.hh:151
LdsState::banks
int banks
Definition: lds_state.hh:535
LdsState::getPort
Port & getPort(const std::string &if_name, PortID idx)
Get a port with a given name and index.
Definition: lds_state.hh:469
Event
Definition: eventq.hh:246
AddrRange
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition: addr_range.hh:68
LdsState::returnQueue
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition: lds_state.hh:240
LdsState::TickEvent::schedule
void schedule(Tick when)
Definition: lds_state.hh:145
LdsState::earliestReturnTime
Tick earliestReturnTime() const
Definition: lds_state.hh:413
LdsState::CuSidePort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
Definition: lds_state.cc:244
LdsState::LdsState
LdsState(const Params *params)
the default constructor that works with SWIG
Definition: lds_state.cc:47
LdsState
Definition: lds_state.hh:119
LdsState::CuSidePort::storeData
void storeData(PacketPtr packet)
Port
Ports are used to interface objects to each other.
Definition: port.hh:56
LdsState::CuSidePort::getAddrRanges
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: lds_state.hh:195
port.hh
LdsState::~LdsState
~LdsState()
Definition: lds_state.hh:269
LdsState::TickEvent::nextTick
Tick nextTick
Definition: lds_state.hh:132
LdsState::getDynInstr
GPUDynInstPtr getDynInstr(PacketPtr packet)
Definition: lds_state.cc:183
std::pair
STL pair class.
Definition: stl.hh:58
LdsState::CuSidePort::CuSidePort
CuSidePort(const std::string &_name, LdsState *_ownerLds)
Definition: lds_state.hh:163
LdsState::bankConflictPenalty
int bankConflictPenalty
Definition: lds_state.hh:532
LdsState::retryResp
bool retryResp
Definition: lds_state.hh:243
LdsState::maximumSize
int maximumSize
Definition: lds_state.hh:526
LdsState::ldsSize
std::size_t ldsSize(const uint32_t x_wgId)
get the allocated size for this workgroup
Definition: lds_state.hh:457
LdsState::Params
LdsStateParams Params
Definition: lds_state.hh:262
LdsState::_name
std::string _name
Definition: lds_state.hh:520
LdsState::chunkMap
std::unordered_map< uint32_t, std::unordered_map< uint32_t, LdsChunk > > chunkMap
Definition: lds_state.hh:232
LdsState::countBankConflicts
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
Definition: lds_state.cc:94
LdsState::CuSidePort::ownerLds
LdsState * ownerLds
Definition: lds_state.hh:169
LdsState::CuSidePort::recvRangeChange
virtual void recvRangeChange()
Definition: lds_state.hh:184
LdsState::releaseSpace
bool releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
give back the space
Definition: lds_state.hh:493
LdsState::CuSidePort::atomicOperation
void atomicOperation(PacketPtr packet)
LdsState::setParent
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
Definition: lds_state.cc:79
LdsChunk::LdsChunk
LdsChunk()
Definition: lds_state.hh:63
LdsState::processPacket
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
Definition: lds_state.cc:195
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:197
LdsState::getName
std::string getName()
Definition: lds_state.hh:430
LdsState::decreaseRefCounter
int decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
decrease the reference count after making sure it is in the list give back this chunk if the ref coun...
Definition: lds_state.hh:313
LdsState::TickEvent::TickEvent
TickEvent(LdsState *_ldsState)
Definition: lds_state.hh:136
LdsState::reserveSpace
LdsChunk * reserveSpace(const uint32_t dispatchId, const uint32_t wgId, const uint32_t size)
assign a parent and request this amount of space be set aside for this wgid
Definition: lds_state.hh:368
SimObject::_params
const SimObjectParams * _params
Cached copy of the object parameters.
Definition: sim_object.hh:110
LdsState::cuPort
CuSidePort cuPort
Definition: lds_state.hh:516
LdsChunk::read
T read(const uint32_t index)
a read operation
Definition: lds_state.hh:70
clocked_object.hh
Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:257
LdsChunk::LdsChunk
LdsChunk(const uint32_t x_size)
Definition: lds_state.hh:58
GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
LdsChunk
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Definition: lds_state.hh:55
LdsState::setRetryResp
void setRetryResp(const bool value)
Definition: lds_state.hh:287
LdsState::getBankConflictPenalty
int getBankConflictPenalty() const
Definition: lds_state.hh:448
LdsState::CuSidePort::recvRespRetry
virtual void recvRespRetry()
receive a retry for a response
Definition: lds_state.cc:253
LdsState::bytesAllocated
int bytesAllocated
Definition: lds_state.hh:523
LdsState::parent
ComputeUnit * parent
Definition: lds_state.hh:518
std::list< AddrRange >
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:219
LdsState::increaseRefCounter
int increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
use the dynamic wave id to create or just increase the reference count
Definition: lds_state.hh:300
LdsState::getLdsChunk
LdsChunk * getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
Definition: lds_state.hh:397
LdsState::getBanks
int getBanks() const
Definition: lds_state.hh:436
SimObject::find
static SimObject * find(const char *name)
Find the SimObject with the given name and return a pointer to it.
Definition: sim_object.cc:171
curTick
Tick curTick()
The current simulated tick.
Definition: core.hh:45
LdsState::canReserve
bool canReserve(uint32_t x_size) const
can this much space be reserved for a workgroup?
Definition: lds_state.hh:483
LdsChunk::size
std::vector< uint8_t >::size_type size() const
get the size of this chunk
Definition: lds_state.hh:107

Generated on Wed Sep 30 2020 14:02:12 for gem5 by doxygen 1.8.17