gem5  v22.0.0.1
lds_state.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #ifndef __LDS_STATE_HH__
33 #define __LDS_STATE_HH__
34 
35 #include <array>
36 #include <queue>
37 #include <string>
38 #include <unordered_map>
39 #include <utility>
40 #include <vector>
41 
42 #include "gpu-compute/misc.hh"
43 #include "mem/port.hh"
44 #include "params/LdsState.hh"
45 #include "sim/clocked_object.hh"
46 
47 namespace gem5
48 {
49 
50 class ComputeUnit;
51 
56 class LdsChunk
57 {
58  public:
59  LdsChunk(const uint32_t x_size):
60  chunk(x_size)
61  {
62  }
63 
64  LdsChunk() {}
65 
69  template<class T>
70  T
71  read(const uint32_t index)
72  {
77  if (index >= chunk.size()) {
78  return (T)0;
79  }
80 
81  T *p0 = (T *) (&(chunk.at(index)));
82  return *p0;
83  }
84 
88  template<class T>
89  void
90  write(const uint32_t index, const T value)
91  {
96  if (index >= chunk.size()) {
97  return;
98  }
99 
100  T *p0 = (T *) (&(chunk.at(index)));
101  *p0 = value;
102  }
103 
108  size() const
109  {
110  return chunk.size();
111  }
112 
113  protected:
114  // the actual data store for this slice of the LDS
116 };
117 
118 // Local Data Share (LDS) State per Wavefront (contents of the LDS region
119 // allocated to the WorkGroup of this Wavefront)
120 class LdsState: public ClockedObject
121 {
122  protected:
123 
127  class TickEvent: public Event
128  {
129  protected:
130 
131  LdsState *ldsState = nullptr;
132 
134 
135  public:
136 
137  TickEvent(LdsState *_ldsState) :
138  ldsState(_ldsState)
139  {
140  }
141 
142  virtual void
143  process();
144 
145  void
147  {
148  mainEventQueue[0]->schedule(this, when);
149  }
150 
151  void
153  {
154  mainEventQueue[0]->deschedule(this);
155  }
156  };
157 
161  class CuSidePort: public ResponsePort
162  {
163  public:
164  CuSidePort(const std::string &_name, LdsState *_ownerLds) :
165  ResponsePort(_name, _ownerLds), ownerLds(_ownerLds)
166  {
167  }
168 
169  protected:
171 
172  virtual bool
174 
175  virtual Tick
177  {
178  return 0;
179  }
180 
181  virtual void
183 
184  virtual void
186  {
187  }
188 
189  virtual void
190  recvRetry();
191 
192  virtual void
193  recvRespRetry();
194 
195  virtual AddrRangeList
197  {
198  AddrRangeList ranges;
199  ranges.push_back(ownerLds->getAddrRange());
200  return ranges;
201  }
202 
203  template<typename T>
204  void
205  loadData(PacketPtr packet);
206 
207  template<typename T>
208  void
209  storeData(PacketPtr packet);
210 
211  template<typename T>
212  void
213  atomicOperation(PacketPtr packet);
214  };
215 
216  protected:
217 
228  std::unordered_map<uint32_t,
229  std::unordered_map<uint32_t, int32_t>> refCounter;
230 
231  // the map that allows workgroups to access their own chunk of the LDS
232  std::unordered_map<uint32_t,
233  std::unordered_map<uint32_t, LdsChunk>> chunkMap;
234 
235  // an event to allow the LDS to wake up at a specified time
237 
238  // the queue of packets that are going back to the CU after a
239  // read/write/atomic op
240  // TODO need to make this have a maximum size to create flow control
241  std::queue<std::pair<Tick, PacketPtr>> returnQueue;
242 
243  // whether or not there are pending responses
244  bool retryResp = false;
245 
246  bool
247  process();
248 
250  getDynInstr(PacketPtr packet);
251 
252  bool
253  processPacket(PacketPtr packet);
254 
255  unsigned
256  countBankConflicts(PacketPtr packet, unsigned *bankAccesses);
257 
258  unsigned
260  unsigned *numBankAccesses);
261 
262  public:
263  using Params = LdsStateParams;
264 
265  LdsState(const Params &params);
266 
267  // prevent copy construction
268  LdsState(const LdsState&) = delete;
269 
271  {
272  parent = nullptr;
273  }
274 
275  bool
276  isRetryResp() const
277  {
278  return retryResp;
279  }
280 
281  void
282  setRetryResp(const bool value)
283  {
284  retryResp = value;
285  }
286 
287  // prevent assignment
288  LdsState &
289  operator=(const LdsState &) = delete;
290 
294  int
295  increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
296  {
297  int refCount = getRefCounter(dispatchId, wgId);
298  fatal_if(refCount < 0,
299  "reference count should not be below zero");
300  return ++refCounter[dispatchId][wgId];
301  }
302 
307  int
308  decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
309  {
310  int refCount = getRefCounter(dispatchId, wgId);
311 
312  fatal_if(refCount <= 0,
313  "reference count should not be below zero or at zero to"
314  "decrement");
315 
316  refCounter[dispatchId][wgId]--;
317 
318  if (refCounter[dispatchId][wgId] == 0) {
319  releaseSpace(dispatchId, wgId);
320  return 0;
321  } else {
322  return refCounter[dispatchId][wgId];
323  }
324  }
325 
329  int
330  getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
331  {
332  auto dispatchIter = chunkMap.find(dispatchId);
333  fatal_if(dispatchIter == chunkMap.end(),
334  "could not locate this dispatch id [%d]", dispatchId);
335 
336  auto workgroup = dispatchIter->second.find(wgId);
337  fatal_if(workgroup == dispatchIter->second.end(),
338  "could not find this workgroup id within this dispatch id"
339  " did[%d] wgid[%d]", dispatchId, wgId);
340 
341  auto refCountIter = refCounter.find(dispatchId);
342  if (refCountIter == refCounter.end()) {
343  fatal("could not locate this dispatch id [%d]", dispatchId);
344  } else {
345  auto workgroup = refCountIter->second.find(wgId);
346  if (workgroup == refCountIter->second.end()) {
347  fatal("could not find this workgroup id within this dispatch id"
348  " did[%d] wgid[%d]", dispatchId, wgId);
349  } else {
350  return refCounter.at(dispatchId).at(wgId);
351  }
352  }
353 
354  fatal("should not reach this point");
355  return 0;
356  }
357 
362  LdsChunk *
363  reserveSpace(const uint32_t dispatchId, const uint32_t wgId,
364  const uint32_t size)
365  {
366  if (chunkMap.find(dispatchId) != chunkMap.end()) {
367  panic_if(
368  chunkMap[dispatchId].find(wgId) != chunkMap[dispatchId].end(),
369  "duplicate workgroup ID asking for space in the LDS "
370  "did[%d] wgid[%d]", dispatchId, wgId);
371  }
372 
373  if (bytesAllocated + size > maximumSize) {
374  return nullptr;
375  } else {
376  bytesAllocated += size;
377 
378  auto value = chunkMap[dispatchId].emplace(wgId, LdsChunk(size));
379  panic_if(!value.second, "was unable to allocate a new chunkMap");
380 
381  // make an entry for this workgroup
382  refCounter[dispatchId][wgId] = 0;
383 
384  return &chunkMap[dispatchId][wgId];
385  }
386  }
387 
388  /*
389  * return pointer to lds chunk for wgid
390  */
391  LdsChunk *
392  getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
393  {
394  fatal_if(chunkMap.find(dispatchId) == chunkMap.end(),
395  "fetch for unknown dispatch ID did[%d]", dispatchId);
396 
397  fatal_if(chunkMap[dispatchId].find(wgId) == chunkMap[dispatchId].end(),
398  "fetch for unknown workgroup ID wgid[%d] in dispatch ID did[%d]",
399  wgId, dispatchId);
400 
401  return &chunkMap[dispatchId][wgId];
402  }
403 
404  bool
406 
407  Tick
409  {
410  // TODO set to max(lastCommand+1, curTick())
411  return returnQueue.empty() ? curTick() : returnQueue.back().first;
412  }
413 
414  void
415  setParent(ComputeUnit *x_parent);
416 
417  // accessors
418  ComputeUnit *
419  getParent() const
420  {
421  return parent;
422  }
423 
424  std::string
426  {
427  return _name;
428  }
429 
430  int
431  getBanks() const
432  {
433  return banks;
434  }
435 
436  ComputeUnit *
438  {
439  return parent;
440  }
441 
442  int
444  {
445  return bankConflictPenalty;
446  }
447 
451  std::size_t
452  ldsSize(const uint32_t x_wgId)
453  {
454  return chunkMap[x_wgId].size();
455  }
456 
457  AddrRange
458  getAddrRange() const
459  {
460  return range;
461  }
462 
463  Port &
464  getPort(const std::string &if_name, PortID idx)
465  {
466  if (if_name == "cuPort") {
467  // TODO need to set name dynamically at this point?
468  return cuPort;
469  } else {
470  fatal("cannot resolve the port name " + if_name);
471  }
472  }
473 
477  bool
478  canReserve(uint32_t x_size) const
479  {
480  return bytesAllocated + x_size <= maximumSize;
481  }
482 
483  private:
487  bool
488  releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
489  {
490  auto dispatchIter = chunkMap.find(x_dispatchId);
491 
492  if (dispatchIter == chunkMap.end()) {
493  fatal("dispatch id not found [%d]", x_dispatchId);
494  } else {
495  auto workgroupIter = dispatchIter->second.find(x_wgId);
496  if (workgroupIter == dispatchIter->second.end()) {
497  fatal("workgroup id [%d] not found in dispatch id [%d]",
498  x_wgId, x_dispatchId);
499  }
500  }
501 
502  fatal_if(bytesAllocated < chunkMap[x_dispatchId][x_wgId].size(),
503  "releasing more space than was allocated");
504 
505  bytesAllocated -= chunkMap[x_dispatchId][x_wgId].size();
506  chunkMap[x_dispatchId].erase(chunkMap[x_dispatchId].find(x_wgId));
507  return true;
508  }
509 
510  // the port that connects this LDS to its owner CU
512 
513  ComputeUnit* parent = nullptr;
514 
515  std::string _name;
516 
517  // the number of bytes currently reserved by all workgroups
518  int bytesAllocated = 0;
519 
520  // the size of the LDS, the most bytes available
522 
523  // Address range of this memory
525 
526  // the penalty, in cycles, for each LDS bank conflict
528 
529  // the number of banks in the LDS underlying data store
530  int banks = 0;
531 };
532 
533 } // namespace gem5
534 
535 #endif // __LDS_STATE_HH__
gem5::LdsState::TickEvent::nextTick
Tick nextTick
Definition: lds_state.hh:133
gem5::LdsState::parent
ComputeUnit * parent
Definition: lds_state.hh:513
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:190
gem5::PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:245
gem5::LdsState::getParent
ComputeUnit * getParent() const
Definition: lds_state.hh:419
gem5::LdsState::TickEvent::process
virtual void process()
wake up at this time and perform specified actions
Definition: lds_state.cc:319
gem5::Event::when
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:508
gem5::LdsState::retryResp
bool retryResp
Definition: lds_state.hh:244
gem5::LdsState::getRefCounter
int getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
return the current reference count for this workgroup id
Definition: lds_state.hh:330
gem5::LdsState::getAddrRange
AddrRange getAddrRange() const
Definition: lds_state.hh:458
gem5::LdsState::~LdsState
~LdsState()
Definition: lds_state.hh:270
gem5::LdsState::CuSidePort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
Definition: lds_state.cc:236
gem5::MipsISA::index
Bitfield< 30, 0 > index
Definition: pra_constants.hh:47
gem5::mainEventQueue
std::vector< EventQueue * > mainEventQueue
Array for main event queues.
Definition: eventq.cc:57
gem5::LdsState::TickEvent::TickEvent
TickEvent(LdsState *_ldsState)
Definition: lds_state.hh:137
gem5::LdsState::Params
LdsStateParams Params
Definition: lds_state.hh:263
gem5::SimObject::find
static SimObject * find(const char *name)
Find the SimObject with the given name and return a pointer to it.
Definition: sim_object.cc:176
gem5::LdsState::chunkMap
std::unordered_map< uint32_t, std::unordered_map< uint32_t, LdsChunk > > chunkMap
Definition: lds_state.hh:233
gem5::LdsState::maximumSize
int maximumSize
Definition: lds_state.hh:521
misc.hh
gem5::LdsState::setParent
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
Definition: lds_state.cc:71
gem5::LdsState::LdsState
LdsState(const Params &params)
the default constructor that works with SWIG
Definition: lds_state.cc:48
std::vector
STL vector class.
Definition: stl.hh:37
gem5::LdsState::CuSidePort
CuSidePort is the LDS Port closer to the CU side.
Definition: lds_state.hh:161
gem5::LdsChunk::LdsChunk
LdsChunk()
Definition: lds_state.hh:64
gem5::LdsState::TickEvent::schedule
void schedule(Tick when)
Definition: lds_state.hh:146
gem5::LdsState::getComputeUnit
ComputeUnit * getComputeUnit() const
Definition: lds_state.hh:437
gem5::LdsState::getLdsChunk
LdsChunk * getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
Definition: lds_state.hh:392
gem5::LdsState::CuSidePort::getAddrRanges
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: lds_state.hh:196
gem5::LdsState::canReserve
bool canReserve(uint32_t x_size) const
can this much space be reserved for a workgroup?
Definition: lds_state.hh:478
gem5::ComputeUnit
Definition: compute_unit.hh:201
gem5::LdsState::getDynInstr
GPUDynInstPtr getDynInstr(PacketPtr packet)
Definition: lds_state.cc:175
gem5::SimObject::params
const Params & params() const
Definition: sim_object.hh:176
gem5::LdsState::isRetryResp
bool isRetryResp() const
Definition: lds_state.hh:276
gem5::Event
Definition: eventq.hh:251
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:291
gem5::LdsState::releaseSpace
bool releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
give back the space
Definition: lds_state.hh:488
gem5::LdsState::CuSidePort::recvTimingReq
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
Definition: lds_state.cc:169
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::LdsState::getBankConflictPenalty
int getBankConflictPenalty() const
Definition: lds_state.hh:443
gem5::LdsState::earliestReturnTime
Tick earliestReturnTime() const
Definition: lds_state.hh:408
gem5::LdsState::processPacket
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
Definition: lds_state.cc:187
gem5::LdsState::CuSidePort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Receive an atomic request packet from the peer.
Definition: lds_state.hh:176
gem5::LdsState::TickEvent::ldsState
LdsState * ldsState
Definition: lds_state.hh:131
port.hh
gem5::LdsState::refCounter
std::unordered_map< uint32_t, std::unordered_map< uint32_t, int32_t > > refCounter
the lds reference counter The key is the workgroup ID and dispatch ID The value is the number of wave...
Definition: lds_state.hh:229
gem5::LdsChunk
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Definition: lds_state.hh:56
gem5::LdsState::_name
std::string _name
Definition: lds_state.hh:515
gem5::LdsState::CuSidePort::recvRetry
virtual void recvRetry()
receive a retry
Definition: lds_state.cc:257
gem5::LdsChunk::write
void write(const uint32_t index, const T value)
a write operation
Definition: lds_state.hh:90
gem5::LdsChunk::LdsChunk
LdsChunk(const uint32_t x_size)
Definition: lds_state.hh:59
gem5::LdsState::range
AddrRange range
Definition: lds_state.hh:524
gem5::LdsState::CuSidePort::storeData
void storeData(PacketPtr packet)
std::pair
STL pair class.
Definition: stl.hh:58
gem5::LdsState::bankConflictPenalty
int bankConflictPenalty
Definition: lds_state.hh:527
gem5::LdsState::CuSidePort::recvRangeChange
virtual void recvRangeChange()
Definition: lds_state.hh:185
gem5::LdsState::increaseRefCounter
int increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
use the dynamic wave id to create or just increase the reference count
Definition: lds_state.hh:295
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
gem5::LdsState::CuSidePort::loadData
void loadData(PacketPtr packet)
gem5::ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:234
gem5::LdsState::getName
std::string getName()
Definition: lds_state.hh:425
gem5::LdsState::getPort
Port & getPort(const std::string &if_name, PortID idx)
Get a port with a given name and index.
Definition: lds_state.hh:464
gem5::LdsState::bytesAllocated
int bytesAllocated
Definition: lds_state.hh:518
gem5::LdsChunk::size
std::vector< uint8_t >::size_type size() const
get the size of this chunk
Definition: lds_state.hh:108
gem5::LdsState::TickEvent
an event to allow event-driven execution
Definition: lds_state.hh:127
gem5::ResponsePort
A ResponsePort is a specialization of a port.
Definition: port.hh:268
gem5::LdsState::CuSidePort::atomicOperation
void atomicOperation(PacketPtr packet)
gem5::LdsState::operator=
LdsState & operator=(const LdsState &)=delete
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:204
gem5::LdsChunk::chunk
std::vector< uint8_t > chunk
Definition: lds_state.hh:115
gem5::Port
Ports are used to interface objects to each other.
Definition: port.hh:61
gem5::LdsState::countBankConflicts
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
Definition: lds_state.cc:86
gem5::LdsState::ldsSize
std::size_t ldsSize(const uint32_t x_wgId)
get the allocated size for this workgroup
Definition: lds_state.hh:452
clocked_object.hh
gem5::LdsState::returnQueue
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition: lds_state.hh:241
gem5::LdsState::returnQueuePush
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
Definition: lds_state.cc:218
gem5::LdsState::setRetryResp
void setRetryResp(const bool value)
Definition: lds_state.hh:282
gem5::LdsState::CuSidePort::CuSidePort
CuSidePort(const std::string &_name, LdsState *_ownerLds)
Definition: lds_state.hh:164
gem5::LdsState::banks
int banks
Definition: lds_state.hh:530
gem5::AddrRange
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition: addr_range.hh:81
gem5::LdsState::decreaseRefCounter
int decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
decrease the reference count after making sure it is in the list give back this chunk if the ref coun...
Definition: lds_state.hh:308
std::list< AddrRange >
gem5::LdsState::TickEvent::deschedule
void deschedule()
Definition: lds_state.hh:152
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:226
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::LdsState::process
bool process()
look for packets to return at this time
Definition: lds_state.cc:266
gem5::LdsState::reserveSpace
LdsChunk * reserveSpace(const uint32_t dispatchId, const uint32_t wgId, const uint32_t size)
assign a parent and request this amount of space be set aside for this wgid
Definition: lds_state.hh:363
gem5::LdsState::CuSidePort::recvRespRetry
virtual void recvRespRetry()
receive a retry for a response
Definition: lds_state.cc:245
gem5::LdsState::cuPort
CuSidePort cuPort
Definition: lds_state.hh:511
gem5::LdsState
Definition: lds_state.hh:120
gem5::LdsState::CuSidePort::ownerLds
LdsState * ownerLds
Definition: lds_state.hh:170
gem5::LdsState::getBanks
int getBanks() const
Definition: lds_state.hh:431
gem5::LdsState::tickEvent
TickEvent tickEvent
Definition: lds_state.hh:236
gem5::LdsChunk::read
T read(const uint32_t index)
a read operation
Definition: lds_state.hh:71

Generated on Sat Jun 18 2022 08:12:26 for gem5 by doxygen 1.8.17