gem5  v21.1.0.2
lds_state.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __LDS_STATE_HH__
35 #define __LDS_STATE_HH__
36 
37 #include <array>
38 #include <queue>
39 #include <string>
40 #include <unordered_map>
41 #include <utility>
42 #include <vector>
43 
44 #include "gpu-compute/misc.hh"
45 #include "mem/port.hh"
46 #include "params/LdsState.hh"
47 #include "sim/clocked_object.hh"
48 
49 namespace gem5
50 {
51 
52 class ComputeUnit;
53 
58 class LdsChunk
59 {
60  public:
61  LdsChunk(const uint32_t x_size):
62  chunk(x_size)
63  {
64  }
65 
66  LdsChunk() {}
67 
71  template<class T>
72  T
73  read(const uint32_t index)
74  {
79  if (index >= chunk.size()) {
80  return (T)0;
81  }
82 
83  T *p0 = (T *) (&(chunk.at(index)));
84  return *p0;
85  }
86 
90  template<class T>
91  void
92  write(const uint32_t index, const T value)
93  {
98  if (index >= chunk.size()) {
99  return;
100  }
101 
102  T *p0 = (T *) (&(chunk.at(index)));
103  *p0 = value;
104  }
105 
110  size() const
111  {
112  return chunk.size();
113  }
114 
115  protected:
116  // the actual data store for this slice of the LDS
118 };
119 
120 // Local Data Share (LDS) State per Wavefront (contents of the LDS region
121 // allocated to the WorkGroup of this Wavefront)
122 class LdsState: public ClockedObject
123 {
124  protected:
125 
129  class TickEvent: public Event
130  {
131  protected:
132 
133  LdsState *ldsState = nullptr;
134 
136 
137  public:
138 
139  TickEvent(LdsState *_ldsState) :
140  ldsState(_ldsState)
141  {
142  }
143 
144  virtual void
145  process();
146 
147  void
149  {
150  mainEventQueue[0]->schedule(this, when);
151  }
152 
153  void
155  {
156  mainEventQueue[0]->deschedule(this);
157  }
158  };
159 
163  class CuSidePort: public ResponsePort
164  {
165  public:
166  CuSidePort(const std::string &_name, LdsState *_ownerLds) :
167  ResponsePort(_name, _ownerLds), ownerLds(_ownerLds)
168  {
169  }
170 
171  protected:
173 
174  virtual bool
176 
177  virtual Tick
179  {
180  return 0;
181  }
182 
183  virtual void
185 
186  virtual void
188  {
189  }
190 
191  virtual void
192  recvRetry();
193 
194  virtual void
195  recvRespRetry();
196 
197  virtual AddrRangeList
199  {
200  AddrRangeList ranges;
201  ranges.push_back(ownerLds->getAddrRange());
202  return ranges;
203  }
204 
205  template<typename T>
206  void
207  loadData(PacketPtr packet);
208 
209  template<typename T>
210  void
211  storeData(PacketPtr packet);
212 
213  template<typename T>
214  void
215  atomicOperation(PacketPtr packet);
216  };
217 
218  protected:
219 
230  std::unordered_map<uint32_t,
231  std::unordered_map<uint32_t, int32_t>> refCounter;
232 
233  // the map that allows workgroups to access their own chunk of the LDS
234  std::unordered_map<uint32_t,
235  std::unordered_map<uint32_t, LdsChunk>> chunkMap;
236 
237  // an event to allow the LDS to wake up at a specified time
239 
240  // the queue of packets that are going back to the CU after a
241  // read/write/atomic op
242  // TODO need to make this have a maximum size to create flow control
243  std::queue<std::pair<Tick, PacketPtr>> returnQueue;
244 
245  // whether or not there are pending responses
246  bool retryResp = false;
247 
248  bool
249  process();
250 
252  getDynInstr(PacketPtr packet);
253 
254  bool
255  processPacket(PacketPtr packet);
256 
257  unsigned
258  countBankConflicts(PacketPtr packet, unsigned *bankAccesses);
259 
260  unsigned
262  unsigned *numBankAccesses);
263 
264  public:
265  using Params = LdsStateParams;
266 
267  LdsState(const Params &params);
268 
269  // prevent copy construction
270  LdsState(const LdsState&) = delete;
271 
273  {
274  parent = nullptr;
275  }
276 
277  bool
278  isRetryResp() const
279  {
280  return retryResp;
281  }
282 
283  void
284  setRetryResp(const bool value)
285  {
286  retryResp = value;
287  }
288 
289  // prevent assignment
290  LdsState &
291  operator=(const LdsState &) = delete;
292 
296  int
297  increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
298  {
299  int refCount = getRefCounter(dispatchId, wgId);
300  fatal_if(refCount < 0,
301  "reference count should not be below zero");
302  return ++refCounter[dispatchId][wgId];
303  }
304 
309  int
310  decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
311  {
312  int refCount = getRefCounter(dispatchId, wgId);
313 
314  fatal_if(refCount <= 0,
315  "reference count should not be below zero or at zero to"
316  "decrement");
317 
318  refCounter[dispatchId][wgId]--;
319 
320  if (refCounter[dispatchId][wgId] == 0) {
321  releaseSpace(dispatchId, wgId);
322  return 0;
323  } else {
324  return refCounter[dispatchId][wgId];
325  }
326  }
327 
331  int
332  getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
333  {
334  auto dispatchIter = chunkMap.find(dispatchId);
335  fatal_if(dispatchIter == chunkMap.end(),
336  "could not locate this dispatch id [%d]", dispatchId);
337 
338  auto workgroup = dispatchIter->second.find(wgId);
339  fatal_if(workgroup == dispatchIter->second.end(),
340  "could not find this workgroup id within this dispatch id"
341  " did[%d] wgid[%d]", dispatchId, wgId);
342 
343  auto refCountIter = refCounter.find(dispatchId);
344  if (refCountIter == refCounter.end()) {
345  fatal("could not locate this dispatch id [%d]", dispatchId);
346  } else {
347  auto workgroup = refCountIter->second.find(wgId);
348  if (workgroup == refCountIter->second.end()) {
349  fatal("could not find this workgroup id within this dispatch id"
350  " did[%d] wgid[%d]", dispatchId, wgId);
351  } else {
352  return refCounter.at(dispatchId).at(wgId);
353  }
354  }
355 
356  fatal("should not reach this point");
357  return 0;
358  }
359 
364  LdsChunk *
365  reserveSpace(const uint32_t dispatchId, const uint32_t wgId,
366  const uint32_t size)
367  {
368  if (chunkMap.find(dispatchId) != chunkMap.end()) {
369  panic_if(
370  chunkMap[dispatchId].find(wgId) != chunkMap[dispatchId].end(),
371  "duplicate workgroup ID asking for space in the LDS "
372  "did[%d] wgid[%d]", dispatchId, wgId);
373  }
374 
375  if (bytesAllocated + size > maximumSize) {
376  return nullptr;
377  } else {
378  bytesAllocated += size;
379 
380  auto value = chunkMap[dispatchId].emplace(wgId, LdsChunk(size));
381  panic_if(!value.second, "was unable to allocate a new chunkMap");
382 
383  // make an entry for this workgroup
384  refCounter[dispatchId][wgId] = 0;
385 
386  return &chunkMap[dispatchId][wgId];
387  }
388  }
389 
390  /*
391  * return pointer to lds chunk for wgid
392  */
393  LdsChunk *
394  getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
395  {
396  fatal_if(chunkMap.find(dispatchId) == chunkMap.end(),
397  "fetch for unknown dispatch ID did[%d]", dispatchId);
398 
399  fatal_if(chunkMap[dispatchId].find(wgId) == chunkMap[dispatchId].end(),
400  "fetch for unknown workgroup ID wgid[%d] in dispatch ID did[%d]",
401  wgId, dispatchId);
402 
403  return &chunkMap[dispatchId][wgId];
404  }
405 
406  bool
408 
409  Tick
411  {
412  // TODO set to max(lastCommand+1, curTick())
413  return returnQueue.empty() ? curTick() : returnQueue.back().first;
414  }
415 
416  void
417  setParent(ComputeUnit *x_parent);
418 
419  // accessors
420  ComputeUnit *
421  getParent() const
422  {
423  return parent;
424  }
425 
426  std::string
428  {
429  return _name;
430  }
431 
432  int
433  getBanks() const
434  {
435  return banks;
436  }
437 
438  ComputeUnit *
440  {
441  return parent;
442  }
443 
444  int
446  {
447  return bankConflictPenalty;
448  }
449 
453  std::size_t
454  ldsSize(const uint32_t x_wgId)
455  {
456  return chunkMap[x_wgId].size();
457  }
458 
459  AddrRange
460  getAddrRange() const
461  {
462  return range;
463  }
464 
465  Port &
466  getPort(const std::string &if_name, PortID idx)
467  {
468  if (if_name == "cuPort") {
469  // TODO need to set name dynamically at this point?
470  return cuPort;
471  } else {
472  fatal("cannot resolve the port name " + if_name);
473  }
474  }
475 
479  bool
480  canReserve(uint32_t x_size) const
481  {
482  return bytesAllocated + x_size <= maximumSize;
483  }
484 
485  private:
489  bool
490  releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
491  {
492  auto dispatchIter = chunkMap.find(x_dispatchId);
493 
494  if (dispatchIter == chunkMap.end()) {
495  fatal("dispatch id not found [%d]", x_dispatchId);
496  } else {
497  auto workgroupIter = dispatchIter->second.find(x_wgId);
498  if (workgroupIter == dispatchIter->second.end()) {
499  fatal("workgroup id [%d] not found in dispatch id [%d]",
500  x_wgId, x_dispatchId);
501  }
502  }
503 
504  fatal_if(bytesAllocated < chunkMap[x_dispatchId][x_wgId].size(),
505  "releasing more space than was allocated");
506 
507  bytesAllocated -= chunkMap[x_dispatchId][x_wgId].size();
508  chunkMap[x_dispatchId].erase(chunkMap[x_dispatchId].find(x_wgId));
509  return true;
510  }
511 
512  // the port that connects this LDS to its owner CU
514 
515  ComputeUnit* parent = nullptr;
516 
517  std::string _name;
518 
519  // the number of bytes currently reserved by all workgroups
520  int bytesAllocated = 0;
521 
522  // the size of the LDS, the most bytes available
524 
525  // Address range of this memory
527 
528  // the penalty, in cycles, for each LDS bank conflict
530 
531  // the number of banks in the LDS underlying data store
532  int banks = 0;
533 };
534 
535 } // namespace gem5
536 
537 #endif // __LDS_STATE_HH__
gem5::LdsState::TickEvent::nextTick
Tick nextTick
Definition: lds_state.hh:135
gem5::LdsState::parent
ComputeUnit * parent
Definition: lds_state.hh:515
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:189
gem5::PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:252
gem5::LdsState::getParent
ComputeUnit * getParent() const
Definition: lds_state.hh:421
gem5::LdsState::TickEvent::process
virtual void process()
wake up at this time and perform specified actions
Definition: lds_state.cc:321
gem5::Event::when
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:508
gem5::LdsState::retryResp
bool retryResp
Definition: lds_state.hh:246
gem5::LdsState::getRefCounter
int getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
return the current reference count for this workgroup id
Definition: lds_state.hh:332
gem5::LdsState::getAddrRange
AddrRange getAddrRange() const
Definition: lds_state.hh:460
gem5::LdsState::~LdsState
~LdsState()
Definition: lds_state.hh:272
gem5::LdsState::CuSidePort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
Definition: lds_state.cc:238
gem5::MipsISA::index
Bitfield< 30, 0 > index
Definition: pra_constants.hh:47
gem5::mainEventQueue
std::vector< EventQueue * > mainEventQueue
Array for main event queues.
Definition: eventq.cc:57
gem5::LdsState::TickEvent::TickEvent
TickEvent(LdsState *_ldsState)
Definition: lds_state.hh:139
gem5::LdsState::Params
LdsStateParams Params
Definition: lds_state.hh:265
gem5::SimObject::find
static SimObject * find(const char *name)
Find the SimObject with the given name and return a pointer to it.
Definition: sim_object.cc:176
gem5::LdsState::chunkMap
std::unordered_map< uint32_t, std::unordered_map< uint32_t, LdsChunk > > chunkMap
Definition: lds_state.hh:235
gem5::LdsState::maximumSize
int maximumSize
Definition: lds_state.hh:523
misc.hh
gem5::LdsState::setParent
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
Definition: lds_state.cc:73
gem5::LdsState::LdsState
LdsState(const Params &params)
the default constructor that works with SWIG
Definition: lds_state.cc:50
std::vector
STL vector class.
Definition: stl.hh:37
gem5::LdsState::CuSidePort
CuSidePort is the LDS Port closer to the CU side.
Definition: lds_state.hh:163
gem5::LdsChunk::LdsChunk
LdsChunk()
Definition: lds_state.hh:66
gem5::LdsState::TickEvent::schedule
void schedule(Tick when)
Definition: lds_state.hh:148
gem5::LdsState::getComputeUnit
ComputeUnit * getComputeUnit() const
Definition: lds_state.hh:439
gem5::LdsState::getLdsChunk
LdsChunk * getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
Definition: lds_state.hh:394
gem5::LdsState::CuSidePort::getAddrRanges
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: lds_state.hh:198
gem5::LdsState::canReserve
bool canReserve(uint32_t x_size) const
can this much space be reserved for a workgroup?
Definition: lds_state.hh:480
gem5::ComputeUnit
Definition: compute_unit.hh:203
gem5::LdsState::getDynInstr
GPUDynInstPtr getDynInstr(PacketPtr packet)
Definition: lds_state.cc:177
gem5::SimObject::params
const Params & params() const
Definition: sim_object.hh:176
gem5::LdsState::isRetryResp
bool isRetryResp() const
Definition: lds_state.hh:278
gem5::Event
Definition: eventq.hh:251
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:283
gem5::LdsState::releaseSpace
bool releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
give back the space
Definition: lds_state.hh:490
gem5::LdsState::CuSidePort::recvTimingReq
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
Definition: lds_state.cc:171
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::LdsState::getBankConflictPenalty
int getBankConflictPenalty() const
Definition: lds_state.hh:445
gem5::LdsState::earliestReturnTime
Tick earliestReturnTime() const
Definition: lds_state.hh:410
gem5::LdsState::processPacket
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
Definition: lds_state.cc:189
gem5::LdsState::CuSidePort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Receive an atomic request packet from the peer.
Definition: lds_state.hh:178
gem5::LdsState::TickEvent::ldsState
LdsState * ldsState
Definition: lds_state.hh:133
port.hh
gem5::LdsState::refCounter
std::unordered_map< uint32_t, std::unordered_map< uint32_t, int32_t > > refCounter
the lds reference counter The key is the workgroup ID and dispatch ID The value is the number of wave...
Definition: lds_state.hh:231
gem5::LdsChunk
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Definition: lds_state.hh:58
gem5::LdsState::_name
std::string _name
Definition: lds_state.hh:517
gem5::LdsState::CuSidePort::recvRetry
virtual void recvRetry()
receive a retry
Definition: lds_state.cc:259
gem5::LdsChunk::write
void write(const uint32_t index, const T value)
a write operation
Definition: lds_state.hh:92
gem5::LdsChunk::LdsChunk
LdsChunk(const uint32_t x_size)
Definition: lds_state.hh:61
gem5::LdsState::range
AddrRange range
Definition: lds_state.hh:526
gem5::LdsState::CuSidePort::storeData
void storeData(PacketPtr packet)
std::pair
STL pair class.
Definition: stl.hh:58
gem5::LdsState::bankConflictPenalty
int bankConflictPenalty
Definition: lds_state.hh:529
gem5::LdsState::CuSidePort::recvRangeChange
virtual void recvRangeChange()
Definition: lds_state.hh:187
gem5::LdsState::increaseRefCounter
int increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
use the dynamic wave id to create or just increase the reference count
Definition: lds_state.hh:297
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:51
gem5::LdsState::CuSidePort::loadData
void loadData(PacketPtr packet)
gem5::ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:234
gem5::LdsState::getName
std::string getName()
Definition: lds_state.hh:427
gem5::LdsState::getPort
Port & getPort(const std::string &if_name, PortID idx)
Get a port with a given name and index.
Definition: lds_state.hh:466
gem5::LdsState::bytesAllocated
int bytesAllocated
Definition: lds_state.hh:520
gem5::LdsChunk::size
std::vector< uint8_t >::size_type size() const
get the size of this chunk
Definition: lds_state.hh:110
gem5::LdsState::TickEvent
an event to allow event-driven execution
Definition: lds_state.hh:129
gem5::ResponsePort
A ResponsePort is a specialization of a port.
Definition: port.hh:268
gem5::LdsState::CuSidePort::atomicOperation
void atomicOperation(PacketPtr packet)
gem5::LdsState::operator=
LdsState & operator=(const LdsState &)=delete
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:203
gem5::LdsChunk::chunk
std::vector< uint8_t > chunk
Definition: lds_state.hh:117
gem5::Port
Ports are used to interface objects to each other.
Definition: port.hh:61
gem5::LdsState::countBankConflicts
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
Definition: lds_state.cc:88
gem5::LdsState::ldsSize
std::size_t ldsSize(const uint32_t x_wgId)
get the allocated size for this workgroup
Definition: lds_state.hh:454
clocked_object.hh
gem5::LdsState::returnQueue
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition: lds_state.hh:243
gem5::LdsState::returnQueuePush
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
Definition: lds_state.cc:220
gem5::LdsState::setRetryResp
void setRetryResp(const bool value)
Definition: lds_state.hh:284
gem5::LdsState::CuSidePort::CuSidePort
CuSidePort(const std::string &_name, LdsState *_ownerLds)
Definition: lds_state.hh:166
gem5::LdsState::banks
int banks
Definition: lds_state.hh:532
gem5::AddrRange
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition: addr_range.hh:71
gem5::LdsState::decreaseRefCounter
int decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
decrease the reference count after making sure it is in the list give back this chunk if the ref coun...
Definition: lds_state.hh:310
std::list< AddrRange >
gem5::LdsState::TickEvent::deschedule
void deschedule()
Definition: lds_state.hh:154
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:225
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::LdsState::process
bool process()
look for packets to return at this time
Definition: lds_state.cc:268
gem5::LdsState::reserveSpace
LdsChunk * reserveSpace(const uint32_t dispatchId, const uint32_t wgId, const uint32_t size)
assign a parent and request this amount of space be set aside for this wgid
Definition: lds_state.hh:365
gem5::LdsState::CuSidePort::recvRespRetry
virtual void recvRespRetry()
receive a retry for a response
Definition: lds_state.cc:247
gem5::LdsState::cuPort
CuSidePort cuPort
Definition: lds_state.hh:513
gem5::LdsState
Definition: lds_state.hh:122
gem5::LdsState::CuSidePort::ownerLds
LdsState * ownerLds
Definition: lds_state.hh:172
gem5::LdsState::getBanks
int getBanks() const
Definition: lds_state.hh:433
gem5::LdsState::tickEvent
TickEvent tickEvent
Definition: lds_state.hh:238
gem5::LdsChunk::read
T read(const uint32_t index)
a read operation
Definition: lds_state.hh:73

Generated on Tue Sep 21 2021 12:25:25 for gem5 by doxygen 1.8.17