gem5 v23.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
lds_state.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#ifndef __LDS_STATE_HH__
33#define __LDS_STATE_HH__
34
35#include <array>
36#include <queue>
37#include <string>
38#include <unordered_map>
39#include <utility>
40#include <vector>
41
42#include "gpu-compute/misc.hh"
43#include "mem/port.hh"
44#include "params/LdsState.hh"
45#include "sim/clocked_object.hh"
46
47namespace gem5
48{
49
50class ComputeUnit;
51
57{
58 public:
59 LdsChunk(const uint32_t x_size):
60 chunk(x_size)
61 {
62 }
63
65
69 template<class T>
70 T
71 read(const uint32_t index)
72 {
77 if (index >= chunk.size()) {
78 return (T)0;
79 }
80
81 T *p0 = (T *) (&(chunk.at(index)));
82 return *p0;
83 }
84
88 template<class T>
89 void
90 write(const uint32_t index, const T value)
91 {
96 if (index >= chunk.size()) {
97 return;
98 }
99
100 T *p0 = (T *) (&(chunk.at(index)));
101 *p0 = value;
102 }
103
107 template<class T>
108 T
109 atomic(const uint32_t index, AtomicOpFunctorPtr amoOp)
110 {
115 if (index >= chunk.size()) {
116 return (T)0;
117 }
118 T *p0 = (T *) (&(chunk.at(index)));
119 T tmp = *p0;
120
121 (*amoOp)((uint8_t *)p0);
122 return tmp;
123 }
124
129 size() const
130 {
131 return chunk.size();
132 }
133
134 protected:
135 // the actual data store for this slice of the LDS
137};
138
139// Local Data Share (LDS) State per Wavefront (contents of the LDS region
140// allocated to the WorkGroup of this Wavefront)
142{
143 protected:
144
148 class TickEvent: public Event
149 {
150 protected:
151
152 LdsState *ldsState = nullptr;
153
155
156 public:
157
158 TickEvent(LdsState *_ldsState) :
159 ldsState(_ldsState)
160 {
161 }
162
163 virtual void
164 process();
165
166 void
168 {
169 mainEventQueue[0]->schedule(this, when);
170 }
171
172 void
174 {
175 mainEventQueue[0]->deschedule(this);
176 }
177 };
178
183 {
184 public:
185 CuSidePort(const std::string &_name, LdsState *_ownerLds) :
186 ResponsePort(_name), ownerLds(_ownerLds)
187 {
188 }
189
190 protected:
192
193 virtual bool
195
196 virtual Tick
198 {
199 return 0;
200 }
201
202 virtual void
204
205 virtual void
207 {
208 }
209
210 virtual void
211 recvRetry();
212
213 virtual void
215
216 virtual AddrRangeList
218 {
219 AddrRangeList ranges;
220 ranges.push_back(ownerLds->getAddrRange());
221 return ranges;
222 }
223
224 template<typename T>
225 void
227
228 template<typename T>
229 void
231
232 template<typename T>
233 void
235 };
236
237 protected:
238
249 std::unordered_map<uint32_t,
250 std::unordered_map<uint32_t, int32_t>> refCounter;
251
252 // the map that allows workgroups to access their own chunk of the LDS
253 std::unordered_map<uint32_t,
254 std::unordered_map<uint32_t, LdsChunk>> chunkMap;
255
256 // an event to allow the LDS to wake up at a specified time
258
259 // the queue of packets that are going back to the CU after a
260 // read/write/atomic op
261 // TODO need to make this have a maximum size to create flow control
262 std::queue<std::pair<Tick, PacketPtr>> returnQueue;
263
264 // whether or not there are pending responses
265 bool retryResp = false;
266
267 bool
268 process();
269
271 getDynInstr(PacketPtr packet);
272
273 bool
274 processPacket(PacketPtr packet);
275
276 unsigned
277 countBankConflicts(PacketPtr packet, unsigned *bankAccesses);
278
279 unsigned
281 unsigned *numBankAccesses);
282
283 public:
284 using Params = LdsStateParams;
285
286 LdsState(const Params &params);
287
288 // prevent copy construction
289 LdsState(const LdsState&) = delete;
290
292 {
293 parent = nullptr;
294 }
295
296 bool
298 {
299 return retryResp;
300 }
301
302 void
303 setRetryResp(const bool value)
304 {
305 retryResp = value;
306 }
307
308 // prevent assignment
309 LdsState &
310 operator=(const LdsState &) = delete;
311
315 int
316 increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
317 {
318 int refCount = getRefCounter(dispatchId, wgId);
319 fatal_if(refCount < 0,
320 "reference count should not be below zero");
321 return ++refCounter[dispatchId][wgId];
322 }
323
328 int
329 decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
330 {
331 int refCount = getRefCounter(dispatchId, wgId);
332
333 fatal_if(refCount <= 0,
334 "reference count should not be below zero or at zero to"
335 "decrement");
336
337 refCounter[dispatchId][wgId]--;
338
339 if (refCounter[dispatchId][wgId] == 0) {
340 releaseSpace(dispatchId, wgId);
341 return 0;
342 } else {
343 return refCounter[dispatchId][wgId];
344 }
345 }
346
350 int
351 getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
352 {
353 auto dispatchIter = chunkMap.find(dispatchId);
354 fatal_if(dispatchIter == chunkMap.end(),
355 "could not locate this dispatch id [%d]", dispatchId);
356
357 auto workgroup = dispatchIter->second.find(wgId);
358 fatal_if(workgroup == dispatchIter->second.end(),
359 "could not find this workgroup id within this dispatch id"
360 " did[%d] wgid[%d]", dispatchId, wgId);
361
362 auto refCountIter = refCounter.find(dispatchId);
363 if (refCountIter == refCounter.end()) {
364 fatal("could not locate this dispatch id [%d]", dispatchId);
365 } else {
366 auto workgroup = refCountIter->second.find(wgId);
367 if (workgroup == refCountIter->second.end()) {
368 fatal("could not find this workgroup id within this dispatch id"
369 " did[%d] wgid[%d]", dispatchId, wgId);
370 } else {
371 return refCounter.at(dispatchId).at(wgId);
372 }
373 }
374
375 fatal("should not reach this point");
376 return 0;
377 }
378
383 LdsChunk *
384 reserveSpace(const uint32_t dispatchId, const uint32_t wgId,
385 const uint32_t size)
386 {
387 if (chunkMap.find(dispatchId) != chunkMap.end()) {
388 panic_if(
389 chunkMap[dispatchId].find(wgId) != chunkMap[dispatchId].end(),
390 "duplicate workgroup ID asking for space in the LDS "
391 "did[%d] wgid[%d]", dispatchId, wgId);
392 }
393
394 if (bytesAllocated + size > maximumSize) {
395 return nullptr;
396 } else {
397 bytesAllocated += size;
398
399 auto value = chunkMap[dispatchId].emplace(wgId, LdsChunk(size));
400 panic_if(!value.second, "was unable to allocate a new chunkMap");
401
402 // make an entry for this workgroup
403 refCounter[dispatchId][wgId] = 0;
404
405 return &chunkMap[dispatchId][wgId];
406 }
407 }
408
409 /*
410 * return pointer to lds chunk for wgid
411 */
412 LdsChunk *
413 getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
414 {
415 fatal_if(chunkMap.find(dispatchId) == chunkMap.end(),
416 "fetch for unknown dispatch ID did[%d]", dispatchId);
417
418 fatal_if(chunkMap[dispatchId].find(wgId) == chunkMap[dispatchId].end(),
419 "fetch for unknown workgroup ID wgid[%d] in dispatch ID did[%d]",
420 wgId, dispatchId);
421
422 return &chunkMap[dispatchId][wgId];
423 }
424
425 bool
427
428 Tick
430 {
431 // TODO set to max(lastCommand+1, curTick())
432 return returnQueue.empty() ? curTick() : returnQueue.back().first;
433 }
434
435 void
436 setParent(ComputeUnit *x_parent);
437
438 // accessors
440 getParent() const
441 {
442 return parent;
443 }
444
445 std::string
447 {
448 return _name;
449 }
450
451 int
452 getBanks() const
453 {
454 return banks;
455 }
456
459 {
460 return parent;
461 }
462
463 int
465 {
466 return bankConflictPenalty;
467 }
468
472 std::size_t
473 ldsSize(const uint32_t x_wgId)
474 {
475 return chunkMap[x_wgId].size();
476 }
477
480 {
481 return range;
482 }
483
484 Port &
485 getPort(const std::string &if_name, PortID idx)
486 {
487 if (if_name == "cuPort") {
488 // TODO need to set name dynamically at this point?
489 return cuPort;
490 } else {
491 fatal("cannot resolve the port name " + if_name);
492 }
493 }
494
498 bool
499 canReserve(uint32_t x_size) const
500 {
501 return bytesAllocated + x_size <= maximumSize;
502 }
503
504 private:
508 bool
509 releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
510 {
511 auto dispatchIter = chunkMap.find(x_dispatchId);
512
513 if (dispatchIter == chunkMap.end()) {
514 fatal("dispatch id not found [%d]", x_dispatchId);
515 } else {
516 auto workgroupIter = dispatchIter->second.find(x_wgId);
517 if (workgroupIter == dispatchIter->second.end()) {
518 fatal("workgroup id [%d] not found in dispatch id [%d]",
519 x_wgId, x_dispatchId);
520 }
521 }
522
523 fatal_if(bytesAllocated < chunkMap[x_dispatchId][x_wgId].size(),
524 "releasing more space than was allocated");
525
526 bytesAllocated -= chunkMap[x_dispatchId][x_wgId].size();
527 chunkMap[x_dispatchId].erase(chunkMap[x_dispatchId].find(x_wgId));
528 return true;
529 }
530
531 // the port that connects this LDS to its owner CU
533
534 ComputeUnit* parent = nullptr;
535
536 std::string _name;
537
538 // the number of bytes currently reserved by all workgroups
540
541 // the size of the LDS, the most bytes available
543
544 // Address range of this memory
546
547 // the penalty, in cycles, for each LDS bank conflict
549
550 // the number of banks in the LDS underlying data store
551 int banks = 0;
552};
553
554} // namespace gem5
555
556#endif // __LDS_STATE_HH__
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition addr_range.hh:82
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Definition lds_state.hh:57
void write(const uint32_t index, const T value)
a write operation
Definition lds_state.hh:90
T atomic(const uint32_t index, AtomicOpFunctorPtr amoOp)
an atomic operation
Definition lds_state.hh:109
LdsChunk(const uint32_t x_size)
Definition lds_state.hh:59
T read(const uint32_t index)
a read operation
Definition lds_state.hh:71
std::vector< uint8_t >::size_type size() const
get the size of this chunk
Definition lds_state.hh:129
std::vector< uint8_t > chunk
Definition lds_state.hh:136
CuSidePort is the LDS Port closer to the CU side.
Definition lds_state.hh:183
virtual Tick recvAtomic(PacketPtr pkt)
Receive an atomic request packet from the peer.
Definition lds_state.hh:197
virtual void recvRetry()
receive a retry
Definition lds_state.cc:257
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
Definition lds_state.cc:169
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition lds_state.hh:217
void storeData(PacketPtr packet)
virtual void recvRangeChange()
Definition lds_state.hh:206
CuSidePort(const std::string &_name, LdsState *_ownerLds)
Definition lds_state.hh:185
void atomicOperation(PacketPtr packet)
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
Definition lds_state.cc:236
virtual void recvRespRetry()
receive a retry for a response
Definition lds_state.cc:245
void loadData(PacketPtr packet)
an event to allow event-driven execution
Definition lds_state.hh:149
virtual void process()
wake up at this time and perform specified actions
Definition lds_state.cc:319
TickEvent(LdsState *_ldsState)
Definition lds_state.hh:158
void schedule(Tick when)
Definition lds_state.hh:167
int getBanks() const
Definition lds_state.hh:452
int increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
use the dynamic wave id to create or just increase the reference count
Definition lds_state.hh:316
bool process()
look for packets to return at this time
Definition lds_state.cc:266
AddrRange range
Definition lds_state.hh:545
LdsChunk * getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
Definition lds_state.hh:413
bool canReserve(uint32_t x_size) const
can this much space be reserved for a workgroup?
Definition lds_state.hh:499
int decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
decrease the reference count after making sure it is in the list give back this chunk if the ref coun...
Definition lds_state.hh:329
std::string _name
Definition lds_state.hh:536
void setRetryResp(const bool value)
Definition lds_state.hh:303
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
Definition lds_state.cc:218
std::unordered_map< uint32_t, std::unordered_map< uint32_t, int32_t > > refCounter
the lds reference counter The key is the workgroup ID and dispatch ID The value is the number of wave...
Definition lds_state.hh:250
LdsChunk * reserveSpace(const uint32_t dispatchId, const uint32_t wgId, const uint32_t size)
assign a parent and request this amount of space be set aside for this wgid
Definition lds_state.hh:384
int getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
return the current reference count for this workgroup id
Definition lds_state.hh:351
std::unordered_map< uint32_t, std::unordered_map< uint32_t, LdsChunk > > chunkMap
Definition lds_state.hh:254
Port & getPort(const std::string &if_name, PortID idx)
Get a port with a given name and index.
Definition lds_state.hh:485
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
Definition lds_state.cc:187
LdsState & operator=(const LdsState &)=delete
ComputeUnit * getParent() const
Definition lds_state.hh:440
int bankConflictPenalty
Definition lds_state.hh:548
TickEvent tickEvent
Definition lds_state.hh:257
ComputeUnit * getComputeUnit() const
Definition lds_state.hh:458
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
Definition lds_state.cc:86
LdsState(const LdsState &)=delete
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition lds_state.hh:262
int getBankConflictPenalty() const
Definition lds_state.hh:464
bool isRetryResp() const
Definition lds_state.hh:297
ComputeUnit * parent
Definition lds_state.hh:534
LdsStateParams Params
Definition lds_state.hh:284
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
Definition lds_state.cc:71
std::string getName()
Definition lds_state.hh:446
CuSidePort cuPort
Definition lds_state.hh:532
GPUDynInstPtr getDynInstr(PacketPtr packet)
Definition lds_state.cc:175
bool releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
give back the space
Definition lds_state.hh:509
std::size_t ldsSize(const uint32_t x_wgId)
get the allocated size for this workgroup
Definition lds_state.hh:473
AddrRange getAddrRange() const
Definition lds_state.hh:479
Tick earliestReturnTime() const
Definition lds_state.hh:429
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
Ports are used to interface objects to each other.
Definition port.hh:62
A ResponsePort is a specialization of a port.
Definition port.hh:288
STL pair class.
Definition stl.hh:58
STL vector class.
Definition stl.hh:37
ClockedObject declaration and implementation.
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition amo.hh:269
Tick when() const
Get the time that the event is scheduled.
Definition eventq.hh:501
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:236
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214
const Params & params() const
static SimObject * find(const char *name)
Find the SimObject with the given name and return a pointer to it.
Port Object Declaration.
Bitfield< 30, 0 > index
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition types.hh:245
uint64_t Tick
Tick count type.
Definition types.hh:58
std::vector< EventQueue * > mainEventQueue
Array for main event queues.
Definition eventq.cc:57

Generated on Mon Jul 10 2023 14:24:31 for gem5 by doxygen 1.9.7