gem5 v24.0.0.0
Loading...
Searching...
No Matches
lds_state.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#ifndef __LDS_STATE_HH__
33#define __LDS_STATE_HH__
34
35#include <array>
36#include <queue>
37#include <string>
38#include <unordered_map>
39#include <utility>
40#include <vector>
41
42#include "debug/GPULDS.hh"
43#include "gpu-compute/misc.hh"
44#include "mem/port.hh"
45#include "params/LdsState.hh"
46#include "sim/clocked_object.hh"
47
48namespace gem5
49{
50
51class ComputeUnit;
52
58{
59 public:
60 LdsChunk(const uint32_t x_size):
61 chunk(x_size)
62 {
63 }
64
66
70 template<class T>
71 T
72 read(const uint32_t index)
73 {
78 if (index >= chunk.size()) {
79 DPRINTF(GPULDS, "LDS[%d][%d]: Read 0 beyond size (%ld)\n",
80 dispatchId, wgId, chunk.size());
81 return (T)0;
82 }
83
84 T *p0 = (T *) (&(chunk.at(index)));
85
86 if (sizeof(T) <= 4) {
87 [[maybe_unused]] uint32_t int_val =
88 *reinterpret_cast<uint32_t*>(p0);
89 DPRINTF(GPULDS, "LDS[%d][%d]: Read %08x from index %d\n",
90 dispatchId, wgId, int_val, index);
91 } else if (sizeof(T) <= 8) {
92 [[maybe_unused]] uint64_t int_val =
93 *reinterpret_cast<uint64_t*>(p0);
94 DPRINTF(GPULDS, "LDS[%d][%d]: Read %016lx from index %d\n",
95 dispatchId, wgId, int_val, index);
96 } else if (sizeof(T) <= 16) {
97 [[maybe_unused]] uint64_t *int_vals =
98 reinterpret_cast<uint64_t*>(p0);
99 DPRINTF(GPULDS, "LDS[%d][%d]: Read %016lx%016lx from index %d\n",
100 dispatchId, wgId, int_vals[1], int_vals[0], index);
101 }
102
103 return *p0;
104 }
105
109 template<class T>
110 void
111 write(const uint32_t index, const T value)
112 {
117 if (index >= chunk.size()) {
118 DPRINTF(GPULDS, "LDS[%d][%d]: Ignoring write beyond size (%ld)\n",
119 dispatchId, wgId, chunk.size());
120 return;
121 }
122
123 T *p0 = (T *) (&(chunk.at(index)));
124
125 if (sizeof(T) <= 4) {
126 [[maybe_unused]] uint32_t prev_val =
127 *reinterpret_cast<uint32_t*>(p0);
128 DPRINTF(GPULDS, "LDS[%d][%d]: Write %08lx to index %d (was "
129 "%08lx)\n", dispatchId, wgId, value, index, prev_val);
130 } else if (sizeof(T) <= 8) {
131 [[maybe_unused]] uint64_t prev_val =
132 *reinterpret_cast<uint64_t*>(p0);
133 DPRINTF(GPULDS, "LDS[%d][%d]: Write %016lx to index %d (was "
134 "%016lx)\n", dispatchId, wgId, value, index, prev_val);
135 } else if (sizeof(T) <= 16) {
136 [[maybe_unused]] uint64_t *prev_vals =
137 reinterpret_cast<uint64_t*>(p0);
138 [[maybe_unused]] const uint64_t *next_vals =
139 reinterpret_cast<const uint64_t*>(&value);
140 DPRINTF(GPULDS, "LDS[%d][%d]: Write %016lx%016lx to index %d "
141 "(was %016lx%016lx)\n", dispatchId, wgId, next_vals[1],
142 next_vals[0], index, prev_vals[1], prev_vals[0]);
143 }
144
145 *p0 = value;
146 }
147
151 template<class T>
152 T
153 atomic(const uint32_t index, AtomicOpFunctorPtr amoOp)
154 {
159 if (index >= chunk.size()) {
160 return (T)0;
161 }
162 T *p0 = (T *) (&(chunk.at(index)));
163 T tmp = *p0;
164
165 (*amoOp)((uint8_t *)p0);
166 return tmp;
167 }
168
173 size() const
174 {
175 return chunk.size();
176 }
177
178 uint32_t dispatchId;
179 uint32_t wgId;
180
181 protected:
182 // the actual data store for this slice of the LDS
184};
185
186// Local Data Share (LDS) State per Wavefront (contents of the LDS region
187// allocated to the WorkGroup of this Wavefront)
189{
190 protected:
191
195 class TickEvent: public Event
196 {
197 protected:
198
199 LdsState *ldsState = nullptr;
200
202
203 public:
204
205 TickEvent(LdsState *_ldsState) :
206 ldsState(_ldsState)
207 {
208 }
209
210 virtual void
211 process();
212
213 void
215 {
216 mainEventQueue[0]->schedule(this, when);
217 }
218
219 void
221 {
222 mainEventQueue[0]->deschedule(this);
223 }
224 };
225
230 {
231 public:
232 CuSidePort(const std::string &_name, LdsState *_ownerLds) :
233 ResponsePort(_name), ownerLds(_ownerLds)
234 {
235 }
236
237 protected:
239
240 virtual bool
242
243 virtual Tick
245 {
246 return 0;
247 }
248
249 virtual void
251
252 virtual void
254 {
255 }
256
257 virtual void
258 recvRetry();
259
260 virtual void
262
263 virtual AddrRangeList
265 {
266 AddrRangeList ranges;
267 ranges.push_back(ownerLds->getAddrRange());
268 return ranges;
269 }
270
271 template<typename T>
272 void
274
275 template<typename T>
276 void
278
279 template<typename T>
280 void
282 };
283
284 protected:
285
296 std::unordered_map<uint32_t,
297 std::unordered_map<uint32_t, int32_t>> refCounter;
298
299 // the map that allows workgroups to access their own chunk of the LDS
300 std::unordered_map<uint32_t,
301 std::unordered_map<uint32_t, LdsChunk>> chunkMap;
302
303 // an event to allow the LDS to wake up at a specified time
305
306 // the queue of packets that are going back to the CU after a
307 // read/write/atomic op
308 // TODO need to make this have a maximum size to create flow control
309 std::queue<std::pair<Tick, PacketPtr>> returnQueue;
310
311 // whether or not there are pending responses
312 bool retryResp = false;
313
314 bool
315 process();
316
318 getDynInstr(PacketPtr packet);
319
320 bool
321 processPacket(PacketPtr packet);
322
323 unsigned
324 countBankConflicts(PacketPtr packet, unsigned *bankAccesses);
325
326 unsigned
328 unsigned *numBankAccesses);
329
330 public:
331 using Params = LdsStateParams;
332
333 LdsState(const Params &params);
334
335 // prevent copy construction
336 LdsState(const LdsState&) = delete;
337
339 {
340 parent = nullptr;
341 }
342
343 bool
345 {
346 return retryResp;
347 }
348
349 void
350 setRetryResp(const bool value)
351 {
352 retryResp = value;
353 }
354
355 // prevent assignment
356 LdsState &
357 operator=(const LdsState &) = delete;
358
362 int
363 increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
364 {
365 int refCount = getRefCounter(dispatchId, wgId);
366 fatal_if(refCount < 0,
367 "reference count should not be below zero");
368 return ++refCounter[dispatchId][wgId];
369 }
370
375 int
376 decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
377 {
378 int refCount = getRefCounter(dispatchId, wgId);
379
380 fatal_if(refCount <= 0,
381 "reference count should not be below zero or at zero to"
382 "decrement");
383
384 refCounter[dispatchId][wgId]--;
385
386 if (refCounter[dispatchId][wgId] == 0) {
387 releaseSpace(dispatchId, wgId);
388 return 0;
389 } else {
390 return refCounter[dispatchId][wgId];
391 }
392 }
393
397 int
398 getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
399 {
400 auto dispatchIter = chunkMap.find(dispatchId);
401 fatal_if(dispatchIter == chunkMap.end(),
402 "could not locate this dispatch id [%d]", dispatchId);
403
404 auto workgroup = dispatchIter->second.find(wgId);
405 fatal_if(workgroup == dispatchIter->second.end(),
406 "could not find this workgroup id within this dispatch id"
407 " did[%d] wgid[%d]", dispatchId, wgId);
408
409 auto refCountIter = refCounter.find(dispatchId);
410 if (refCountIter == refCounter.end()) {
411 fatal("could not locate this dispatch id [%d]", dispatchId);
412 } else {
413 auto workgroup = refCountIter->second.find(wgId);
414 if (workgroup == refCountIter->second.end()) {
415 fatal("could not find this workgroup id within this dispatch id"
416 " did[%d] wgid[%d]", dispatchId, wgId);
417 } else {
418 return refCounter.at(dispatchId).at(wgId);
419 }
420 }
421
422 fatal("should not reach this point");
423 return 0;
424 }
425
430 LdsChunk *
431 reserveSpace(const uint32_t dispatchId, const uint32_t wgId,
432 const uint32_t size)
433 {
434 if (chunkMap.find(dispatchId) != chunkMap.end()) {
435 panic_if(
436 chunkMap[dispatchId].find(wgId) != chunkMap[dispatchId].end(),
437 "duplicate workgroup ID asking for space in the LDS "
438 "did[%d] wgid[%d]", dispatchId, wgId);
439 }
440
441 if (bytesAllocated + size > maximumSize) {
442 return nullptr;
443 } else {
444 bytesAllocated += size;
445
446 auto value = chunkMap[dispatchId].emplace(wgId, LdsChunk(size));
447 panic_if(!value.second, "was unable to allocate a new chunkMap");
448
449 // make an entry for this workgroup
450 refCounter[dispatchId][wgId] = 0;
451
452 chunkMap[dispatchId][wgId].dispatchId = dispatchId;
453 chunkMap[dispatchId][wgId].wgId = wgId;
454
455 return &chunkMap[dispatchId][wgId];
456 }
457 }
458
459 /*
460 * return pointer to lds chunk for wgid
461 */
462 LdsChunk *
463 getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
464 {
465 fatal_if(chunkMap.find(dispatchId) == chunkMap.end(),
466 "fetch for unknown dispatch ID did[%d]", dispatchId);
467
468 fatal_if(chunkMap[dispatchId].find(wgId) == chunkMap[dispatchId].end(),
469 "fetch for unknown workgroup ID wgid[%d] in dispatch ID did[%d]",
470 wgId, dispatchId);
471
472 return &chunkMap[dispatchId][wgId];
473 }
474
475 bool
477
478 Tick
480 {
481 // TODO set to max(lastCommand+1, curTick())
482 return returnQueue.empty() ? curTick() : returnQueue.back().first;
483 }
484
485 void
486 setParent(ComputeUnit *x_parent);
487
488 // accessors
490 getParent() const
491 {
492 return parent;
493 }
494
495 std::string
497 {
498 return _name;
499 }
500
501 int
502 getBanks() const
503 {
504 return banks;
505 }
506
509 {
510 return parent;
511 }
512
513 int
515 {
516 return bankConflictPenalty;
517 }
518
522 std::size_t
523 ldsSize(const uint32_t x_wgId)
524 {
525 return chunkMap[x_wgId].size();
526 }
527
530 {
531 return range;
532 }
533
534 Port &
535 getPort(const std::string &if_name, PortID idx)
536 {
537 if (if_name == "cuPort") {
538 // TODO need to set name dynamically at this point?
539 return cuPort;
540 } else {
541 fatal("cannot resolve the port name " + if_name);
542 }
543 }
544
548 bool
549 canReserve(uint32_t x_size) const
550 {
551 return bytesAllocated + x_size <= maximumSize;
552 }
553
554 private:
558 bool
559 releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
560 {
561 auto dispatchIter = chunkMap.find(x_dispatchId);
562
563 if (dispatchIter == chunkMap.end()) {
564 fatal("dispatch id not found [%d]", x_dispatchId);
565 } else {
566 auto workgroupIter = dispatchIter->second.find(x_wgId);
567 if (workgroupIter == dispatchIter->second.end()) {
568 fatal("workgroup id [%d] not found in dispatch id [%d]",
569 x_wgId, x_dispatchId);
570 }
571 }
572
573 fatal_if(bytesAllocated < chunkMap[x_dispatchId][x_wgId].size(),
574 "releasing more space than was allocated");
575
576 bytesAllocated -= chunkMap[x_dispatchId][x_wgId].size();
577 chunkMap[x_dispatchId].erase(chunkMap[x_dispatchId].find(x_wgId));
578 return true;
579 }
580
581 // the port that connects this LDS to its owner CU
583
584 ComputeUnit* parent = nullptr;
585
586 std::string _name;
587
588 // the number of bytes currently reserved by all workgroups
590
591 // the size of the LDS, the most bytes available
593
594 // Address range of this memory
596
597 // the penalty, in cycles, for each LDS bank conflict
599
600 // the number of banks in the LDS underlying data store
601 int banks = 0;
602};
603
604} // namespace gem5
605
606#endif // __LDS_STATE_HH__
#define DPRINTF(x,...)
Definition trace.hh:210
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition addr_range.hh:82
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Definition lds_state.hh:58
void write(const uint32_t index, const T value)
a write operation
Definition lds_state.hh:111
T atomic(const uint32_t index, AtomicOpFunctorPtr amoOp)
an atomic operation
Definition lds_state.hh:153
LdsChunk(const uint32_t x_size)
Definition lds_state.hh:60
T read(const uint32_t index)
a read operation
Definition lds_state.hh:72
uint32_t dispatchId
Definition lds_state.hh:178
std::vector< uint8_t >::size_type size() const
get the size of this chunk
Definition lds_state.hh:173
std::vector< uint8_t > chunk
Definition lds_state.hh:183
uint32_t wgId
Definition lds_state.hh:179
CuSidePort is the LDS Port closer to the CU side.
Definition lds_state.hh:230
virtual Tick recvAtomic(PacketPtr pkt)
Receive an atomic request packet from the peer.
Definition lds_state.hh:244
virtual void recvRetry()
receive a retry
Definition lds_state.cc:257
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
Definition lds_state.cc:169
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition lds_state.hh:264
void storeData(PacketPtr packet)
virtual void recvRangeChange()
Definition lds_state.hh:253
CuSidePort(const std::string &_name, LdsState *_ownerLds)
Definition lds_state.hh:232
void atomicOperation(PacketPtr packet)
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
Definition lds_state.cc:236
virtual void recvRespRetry()
receive a retry for a response
Definition lds_state.cc:245
void loadData(PacketPtr packet)
an event to allow event-driven execution
Definition lds_state.hh:196
virtual void process()
wake up at this time and perform specified actions
Definition lds_state.cc:319
TickEvent(LdsState *_ldsState)
Definition lds_state.hh:205
void schedule(Tick when)
Definition lds_state.hh:214
int getBanks() const
Definition lds_state.hh:502
int increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
use the dynamic wave id to create or just increase the reference count
Definition lds_state.hh:363
bool process()
look for packets to return at this time
Definition lds_state.cc:266
AddrRange range
Definition lds_state.hh:595
LdsChunk * getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
Definition lds_state.hh:463
bool canReserve(uint32_t x_size) const
can this much space be reserved for a workgroup?
Definition lds_state.hh:549
int decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
decrease the reference count after making sure it is in the list give back this chunk if the ref coun...
Definition lds_state.hh:376
std::string _name
Definition lds_state.hh:586
void setRetryResp(const bool value)
Definition lds_state.hh:350
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
Definition lds_state.cc:218
std::unordered_map< uint32_t, std::unordered_map< uint32_t, int32_t > > refCounter
the lds reference counter The key is the workgroup ID and dispatch ID The value is the number of wave...
Definition lds_state.hh:297
LdsChunk * reserveSpace(const uint32_t dispatchId, const uint32_t wgId, const uint32_t size)
assign a parent and request this amount of space be set aside for this wgid
Definition lds_state.hh:431
int getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
return the current reference count for this workgroup id
Definition lds_state.hh:398
std::unordered_map< uint32_t, std::unordered_map< uint32_t, LdsChunk > > chunkMap
Definition lds_state.hh:301
Port & getPort(const std::string &if_name, PortID idx)
Get a port with a given name and index.
Definition lds_state.hh:535
LdsState(const Params &params)
the default constructor that works with SWIG
Definition lds_state.cc:48
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
Definition lds_state.cc:187
LdsState & operator=(const LdsState &)=delete
ComputeUnit * getParent() const
Definition lds_state.hh:490
int bankConflictPenalty
Definition lds_state.hh:598
TickEvent tickEvent
Definition lds_state.hh:304
ComputeUnit * getComputeUnit() const
Definition lds_state.hh:508
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
Definition lds_state.cc:86
LdsState(const LdsState &)=delete
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition lds_state.hh:309
int getBankConflictPenalty() const
Definition lds_state.hh:514
bool isRetryResp() const
Definition lds_state.hh:344
ComputeUnit * parent
Definition lds_state.hh:584
LdsStateParams Params
Definition lds_state.hh:331
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
Definition lds_state.cc:71
std::string getName()
Definition lds_state.hh:496
CuSidePort cuPort
Definition lds_state.hh:582
GPUDynInstPtr getDynInstr(PacketPtr packet)
Definition lds_state.cc:175
bool releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
give back the space
Definition lds_state.hh:559
std::size_t ldsSize(const uint32_t x_wgId)
get the allocated size for this workgroup
Definition lds_state.hh:523
AddrRange getAddrRange() const
Definition lds_state.hh:529
Tick earliestReturnTime() const
Definition lds_state.hh:479
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
Ports are used to interface objects to each other.
Definition port.hh:62
A ResponsePort is a specialization of a port.
Definition port.hh:349
STL pair class.
Definition stl.hh:58
STL vector class.
Definition stl.hh:37
ClockedObject declaration and implementation.
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition amo.hh:269
Tick when() const
Get the time that the event is scheduled.
Definition eventq.hh:501
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:236
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214
const Params & params() const
static SimObject * find(const char *name)
Find the SimObject with the given name and return a pointer to it.
Port Object Declaration.
Bitfield< 30, 0 > index
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition types.hh:245
uint64_t Tick
Tick count type.
Definition types.hh:58
std::vector< EventQueue * > mainEventQueue
Array for main event queues.
Definition eventq.cc:57

Generated on Tue Jun 18 2024 16:24:04 for gem5 by doxygen 1.11.0