32#ifndef __LDS_STATE_HH__
33#define __LDS_STATE_HH__
38#include <unordered_map>
42#include "debug/GPULDS.hh"
45#include "params/LdsState.hh"
79 DPRINTF(GPULDS,
"LDS[%d][%d]: Read 0 beyond size (%ld)\n",
87 [[maybe_unused]] uint32_t int_val =
88 *
reinterpret_cast<uint32_t*
>(p0);
89 DPRINTF(GPULDS,
"LDS[%d][%d]: Read %08x from index %d\n",
91 }
else if (
sizeof(T) <= 8) {
92 [[maybe_unused]] uint64_t int_val =
93 *
reinterpret_cast<uint64_t*
>(p0);
94 DPRINTF(GPULDS,
"LDS[%d][%d]: Read %016lx from index %d\n",
96 }
else if (
sizeof(T) <= 16) {
97 [[maybe_unused]] uint64_t *int_vals =
98 reinterpret_cast<uint64_t*
>(p0);
99 DPRINTF(GPULDS,
"LDS[%d][%d]: Read %016lx%016lx from index %d\n",
118 DPRINTF(GPULDS,
"LDS[%d][%d]: Ignoring write beyond size (%ld)\n",
125 if (
sizeof(T) <= 4) {
126 [[maybe_unused]] uint32_t prev_val =
127 *
reinterpret_cast<uint32_t*
>(p0);
128 DPRINTF(GPULDS,
"LDS[%d][%d]: Write %08lx to index %d (was "
130 }
else if (
sizeof(T) <= 8) {
131 [[maybe_unused]] uint64_t prev_val =
132 *
reinterpret_cast<uint64_t*
>(p0);
133 DPRINTF(GPULDS,
"LDS[%d][%d]: Write %016lx to index %d (was "
135 }
else if (
sizeof(T) <= 16) {
136 [[maybe_unused]] uint64_t *prev_vals =
137 reinterpret_cast<uint64_t*
>(p0);
138 [[maybe_unused]]
const uint64_t *next_vals =
139 reinterpret_cast<const uint64_t*
>(&value);
140 DPRINTF(GPULDS,
"LDS[%d][%d]: Write %016lx%016lx to index %d "
142 next_vals[0],
index, prev_vals[1], prev_vals[0]);
165 (*amoOp)((uint8_t *)p0);
296 std::unordered_map<uint32_t,
300 std::unordered_map<uint32_t,
328 unsigned *numBankAccesses);
367 "reference count should not be below zero");
381 "reference count should not be below zero or at zero to"
400 auto dispatchIter =
chunkMap.find(dispatchId);
402 "could not locate this dispatch id [%d]", dispatchId);
404 auto workgroup = dispatchIter->second.find(wgId);
405 fatal_if(workgroup == dispatchIter->second.end(),
406 "could not find this workgroup id within this dispatch id"
407 " did[%d] wgid[%d]", dispatchId, wgId);
409 auto refCountIter =
refCounter.find(dispatchId);
411 fatal(
"could not locate this dispatch id [%d]", dispatchId);
413 auto workgroup = refCountIter->second.find(wgId);
414 if (workgroup == refCountIter->second.end()) {
415 fatal(
"could not find this workgroup id within this dispatch id"
416 " did[%d] wgid[%d]", dispatchId, wgId);
422 fatal(
"should not reach this point");
437 "duplicate workgroup ID asking for space in the LDS "
438 "did[%d] wgid[%d]", dispatchId, wgId);
447 panic_if(!value.second,
"was unable to allocate a new chunkMap");
452 chunkMap[dispatchId][wgId].dispatchId = dispatchId;
453 chunkMap[dispatchId][wgId].wgId = wgId;
466 "fetch for unknown dispatch ID did[%d]", dispatchId);
469 "fetch for unknown workgroup ID wgid[%d] in dispatch ID did[%d]",
537 if (if_name ==
"cuPort") {
541 fatal(
"cannot resolve the port name " + if_name);
561 auto dispatchIter =
chunkMap.find(x_dispatchId);
563 if (dispatchIter ==
chunkMap.end()) {
564 fatal(
"dispatch id not found [%d]", x_dispatchId);
566 auto workgroupIter = dispatchIter->second.find(x_wgId);
567 if (workgroupIter == dispatchIter->second.end()) {
568 fatal(
"workgroup id [%d] not found in dispatch id [%d]",
569 x_wgId, x_dispatchId);
574 "releasing more space than was allocated");
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
void write(const uint32_t index, const T value)
a write operation
T atomic(const uint32_t index, AtomicOpFunctorPtr amoOp)
an atomic operation
LdsChunk(const uint32_t x_size)
T read(const uint32_t index)
a read operation
std::vector< uint8_t >::size_type size() const
get the size of this chunk
std::vector< uint8_t > chunk
CuSidePort is the LDS Port closer to the CU side.
virtual Tick recvAtomic(PacketPtr pkt)
Receive an atomic request packet from the peer.
virtual void recvRetry()
receive a retry
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
void storeData(PacketPtr packet)
virtual void recvRangeChange()
CuSidePort(const std::string &_name, LdsState *_ownerLds)
void atomicOperation(PacketPtr packet)
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
virtual void recvRespRetry()
receive a retry for a response
void loadData(PacketPtr packet)
an event to allow event-driven execution
virtual void process()
wake up at this time and perform specified actions
TickEvent(LdsState *_ldsState)
int increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
use the dynamic wave id to create or just increase the reference count
bool process()
look for packets to return at this time
LdsChunk * getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
bool canReserve(uint32_t x_size) const
can this much space be reserved for a workgroup?
int decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
decrease the reference count after making sure it is in the list give back this chunk if the ref coun...
void setRetryResp(const bool value)
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
std::unordered_map< uint32_t, std::unordered_map< uint32_t, int32_t > > refCounter
the lds reference counter The key is the workgroup ID and dispatch ID The value is the number of wave...
LdsChunk * reserveSpace(const uint32_t dispatchId, const uint32_t wgId, const uint32_t size)
assign a parent and request this amount of space be set aside for this wgid
int getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
return the current reference count for this workgroup id
std::unordered_map< uint32_t, std::unordered_map< uint32_t, LdsChunk > > chunkMap
Port & getPort(const std::string &if_name, PortID idx)
Get a port with a given name and index.
LdsState(const Params ¶ms)
the default constructor that works with SWIG
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
LdsState & operator=(const LdsState &)=delete
ComputeUnit * getParent() const
ComputeUnit * getComputeUnit() const
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
LdsState(const LdsState &)=delete
std::queue< std::pair< Tick, PacketPtr > > returnQueue
int getBankConflictPenalty() const
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
GPUDynInstPtr getDynInstr(PacketPtr packet)
bool releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
give back the space
std::size_t ldsSize(const uint32_t x_wgId)
get the allocated size for this workgroup
AddrRange getAddrRange() const
Tick earliestReturnTime() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Ports are used to interface objects to each other.
A ResponsePort is a specialization of a port.
ClockedObject declaration and implementation.
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Tick when() const
Get the time that the event is scheduled.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
#define fatal(...)
This implements a cprintf based fatal() function.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
const Params & params() const
static SimObject * find(const char *name)
Find the SimObject with the given name and return a pointer to it.
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Tick curTick()
The universal simulation clock.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
uint64_t Tick
Tick count type.
std::vector< EventQueue * > mainEventQueue
Array for main event queues.