58 "Number of LDS banks should be positive number");
60 "Number of LDS banks should be a power of 2");
62 "cannot allocate an LDS with a size less than 1");
64 "the LDS should be an even number");
74 fatal_if(!x_parent,
"x_parent should not be nullptr");
76 "should not be setting the parent twice");
96 "did not get the right sort of sender state");
106 unsigned *numBankAccesses)
108 int bank_conflicts = 0;
115 int groups = (
parent->wfSize() > numBanks) ?
116 (
parent->wfSize() / numBanks) : 1;
117 for (
int i = 0;
i < groups;
i++) {
120 addr_array.resize(numBanks, 0);
122 bank.resize(
banks, 0);
126 for (
int j = 0; j < numBanks; j++) {
127 if (gpuDynInst->exec_mask[(
i*numBanks)+j]) {
128 addr_array[j] = gpuDynInst->addr[(
i*numBanks)+j];
130 addr_array[j] = std::numeric_limits<Addr>::max();
134 if (gpuDynInst->isLoad() || gpuDynInst->isStore()) {
136 for (
int j = 0; j < numBanks; ++j) {
137 for (
int j0 = 0; j0 < j; j0++) {
138 if (addr_array[j] != std::numeric_limits<Addr>::max()
139 && addr_array[j] == addr_array[j0]) {
140 addr_array[j] = std::numeric_limits<Addr>::max();
146 for (
int j = 0; j < numBanks; ++j) {
147 if (addr_array[j] != std::numeric_limits<Addr>::max()) {
148 int bankId = addr_array[j] %
banks;
150 max_bank = std::max(max_bank, bank[bankId]);
155 (*numBankAccesses)++;
158 bank_conflicts += max_bank;
161 "Max bank conflicts should match num of work items per instr");
162 return bank_conflicts;
171 return ownerLds->processPacket(packet);
180 return ss->getMemInst();
189 unsigned bankAccesses = 0;
193 parent->stats.ldsBankAccesses += bankAccesses;
196 parent->stats.ldsBankConflictDist.sample(bankConflicts-1);
200 int busLength = (dynInst->isLoad()) ?
parent->loadBusLength() :
201 (dynInst->isStore()) ?
parent->storeBusLength() :
204 Tick processingTime =
238 fatal(
"not implemented");
259 fatal(
"not implemented");
280 gpuDynInst->initiateAcc(gpuDynInst);
286 bool success =
cuPort.sendTimingResp(packet);
290 panic(
"have not handled timing responses being NACK'd when sent"
ClockedObject(const ClockedObjectParams &p)
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
SenderState is information carried along with the packet, esp.
GPUDynInstPtr getMemInst() const
Cycles is a wrapper class for representing cycle counts, i.e.
virtual void recvRetry()
receive a retry
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
virtual void recvRespRetry()
receive a retry for a response
virtual void process()
wake up at this time and perform specified actions
bool process()
look for packets to return at this time
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
LdsState(const Params ¶ms)
the default constructor that works with SWIG
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
std::queue< std::pair< Tick, PacketPtr > > returnQueue
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
GPUDynInstPtr getDynInstr(PacketPtr packet)
Tick earliestReturnTime() const
virtual std::string name() const
void makeTimingResponse()
SenderState * senderState
This packet's sender state.
#define panic(...)
This implements a cprintf based panic() function.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
#define fatal(...)
This implements a cprintf based fatal() function.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
const Params & params() const
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Tick
Tick count type.
A virtual base opaque structure used to hold state associated with the packet (e.g....
SenderState * predecessor