gem5 v24.0.0.0
Loading...
Searching...
No Matches
lds_state.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include <array>
35#include <cstdio>
36#include <cstdlib>
37
40#include "gpu-compute/shader.hh"
41
42namespace gem5
43{
44
48LdsState::LdsState(const Params &params) :
49 ClockedObject(params),
50 tickEvent(this),
51 cuPort(name() + ".port", this),
52 maximumSize(params.size),
53 range(params.range),
54 bankConflictPenalty(params.bankConflictPenalty),
55 banks(params.banks)
56{
57 fatal_if(params.banks <= 0,
58 "Number of LDS banks should be positive number");
59 fatal_if((params.banks & (params.banks - 1)) != 0,
60 "Number of LDS banks should be a power of 2");
61 fatal_if(params.size <= 0,
62 "cannot allocate an LDS with a size less than 1");
63 fatal_if(params.size % 2,
64 "the LDS should be an even number");
65}
66
70void
72{
73 // check that this gets assigned to the same thing each time
74 fatal_if(!x_parent, "x_parent should not be nullptr");
75 fatal_if(x_parent == parent,
76 "should not be setting the parent twice");
77
78 parent = x_parent;
79 _name = x_parent->name() + ".LdsState";
80}
81
85unsigned
86LdsState::countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
87{
88 Packet::SenderState *baseSenderState = packet->senderState;
89 while (baseSenderState->predecessor) {
90 baseSenderState = baseSenderState->predecessor;
91 }
92 const ComputeUnit::LDSPort::SenderState *senderState =
93 dynamic_cast<ComputeUnit::LDSPort::SenderState *>(baseSenderState);
94
95 fatal_if(!senderState,
96 "did not get the right sort of sender state");
97
98 GPUDynInstPtr gpuDynInst = senderState->getMemInst();
99
100 return countBankConflicts(gpuDynInst, bankAccesses);
101}
102
103// Count the total number of bank conflicts for the local memory packet
104unsigned
106 unsigned *numBankAccesses)
107{
108 int bank_conflicts = 0;
109 std::vector<int> bank;
110 // the number of LDS banks being touched by the memory instruction
111 int numBanks = std::min(parent->wfSize(), banks);
112 // if the wavefront size is larger than the number of LDS banks, we
113 // need to iterate over all work items to calculate the total
114 // number of bank conflicts
115 int groups = (parent->wfSize() > numBanks) ?
116 (parent->wfSize() / numBanks) : 1;
117 for (int i = 0; i < groups; i++) {
118 // Address Array holding all the work item addresses of an instruction
119 std::vector<Addr> addr_array;
120 addr_array.resize(numBanks, 0);
121 bank.clear();
122 bank.resize(banks, 0);
123 int max_bank = 0;
124
125 // populate the address array for all active work items
126 for (int j = 0; j < numBanks; j++) {
127 if (gpuDynInst->exec_mask[(i*numBanks)+j]) {
128 addr_array[j] = gpuDynInst->addr[(i*numBanks)+j];
129 } else {
130 addr_array[j] = std::numeric_limits<Addr>::max();
131 }
132 }
133
134 if (gpuDynInst->isLoad() || gpuDynInst->isStore()) {
135 // mask identical addresses
136 for (int j = 0; j < numBanks; ++j) {
137 for (int j0 = 0; j0 < j; j0++) {
138 if (addr_array[j] != std::numeric_limits<Addr>::max()
139 && addr_array[j] == addr_array[j0]) {
140 addr_array[j] = std::numeric_limits<Addr>::max();
141 }
142 }
143 }
144 }
145 // calculate bank conflicts
146 for (int j = 0; j < numBanks; ++j) {
147 if (addr_array[j] != std::numeric_limits<Addr>::max()) {
148 int bankId = addr_array[j] % banks;
149 bank[bankId]++;
150 max_bank = std::max(max_bank, bank[bankId]);
151 // Count the number of LDS banks accessed.
152 // Since we have masked identical addresses all remaining
153 // accesses will need to be serialized if they access
154 // the same bank (bank conflict).
155 (*numBankAccesses)++;
156 }
157 }
158 bank_conflicts += max_bank;
159 }
160 panic_if(bank_conflicts > parent->wfSize(),
161 "Max bank conflicts should match num of work items per instr");
162 return bank_conflicts;
163}
164
168bool
173
182
186bool
188{
189 unsigned bankAccesses = 0;
190 // the number of conflicts this packet will have when accessing the LDS
191 unsigned bankConflicts = countBankConflicts(packet, &bankAccesses);
192 // count the total number of physical LDS bank accessed
193 parent->stats.ldsBankAccesses += bankAccesses;
194 // count the LDS bank conflicts. A number set to 1 indicates one
195 // access per bank maximum so there are no bank conflicts
196 parent->stats.ldsBankConflictDist.sample(bankConflicts-1);
197
198 GPUDynInstPtr dynInst = getDynInstr(packet);
199 // account for the LDS bank conflict overhead
200 int busLength = (dynInst->isLoad()) ? parent->loadBusLength() :
201 (dynInst->isStore()) ? parent->storeBusLength() :
203 // delay for accessing the LDS
204 Tick processingTime =
206 parent->cyclesToTicks(Cycles(busLength));
207 // choose (delay + last packet in queue) or (now + delay) as the time to
208 // return this
209 Tick doneAt = earliestReturnTime() + processingTime;
210 // then store it for processing
211 return returnQueuePush(std::make_pair(doneAt, packet));
212}
213
217bool
219{
220 // TODO add time limits (e.g. one packet per cycle) and queue size limits
221 // and implement flow control
222 returnQueue.push(thePair);
223
224 // if there is no set wakeup time, look through the queue
225 if (!tickEvent.scheduled()) {
226 process();
227 }
228
229 return true;
230}
231
235void
237{
238 fatal("not implemented");
239}
240
244void
246{
247 // TODO verify that this is the right way to do this
248 assert(ownerLds->isRetryResp());
249 ownerLds->setRetryResp(false);
250 ownerLds->process();
251}
252
256void
258{
259 fatal("not implemented");
260}
261
265bool
267{
268 Tick now = clockEdge();
269
270 // send back completed packets
271 while (!returnQueue.empty() && returnQueue.front().first <= now) {
272 PacketPtr packet = returnQueue.front().second;
273
275 dynamic_cast<ComputeUnit::LDSPort::SenderState *>(
276 packet->senderState);
277
278 GPUDynInstPtr gpuDynInst = ss->getMemInst();
279
280 gpuDynInst->initiateAcc(gpuDynInst);
281
282 packet->makeTimingResponse();
283
284 returnQueue.pop();
285
286 bool success = cuPort.sendTimingResp(packet);
287
288 if (!success) {
289 retryResp = true;
290 panic("have not handled timing responses being NACK'd when sent"
291 "back");
292 }
293 }
294
295 // determine the next wakeup time
296 if (!returnQueue.empty()) {
297
298 Tick next = returnQueue.front().first;
299
300 if (tickEvent.scheduled()) {
301
302 if (next < tickEvent.when()) {
303
305 tickEvent.schedule(next);
306 }
307 } else {
308 tickEvent.schedule(next);
309 }
310 }
311
312 return true;
313}
314
318void
320{
321 ldsState->process();
322}
323
324} // namespace gem5
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Tick cyclesToTicks(Cycles c) const
SenderState is information carried along with the packet, esp.
int loadBusLength() const
int storeBusLength() const
gem5::ComputeUnit::ComputeUnitStats stats
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
virtual void recvRetry()
receive a retry
Definition lds_state.cc:257
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
Definition lds_state.cc:169
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
Definition lds_state.cc:236
virtual void recvRespRetry()
receive a retry for a response
Definition lds_state.cc:245
virtual void process()
wake up at this time and perform specified actions
Definition lds_state.cc:319
void schedule(Tick when)
Definition lds_state.hh:214
bool process()
look for packets to return at this time
Definition lds_state.cc:266
std::string _name
Definition lds_state.hh:586
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
Definition lds_state.cc:218
LdsState(const Params &params)
the default constructor that works with SWIG
Definition lds_state.cc:48
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
Definition lds_state.cc:187
int bankConflictPenalty
Definition lds_state.hh:598
TickEvent tickEvent
Definition lds_state.hh:304
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
Definition lds_state.cc:86
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition lds_state.hh:309
ComputeUnit * parent
Definition lds_state.hh:584
LdsStateParams Params
Definition lds_state.hh:331
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
Definition lds_state.cc:71
CuSidePort cuPort
Definition lds_state.hh:582
GPUDynInstPtr getDynInstr(PacketPtr packet)
Definition lds_state.cc:175
Tick earliestReturnTime() const
Definition lds_state.hh:479
virtual std::string name() const
Definition named.hh:47
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
void makeTimingResponse()
Definition packet.hh:1080
SenderState * senderState
This packet's sender state.
Definition packet.hh:545
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
Definition port.hh:454
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
STL pair class.
Definition stl.hh:58
STL vector class.
Definition stl.hh:37
bool scheduled() const
Determine if the current event is scheduled.
Definition eventq.hh:458
Tick when() const
Get the time that the event is scheduled.
Definition eventq.hh:501
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:236
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214
const Params & params() const
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 21 > ss
Definition misc_types.hh:60
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Tick
Tick count type.
Definition types.hh:58
statistics::Distribution ldsBankConflictDist
A virtual base opaque structure used to hold state associated with the packet (e.g....
Definition packet.hh:469
SenderState * predecessor
Definition packet.hh:470
const std::string & name()
Definition trace.cc:48

Generated on Tue Jun 18 2024 16:24:04 for gem5 by doxygen 1.11.0