gem5  v22.1.0.0
lds_state.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "gpu-compute/lds_state.hh"
33 
34 #include <array>
35 #include <cstdio>
36 #include <cstdlib>
37 
40 #include "gpu-compute/shader.hh"
41 
42 namespace gem5
43 {
44 
48 LdsState::LdsState(const Params &params) :
49  ClockedObject(params),
50  tickEvent(this),
51  cuPort(name() + ".port", this),
52  maximumSize(params.size),
53  range(params.range),
54  bankConflictPenalty(params.bankConflictPenalty),
55  banks(params.banks)
56 {
57  fatal_if(params.banks <= 0,
58  "Number of LDS banks should be positive number");
59  fatal_if((params.banks & (params.banks - 1)) != 0,
60  "Number of LDS banks should be a power of 2");
61  fatal_if(params.size <= 0,
62  "cannot allocate an LDS with a size less than 1");
63  fatal_if(params.size % 2,
64  "the LDS should be an even number");
65 }
66 
70 void
72 {
73  // check that this gets assigned to the same thing each time
74  fatal_if(!x_parent, "x_parent should not be nullptr");
75  fatal_if(x_parent == parent,
76  "should not be setting the parent twice");
77 
78  parent = x_parent;
79  _name = x_parent->name() + ".LdsState";
80 }
81 
85 unsigned
86 LdsState::countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
87 {
88  Packet::SenderState *baseSenderState = packet->senderState;
89  while (baseSenderState->predecessor) {
90  baseSenderState = baseSenderState->predecessor;
91  }
92  const ComputeUnit::LDSPort::SenderState *senderState =
93  dynamic_cast<ComputeUnit::LDSPort::SenderState *>(baseSenderState);
94 
95  fatal_if(!senderState,
96  "did not get the right sort of sender state");
97 
98  GPUDynInstPtr gpuDynInst = senderState->getMemInst();
99 
100  return countBankConflicts(gpuDynInst, bankAccesses);
101 }
102 
103 // Count the total number of bank conflicts for the local memory packet
104 unsigned
106  unsigned *numBankAccesses)
107 {
108  int bank_conflicts = 0;
109  std::vector<int> bank;
110  // the number of LDS banks being touched by the memory instruction
111  int numBanks = std::min(parent->wfSize(), banks);
112  // if the wavefront size is larger than the number of LDS banks, we
113  // need to iterate over all work items to calculate the total
114  // number of bank conflicts
115  int groups = (parent->wfSize() > numBanks) ?
116  (parent->wfSize() / numBanks) : 1;
117  for (int i = 0; i < groups; i++) {
118  // Address Array holding all the work item addresses of an instruction
119  std::vector<Addr> addr_array;
120  addr_array.resize(numBanks, 0);
121  bank.clear();
122  bank.resize(banks, 0);
123  int max_bank = 0;
124 
125  // populate the address array for all active work items
126  for (int j = 0; j < numBanks; j++) {
127  if (gpuDynInst->exec_mask[(i*numBanks)+j]) {
128  addr_array[j] = gpuDynInst->addr[(i*numBanks)+j];
129  } else {
130  addr_array[j] = std::numeric_limits<Addr>::max();
131  }
132  }
133 
134  if (gpuDynInst->isLoad() || gpuDynInst->isStore()) {
135  // mask identical addresses
136  for (int j = 0; j < numBanks; ++j) {
137  for (int j0 = 0; j0 < j; j0++) {
138  if (addr_array[j] != std::numeric_limits<Addr>::max()
139  && addr_array[j] == addr_array[j0]) {
140  addr_array[j] = std::numeric_limits<Addr>::max();
141  }
142  }
143  }
144  }
145  // calculate bank conflicts
146  for (int j = 0; j < numBanks; ++j) {
147  if (addr_array[j] != std::numeric_limits<Addr>::max()) {
148  int bankId = addr_array[j] % banks;
149  bank[bankId]++;
150  max_bank = std::max(max_bank, bank[bankId]);
151  // Count the number of LDS banks accessed.
152  // Since we have masked identical addresses all remaining
153  // accesses will need to be serialized if they access
154  // the same bank (bank conflict).
155  (*numBankAccesses)++;
156  }
157  }
158  bank_conflicts += max_bank;
159  }
160  panic_if(bank_conflicts > parent->wfSize(),
161  "Max bank conflicts should match num of work items per instr");
162  return bank_conflicts;
163 }
164 
168 bool
170 {
171  return ownerLds->processPacket(packet);
172 }
173 
176 {
178  dynamic_cast<ComputeUnit::LDSPort::SenderState *>(
179  packet->senderState);
180  return ss->getMemInst();
181 }
182 
186 bool
188 {
189  unsigned bankAccesses = 0;
190  // the number of conflicts this packet will have when accessing the LDS
191  unsigned bankConflicts = countBankConflicts(packet, &bankAccesses);
192  // count the total number of physical LDS bank accessed
193  parent->stats.ldsBankAccesses += bankAccesses;
194  // count the LDS bank conflicts. A number set to 1 indicates one
195  // access per bank maximum so there are no bank conflicts
196  parent->stats.ldsBankConflictDist.sample(bankConflicts-1);
197 
198  GPUDynInstPtr dynInst = getDynInstr(packet);
199  // account for the LDS bank conflict overhead
200  int busLength = (dynInst->isLoad()) ? parent->loadBusLength() :
201  (dynInst->isStore()) ? parent->storeBusLength() :
203  // delay for accessing the LDS
204  Tick processingTime =
205  parent->cyclesToTicks(Cycles(bankConflicts * bankConflictPenalty)) +
206  parent->cyclesToTicks(Cycles(busLength));
207  // choose (delay + last packet in queue) or (now + delay) as the time to
208  // return this
209  Tick doneAt = earliestReturnTime() + processingTime;
210  // then store it for processing
211  return returnQueuePush(std::make_pair(doneAt, packet));
212 }
213 
217 bool
219 {
220  // TODO add time limits (e.g. one packet per cycle) and queue size limits
221  // and implement flow control
222  returnQueue.push(thePair);
223 
224  // if there is no set wakeup time, look through the queue
225  if (!tickEvent.scheduled()) {
226  process();
227  }
228 
229  return true;
230 }
231 
235 void
237 {
238  fatal("not implemented");
239 }
240 
244 void
246 {
247  // TODO verify that this is the right way to do this
248  assert(ownerLds->isRetryResp());
249  ownerLds->setRetryResp(false);
250  ownerLds->process();
251 }
252 
256 void
258 {
259  fatal("not implemented");
260 }
261 
265 bool
267 {
268  Tick now = clockEdge();
269 
270  // send back completed packets
271  while (!returnQueue.empty() && returnQueue.front().first <= now) {
272  PacketPtr packet = returnQueue.front().second;
273 
275  dynamic_cast<ComputeUnit::LDSPort::SenderState *>(
276  packet->senderState);
277 
278  GPUDynInstPtr gpuDynInst = ss->getMemInst();
279 
280  gpuDynInst->initiateAcc(gpuDynInst);
281 
282  packet->makeTimingResponse();
283 
284  returnQueue.pop();
285 
286  bool success = cuPort.sendTimingResp(packet);
287 
288  if (!success) {
289  retryResp = true;
290  panic("have not handled timing responses being NACK'd when sent"
291  "back");
292  }
293  }
294 
295  // determine the next wakeup time
296  if (!returnQueue.empty()) {
297 
298  Tick next = returnQueue.front().first;
299 
300  if (tickEvent.scheduled()) {
301 
302  if (next < tickEvent.when()) {
303 
305  tickEvent.schedule(next);
306  }
307  } else {
308  tickEvent.schedule(next);
309  }
310  }
311 
312  return true;
313 }
314 
318 void
320 {
321  ldsState->process();
322 }
323 
324 } // namespace gem5
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Tick cyclesToTicks(Cycles c) const
SenderState is information carried along with the packet, esp.
int wfSize() const
int loadBusLength() const
int storeBusLength() const
gem5::ComputeUnit::ComputeUnitStats stats
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:79
virtual void recvRetry()
receive a retry
Definition: lds_state.cc:257
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
Definition: lds_state.cc:169
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
Definition: lds_state.cc:236
virtual void recvRespRetry()
receive a retry for a response
Definition: lds_state.cc:245
virtual void process()
wake up at this time and perform specified actions
Definition: lds_state.cc:319
void schedule(Tick when)
Definition: lds_state.hh:167
bool process()
look for packets to return at this time
Definition: lds_state.cc:266
std::string _name
Definition: lds_state.hh:536
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
Definition: lds_state.cc:218
LdsState(const Params &params)
the default constructor that works with SWIG
Definition: lds_state.cc:48
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
Definition: lds_state.cc:187
int bankConflictPenalty
Definition: lds_state.hh:548
TickEvent tickEvent
Definition: lds_state.hh:257
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
Definition: lds_state.cc:86
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition: lds_state.hh:262
ComputeUnit * parent
Definition: lds_state.hh:534
LdsStateParams Params
Definition: lds_state.hh:284
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
Definition: lds_state.cc:71
CuSidePort cuPort
Definition: lds_state.hh:532
GPUDynInstPtr getDynInstr(PacketPtr packet)
Definition: lds_state.cc:175
Tick earliestReturnTime() const
Definition: lds_state.hh:429
virtual std::string name() const
Definition: named.hh:47
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
void makeTimingResponse()
Definition: packet.hh:1077
SenderState * senderState
This packet's sender state.
Definition: packet.hh:544
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
Definition: port.hh:370
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1328
STL pair class.
Definition: stl.hh:58
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:508
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:226
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:190
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:204
const Params & params() const
Definition: sim_object.hh:176
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 24 > j
Definition: misc_types.hh:57
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
uint64_t Tick
Tick count type.
Definition: types.hh:58
statistics::Distribution ldsBankConflictDist
A virtual base opaque structure used to hold state associated with the packet (e.g....
Definition: packet.hh:468
SenderState * predecessor
Definition: packet.hh:469
const std::string & name()
Definition: trace.cc:49
std::stringstream ss
Definition: trace.test.cc:45

Generated on Wed Dec 21 2022 10:22:35 for gem5 by doxygen 1.9.1