gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
lds_state.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: John Kalamatianos,
34  * Joe Gross
35  */
36 
37 #include "gpu-compute/lds_state.hh"
38 
39 #include <array>
40 #include <cstdio>
41 #include <cstdlib>
42 
45 #include "gpu-compute/shader.hh"
46 
50 LdsState::LdsState(const Params *params) :
51  ClockedObject(params),
52  tickEvent(this),
53  cuPort(name() + ".port", this),
54  maximumSize(params->size),
55  range(params->range),
56  bankConflictPenalty(params->bankConflictPenalty),
57  banks(params->banks)
58 {
59  fatal_if(params->banks <= 0,
60  "Number of LDS banks should be positive number");
61  fatal_if((params->banks & (params->banks - 1)) != 0,
62  "Number of LDS banks should be a power of 2");
63  fatal_if(params->size <= 0,
64  "cannot allocate an LDS with a size less than 1");
65  fatal_if(params->size % 2,
66  "the LDS should be an even number");
67 }
68 
72 LdsState *
73 LdsStateParams::create()
74 {
75  return new LdsState(this);
76 }
77 
81 void
83 {
84  // check that this gets assigned to the same thing each time
85  fatal_if(!x_parent, "x_parent should not be nullptr");
86  fatal_if(x_parent == parent,
87  "should not be setting the parent twice");
88 
89  parent = x_parent;
90  _name = x_parent->name() + ".LdsState";
91 }
92 
96 unsigned
97 LdsState::countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
98 {
99  Packet::SenderState *baseSenderState = packet->senderState;
100  while (baseSenderState->predecessor) {
101  baseSenderState = baseSenderState->predecessor;
102  }
103  const ComputeUnit::LDSPort::SenderState *senderState =
104  dynamic_cast<ComputeUnit::LDSPort::SenderState *>(baseSenderState);
105 
106  fatal_if(!senderState,
107  "did not get the right sort of sender state");
108 
109  GPUDynInstPtr gpuDynInst = senderState->getMemInst();
110 
111  return countBankConflicts(gpuDynInst, bankAccesses);
112 }
113 
114 // Count the total number of bank conflicts for the local memory packet
115 unsigned
117  unsigned *numBankAccesses)
118 {
119  int bank_conflicts = 0;
120  std::vector<int> bank;
121  // the number of LDS banks being touched by the memory instruction
122  int numBanks = std::min(parent->wfSize(), banks);
123  // if the wavefront size is larger than the number of LDS banks, we
124  // need to iterate over all work items to calculate the total
125  // number of bank conflicts
126  int groups = (parent->wfSize() > numBanks) ?
127  (parent->wfSize() / numBanks) : 1;
128  for (int i = 0; i < groups; i++) {
129  // Address Array holding all the work item addresses of an instruction
130  std::vector<Addr> addr_array;
131  addr_array.resize(numBanks, 0);
132  bank.clear();
133  bank.resize(banks, 0);
134  int max_bank = 0;
135 
136  // populate the address array for all active work items
137  for (int j = 0; j < numBanks; j++) {
138  if (gpuDynInst->exec_mask[(i*numBanks)+j]) {
139  addr_array[j] = gpuDynInst->addr[(i*numBanks)+j];
140  } else {
141  addr_array[j] = std::numeric_limits<Addr>::max();
142  }
143  }
144 
145  if (gpuDynInst->isLoad() || gpuDynInst->isStore()) {
146  // mask identical addresses
147  for (int j = 0; j < numBanks; ++j) {
148  for (int j0 = 0; j0 < j; j0++) {
149  if (addr_array[j] != std::numeric_limits<Addr>::max()
150  && addr_array[j] == addr_array[j0]) {
151  addr_array[j] = std::numeric_limits<Addr>::max();
152  }
153  }
154  }
155  }
156  // calculate bank conflicts
157  for (int j = 0; j < numBanks; ++j) {
158  if (addr_array[j] != std::numeric_limits<Addr>::max()) {
159  int bankId = addr_array[j] % banks;
160  bank[bankId]++;
161  max_bank = std::max(max_bank, bank[bankId]);
162  // Count the number of LDS banks accessed.
163  // Since we have masked identical addresses all remaining
164  // accesses will need to be serialized if they access
165  // the same bank (bank conflict).
166  (*numBankAccesses)++;
167  }
168  }
169  bank_conflicts += max_bank;
170  }
171  panic_if(bank_conflicts > parent->wfSize(),
172  "Max bank conflicts should match num of work items per instr");
173  return bank_conflicts;
174 }
175 
179 bool
181 {
182  return ownerLds->processPacket(packet);
183 }
184 
187 {
189  dynamic_cast<ComputeUnit::LDSPort::SenderState *>(
190  packet->senderState);
191  return ss->getMemInst();
192 }
193 
197 bool
199 {
200  unsigned bankAccesses = 0;
201  // the number of conflicts this packet will have when accessing the LDS
202  unsigned bankConflicts = countBankConflicts(packet, &bankAccesses);
203  // count the total number of physical LDS bank accessed
204  parent->ldsBankAccesses += bankAccesses;
205  // count the LDS bank conflicts. A number set to 1 indicates one
206  // access per bank maximum so there are no bank conflicts
207  parent->ldsBankConflictDist.sample(bankConflicts-1);
208 
209  GPUDynInstPtr dynInst = getDynInstr(packet);
210  // account for the LDS bank conflict overhead
211  int busLength = (dynInst->isLoad()) ? parent->loadBusLength() :
212  (dynInst->isStore()) ? parent->storeBusLength() :
214  // delay for accessing the LDS
215  Tick processingTime =
216  parent->shader->ticks(bankConflicts * bankConflictPenalty) +
217  parent->shader->ticks(busLength);
218  // choose (delay + last packet in queue) or (now + delay) as the time to
219  // return this
220  Tick doneAt = earliestReturnTime() + processingTime;
221  // then store it for processing
222  return returnQueuePush(std::make_pair(doneAt, packet));
223 }
224 
228 bool
230 {
231  // TODO add time limits (e.g. one packet per cycle) and queue size limits
232  // and implement flow control
233  returnQueue.push(thePair);
234 
235  // if there is no set wakeup time, look through the queue
236  if (!tickEvent.scheduled()) {
237  process();
238  }
239 
240  return true;
241 }
242 
246 void
248 {
249  fatal("not implemented");
250 }
251 
255 void
257 {
258  // TODO verify that this is the right way to do this
259  assert(ownerLds->isRetryResp());
260  ownerLds->setRetryResp(false);
261  ownerLds->process();
262 }
263 
267 void
269 {
270  fatal("not implemented");
271 }
272 
276 bool
278 {
279  Tick now = clockEdge();
280 
281  // send back completed packets
282  while (!returnQueue.empty() && returnQueue.front().first <= now) {
283  PacketPtr packet = returnQueue.front().second;
284 
286  dynamic_cast<ComputeUnit::LDSPort::SenderState *>(
287  packet->senderState);
288 
289  GPUDynInstPtr gpuDynInst = ss->getMemInst();
290 
291  gpuDynInst->initiateAcc(gpuDynInst);
292 
293  packet->makeTimingResponse();
294 
295  returnQueue.pop();
296 
297  bool success = cuPort.sendTimingResp(packet);
298 
299  if (!success) {
300  retryResp = true;
301  panic("have not handled timing responses being NACK'd when sent"
302  "back");
303  }
304  }
305 
306  // determine the next wakeup time
307  if (!returnQueue.empty()) {
308 
309  Tick next = returnQueue.front().first;
310 
311  if (tickEvent.scheduled()) {
312 
313  if (next < tickEvent.when()) {
314 
316  tickEvent.schedule(next);
317  }
318  } else {
319  tickEvent.schedule(next);
320  }
321  }
322 
323  return true;
324 }
325 
329 void
331 {
332  ldsState->process();
333 }
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:167
GPUDynInstPtr getDynInstr(PacketPtr packet)
Definition: lds_state.cc:186
std::string _name
Definition: lds_state.hh:491
virtual void process()
wake up at this time and perform specified actions
Definition: lds_state.cc:330
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:175
const std::string & name()
Definition: trace.cc:54
Bitfield< 7 > i
Tick earliestReturnTime() const
Definition: lds_state.hh:384
STL pair class.
Definition: stl.hh:61
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:401
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition: lds_state.hh:230
int bankConflictPenalty
Definition: lds_state.hh:503
void makeTimingResponse()
Definition: packet.hh:955
int storeBusLength()
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
Definition: lds_state.cc:229
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
Definition: lds_state.cc:247
LdsState(const Params *params)
the default constructor that works with SWIG
Definition: lds_state.cc:50
LdsStateParams Params
Definition: lds_state.hh:252
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
Definition: lds_state.cc:198
ComputeUnit * parent
Definition: lds_state.hh:489
Stats::Distribution ldsBankConflictDist
void schedule(Tick when)
Definition: lds_state.hh:137
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the master port by calling its corresponding receive function...
Definition: port.hh:357
virtual void recvRetry()
receive a retry
Definition: lds_state.cc:268
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:385
SenderState * predecessor
Definition: packet.hh:405
uint64_t Tick
Tick count type.
Definition: types.hh:63
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
GPUDynInstPtr getMemInst() const
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
Definition: lds_state.cc:97
int wfSize() const
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:203
Bitfield< 21 > ss
virtual void recvRespRetry()
receive a retry for a response
Definition: lds_state.cc:256
virtual const std::string name() const
Definition: sim_object.hh:120
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:255
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a SimObject that sees the packet.
Definition: packet.hh:403
Bitfield< 24 > j
Shader * shader
TickEvent tickEvent
Definition: lds_state.hh:225
SenderState is information carried along with the packet, esp.
bool retryResp
Definition: lds_state.hh:233
SenderState * senderState
This packet&#39;s sender state.
Definition: packet.hh:480
Tick ticks(int numCycles) const
Definition: shader.hh:91
int loadBusLength()
int banks
Definition: lds_state.hh:506
CuSidePort cuPort
Definition: lds_state.hh:487
Stats::Scalar ldsBankAccesses
bool process()
look for packets to return at this time
Definition: lds_state.cc:277
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:185
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
Definition: lds_state.cc:82
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1899
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
Definition: lds_state.cc:180

Generated on Fri Feb 28 2020 16:27:01 for gem5 by doxygen 1.8.13