gem5  v21.0.1.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
lds_state.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "gpu-compute/lds_state.hh"
35 
36 #include <array>
37 #include <cstdio>
38 #include <cstdlib>
39 
42 #include "gpu-compute/shader.hh"
43 
47 LdsState::LdsState(const Params &params) :
48  ClockedObject(params),
49  tickEvent(this),
50  cuPort(name() + ".port", this),
51  maximumSize(params.size),
52  range(params.range),
53  bankConflictPenalty(params.bankConflictPenalty),
54  banks(params.banks)
55 {
56  fatal_if(params.banks <= 0,
57  "Number of LDS banks should be positive number");
58  fatal_if((params.banks & (params.banks - 1)) != 0,
59  "Number of LDS banks should be a power of 2");
60  fatal_if(params.size <= 0,
61  "cannot allocate an LDS with a size less than 1");
62  fatal_if(params.size % 2,
63  "the LDS should be an even number");
64 }
65 
69 void
71 {
72  // check that this gets assigned to the same thing each time
73  fatal_if(!x_parent, "x_parent should not be nullptr");
74  fatal_if(x_parent == parent,
75  "should not be setting the parent twice");
76 
77  parent = x_parent;
78  _name = x_parent->name() + ".LdsState";
79 }
80 
84 unsigned
85 LdsState::countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
86 {
87  Packet::SenderState *baseSenderState = packet->senderState;
88  while (baseSenderState->predecessor) {
89  baseSenderState = baseSenderState->predecessor;
90  }
91  const ComputeUnit::LDSPort::SenderState *senderState =
92  dynamic_cast<ComputeUnit::LDSPort::SenderState *>(baseSenderState);
93 
94  fatal_if(!senderState,
95  "did not get the right sort of sender state");
96 
97  GPUDynInstPtr gpuDynInst = senderState->getMemInst();
98 
99  return countBankConflicts(gpuDynInst, bankAccesses);
100 }
101 
102 // Count the total number of bank conflicts for the local memory packet
103 unsigned
105  unsigned *numBankAccesses)
106 {
107  int bank_conflicts = 0;
108  std::vector<int> bank;
109  // the number of LDS banks being touched by the memory instruction
110  int numBanks = std::min(parent->wfSize(), banks);
111  // if the wavefront size is larger than the number of LDS banks, we
112  // need to iterate over all work items to calculate the total
113  // number of bank conflicts
114  int groups = (parent->wfSize() > numBanks) ?
115  (parent->wfSize() / numBanks) : 1;
116  for (int i = 0; i < groups; i++) {
117  // Address Array holding all the work item addresses of an instruction
118  std::vector<Addr> addr_array;
119  addr_array.resize(numBanks, 0);
120  bank.clear();
121  bank.resize(banks, 0);
122  int max_bank = 0;
123 
124  // populate the address array for all active work items
125  for (int j = 0; j < numBanks; j++) {
126  if (gpuDynInst->exec_mask[(i*numBanks)+j]) {
127  addr_array[j] = gpuDynInst->addr[(i*numBanks)+j];
128  } else {
129  addr_array[j] = std::numeric_limits<Addr>::max();
130  }
131  }
132 
133  if (gpuDynInst->isLoad() || gpuDynInst->isStore()) {
134  // mask identical addresses
135  for (int j = 0; j < numBanks; ++j) {
136  for (int j0 = 0; j0 < j; j0++) {
137  if (addr_array[j] != std::numeric_limits<Addr>::max()
138  && addr_array[j] == addr_array[j0]) {
139  addr_array[j] = std::numeric_limits<Addr>::max();
140  }
141  }
142  }
143  }
144  // calculate bank conflicts
145  for (int j = 0; j < numBanks; ++j) {
146  if (addr_array[j] != std::numeric_limits<Addr>::max()) {
147  int bankId = addr_array[j] % banks;
148  bank[bankId]++;
149  max_bank = std::max(max_bank, bank[bankId]);
150  // Count the number of LDS banks accessed.
151  // Since we have masked identical addresses all remaining
152  // accesses will need to be serialized if they access
153  // the same bank (bank conflict).
154  (*numBankAccesses)++;
155  }
156  }
157  bank_conflicts += max_bank;
158  }
159  panic_if(bank_conflicts > parent->wfSize(),
160  "Max bank conflicts should match num of work items per instr");
161  return bank_conflicts;
162 }
163 
167 bool
169 {
170  return ownerLds->processPacket(packet);
171 }
172 
175 {
177  dynamic_cast<ComputeUnit::LDSPort::SenderState *>(
178  packet->senderState);
179  return ss->getMemInst();
180 }
181 
185 bool
187 {
188  unsigned bankAccesses = 0;
189  // the number of conflicts this packet will have when accessing the LDS
190  unsigned bankConflicts = countBankConflicts(packet, &bankAccesses);
191  // count the total number of physical LDS bank accessed
192  parent->stats.ldsBankAccesses += bankAccesses;
193  // count the LDS bank conflicts. A number set to 1 indicates one
194  // access per bank maximum so there are no bank conflicts
195  parent->stats.ldsBankConflictDist.sample(bankConflicts-1);
196 
197  GPUDynInstPtr dynInst = getDynInstr(packet);
198  // account for the LDS bank conflict overhead
199  int busLength = (dynInst->isLoad()) ? parent->loadBusLength() :
200  (dynInst->isStore()) ? parent->storeBusLength() :
202  // delay for accessing the LDS
203  Tick processingTime =
204  parent->cyclesToTicks(Cycles(bankConflicts * bankConflictPenalty)) +
205  parent->cyclesToTicks(Cycles(busLength));
206  // choose (delay + last packet in queue) or (now + delay) as the time to
207  // return this
208  Tick doneAt = earliestReturnTime() + processingTime;
209  // then store it for processing
210  return returnQueuePush(std::make_pair(doneAt, packet));
211 }
212 
216 bool
218 {
219  // TODO add time limits (e.g. one packet per cycle) and queue size limits
220  // and implement flow control
221  returnQueue.push(thePair);
222 
223  // if there is no set wakeup time, look through the queue
224  if (!tickEvent.scheduled()) {
225  process();
226  }
227 
228  return true;
229 }
230 
234 void
236 {
237  fatal("not implemented");
238 }
239 
243 void
245 {
246  // TODO verify that this is the right way to do this
247  assert(ownerLds->isRetryResp());
248  ownerLds->setRetryResp(false);
249  ownerLds->process();
250 }
251 
255 void
257 {
258  fatal("not implemented");
259 }
260 
264 bool
266 {
267  Tick now = clockEdge();
268 
269  // send back completed packets
270  while (!returnQueue.empty() && returnQueue.front().first <= now) {
271  PacketPtr packet = returnQueue.front().second;
272 
274  dynamic_cast<ComputeUnit::LDSPort::SenderState *>(
275  packet->senderState);
276 
277  GPUDynInstPtr gpuDynInst = ss->getMemInst();
278 
279  gpuDynInst->initiateAcc(gpuDynInst);
280 
281  packet->makeTimingResponse();
282 
283  returnQueue.pop();
284 
285  bool success = cuPort.sendTimingResp(packet);
286 
287  if (!success) {
288  retryResp = true;
289  panic("have not handled timing responses being NACK'd when sent"
290  "back");
291  }
292  }
293 
294  // determine the next wakeup time
295  if (!returnQueue.empty()) {
296 
297  Tick next = returnQueue.front().first;
298 
299  if (tickEvent.scheduled()) {
300 
301  if (next < tickEvent.when()) {
302 
304  tickEvent.schedule(next);
305  }
306  } else {
307  tickEvent.schedule(next);
308  }
309  }
310 
311  return true;
312 }
313 
317 void
319 {
320  ldsState->process();
321 }
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:183
Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:462
LdsState::tickEvent
TickEvent tickEvent
Definition: lds_state.hh:235
LdsState::CuSidePort::recvRetry
virtual void recvRetry()
receive a retry
Definition: lds_state.cc:256
Packet::makeTimingResponse
void makeTimingResponse()
Definition: packet.hh:1023
ComputeUnit::loadBusLength
int loadBusLength() const
Definition: compute_unit.hh:392
shader.hh
ResponsePort::sendTimingResp
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
Definition: port.hh:367
ArmISA::i
Bitfield< 7 > i
Definition: miscregs_types.hh:63
compute_unit.hh
LdsState::TickEvent::process
virtual void process()
wake up at this time and perform specified actions
Definition: lds_state.cc:318
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:59
LdsState::returnQueuePush
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
Definition: lds_state.cc:217
ComputeUnit::stats
ComputeUnit::ComputeUnitStats stats
LdsState::CuSidePort::recvTimingReq
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
Definition: lds_state.cc:168
std::vector< int >
LdsState::Params
LdsStateParams Params
Definition: lds_state.hh:262
LdsState::process
bool process()
look for packets to return at this time
Definition: lds_state.cc:265
Event::when
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:505
ComputeUnit::LDSPort::SenderState
SenderState is information carried along with the packet, esp.
Definition: compute_unit.hh:782
ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:231
ComputeUnit
Definition: compute_unit.hh:200
ComputeUnit::ComputeUnitStats::ldsBankConflictDist
Stats::Distribution ldsBankConflictDist
Definition: compute_unit.hh:1016
ArmISA::j
Bitfield< 24 > j
Definition: miscregs_types.hh:54
LdsState::TickEvent::deschedule
void deschedule()
Definition: lds_state.hh:151
Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition: clocked_object.hh:224
ArmISA::ss
Bitfield< 21 > ss
Definition: miscregs_types.hh:56
LdsState::banks
int banks
Definition: lds_state.hh:529
Packet::SenderState
A virtual base opaque structure used to hold state associated with the packet (e.g....
Definition: packet.hh:432
LdsState::returnQueue
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition: lds_state.hh:240
LdsState::TickEvent::schedule
void schedule(Tick when)
Definition: lds_state.hh:145
LdsState::earliestReturnTime
Tick earliestReturnTime() const
Definition: lds_state.hh:407
LdsState::CuSidePort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
Definition: lds_state.cc:235
Clocked::clockEdge
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Definition: clocked_object.hh:174
ComputeUnit::storeBusLength
int storeBusLength() const
Definition: compute_unit.hh:391
gpu_dyn_inst.hh
LdsState::getDynInstr
GPUDynInstPtr getDynInstr(PacketPtr packet)
Definition: lds_state.cc:174
std::pair
STL pair class.
Definition: stl.hh:58
ComputeUnit::ComputeUnitStats::ldsBankAccesses
Stats::Scalar ldsBankAccesses
Definition: compute_unit.hh:1015
LdsState::bankConflictPenalty
int bankConflictPenalty
Definition: lds_state.hh:526
LdsState::retryResp
bool retryResp
Definition: lds_state.hh:243
name
const std::string & name()
Definition: trace.cc:48
LdsState::_name
std::string _name
Definition: lds_state.hh:514
LdsState::countBankConflicts
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
Definition: lds_state.cc:85
LdsState::CuSidePort::ownerLds
LdsState * ownerLds
Definition: lds_state.hh:169
LdsState::setParent
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
Definition: lds_state.cc:70
SimObject::name
virtual const std::string name() const
Definition: sim_object.hh:182
LdsState::processPacket
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
Definition: lds_state.cc:186
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:197
LdsState::cuPort
CuSidePort cuPort
Definition: lds_state.hh:510
Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:258
ComputeUnit::LDSPort::SenderState::getMemInst
GPUDynInstPtr getMemInst() const
Definition: compute_unit.hh:795
ComputeUnit::wfSize
int wfSize() const
Definition: compute_unit.hh:393
GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
Stats::DistBase::sample
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1323
Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:79
LdsState::CuSidePort::recvRespRetry
virtual void recvRespRetry()
receive a retry for a response
Definition: lds_state.cc:244
LdsState::parent
ComputeUnit * parent
Definition: lds_state.hh:512
SimObject::params
const Params & params() const
Definition: sim_object.hh:168
Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:509
LdsState::LdsState
LdsState(const Params &params)
the default constructor that works with SWIG
Definition: lds_state.cc:47
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:219
Packet::SenderState::predecessor
SenderState * predecessor
Definition: packet.hh:434
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:171
lds_state.hh

Generated on Tue Jun 22 2021 15:28:28 for gem5 by doxygen 1.8.17