gem5  v22.0.0.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
lds_state.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "gpu-compute/lds_state.hh"
33 
34 #include <array>
35 #include <cstdio>
36 #include <cstdlib>
37 
40 #include "gpu-compute/shader.hh"
41 
42 namespace gem5
43 {
44 
48 LdsState::LdsState(const Params &params) :
49  ClockedObject(params),
50  tickEvent(this),
51  cuPort(name() + ".port", this),
52  maximumSize(params.size),
53  range(params.range),
54  bankConflictPenalty(params.bankConflictPenalty),
55  banks(params.banks)
56 {
57  fatal_if(params.banks <= 0,
58  "Number of LDS banks should be positive number");
59  fatal_if((params.banks & (params.banks - 1)) != 0,
60  "Number of LDS banks should be a power of 2");
61  fatal_if(params.size <= 0,
62  "cannot allocate an LDS with a size less than 1");
63  fatal_if(params.size % 2,
64  "the LDS should be an even number");
65 }
66 
70 void
72 {
73  // check that this gets assigned to the same thing each time
74  fatal_if(!x_parent, "x_parent should not be nullptr");
75  fatal_if(x_parent == parent,
76  "should not be setting the parent twice");
77 
78  parent = x_parent;
79  _name = x_parent->name() + ".LdsState";
80 }
81 
85 unsigned
86 LdsState::countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
87 {
88  Packet::SenderState *baseSenderState = packet->senderState;
89  while (baseSenderState->predecessor) {
90  baseSenderState = baseSenderState->predecessor;
91  }
92  const ComputeUnit::LDSPort::SenderState *senderState =
93  dynamic_cast<ComputeUnit::LDSPort::SenderState *>(baseSenderState);
94 
95  fatal_if(!senderState,
96  "did not get the right sort of sender state");
97 
98  GPUDynInstPtr gpuDynInst = senderState->getMemInst();
99 
100  return countBankConflicts(gpuDynInst, bankAccesses);
101 }
102 
103 // Count the total number of bank conflicts for the local memory packet
104 unsigned
106  unsigned *numBankAccesses)
107 {
108  int bank_conflicts = 0;
109  std::vector<int> bank;
110  // the number of LDS banks being touched by the memory instruction
111  int numBanks = std::min(parent->wfSize(), banks);
112  // if the wavefront size is larger than the number of LDS banks, we
113  // need to iterate over all work items to calculate the total
114  // number of bank conflicts
115  int groups = (parent->wfSize() > numBanks) ?
116  (parent->wfSize() / numBanks) : 1;
117  for (int i = 0; i < groups; i++) {
118  // Address Array holding all the work item addresses of an instruction
119  std::vector<Addr> addr_array;
120  addr_array.resize(numBanks, 0);
121  bank.clear();
122  bank.resize(banks, 0);
123  int max_bank = 0;
124 
125  // populate the address array for all active work items
126  for (int j = 0; j < numBanks; j++) {
127  if (gpuDynInst->exec_mask[(i*numBanks)+j]) {
128  addr_array[j] = gpuDynInst->addr[(i*numBanks)+j];
129  } else {
130  addr_array[j] = std::numeric_limits<Addr>::max();
131  }
132  }
133 
134  if (gpuDynInst->isLoad() || gpuDynInst->isStore()) {
135  // mask identical addresses
136  for (int j = 0; j < numBanks; ++j) {
137  for (int j0 = 0; j0 < j; j0++) {
138  if (addr_array[j] != std::numeric_limits<Addr>::max()
139  && addr_array[j] == addr_array[j0]) {
140  addr_array[j] = std::numeric_limits<Addr>::max();
141  }
142  }
143  }
144  }
145  // calculate bank conflicts
146  for (int j = 0; j < numBanks; ++j) {
147  if (addr_array[j] != std::numeric_limits<Addr>::max()) {
148  int bankId = addr_array[j] % banks;
149  bank[bankId]++;
150  max_bank = std::max(max_bank, bank[bankId]);
151  // Count the number of LDS banks accessed.
152  // Since we have masked identical addresses all remaining
153  // accesses will need to be serialized if they access
154  // the same bank (bank conflict).
155  (*numBankAccesses)++;
156  }
157  }
158  bank_conflicts += max_bank;
159  }
160  panic_if(bank_conflicts > parent->wfSize(),
161  "Max bank conflicts should match num of work items per instr");
162  return bank_conflicts;
163 }
164 
168 bool
170 {
171  return ownerLds->processPacket(packet);
172 }
173 
176 {
178  dynamic_cast<ComputeUnit::LDSPort::SenderState *>(
179  packet->senderState);
180  return ss->getMemInst();
181 }
182 
186 bool
188 {
189  unsigned bankAccesses = 0;
190  // the number of conflicts this packet will have when accessing the LDS
191  unsigned bankConflicts = countBankConflicts(packet, &bankAccesses);
192  // count the total number of physical LDS bank accessed
193  parent->stats.ldsBankAccesses += bankAccesses;
194  // count the LDS bank conflicts. A number set to 1 indicates one
195  // access per bank maximum so there are no bank conflicts
196  parent->stats.ldsBankConflictDist.sample(bankConflicts-1);
197 
198  GPUDynInstPtr dynInst = getDynInstr(packet);
199  // account for the LDS bank conflict overhead
200  int busLength = (dynInst->isLoad()) ? parent->loadBusLength() :
201  (dynInst->isStore()) ? parent->storeBusLength() :
203  // delay for accessing the LDS
204  Tick processingTime =
205  parent->cyclesToTicks(Cycles(bankConflicts * bankConflictPenalty)) +
206  parent->cyclesToTicks(Cycles(busLength));
207  // choose (delay + last packet in queue) or (now + delay) as the time to
208  // return this
209  Tick doneAt = earliestReturnTime() + processingTime;
210  // then store it for processing
211  return returnQueuePush(std::make_pair(doneAt, packet));
212 }
213 
217 bool
219 {
220  // TODO add time limits (e.g. one packet per cycle) and queue size limits
221  // and implement flow control
222  returnQueue.push(thePair);
223 
224  // if there is no set wakeup time, look through the queue
225  if (!tickEvent.scheduled()) {
226  process();
227  }
228 
229  return true;
230 }
231 
235 void
237 {
238  fatal("not implemented");
239 }
240 
244 void
246 {
247  // TODO verify that this is the right way to do this
248  assert(ownerLds->isRetryResp());
249  ownerLds->setRetryResp(false);
250  ownerLds->process();
251 }
252 
256 void
258 {
259  fatal("not implemented");
260 }
261 
265 bool
267 {
268  Tick now = clockEdge();
269 
270  // send back completed packets
271  while (!returnQueue.empty() && returnQueue.front().first <= now) {
272  PacketPtr packet = returnQueue.front().second;
273 
275  dynamic_cast<ComputeUnit::LDSPort::SenderState *>(
276  packet->senderState);
277 
278  GPUDynInstPtr gpuDynInst = ss->getMemInst();
279 
280  gpuDynInst->initiateAcc(gpuDynInst);
281 
282  packet->makeTimingResponse();
283 
284  returnQueue.pop();
285 
286  bool success = cuPort.sendTimingResp(packet);
287 
288  if (!success) {
289  retryResp = true;
290  panic("have not handled timing responses being NACK'd when sent"
291  "back");
292  }
293  }
294 
295  // determine the next wakeup time
296  if (!returnQueue.empty()) {
297 
298  Tick next = returnQueue.front().first;
299 
300  if (tickEvent.scheduled()) {
301 
302  if (next < tickEvent.when()) {
303 
305  tickEvent.schedule(next);
306  }
307  } else {
308  tickEvent.schedule(next);
309  }
310  }
311 
312  return true;
313 }
314 
318 void
320 {
321  ldsState->process();
322 }
323 
324 } // namespace gem5
gem5::LdsState::parent
ComputeUnit * parent
Definition: lds_state.hh:513
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:190
gem5::LdsState::TickEvent::process
virtual void process()
wake up at this time and perform specified actions
Definition: lds_state.cc:319
gem5::Event::when
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:508
gem5::LdsState::retryResp
bool retryResp
Definition: lds_state.hh:244
shader.hh
gem5::LdsState::CuSidePort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
Definition: lds_state.cc:236
gem5::ComputeUnit::LDSPort::SenderState
SenderState is information carried along with the packet, esp.
Definition: compute_unit.hh:834
gem5::LdsState::Params
LdsStateParams Params
Definition: lds_state.hh:263
compute_unit.hh
gem5::ComputeUnit::stats
gem5::ComputeUnit::ComputeUnitStats stats
gem5::LdsState::setParent
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
Definition: lds_state.cc:71
gem5::LdsState::LdsState
LdsState(const Params &params)
the default constructor that works with SWIG
Definition: lds_state.cc:48
gem5::ComputeUnit::ComputeUnitStats::ldsBankConflictDist
statistics::Distribution ldsBankConflictDist
Definition: compute_unit.hh:1068
std::vector< int >
gem5::ComputeUnit::storeBusLength
int storeBusLength() const
Definition: compute_unit.hh:392
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:67
gem5::LdsState::TickEvent::schedule
void schedule(Tick when)
Definition: lds_state.hh:146
gem5::statistics::DistBase::sample
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1328
gem5::Packet::SenderState::predecessor
SenderState * predecessor
Definition: packet.hh:467
gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:78
gem5::ComputeUnit::loadBusLength
int loadBusLength() const
Definition: compute_unit.hh:393
gem5::ArmISA::j
Bitfield< 24 > j
Definition: misc_types.hh:57
gem5::ComputeUnit
Definition: compute_unit.hh:201
gem5::LdsState::getDynInstr
GPUDynInstPtr getDynInstr(PacketPtr packet)
Definition: lds_state.cc:175
gem5::Named::name
virtual std::string name() const
Definition: named.hh:47
gem5::Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition: clocked_object.hh:227
gem5::SimObject::params
const Params & params() const
Definition: sim_object.hh:176
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:291
gem5::LdsState::CuSidePort::recvTimingReq
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
Definition: lds_state.cc:169
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::LdsState::earliestReturnTime
Tick earliestReturnTime() const
Definition: lds_state.hh:408
gem5::LdsState::processPacket
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
Definition: lds_state.cc:187
gem5::ComputeUnit::ComputeUnitStats::ldsBankAccesses
statistics::Scalar ldsBankAccesses
Definition: compute_unit.hh:1067
gpu_dyn_inst.hh
gem5::ResponsePort::sendTimingResp
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
Definition: port.hh:370
ss
std::stringstream ss
Definition: trace.test.cc:45
gem5::LdsState::_name
std::string _name
Definition: lds_state.hh:515
gem5::LdsState::CuSidePort::recvRetry
virtual void recvRetry()
receive a retry
Definition: lds_state.cc:257
gem5::Packet::SenderState
A virtual base opaque structure used to hold state associated with the packet (e.g....
Definition: packet.hh:465
std::pair
STL pair class.
Definition: stl.hh:58
gem5::LdsState::bankConflictPenalty
int bankConflictPenalty
Definition: lds_state.hh:527
gem5::Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:542
name
const std::string & name()
Definition: trace.cc:49
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
gem5::ComputeUnit::wfSize
int wfSize() const
Definition: compute_unit.hh:394
gem5::ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:234
gem5::Clocked::clockEdge
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Definition: clocked_object.hh:177
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:204
gem5::Packet::makeTimingResponse
void makeTimingResponse()
Definition: packet.hh:1062
gem5::LdsState::countBankConflicts
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
Definition: lds_state.cc:86
gem5::ComputeUnit::LDSPort::SenderState::getMemInst
GPUDynInstPtr getMemInst() const
Definition: compute_unit.hh:847
gem5::LdsState::returnQueue
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition: lds_state.hh:241
gem5::LdsState::returnQueuePush
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
Definition: lds_state.cc:218
gem5::LdsState::banks
int banks
Definition: lds_state.hh:530
gem5::LdsState::TickEvent::deschedule
void deschedule()
Definition: lds_state.hh:152
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:226
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::LdsState::process
bool process()
look for packets to return at this time
Definition: lds_state.cc:266
gem5::LdsState::CuSidePort::recvRespRetry
virtual void recvRespRetry()
receive a retry for a response
Definition: lds_state.cc:245
gem5::LdsState::cuPort
CuSidePort cuPort
Definition: lds_state.hh:511
gem5::LdsState::CuSidePort::ownerLds
LdsState * ownerLds
Definition: lds_state.hh:170
gem5::LdsState::tickEvent
TickEvent tickEvent
Definition: lds_state.hh:236
gem5::Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
lds_state.hh

Generated on Wed Jul 13 2022 10:39:22 for gem5 by doxygen 1.8.17