gem5  v22.1.0.0
gpu_wavefront.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
33 
34 #include "debug/ProtocolTest.hh"
35 
36 namespace gem5
37 {
38 
40  : TesterThread(p), cuId(p.cu_id)
41 {
42  threadName = "GpuWavefront(TesterThread ID = " + std::to_string(threadId) +
43  ", CU ID = " + std::to_string(cuId) + ")";
44  threadEvent.setDesc("GpuWavefront tick");
45 }
46 
48 {
49 
50 }
51 
52 void
54 {
55  assert(curAction);
57  // we should not have any outstanding fence or atomic op at this point
58  assert(pendingFenceCount == 0);
59  assert(pendingAtomicCount == 0);
60 
61  for (int lane = 0; lane < numLanes; ++lane) {
62  Location location = curAction->getLocation(lane);
63  assert(location >= AddressManager::INVALID_LOCATION);
64 
65  // Make a request if we do not get an INVALID_LOCATION for this lane.
66  if (location >= 0) {
67  Addr address = addrManager->getAddress(location);
68  DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
69  this->getName(), curEpisode->getEpisodeId(),
70  ruby::printAddress(address));
71 
72  int load_size = sizeof(Value);
73 
74  // for now, assert address is 4-byte aligned
75  assert(address % load_size == 0);
76 
77  auto req = std::make_shared<Request>(address, load_size,
78  0, tester->requestorId(),
79  0, threadId, nullptr);
80  req->setPaddr(address);
81  req->setReqInstSeqNum(tester->getActionSeqNum());
82  // set protocol-specific flags
84 
85  PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
86  uint8_t* data = new uint8_t[load_size];
87  pkt->dataDynamic(data);
89 
90  // increment the number of outstanding ld_st requests
92 
93  if (!port->sendTimingReq(pkt)) {
94  panic("Not expected failed sendTimingReq\n");
95  }
96 
97  // insert an outstanding load
98  addOutstandingReqs(outstandingLoads, address, lane, location);
99  }
100  }
101 }
102 
103 void
105 {
106  assert(curAction);
108  // we should not have any outstanding fence or atomic op at this point
109  assert(pendingFenceCount == 0);
110  assert(pendingAtomicCount == 0);
111 
112  for (int lane = 0; lane < numLanes; ++lane) {
113  Location location = curAction->getLocation(lane);
114  assert(location >= AddressManager::INVALID_LOCATION);
115 
116  // Make a request if we do not get an INVALID_LOCATION for this lane.
117  if (location >= 0) {
118  // prepare the next value to store
119  Value new_value = addrManager->getLoggedValue(location) + 1;
120 
121  Addr address = addrManager->getAddress(location);
122  // must be aligned with store size
123  assert(address % sizeof(Value) == 0);
124 
125  DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
126  "Value %d\n", this->getName(),
128  new_value);
129 
130  auto req = std::make_shared<Request>(address, sizeof(Value),
131  0, tester->requestorId(), 0,
132  threadId, nullptr);
133  req->setPaddr(address);
134  req->setReqInstSeqNum(tester->getActionSeqNum());
135  // set protocol-specific flags
137 
138  PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
139  uint8_t *writeData = new uint8_t[sizeof(Value)];
140  for (int j = 0; j < sizeof(Value); ++j) {
141  writeData[j] = ((uint8_t*)&new_value)[j];
142  }
143  pkt->dataDynamic(writeData);
144  pkt->senderState = new ProtocolTester::SenderState(this);
145 
146  // increment the number of outstanding ld_st requests
148 
149  if (!port->sendTimingReq(pkt)) {
150  panic("Not expecting a failed sendTimingReq\n");
151  }
152 
153  // add an outstanding store
154  addOutstandingReqs(outstandingStores, address, lane, location,
155  new_value);
156  }
157  }
158 }
159 
160 void
162 {
163  assert(curAction);
165  // we should not have any outstanding ops at this point
166  assert(pendingFenceCount == 0);
167  assert(pendingLdStCount == 0);
168  assert(pendingAtomicCount == 0);
169 
170  // we use atomic_inc in the tester
172 
173  for (int lane = 0; lane < numLanes; ++lane) {
174  Location location = curAction->getLocation(lane);
175  assert(location >= 0);
176 
177  Addr address = addrManager->getAddress(location);
178 
179  DPRINTF(ProtocolTest, "%s Episode %d: Issuing Atomic_Inc - Addr %s\n",
180  this->getName(), curEpisode->getEpisodeId(),
181  ruby::printAddress(address));
182 
183  // must be aligned with store size
184  assert(address % sizeof(Value) == 0);
185  AtomicOpFunctor *amo_op = new AtomicOpInc<Value>();
186  auto req = std::make_shared<Request>(address, sizeof(Value),
188  0, threadId,
189  AtomicOpFunctorPtr(amo_op));
190  req->setPaddr(address);
191  req->setReqInstSeqNum(tester->getActionSeqNum());
192  // set protocol-specific flags
194 
195  PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
196  uint8_t* data = new uint8_t[sizeof(Value)];
197  pkt->dataDynamic(data);
198  pkt->senderState = new ProtocolTester::SenderState(this);
199 
200  if (!port->sendTimingReq(pkt)) {
201  panic("Not expecting failed sendTimingReq\n");
202  }
203 
204  // increment the number of outstanding atomic ops
206 
207  // add an outstanding atomic
208  addOutstandingReqs(outstandingAtomics, address, lane, location);
209  }
210 }
211 
212 void
214 {
215  DPRINTF(ProtocolTest, "%s Episode %d: Issuing Acquire\n", this->getName(),
217 
218  assert(curAction);
220  // we should not have any outstanding ops at this point
221  assert(pendingFenceCount == 0);
222  assert(pendingLdStCount == 0);
223  assert(pendingAtomicCount == 0);
224 
225  auto acq_req = std::make_shared<Request>(0, 0, 0,
226  tester->requestorId(), 0,
227  threadId, nullptr);
228  acq_req->setPaddr(0);
229  acq_req->setReqInstSeqNum(tester->getActionSeqNum());
230  acq_req->setCacheCoherenceFlags(Request::INV_L1);
231  // set protocol-specific flags
232  setExtraRequestFlags(acq_req);
233 
234  PacketPtr pkt = new Packet(acq_req, MemCmd::MemSyncReq);
235  pkt->senderState = new ProtocolTester::SenderState(this);
236 
237  // increment the number of outstanding fence requests
239 
240  if (!port->sendTimingReq(pkt)) {
241  panic("Not expecting failed sendTimingReq\n");
242  }
243 }
244 
245 void
247 {
248  DPRINTF(ProtocolTest, "%s Episode %d: Issuing Release\n", this->getName(),
250 
251  // A release fence simply waits for all previous stores to complete. All
252  // previous loads and stores were done before this release operation is
253  // issued, so issueReleaseOp is just a no-op in this tester.
254 
255  // we may be able to issue an action. Let's check
256  if (!threadEvent.scheduled()) {
257  scheduleWakeup();
258  }
259 }
260 
261 void
263 {
264  assert(pkt);
265  MemCmd resp_cmd = pkt->cmd;
266  Addr addr = (resp_cmd == MemCmd::WriteCompleteResp) ? 0 : pkt->getAddr();
267 
268  DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - "
269  "Addr %s\n", this->getName(),
270  curEpisode->getEpisodeId(), resp_cmd.toString(),
272 
273  // whether the transaction is done after this hitCallback
274  bool isTransactionDone = true;
275 
276  if (resp_cmd == MemCmd::MemSyncResp) {
277  // response to a pending fence
278  // no validation needed for fence responses
279  assert(pendingFenceCount > 0);
280  assert(pendingLdStCount == 0);
281  assert(pendingAtomicCount == 0);
283  } else if (resp_cmd == MemCmd::ReadResp) {
284  // response to a pending read
285  assert(pendingLdStCount > 0);
286  assert(pendingAtomicCount == 0);
287  assert(outstandingLoads.count(addr) > 0);
288 
289  // get return data
290  Value value = *(pkt->getPtr<Value>());
292  validateLoadResp(req.origLoc, req.lane, value);
293 
294  // this Read is done
296  } else if (resp_cmd == MemCmd::WriteResp) {
297  // response to a pending write
298  assert(pendingLdStCount > 0);
299  assert(pendingAtomicCount == 0);
300 
301  // no need to validate Write response
302  // just pop it from the outstanding req table so that subsequent
303  // requests dependent on this write can proceed
304  // note that we don't decrement pendingLdStCount here yet since
305  // the write is not yet completed in downstream memory. Instead, we
306  // decrement the counter when we receive the write completion ack
307  assert(outstandingStores.count(addr) > 0);
310 
311  // update log table
314  req.storedValue,
315  curTick(),
316  cuId);
317 
318  // the transaction is not done yet. Waiting for write completion ack
319  isTransactionDone = false;
320  } else if (resp_cmd == MemCmd::SwapResp) {
321  // response to a pending atomic
322  assert(pendingAtomicCount > 0);
323  assert(pendingLdStCount == 0);
324  assert(outstandingAtomics.count(addr) > 0);
325 
326  // get return data
327  Value value = *(pkt->getPtr<Value>());
328 
329  // validate atomic op return
331  validateAtomicResp(req.origLoc, req.lane, value);
332 
333  // update log table
335  curEpisode->getEpisodeId(), value,
336  curTick(),
337  cuId);
338 
339  // this Atomic is done
341  } else if (resp_cmd == MemCmd::WriteCompleteResp) {
342  // write completion ACK
343  assert(pendingLdStCount > 0);
344  assert(pendingAtomicCount == 0);
345 
346  // the Write is now done
348  } else {
349  panic("Unsupported MemCmd response type");
350  }
351 
352  if (isTransactionDone) {
353  // no need to keep senderState and request around
354  delete pkt->senderState;
355  }
356 
357  delete pkt;
358 
359  // record the last active cycle to check for deadlock
361 
362  // we may be able to issue an action. Let's check
363  if (!threadEvent.scheduled()) {
364  scheduleWakeup();
365  }
366 }
367 
368 void
370 {
371  // No extra request flag is set
372 }
373 
374 } // namespace gem5
#define DPRINTF(x,...)
Definition: trace.hh:186
const char data[]
Addr getAddress(Location loc)
Value getLoggedValue(Location loc) const
void updateLogTable(Location loc, int threadId, int episodeId, Value new_value, Tick curTick, int cuId=-1)
static const int INVALID_LOCATION
static const int INVALID_VALUE
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Location getLocation(int lane) const
Definition: episode.cc:289
Type getType() const
Definition: episode.hh:66
int getEpisodeId() const
Definition: episode.hh:85
virtual void issueReleaseOp()
GpuWavefrontParams Params
virtual ~GpuWavefront()
AddressManager::Value Value
virtual void issueAcquireOp()
virtual void hitCallback(PacketPtr pkt)
AddressManager::Location Location
virtual void setExtraRequestFlags(RequestPtr req)
GpuWavefront(const Params &p)
const std::string & toString() const
Return the string to a cmd given by idx.
Definition: packet.hh:275
@ WriteCompleteResp
Definition: packet.hh:91
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
T * getPtr()
get a pointer to the data ptr.
Definition: packet.hh:1212
Addr getAddr() const
Definition: packet.hh:805
SenderState * senderState
This packet's sender state.
Definition: packet.hh:544
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition: packet.hh:1200
MemCmd cmd
The command field of the packet.
Definition: packet.hh:371
RequestorID requestorId()
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition: port.hh:495
@ ATOMIC_RETURN_OP
The request is an atomic that returns data.
Definition: request.hh:175
void setDesc(std::string _description)
AddressManager * addrManager
OutstandingReqTable outstandingAtomics
void validateAtomicResp(Location loc, int lane, Value ret_val)
ProtocolTester * tester
ProtocolTester::SeqPort * port
OutstandingReqTable outstandingStores
std::string threadName
void addOutstandingReqs(OutstandingReqTable &req_table, Addr addr, int lane, Location loc, Value stored_val=AddressManager::INVALID_VALUE)
const std::string & getName() const
OutstandingReqTable outstandingLoads
OutstandingReq popOutstandingReq(OutstandingReqTable &req_table, Addr address)
const Episode::Action * curAction
TesterThreadEvent threadEvent
void validateLoadResp(Location loc, int lane, Value ret_val)
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition: amo.hh:242
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
uint8_t flags
Definition: helpers.cc:66
Bitfield< 24 > j
Definition: misc_types.hh:57
Bitfield< 54 > p
Definition: pagetable.hh:70
Bitfield< 3 > addr
Definition: types.hh:84
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109
std::string printAddress(Addr addr)
Definition: Address.cc:80
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
RubyTester::SenderState SenderState
Definition: Check.cc:40
const std::string to_string(sc_enc enc)
Definition: sc_fxdefs.cc:60

Generated on Wed Dec 21 2022 10:22:31 for gem5 by doxygen 1.9.1