gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gpu_dyn_inst.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: Anthony Gutierrez
34  */
35 
37 
38 #include "debug/GPUMem.hh"
40 #include "gpu-compute/shader.hh"
41 #include "gpu-compute/wavefront.hh"
42 
44  GPUStaticInst *static_inst, uint64_t instSeqNum)
45  : GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0),
46  n_reg(0), useContinuation(false),
47  statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum)
48 {
49  tlbHitLevel.assign(computeUnit()->wfSize(), -1);
50  d_data = new uint8_t[computeUnit()->wfSize() * 16];
51  a_data = new uint8_t[computeUnit()->wfSize() * 8];
52  x_data = new uint8_t[computeUnit()->wfSize() * 8];
53  for (int i = 0; i < (computeUnit()->wfSize() * 8); ++i) {
54  a_data[i] = 0;
55  x_data[i] = 0;
56  }
57  for (int i = 0; i < (computeUnit()->wfSize() * 16); ++i) {
58  d_data[i] = 0;
59  }
60 }
61 
63 {
64  delete[] d_data;
65  delete[] a_data;
66  delete[] x_data;
67 }
68 
69 void
71 {
72  _staticInst->execute(gpuDynInst);
73 }
74 
75 int
77 {
79 }
80 
81 int
83 {
85 }
86 
87 int
89 {
90  return _staticInst->getNumOperands();
91 }
92 
93 bool
95 {
96  return _staticInst->isVectorRegister(operandIdx);
97 }
98 
99 bool
101 {
102  return _staticInst->isScalarRegister(operandIdx);
103 }
104 
105 bool
107 {
108  return _staticInst->isCondRegister(operandIdx);
109 }
110 
111 int
112 GPUDynInst::getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst)
113 {
114  return _staticInst->getRegisterIndex(operandIdx, gpuDynInst);
115 }
116 
117 int
119 {
120  return _staticInst->getOperandSize(operandIdx);
121 }
122 
123 bool
125 {
126  return _staticInst->isDstOperand(operandIdx);
127 }
128 
129 bool
131 {
132  return _staticInst->isSrcOperand(operandIdx);
133 }
134 
135 const std::string&
137 {
138  return _staticInst->disassemble();
139 }
140 
141 uint64_t
143 {
144  return _seqNum;
145 }
146 
147 Enums::StorageClassType
149 {
150  return _staticInst->executed_as;
151 }
152 
153 // Process a memory instruction and (if necessary) submit timing request
154 void
156 {
157  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
159 
160  _staticInst->initiateAcc(gpuDynInst);
161  time = 0;
162 }
163 
164 void
166 {
167  _staticInst->completeAcc(gpuDynInst);
168 }
169 
174 bool
176 {
177  return _staticInst->isALU();
178 }
179 
180 bool
182 {
183  return _staticInst->isBranch();
184 }
185 
186 bool
188 {
189  return _staticInst->isNop();
190 }
191 
192 bool
194 {
195  return _staticInst->isReturn();
196 }
197 
198 bool
200 {
202 }
203 
204 bool
206 {
207  return _staticInst->isSpecialOp();
208 }
209 
210 bool
212 {
213  return _staticInst->isWaitcnt();
214 }
215 
216 bool
218 {
219  return _staticInst->isBarrier();
220 }
221 
222 bool
224 {
225  return _staticInst->isMemFence();
226 }
227 
228 bool
230 {
231  return _staticInst->isMemRef();
232 }
233 
234 bool
236 {
237  return _staticInst->isFlat();
238 }
239 
240 bool
242 {
243  return _staticInst->isLoad();
244 }
245 
246 bool
248 {
249  return _staticInst->isStore();
250 }
251 
252 bool
254 {
255  return _staticInst->isAtomic();
256 }
257 
258 bool
260 {
261  return _staticInst->isAtomicNoRet();
262 }
263 
264 bool
266 {
267  return _staticInst->isAtomicRet();
268 }
269 
270 bool
272 {
273  return _staticInst->isScalar();
274 }
275 
276 bool
278 {
279  return _staticInst->readsSCC();
280 }
281 
282 bool
284 {
285  return _staticInst->writesSCC();
286 }
287 
288 bool
290 {
291  return _staticInst->readsVCC();
292 }
293 
294 bool
296 {
297  return _staticInst->writesVCC();
298 }
299 
300 bool
302 {
303  return _staticInst->isAtomicAnd();
304 }
305 
306 bool
308 {
309  return _staticInst->isAtomicOr();
310 }
311 
312 bool
314 {
315  return _staticInst->isAtomicXor();
316 }
317 
318 bool
320 {
321  return _staticInst->isAtomicCAS();
322 }
323 
325 {
326  return _staticInst->isAtomicExch();
327 }
328 
329 bool
331 {
332  return _staticInst->isAtomicAdd();
333 }
334 
335 bool
337 {
338  return _staticInst->isAtomicSub();
339 }
340 
341 bool
343 {
344  return _staticInst->isAtomicInc();
345 }
346 
347 bool
349 {
350  return _staticInst->isAtomicDec();
351 }
352 
353 bool
355 {
356  return _staticInst->isAtomicMax();
357 }
358 
359 bool
361 {
362  return _staticInst->isAtomicMin();
363 }
364 
365 bool
367 {
368  return _staticInst->isArgLoad();
369 }
370 
371 bool
373 {
374  return _staticInst->isGlobalMem();
375 }
376 
377 bool
379 {
380  return _staticInst->isLocalMem();
381 }
382 
383 bool
385 {
386  return _staticInst->isArgSeg();
387 }
388 
389 bool
391 {
392  return _staticInst->isGlobalSeg();
393 }
394 
395 bool
397 {
398  return _staticInst->isGroupSeg();
399 }
400 
401 bool
403 {
404  return _staticInst->isKernArgSeg();
405 }
406 
407 bool
409 {
410  return _staticInst->isPrivateSeg();
411 }
412 
413 bool
415 {
416  return _staticInst->isReadOnlySeg();
417 }
418 
419 bool
421 {
422  return _staticInst->isSpillSeg();
423 }
424 
425 bool
427 {
428  return _staticInst->isWorkitemScope();
429 }
430 
431 bool
433 {
434  return _staticInst->isWavefrontScope();
435 }
436 
437 bool
439 {
440  return _staticInst->isWorkgroupScope();
441 }
442 
443 bool
445 {
446  return _staticInst->isDeviceScope();
447 }
448 
449 bool
451 {
452  return _staticInst->isSystemScope();
453 }
454 
455 bool
457 {
458  return _staticInst->isNoScope();
459 }
460 
461 bool
463 {
464  return _staticInst->isRelaxedOrder();
465 }
466 
467 bool
469 {
470  return _staticInst->isAcquire();
471 }
472 
473 bool
475 {
476  return _staticInst->isRelease();
477 }
478 
479 bool
481 {
482  return _staticInst->isAcquireRelease();
483 }
484 
485 bool
487 {
488  return _staticInst->isNoOrder();
489 }
490 
491 bool
493 {
495 }
496 
497 bool
499 {
500  return _staticInst->isSystemCoherent();
501 }
502 
503 void
505 {
506  if (_staticInst->isLocalMem()) {
507  // access to LDS (shared) memory
509  } else {
510  // access to global memory
511 
512  // update PageDivergence histogram
513  int number_pages_touched = cu->pagesTouched.size();
514  assert(number_pages_touched);
515  cu->pageDivergenceDist.sample(number_pages_touched);
516 
518 
519  for (auto it : cu->pagesTouched) {
520  // see if this page has been touched before. if not, this also
521  // inserts the page into the table.
522  ret = cu->pageAccesses
523  .insert(ComputeUnit::pageDataStruct::value_type(it.first,
524  std::make_pair(1, it.second)));
525 
526  // if yes, then update the stats
527  if (!ret.second) {
528  ret.first->second.first++;
529  ret.first->second.second += it.second;
530  }
531  }
532 
533  cu->pagesTouched.clear();
534 
535  // total number of memory instructions (dynamic)
536  // Atomics are counted as a single memory instruction.
537  // this is # memory instructions per wavefronts, not per workitem
539  }
540 }
bool isSystemCoherent() const
bool isAtomicCAS() const
bool isLocalMem() const
#define DPRINTF(x,...)
Definition: trace.hh:229
bool isGlobalSeg() const
bool isBranch() const
bool isAcquire() const
bool isWorkgroupScope() const
virtual void completeAcc(GPUDynInstPtr gpuDynInst)
bool isGloballyCoherent() const
Coherence domain of a memory instruction.
bool isAtomicExch() const
bool isArgSeg() const
std::vector< int > tlbHitLevel
bool isKernArgSeg() const
Bitfield< 7 > i
STL pair class.
Definition: stl.hh:61
bool isMemRef() const
bool isSpillSeg() const
bool isFlat() const
Stats::Scalar dynamicGMemInstrCnt
bool isSystemScope() const
bool readsSCC() const
int numDstRegOperands()
Definition: gpu_dyn_inst.cc:82
bool isUnconditionalJump() const
bool isGroupSeg() const
ip6_addr_t addr
Definition: inet.hh:335
bool isReadOnlySeg() const
bool isAtomicSub() const
bool isDeviceScope() const
bool isAtomicNoRet() const
bool isReturn() const
bool isDeviceScope() const
int getOperandSize(int operandIdx)
bool isSystemScope() const
bool isGlobalSeg() const
bool isSystemCoherent() const
virtual bool isCondRegister(int operandIndex)=0
uint8_t * a_data
Stats::Scalar dynamicLMemInstrCnt
bool isMemFence() const
bool isGlobalMem() const
bool isWorkitemScope() const
bool isAtomicDec() const
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
Definition: gpu_dyn_inst.cc:43
bool isSpecialOp() const
ComputeUnit * cu
Enums::StorageClassType executedAs()
uint64_t seqNum() const
bool isPrivateSeg() const
bool readsVCC() const
bool isNoScope() const
bool isLoad() const
bool isAtomicRet() const
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
bool isBarrier() const
bool isAtomicAnd() const
bool isUnconditionalJump() const
virtual bool isDstOperand(int operandIndex)=0
virtual bool isScalarRegister(int operandIndex)=0
bool isGloballyCoherent() const
void updateStats()
bool isNoOrder() const
bool isGlobalMem() const
bool isAcquireRelease() const
bool isAtomicNoRet() const
uint64_t _seqNum
virtual int numDstRegOperands()=0
Stats::Distribution pageDivergenceDist
bool isCondRegister(int operandIdx)
uint8_t * d_data
bool isALU() const
bool writesSCC() const
bool isSpillSeg() const
bool writesSCC() const
bool isLoad() const
bool isKernArgSeg() const
bool isAtomicAdd() const
bool isAtomicMin() const
bool isReturn() const
bool isAcquireRelease() const
bool isAtomicOr() const
bool isVectorRegister(int operandIdx)
Definition: gpu_dyn_inst.cc:94
bool isAtomicExch() const
bool writesVCC() const
bool writesVCC() const
uint8_t * x_data
bool isArgLoad() const
int wfSize() const
int numSrcRegOperands()
Definition: gpu_dyn_inst.cc:76
int getNumOperands()
Definition: gpu_dyn_inst.cc:88
Enums::StorageClassType executed_as
bool isArgLoad() const
bool isAtomic() const
VectorMask exec_mask
bool isNoOrder() const
virtual int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)=0
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
bool isRelease() const
bool isAcquire() const
bool isAtomicCAS() const
bool isAtomicInc() const
bool isAtomicMax() const
bool isScalarRegister(int operandIdx)
bool isSrcOperand(int operandIdx)
bool isAtomicXor() const
bool readsVCC() const
virtual bool isSrcOperand(int operandIndex)=0
bool isRelaxedOrder() const
bool isWorkgroupScope() const
bool isReadOnlySeg() const
bool isFlat() const
bool isArgSeg() const
bool isMemRef() const
virtual int getNumOperands()=0
void completeAcc(GPUDynInstPtr gpuDynInst)
bool isAtomicInc() const
bool isSpecialOp() const
bool isAtomicAdd() const
bool isNop() const
bool isAtomicMax() const
bool isWavefrontScope() const
GPUStaticInst * _staticInst
int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst)
const std::string & disassemble() const
bool isStore() const
bool isScalar() const
bool isRelaxedOrder() const
bool isBranch() const
bool isAtomicOr() const
void execute(GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:70
bool isAtomicAnd() const
const std::string & disassemble()
std::map< Addr, int > pagesTouched
bool isAtomicRet() const
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
ComputeUnit * computeUnit()
virtual int numSrcRegOperands()=0
bool isLocalMem() const
bool isAtomicMin() const
virtual bool isVectorRegister(int operandIndex)=0
bool isWaitcnt() const
bool isScalar() const
bool isAtomicSub() const
virtual void execute(GPUDynInstPtr gpuDynInst)=0
bool isNoScope() const
bool isWaitcnt() const
bool isDstOperand(int operandIdx)
pageDataStruct pageAccesses
bool isAtomicDec() const
virtual void initiateAcc(GPUDynInstPtr gpuDynInst)
virtual int getOperandSize(int operandIndex)=0
bool isMemFence() const
bool isWavefrontScope() const
bool readsSCC() const
bool isAtomic() const
bool isStore() const
void initiateAcc(GPUDynInstPtr gpuDynInst)
bool isWorkitemScope() const
bool isPrivateSeg() const
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1899
bool isGroupSeg() const
bool isAtomicXor() const
bool isRelease() const
bool isNop() const
bool isBarrier() const

Generated on Fri Feb 28 2020 16:27:01 for gem5 by doxygen 1.8.13