gem5  v20.0.0.2
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gpu_dyn_inst.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
35 
36 #include "debug/GPUMem.hh"
38 #include "gpu-compute/shader.hh"
39 #include "gpu-compute/wavefront.hh"
40 
42  GPUStaticInst *static_inst, uint64_t instSeqNum)
43  : GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0),
44  n_reg(0), useContinuation(false),
45  statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum)
46 {
47  tlbHitLevel.assign(computeUnit()->wfSize(), -1);
48  d_data = new uint8_t[computeUnit()->wfSize() * 16];
49  a_data = new uint8_t[computeUnit()->wfSize() * 8];
50  x_data = new uint8_t[computeUnit()->wfSize() * 8];
51  for (int i = 0; i < (computeUnit()->wfSize() * 8); ++i) {
52  a_data[i] = 0;
53  x_data[i] = 0;
54  }
55  for (int i = 0; i < (computeUnit()->wfSize() * 16); ++i) {
56  d_data[i] = 0;
57  }
58 }
59 
61 {
62  delete[] d_data;
63  delete[] a_data;
64  delete[] x_data;
65 }
66 
67 void
69 {
70  _staticInst->execute(gpuDynInst);
71 }
72 
73 int
75 {
77 }
78 
79 int
81 {
83 }
84 
85 int
87 {
88  return _staticInst->getNumOperands();
89 }
90 
91 bool
93 {
94  return _staticInst->isVectorRegister(operandIdx);
95 }
96 
97 bool
99 {
100  return _staticInst->isScalarRegister(operandIdx);
101 }
102 
103 bool
105 {
106  return _staticInst->isCondRegister(operandIdx);
107 }
108 
109 int
110 GPUDynInst::getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst)
111 {
112  return _staticInst->getRegisterIndex(operandIdx, gpuDynInst);
113 }
114 
115 int
117 {
118  return _staticInst->getOperandSize(operandIdx);
119 }
120 
121 bool
123 {
124  return _staticInst->isDstOperand(operandIdx);
125 }
126 
127 bool
129 {
130  return _staticInst->isSrcOperand(operandIdx);
131 }
132 
133 const std::string&
135 {
136  return _staticInst->disassemble();
137 }
138 
139 uint64_t
141 {
142  return _seqNum;
143 }
144 
145 Enums::StorageClassType
147 {
148  return _staticInst->executed_as;
149 }
150 
151 // Process a memory instruction and (if necessary) submit timing request
152 void
154 {
155  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
157 
158  _staticInst->initiateAcc(gpuDynInst);
159  time = 0;
160 }
161 
162 void
164 {
165  _staticInst->completeAcc(gpuDynInst);
166 }
167 
172 bool
174 {
175  return _staticInst->isALU();
176 }
177 
178 bool
180 {
181  return _staticInst->isBranch();
182 }
183 
184 bool
186 {
187  return _staticInst->isNop();
188 }
189 
190 bool
192 {
193  return _staticInst->isReturn();
194 }
195 
196 bool
198 {
200 }
201 
202 bool
204 {
205  return _staticInst->isSpecialOp();
206 }
207 
208 bool
210 {
211  return _staticInst->isWaitcnt();
212 }
213 
214 bool
216 {
217  return _staticInst->isBarrier();
218 }
219 
220 bool
222 {
223  return _staticInst->isMemFence();
224 }
225 
226 bool
228 {
229  return _staticInst->isMemRef();
230 }
231 
232 bool
234 {
235  return _staticInst->isFlat();
236 }
237 
238 bool
240 {
241  return _staticInst->isLoad();
242 }
243 
244 bool
246 {
247  return _staticInst->isStore();
248 }
249 
250 bool
252 {
253  return _staticInst->isAtomic();
254 }
255 
256 bool
258 {
259  return _staticInst->isAtomicNoRet();
260 }
261 
262 bool
264 {
265  return _staticInst->isAtomicRet();
266 }
267 
268 bool
270 {
271  return _staticInst->isScalar();
272 }
273 
274 bool
276 {
277  return _staticInst->readsSCC();
278 }
279 
280 bool
282 {
283  return _staticInst->writesSCC();
284 }
285 
286 bool
288 {
289  return _staticInst->readsVCC();
290 }
291 
292 bool
294 {
295  return _staticInst->writesVCC();
296 }
297 
298 bool
300 {
301  return _staticInst->isAtomicAnd();
302 }
303 
304 bool
306 {
307  return _staticInst->isAtomicOr();
308 }
309 
310 bool
312 {
313  return _staticInst->isAtomicXor();
314 }
315 
316 bool
318 {
319  return _staticInst->isAtomicCAS();
320 }
321 
323 {
324  return _staticInst->isAtomicExch();
325 }
326 
327 bool
329 {
330  return _staticInst->isAtomicAdd();
331 }
332 
333 bool
335 {
336  return _staticInst->isAtomicSub();
337 }
338 
339 bool
341 {
342  return _staticInst->isAtomicInc();
343 }
344 
345 bool
347 {
348  return _staticInst->isAtomicDec();
349 }
350 
351 bool
353 {
354  return _staticInst->isAtomicMax();
355 }
356 
357 bool
359 {
360  return _staticInst->isAtomicMin();
361 }
362 
363 bool
365 {
366  return _staticInst->isArgLoad();
367 }
368 
369 bool
371 {
372  return _staticInst->isGlobalMem();
373 }
374 
375 bool
377 {
378  return _staticInst->isLocalMem();
379 }
380 
381 bool
383 {
384  return _staticInst->isArgSeg();
385 }
386 
387 bool
389 {
390  return _staticInst->isGlobalSeg();
391 }
392 
393 bool
395 {
396  return _staticInst->isGroupSeg();
397 }
398 
399 bool
401 {
402  return _staticInst->isKernArgSeg();
403 }
404 
405 bool
407 {
408  return _staticInst->isPrivateSeg();
409 }
410 
411 bool
413 {
414  return _staticInst->isReadOnlySeg();
415 }
416 
417 bool
419 {
420  return _staticInst->isSpillSeg();
421 }
422 
423 bool
425 {
426  return _staticInst->isWorkitemScope();
427 }
428 
429 bool
431 {
432  return _staticInst->isWavefrontScope();
433 }
434 
435 bool
437 {
438  return _staticInst->isWorkgroupScope();
439 }
440 
441 bool
443 {
444  return _staticInst->isDeviceScope();
445 }
446 
447 bool
449 {
450  return _staticInst->isSystemScope();
451 }
452 
453 bool
455 {
456  return _staticInst->isNoScope();
457 }
458 
459 bool
461 {
462  return _staticInst->isRelaxedOrder();
463 }
464 
465 bool
467 {
468  return _staticInst->isAcquire();
469 }
470 
471 bool
473 {
474  return _staticInst->isRelease();
475 }
476 
477 bool
479 {
480  return _staticInst->isAcquireRelease();
481 }
482 
483 bool
485 {
486  return _staticInst->isNoOrder();
487 }
488 
489 bool
491 {
493 }
494 
495 bool
497 {
498  return _staticInst->isSystemCoherent();
499 }
500 
501 void
503 {
504  if (_staticInst->isLocalMem()) {
505  // access to LDS (shared) memory
507  } else {
508  // access to global memory
509 
510  // update PageDivergence histogram
511  int number_pages_touched = cu->pagesTouched.size();
512  assert(number_pages_touched);
513  cu->pageDivergenceDist.sample(number_pages_touched);
514 
516 
517  for (auto it : cu->pagesTouched) {
518  // see if this page has been touched before. if not, this also
519  // inserts the page into the table.
520  ret = cu->pageAccesses
521  .insert(ComputeUnit::pageDataStruct::value_type(it.first,
522  std::make_pair(1, it.second)));
523 
524  // if yes, then update the stats
525  if (!ret.second) {
526  ret.first->second.first++;
527  ret.first->second.second += it.second;
528  }
529  }
530 
531  cu->pagesTouched.clear();
532 
533  // total number of memory instructions (dynamic)
534  // Atomics are counted as a single memory instruction.
535  // this is # memory instructions per wavefronts, not per workitem
537  }
538 }
bool isSystemCoherent() const
bool isAtomicCAS() const
bool isLocalMem() const
#define DPRINTF(x,...)
Definition: trace.hh:222
bool isGlobalSeg() const
bool isBranch() const
bool isAcquire() const
bool isWorkgroupScope() const
virtual void completeAcc(GPUDynInstPtr gpuDynInst)
bool isGloballyCoherent() const
Coherence domain of a memory instruction.
bool isAtomicExch() const
bool isArgSeg() const
std::vector< int > tlbHitLevel
bool isKernArgSeg() const
Bitfield< 7 > i
STL pair class.
Definition: stl.hh:58
bool isMemRef() const
bool isSpillSeg() const
bool isFlat() const
Stats::Scalar dynamicGMemInstrCnt
bool isSystemScope() const
bool readsSCC() const
int numDstRegOperands()
Definition: gpu_dyn_inst.cc:80
bool isUnconditionalJump() const
bool isGroupSeg() const
ip6_addr_t addr
Definition: inet.hh:330
bool isReadOnlySeg() const
bool isAtomicSub() const
bool isDeviceScope() const
bool isAtomicNoRet() const
bool isReturn() const
bool isDeviceScope() const
int getOperandSize(int operandIdx)
bool isSystemScope() const
bool isGlobalSeg() const
bool isSystemCoherent() const
virtual bool isCondRegister(int operandIndex)=0
uint8_t * a_data
Stats::Scalar dynamicLMemInstrCnt
bool isMemFence() const
bool isGlobalMem() const
bool isWorkitemScope() const
bool isAtomicDec() const
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
Definition: gpu_dyn_inst.cc:41
bool isSpecialOp() const
ComputeUnit * cu
Enums::StorageClassType executedAs()
uint64_t seqNum() const
bool isPrivateSeg() const
bool readsVCC() const
bool isNoScope() const
bool isLoad() const
bool isAtomicRet() const
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:46
bool isBarrier() const
bool isAtomicAnd() const
bool isUnconditionalJump() const
virtual bool isDstOperand(int operandIndex)=0
virtual bool isScalarRegister(int operandIndex)=0
bool isGloballyCoherent() const
void updateStats()
bool isNoOrder() const
bool isGlobalMem() const
bool isAcquireRelease() const
bool isAtomicNoRet() const
uint64_t _seqNum
virtual int numDstRegOperands()=0
Stats::Distribution pageDivergenceDist
bool isCondRegister(int operandIdx)
uint8_t * d_data
bool isALU() const
bool writesSCC() const
bool isSpillSeg() const
bool writesSCC() const
bool isLoad() const
bool isKernArgSeg() const
bool isAtomicAdd() const
bool isAtomicMin() const
bool isReturn() const
bool isAcquireRelease() const
bool isAtomicOr() const
bool isVectorRegister(int operandIdx)
Definition: gpu_dyn_inst.cc:92
bool isAtomicExch() const
bool writesVCC() const
bool writesVCC() const
uint8_t * x_data
bool isArgLoad() const
int wfSize() const
int numSrcRegOperands()
Definition: gpu_dyn_inst.cc:74
int getNumOperands()
Definition: gpu_dyn_inst.cc:86
Enums::StorageClassType executed_as
bool isArgLoad() const
bool isAtomic() const
VectorMask exec_mask
bool isNoOrder() const
virtual int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)=0
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
bool isRelease() const
bool isAcquire() const
bool isAtomicCAS() const
bool isAtomicInc() const
bool isAtomicMax() const
bool isScalarRegister(int operandIdx)
Definition: gpu_dyn_inst.cc:98
bool isSrcOperand(int operandIdx)
bool isAtomicXor() const
bool readsVCC() const
virtual bool isSrcOperand(int operandIndex)=0
bool isRelaxedOrder() const
bool isWorkgroupScope() const
bool isReadOnlySeg() const
bool isFlat() const
bool isArgSeg() const
bool isMemRef() const
virtual int getNumOperands()=0
void completeAcc(GPUDynInstPtr gpuDynInst)
bool isAtomicInc() const
bool isSpecialOp() const
bool isAtomicAdd() const
bool isNop() const
bool isAtomicMax() const
bool isWavefrontScope() const
GPUStaticInst * _staticInst
int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst)
const std::string & disassemble() const
bool isStore() const
bool isScalar() const
bool isRelaxedOrder() const
bool isBranch() const
bool isAtomicOr() const
void execute(GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:68
bool isAtomicAnd() const
const std::string & disassemble()
std::map< Addr, int > pagesTouched
bool isAtomicRet() const
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
ComputeUnit * computeUnit()
virtual int numSrcRegOperands()=0
bool isLocalMem() const
bool isAtomicMin() const
virtual bool isVectorRegister(int operandIndex)=0
bool isWaitcnt() const
bool isScalar() const
bool isAtomicSub() const
virtual void execute(GPUDynInstPtr gpuDynInst)=0
bool isNoScope() const
bool isWaitcnt() const
bool isDstOperand(int operandIdx)
pageDataStruct pageAccesses
bool isAtomicDec() const
virtual void initiateAcc(GPUDynInstPtr gpuDynInst)
virtual int getOperandSize(int operandIndex)=0
bool isMemFence() const
bool isWavefrontScope() const
bool readsSCC() const
bool isAtomic() const
bool isStore() const
void initiateAcc(GPUDynInstPtr gpuDynInst)
bool isWorkitemScope() const
bool isPrivateSeg() const
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1896
bool isGroupSeg() const
bool isAtomicXor() const
bool isRelease() const
bool isNop() const
bool isBarrier() const

Generated on Mon Jun 8 2020 15:45:11 for gem5 by doxygen 1.8.13