gem5  v19.0.0.0
mem.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: Steve Reinhardt
34  */
35 
36 #ifndef __ARCH_HSAIL_INSTS_MEM_HH__
37 #define __ARCH_HSAIL_INSTS_MEM_HH__
38 
39 #include <type_traits>
40 
41 #include "arch/hsail/insts/decl.hh"
43 #include "arch/hsail/operand.hh"
45 
46 namespace HsailISA
47 {
48  class MemInst
49  {
50  public:
51  MemInst() : size(0), addr_operand(nullptr) { }
52 
53  MemInst(Enums::MemType m_type)
54  {
55  if (m_type == Enums::M_U64 ||
56  m_type == Enums::M_S64 ||
57  m_type == Enums::M_F64) {
58  size = 8;
59  } else if (m_type == Enums::M_U32 ||
60  m_type == Enums::M_S32 ||
61  m_type == Enums::M_F32) {
62  size = 4;
63  } else if (m_type == Enums::M_U16 ||
64  m_type == Enums::M_S16 ||
65  m_type == Enums::M_F16) {
66  size = 2;
67  } else {
68  size = 1;
69  }
70 
71  addr_operand = nullptr;
72  }
73 
74  void
75  init_addr(AddrOperandBase *_addr_operand)
76  {
77  addr_operand = _addr_operand;
78  }
79 
80  private:
81  int size;
83 
84  public:
85  int getMemOperandSize() { return size; }
87  };
88 
89  template<typename DestOperandType, typename AddrOperandType>
91  {
92  public:
93  typename DestOperandType::DestOperand dest;
94  AddrOperandType addr;
95 
96  LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
97  const char *_opcode)
98  : HsailGPUStaticInst(obj, _opcode)
99  {
100  using namespace Brig;
101 
102  setFlag(ALU);
103 
104  unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
105  dest.init(op_offs, obj);
106  op_offs = obj->getOperandPtr(ib->operands, 1);
107  addr.init(op_offs, obj);
108  }
109 
110  int numSrcRegOperands() override
111  { return(this->addr.isVectorRegister()); }
112  int numDstRegOperands() override
113  { return dest.isVectorRegister(); }
114  bool isVectorRegister(int operandIndex) override
115  {
116  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
117  return((operandIndex == 0) ? dest.isVectorRegister() :
118  this->addr.isVectorRegister());
119  }
120  bool isCondRegister(int operandIndex) override
121  {
122  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
123  return((operandIndex == 0) ? dest.isCondRegister() :
124  this->addr.isCondRegister());
125  }
126  bool isScalarRegister(int operandIndex) override
127  {
128  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
129  return((operandIndex == 0) ? dest.isScalarRegister() :
130  this->addr.isScalarRegister());
131  }
132  bool isSrcOperand(int operandIndex) override
133  {
134  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
135  if (operandIndex > 0)
136  return(this->addr.isVectorRegister());
137  return false;
138  }
139  bool isDstOperand(int operandIndex) override {
140  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
141  return(operandIndex == 0);
142  }
143  int getOperandSize(int operandIndex) override
144  {
145  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
146  return((operandIndex == 0) ? dest.opSize() :
147  this->addr.opSize());
148  }
149  int
150  getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
151  {
152  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
153  return((operandIndex == 0) ? dest.regIndex() :
154  this->addr.regIndex());
155  }
156  int getNumOperands() override
157  {
158  if (this->addr.isVectorRegister())
159  return 2;
160  return 1;
161  }
162  };
163 
164  template<typename DestDataType, typename AddrOperandType>
165  class LdaInst :
166  public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>,
167  public MemInst
168  {
169  public:
170  void generateDisassembly();
171 
172  LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
173  const char *_opcode)
174  : LdaInstBase<typename DestDataType::OperandType,
175  AddrOperandType>(ib, obj, _opcode)
176  {
177  init_addr(&this->addr);
178  }
179 
180  void execute(GPUDynInstPtr gpuDynInst);
181  };
182 
183  template<typename DataType>
185  decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
186  {
187  unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
188  BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj);
189 
190  if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
191  return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas");
192  } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
193  // V2/V4 not allowed
194  switch (regDataType.regKind) {
196  return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas");
198  return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas");
199  default:
200  fatal("Bad ldas register operand type %d\n", regDataType.type);
201  }
202  } else {
203  fatal("Bad ldas register operand kind %d\n", regDataType.kind);
204  }
205  }
206 
207  template<typename MemOperandType, typename DestOperandType,
208  typename AddrOperandType>
210  {
211  public:
213  typename DestOperandType::DestOperand dest;
214  AddrOperandType addr;
215 
219  unsigned int equivClass;
220 
221  LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
222  const char *_opcode)
223  : HsailGPUStaticInst(obj, _opcode)
224  {
225  using namespace Brig;
226 
227  setFlag(MemoryRef);
228  setFlag(Load);
229 
230  if (ib->opcode == BRIG_OPCODE_LD) {
231  const BrigInstMem *ldst = (const BrigInstMem*)ib;
232 
233  segment = (BrigSegment)ldst->segment;
234  memoryOrder = BRIG_MEMORY_ORDER_NONE;
235  memoryScope = BRIG_MEMORY_SCOPE_NONE;
236  equivClass = ldst->equivClass;
237 
238  width = ldst->width;
239  unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
240  const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
241  if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
242  dest.init(op_offs, obj);
243 
244  op_offs = obj->getOperandPtr(ib->operands, 1);
245  addr.init(op_offs, obj);
246  } else {
247  const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
248 
249  segment = (BrigSegment)at->segment;
250  memoryOrder = (BrigMemoryOrder)at->memoryOrder;
251  memoryScope = (BrigMemoryScope)at->memoryScope;
252  equivClass = 0;
253 
254  width = BRIG_WIDTH_1;
255  unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
256  const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
257 
258  if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
259  dest.init(op_offs, obj);
260 
261  op_offs = obj->getOperandPtr(ib->operands,1);
262  addr.init(op_offs, obj);
263  }
264 
265  switch (memoryOrder) {
267  setFlag(NoOrder);
268  break;
270  setFlag(RelaxedOrder);
271  break;
273  setFlag(Acquire);
274  break;
276  setFlag(Release);
277  break;
279  setFlag(AcquireRelease);
280  break;
281  default:
282  fatal("LdInst has bad memory order type\n");
283  }
284 
285  switch (memoryScope) {
287  setFlag(NoScope);
288  break;
290  setFlag(WorkitemScope);
291  break;
293  setFlag(WorkgroupScope);
294  break;
296  setFlag(DeviceScope);
297  break;
299  setFlag(SystemScope);
300  break;
301  default:
302  fatal("LdInst has bad memory scope type\n");
303  }
304 
305  switch (segment) {
306  case BRIG_SEGMENT_GLOBAL:
307  setFlag(GlobalSegment);
308  break;
309  case BRIG_SEGMENT_GROUP:
310  setFlag(GroupSegment);
311  break;
313  setFlag(PrivateSegment);
314  break;
316  setFlag(ReadOnlySegment);
317  break;
318  case BRIG_SEGMENT_SPILL:
319  setFlag(SpillSegment);
320  break;
321  case BRIG_SEGMENT_FLAT:
322  setFlag(Flat);
323  break;
325  setFlag(KernArgSegment);
326  break;
327  case BRIG_SEGMENT_ARG:
328  setFlag(ArgSegment);
329  break;
330  default:
331  panic("Ld: segment %d not supported\n", segment);
332  }
333  }
334 
335  int numSrcRegOperands() override
336  { return(this->addr.isVectorRegister()); }
337  int numDstRegOperands() override { return dest.isVectorRegister(); }
338  int getNumOperands() override
339  {
340  if (this->addr.isVectorRegister())
341  return 2;
342  else
343  return 1;
344  }
345  bool isVectorRegister(int operandIndex) override
346  {
347  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
348  return((operandIndex == 0) ? dest.isVectorRegister() :
349  this->addr.isVectorRegister());
350  }
351  bool isCondRegister(int operandIndex) override
352  {
353  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
354  return((operandIndex == 0) ? dest.isCondRegister() :
355  this->addr.isCondRegister());
356  }
357  bool isScalarRegister(int operandIndex) override
358  {
359  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
360  return((operandIndex == 0) ? dest.isScalarRegister() :
361  this->addr.isScalarRegister());
362  }
363  bool isSrcOperand(int operandIndex) override
364  {
365  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
366  if (operandIndex > 0)
367  return(this->addr.isVectorRegister());
368  return false;
369  }
370  bool isDstOperand(int operandIndex) override
371  {
372  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
373  return(operandIndex == 0);
374  }
375  int getOperandSize(int operandIndex) override
376  {
377  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
378  return((operandIndex == 0) ? dest.opSize() :
379  this->addr.opSize());
380  }
381  int
382  getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
383  {
384  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
385  return((operandIndex == 0) ? dest.regIndex() :
386  this->addr.regIndex());
387  }
388  };
389 
390  template<typename MemDataType, typename DestDataType,
391  typename AddrOperandType>
392  class LdInst :
393  public LdInstBase<typename MemDataType::CType,
394  typename DestDataType::OperandType, AddrOperandType>,
395  public MemInst
396  {
397  typename DestDataType::OperandType::DestOperand dest_vect[4];
399  void generateDisassembly() override;
400 
401  public:
402  LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
403  const char *_opcode)
404  : LdInstBase<typename MemDataType::CType,
405  typename DestDataType::OperandType,
406  AddrOperandType>(ib, obj, _opcode),
407  MemInst(MemDataType::memType)
408  {
409  init_addr(&this->addr);
410 
411  unsigned op_offs = obj->getOperandPtr(ib->operands,0);
412  const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
413 
415  const Brig::BrigOperandOperandList *brigRegVecOp =
416  (const Brig::BrigOperandOperandList*)brigOp;
417 
418  num_dest_operands =
419  *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
420 
421  assert(num_dest_operands <= 4);
422  } else {
423  num_dest_operands = 1;
424  }
425 
426  if (num_dest_operands > 1) {
427  assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
428 
429  for (int i = 0; i < num_dest_operands; ++i) {
430  dest_vect[i].init_from_vect(op_offs, obj, i);
431  }
432  }
433  }
434 
435  void
436  initiateAcc(GPUDynInstPtr gpuDynInst) override
437  {
438  typedef typename MemDataType::CType c0;
439 
440  gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
441 
442  if (num_dest_operands > 1) {
443  for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
444  if (gpuDynInst->exec_mask[i])
445  gpuDynInst->statusVector.push_back(num_dest_operands);
446  else
447  gpuDynInst->statusVector.push_back(0);
448  }
449 
450  for (int k = 0; k < num_dest_operands; ++k) {
451 
452  c0 *d = &((c0*)gpuDynInst->d_data)
453  [k * gpuDynInst->computeUnit()->wfSize()];
454 
455  for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
456  if (gpuDynInst->exec_mask[i]) {
457  Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
458 
459  if (this->isLocalMem()) {
460  // load from shared memory
461  *d = gpuDynInst->wavefront()->ldsChunk->
462  read<c0>(vaddr);
463  } else {
464  RequestPtr req = std::make_shared<Request>(0,
465  vaddr, sizeof(c0), 0,
466  gpuDynInst->computeUnit()->masterId(),
467  0, gpuDynInst->wfDynId);
468 
469  gpuDynInst->setRequestFlags(req);
470  PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
471  pkt->dataStatic(d);
472 
473  if (gpuDynInst->computeUnit()->shader->
474  separate_acquire_release &&
475  gpuDynInst->isAcquire()) {
476  // if this load has acquire semantics,
477  // set the response continuation function
478  // to perform an Acquire request
479  gpuDynInst->execContinuation =
481 
482  gpuDynInst->useContinuation = true;
483  } else {
484  // the request will be finished when
485  // the load completes
486  gpuDynInst->useContinuation = false;
487  }
488  // translation is performed in sendRequest()
489  gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
490  i, pkt);
491  }
492  }
493  ++d;
494  }
495  }
496 
497  gpuDynInst->updateStats();
498  }
499 
500  void
501  completeAcc(GPUDynInstPtr gpuDynInst) override
502  {
503  typedef typename MemDataType::CType c1;
504 
505  constexpr bool is_vt_32 = DestDataType::vgprType == VT_32;
506 
518  typedef typename std::conditional<is_vt_32,
519  typename std::conditional<std::is_floating_point<c1>::value,
520  float, typename std::conditional<std::is_signed<c1>::value,
521  int32_t, uint32_t>::type>::type,
522  typename std::conditional<std::is_floating_point<c1>::value,
523  double, typename std::conditional<std::is_signed<c1>::value,
524  int64_t, uint64_t>::type>::type>::type c0;
525 
526 
527  Wavefront *w = gpuDynInst->wavefront();
528 
529  std::vector<uint32_t> regVec;
530  // iterate over number of destination register operands since
531  // this is a load
532  for (int k = 0; k < num_dest_operands; ++k) {
533  assert((sizeof(c1) * num_dest_operands)
535 
536  int dst = this->dest.regIndex() + k;
537  if (num_dest_operands > MAX_REGS_FOR_NON_VEC_MEM_INST)
538  dst = dest_vect[k].regIndex();
539  // virtual->physical VGPR mapping
540  int physVgpr = w->remap(dst, sizeof(c0), 1);
541  // save the physical VGPR index
542  regVec.push_back(physVgpr);
543 
544  c1 *p1 =
545  &((c1*)gpuDynInst->d_data)[k * w->computeUnit->wfSize()];
546 
547  for (int i = 0; i < w->computeUnit->wfSize(); ++i) {
548  if (gpuDynInst->exec_mask[i]) {
549  DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: "
550  "$%s%d <- %d global ld done (src = wavefront "
551  "ld inst)\n", w->computeUnit->cu_id, w->simdId,
552  w->wfSlotId, i, sizeof(c0) == 4 ? "s" : "d",
553  dst, *p1);
554  // write the value into the physical VGPR. This is a
555  // purely functional operation. No timing is modeled.
556  w->computeUnit->vrf[w->simdId]->write<c0>(physVgpr,
557  *p1, i);
558  }
559  ++p1;
560  }
561  }
562 
563  // Schedule the write operation of the load data on the VRF.
564  // This simply models the timing aspect of the VRF write operation.
565  // It does not modify the physical VGPR.
566  int loadVrfBankConflictCycles = gpuDynInst->computeUnit()->
567  vrf[w->simdId]->exec(gpuDynInst->seqNum(), w, regVec,
568  sizeof(c0), gpuDynInst->time);
569 
570  if (this->isGlobalMem()) {
571  gpuDynInst->computeUnit()->globalMemoryPipe
572  .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
573  } else {
574  assert(this->isLocalMem());
575  gpuDynInst->computeUnit()->localMemoryPipe
576  .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
577  }
578  }
579 
580  private:
581  void
582  execLdAcq(GPUDynInstPtr gpuDynInst) override
583  {
584  // after the load has complete and if the load has acquire
585  // semantics, issue an acquire request.
586  if (!this->isLocalMem()) {
587  if (gpuDynInst->computeUnit()->shader->separate_acquire_release
588  && gpuDynInst->isAcquire()) {
589  gpuDynInst->statusBitVector = VectorMask(1);
590  gpuDynInst->useContinuation = false;
591  // create request
592  RequestPtr req = std::make_shared<Request>(0, 0, 0, 0,
593  gpuDynInst->computeUnit()->masterId(),
594  0, gpuDynInst->wfDynId);
595  req->setFlags(Request::ACQUIRE);
596  gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
597  }
598  }
599  }
600 
601  public:
602  bool isVectorRegister(int operandIndex) override
603  {
604  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
605  if ((num_dest_operands != getNumOperands()) &&
606  (operandIndex == (getNumOperands()-1)))
607  return(this->addr.isVectorRegister());
608  if (num_dest_operands > 1) {
609  return dest_vect[operandIndex].isVectorRegister();
610  }
611  else if (num_dest_operands == 1) {
612  return LdInstBase<typename MemDataType::CType,
613  typename DestDataType::OperandType,
614  AddrOperandType>::dest.isVectorRegister();
615  }
616  return false;
617  }
618  bool isCondRegister(int operandIndex) override
619  {
620  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
621  if ((num_dest_operands != getNumOperands()) &&
622  (operandIndex == (getNumOperands()-1)))
623  return(this->addr.isCondRegister());
624  if (num_dest_operands > 1)
625  return dest_vect[operandIndex].isCondRegister();
626  else if (num_dest_operands == 1)
627  return LdInstBase<typename MemDataType::CType,
628  typename DestDataType::OperandType,
629  AddrOperandType>::dest.isCondRegister();
630  return false;
631  }
632  bool isScalarRegister(int operandIndex) override
633  {
634  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
635  if ((num_dest_operands != getNumOperands()) &&
636  (operandIndex == (getNumOperands()-1)))
637  return(this->addr.isScalarRegister());
638  if (num_dest_operands > 1)
639  return dest_vect[operandIndex].isScalarRegister();
640  else if (num_dest_operands == 1)
641  return LdInstBase<typename MemDataType::CType,
642  typename DestDataType::OperandType,
643  AddrOperandType>::dest.isScalarRegister();
644  return false;
645  }
646  bool isSrcOperand(int operandIndex) override
647  {
648  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
649  if ((num_dest_operands != getNumOperands()) &&
650  (operandIndex == (getNumOperands()-1)))
651  return(this->addr.isVectorRegister());
652  return false;
653  }
654  bool isDstOperand(int operandIndex) override
655  {
656  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
657  if ((num_dest_operands != getNumOperands()) &&
658  (operandIndex == (getNumOperands()-1)))
659  return false;
660  return true;
661  }
662  int getOperandSize(int operandIndex) override
663  {
664  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
665  if ((num_dest_operands != getNumOperands()) &&
666  (operandIndex == (getNumOperands()-1)))
667  return(this->addr.opSize());
668  if (num_dest_operands > 1)
669  return(dest_vect[operandIndex].opSize());
670  else if (num_dest_operands == 1)
671  return(LdInstBase<typename MemDataType::CType,
672  typename DestDataType::OperandType,
673  AddrOperandType>::dest.opSize());
674  return 0;
675  }
676  int
677  getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
678  {
679  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
680  if ((num_dest_operands != getNumOperands()) &&
681  (operandIndex == (getNumOperands()-1)))
682  return(this->addr.regIndex());
683  if (num_dest_operands > 1)
684  return(dest_vect[operandIndex].regIndex());
685  else if (num_dest_operands == 1)
686  return(LdInstBase<typename MemDataType::CType,
687  typename DestDataType::OperandType,
688  AddrOperandType>::dest.regIndex());
689  return -1;
690  }
691  int getNumOperands() override
692  {
693  if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
694  return(num_dest_operands+1);
695  else
696  return(num_dest_operands);
697  }
698  void execute(GPUDynInstPtr gpuDynInst) override;
699  };
700 
701  template<typename MemDT, typename DestDT>
703  decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
704  {
705  unsigned op_offs = obj->getOperandPtr(ib->operands,1);
706  BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
707 
709  return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld");
710  } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
712  switch (tmp.regKind) {
714  return new LdInst<MemDT, DestDT,
715  SRegAddrOperand>(ib, obj, "ld");
717  return new LdInst<MemDT, DestDT,
718  DRegAddrOperand>(ib, obj, "ld");
719  default:
720  fatal("Bad ld register operand type %d\n", tmp.regKind);
721  }
722  } else {
723  fatal("Bad ld register operand kind %d\n", tmp.kind);
724  }
725  }
726 
727  template<typename MemDT>
729  decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
730  {
731  unsigned op_offs = obj->getOperandPtr(ib->operands,0);
732  BrigRegOperandInfo dest = findRegDataType(op_offs, obj);
733 
734  assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
736  switch(dest.regKind) {
738  switch (ib->type) {
739  case Brig::BRIG_TYPE_B8:
740  case Brig::BRIG_TYPE_B16:
741  case Brig::BRIG_TYPE_B32:
742  return decodeLd2<MemDT, B32>(ib, obj);
743  case Brig::BRIG_TYPE_U8:
744  case Brig::BRIG_TYPE_U16:
745  case Brig::BRIG_TYPE_U32:
746  return decodeLd2<MemDT, U32>(ib, obj);
747  case Brig::BRIG_TYPE_S8:
748  case Brig::BRIG_TYPE_S16:
749  case Brig::BRIG_TYPE_S32:
750  return decodeLd2<MemDT, S32>(ib, obj);
751  case Brig::BRIG_TYPE_F16:
752  case Brig::BRIG_TYPE_F32:
753  return decodeLd2<MemDT, U32>(ib, obj);
754  default:
755  fatal("Bad ld register operand type %d, %d\n",
756  dest.regKind, ib->type);
757  };
759  switch (ib->type) {
760  case Brig::BRIG_TYPE_B64:
761  return decodeLd2<MemDT, B64>(ib, obj);
762  case Brig::BRIG_TYPE_U64:
763  return decodeLd2<MemDT, U64>(ib, obj);
764  case Brig::BRIG_TYPE_S64:
765  return decodeLd2<MemDT, S64>(ib, obj);
766  case Brig::BRIG_TYPE_F64:
767  return decodeLd2<MemDT, U64>(ib, obj);
768  default:
769  fatal("Bad ld register operand type %d, %d\n",
770  dest.regKind, ib->type);
771  };
772  default:
773  fatal("Bad ld register operand type %d, %d\n", dest.regKind,
774  ib->type);
775  }
776  }
777 
778  template<typename MemDataType, typename SrcOperandType,
779  typename AddrOperandType>
781  {
782  public:
783  typename SrcOperandType::SrcOperand src;
784  AddrOperandType addr;
785 
789  unsigned int equivClass;
790 
791  StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
792  const char *_opcode)
793  : HsailGPUStaticInst(obj, _opcode)
794  {
795  using namespace Brig;
796 
797  setFlag(MemoryRef);
798  setFlag(Store);
799 
800  if (ib->opcode == BRIG_OPCODE_ST) {
801  const BrigInstMem *ldst = (const BrigInstMem*)ib;
802 
803  segment = (BrigSegment)ldst->segment;
804  memoryOrder = BRIG_MEMORY_ORDER_NONE;
805  memoryScope = BRIG_MEMORY_SCOPE_NONE;
806  equivClass = ldst->equivClass;
807 
808  unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
809  const BrigOperand *baseOp = obj->getOperand(op_offs);
810 
811  if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
812  (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
813  src.init(op_offs, obj);
814  }
815 
816  op_offs = obj->getOperandPtr(ib->operands, 1);
817  addr.init(op_offs, obj);
818  } else {
819  const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
820 
821  segment = (BrigSegment)at->segment;
822  memoryScope = (BrigMemoryScope)at->memoryScope;
823  memoryOrder = (BrigMemoryOrder)at->memoryOrder;
824  equivClass = 0;
825 
826  unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
827  addr.init(op_offs, obj);
828 
829  op_offs = obj->getOperandPtr(ib->operands, 1);
830  src.init(op_offs, obj);
831  }
832 
833  switch (memoryOrder) {
835  setFlag(NoOrder);
836  break;
838  setFlag(RelaxedOrder);
839  break;
841  setFlag(Acquire);
842  break;
844  setFlag(Release);
845  break;
847  setFlag(AcquireRelease);
848  break;
849  default:
850  fatal("StInst has bad memory order type\n");
851  }
852 
853  switch (memoryScope) {
855  setFlag(NoScope);
856  break;
858  setFlag(WorkitemScope);
859  break;
861  setFlag(WorkgroupScope);
862  break;
864  setFlag(DeviceScope);
865  break;
867  setFlag(SystemScope);
868  break;
869  default:
870  fatal("StInst has bad memory scope type\n");
871  }
872 
873  switch (segment) {
874  case BRIG_SEGMENT_GLOBAL:
875  setFlag(GlobalSegment);
876  break;
877  case BRIG_SEGMENT_GROUP:
878  setFlag(GroupSegment);
879  break;
881  setFlag(PrivateSegment);
882  break;
884  setFlag(ReadOnlySegment);
885  break;
886  case BRIG_SEGMENT_SPILL:
887  setFlag(SpillSegment);
888  break;
889  case BRIG_SEGMENT_FLAT:
890  setFlag(Flat);
891  break;
892  case BRIG_SEGMENT_ARG:
893  setFlag(ArgSegment);
894  break;
895  default:
896  panic("St: segment %d not supported\n", segment);
897  }
898  }
899 
900  int numDstRegOperands() override { return 0; }
901  int numSrcRegOperands() override
902  {
903  return src.isVectorRegister() + this->addr.isVectorRegister();
904  }
905  int getNumOperands() override
906  {
907  if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
908  return 2;
909  else
910  return 1;
911  }
912  bool isVectorRegister(int operandIndex) override
913  {
914  assert(operandIndex >= 0 && operandIndex < getNumOperands());
915  return !operandIndex ? src.isVectorRegister() :
916  this->addr.isVectorRegister();
917  }
918  bool isCondRegister(int operandIndex) override
919  {
920  assert(operandIndex >= 0 && operandIndex < getNumOperands());
921  return !operandIndex ? src.isCondRegister() :
922  this->addr.isCondRegister();
923  }
924  bool isScalarRegister(int operandIndex) override
925  {
926  assert(operandIndex >= 0 && operandIndex < getNumOperands());
927  return !operandIndex ? src.isScalarRegister() :
928  this->addr.isScalarRegister();
929  }
930  bool isSrcOperand(int operandIndex) override
931  {
932  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
933  return true;
934  }
935  bool isDstOperand(int operandIndex) override { return false; }
936  int getOperandSize(int operandIndex) override
937  {
938  assert(operandIndex >= 0 && operandIndex < getNumOperands());
939  return !operandIndex ? src.opSize() : this->addr.opSize();
940  }
941  int
942  getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
943  {
944  assert(operandIndex >= 0 && operandIndex < getNumOperands());
945  return !operandIndex ? src.regIndex() : this->addr.regIndex();
946  }
947  };
948 
949 
950  template<typename MemDataType, typename SrcDataType,
951  typename AddrOperandType>
952  class StInst :
953  public StInstBase<MemDataType, typename SrcDataType::OperandType,
954  AddrOperandType>,
955  public MemInst
956  {
957  public:
958  typename SrcDataType::OperandType::SrcOperand src_vect[4];
960  void generateDisassembly() override;
961 
962  StInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
963  const char *_opcode, int srcIdx)
964  : StInstBase<MemDataType, typename SrcDataType::OperandType,
965  AddrOperandType>(ib, obj, _opcode),
966  MemInst(SrcDataType::memType)
967  {
968  init_addr(&this->addr);
969 
970  BrigRegOperandInfo rinfo;
971  unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx);
972  const Brig::BrigOperand *baseOp = obj->getOperand(op_offs);
973 
977 
980  } else {
981  rinfo = findRegDataType(op_offs, obj);
982  }
983 
985  const Brig::BrigOperandOperandList *brigRegVecOp =
986  (const Brig::BrigOperandOperandList*)baseOp;
987 
988  num_src_operands =
989  *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
990 
991  assert(num_src_operands <= 4);
992  } else {
993  num_src_operands = 1;
994  }
995 
996  if (num_src_operands > 1) {
997  assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
998 
999  for (int i = 0; i < num_src_operands; ++i) {
1000  src_vect[i].init_from_vect(op_offs, obj, i);
1001  }
1002  }
1003  }
1004 
1005  void
1006  initiateAcc(GPUDynInstPtr gpuDynInst) override
1007  {
1008  // before performing a store, check if this store has
1009  // release semantics, and if so issue a release first
1010  if (!this->isLocalMem()) {
1011  if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1012  && gpuDynInst->isRelease()) {
1013 
1014  gpuDynInst->statusBitVector = VectorMask(1);
1015  gpuDynInst->execContinuation = &GPUStaticInst::execSt;
1016  gpuDynInst->useContinuation = true;
1017  // create request
1018  RequestPtr req = std::make_shared<Request>(0, 0, 0, 0,
1019  gpuDynInst->computeUnit()->masterId(),
1020  0, gpuDynInst->wfDynId);
1021  req->setFlags(Request::RELEASE);
1022  gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1023 
1024  return;
1025  }
1026  }
1027 
1028  // if there is no release semantic, perform stores immediately
1029  execSt(gpuDynInst);
1030  }
1031 
1032  // stores don't write anything back, so there is nothing
1033  // to do here. we only override this method to avoid the
1034  // fatal in the base class implementation
1035  void completeAcc(GPUDynInstPtr gpuDynInst) override { }
1036 
1037  private:
1038  // execSt may be called through a continuation
1039  // if the store had release semantics. see comment for
1040  // execSt in gpu_static_inst.hh
1041  void
1042  execSt(GPUDynInstPtr gpuDynInst) override
1043  {
1044  typedef typename MemDataType::CType c0;
1045 
1046  gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1047 
1048  if (num_src_operands > 1) {
1049  for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
1050  if (gpuDynInst->exec_mask[i])
1051  gpuDynInst->statusVector.push_back(num_src_operands);
1052  else
1053  gpuDynInst->statusVector.push_back(0);
1054  }
1055 
1056  for (int k = 0; k < num_src_operands; ++k) {
1057  c0 *d = &((c0*)gpuDynInst->d_data)
1058  [k * gpuDynInst->computeUnit()->wfSize()];
1059 
1060  for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
1061  if (gpuDynInst->exec_mask[i]) {
1062  Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
1063 
1064  if (this->isLocalMem()) {
1065  //store to shared memory
1066  gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
1067  *d);
1068  } else {
1069  RequestPtr req = std::make_shared<Request>(
1070  0, vaddr, sizeof(c0), 0,
1071  gpuDynInst->computeUnit()->masterId(),
1072  0, gpuDynInst->wfDynId);
1073 
1074  gpuDynInst->setRequestFlags(req);
1075  PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
1076  pkt->dataStatic<c0>(d);
1077 
1078  // translation is performed in sendRequest()
1079  // the request will be finished when the store completes
1080  gpuDynInst->useContinuation = false;
1081  gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
1082  i, pkt);
1083 
1084  }
1085  }
1086  ++d;
1087  }
1088  }
1089 
1090  gpuDynInst->updateStats();
1091  }
1092 
1093  public:
1094  bool isVectorRegister(int operandIndex) override
1095  {
1096  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1097  if (operandIndex == num_src_operands)
1098  return this->addr.isVectorRegister();
1099  if (num_src_operands > 1)
1100  return src_vect[operandIndex].isVectorRegister();
1101  else if (num_src_operands == 1)
1102  return StInstBase<MemDataType,
1103  typename SrcDataType::OperandType,
1104  AddrOperandType>::src.isVectorRegister();
1105  return false;
1106  }
1107  bool isCondRegister(int operandIndex) override
1108  {
1109  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1110  if (operandIndex == num_src_operands)
1111  return this->addr.isCondRegister();
1112  if (num_src_operands > 1)
1113  return src_vect[operandIndex].isCondRegister();
1114  else if (num_src_operands == 1)
1115  return StInstBase<MemDataType,
1116  typename SrcDataType::OperandType,
1117  AddrOperandType>::src.isCondRegister();
1118  return false;
1119  }
1120  bool isScalarRegister(int operandIndex) override
1121  {
1122  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1123  if (operandIndex == num_src_operands)
1124  return this->addr.isScalarRegister();
1125  if (num_src_operands > 1)
1126  return src_vect[operandIndex].isScalarRegister();
1127  else if (num_src_operands == 1)
1128  return StInstBase<MemDataType,
1129  typename SrcDataType::OperandType,
1130  AddrOperandType>::src.isScalarRegister();
1131  return false;
1132  }
1133  bool isSrcOperand(int operandIndex) override
1134  {
1135  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1136  return true;
1137  }
1138  bool isDstOperand(int operandIndex) override { return false; }
1139  int getOperandSize(int operandIndex) override
1140  {
1141  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1142  if (operandIndex == num_src_operands)
1143  return this->addr.opSize();
1144  if (num_src_operands > 1)
1145  return src_vect[operandIndex].opSize();
1146  else if (num_src_operands == 1)
1147  return StInstBase<MemDataType,
1148  typename SrcDataType::OperandType,
1149  AddrOperandType>::src.opSize();
1150  return 0;
1151  }
1152  int
1153  getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
1154  {
1155  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1156  if (operandIndex == num_src_operands)
1157  return this->addr.regIndex();
1158  if (num_src_operands > 1)
1159  return src_vect[operandIndex].regIndex();
1160  else if (num_src_operands == 1)
1161  return StInstBase<MemDataType,
1162  typename SrcDataType::OperandType,
1163  AddrOperandType>::src.regIndex();
1164  return -1;
1165  }
1166  int getNumOperands() override
1167  {
1168  if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
1169  return num_src_operands + 1;
1170  else
1171  return num_src_operands;
1172  }
1173  void execute(GPUDynInstPtr gpuDynInst) override;
1174  };
1175 
1176  template<typename DataType, typename SrcDataType>
1177  GPUStaticInst*
1178  decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
1179  {
1180  int srcIdx = 0;
1181  int destIdx = 1;
1182  if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC ||
1184  srcIdx = 1;
1185  destIdx = 0;
1186  }
1187  unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx);
1188 
1189  BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1190 
1192  return new StInst<DataType, SrcDataType,
1193  NoRegAddrOperand>(ib, obj, "st", srcIdx);
1194  } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1195  // V2/V4 not allowed
1196  switch (tmp.regKind) {
1198  return new StInst<DataType, SrcDataType,
1199  SRegAddrOperand>(ib, obj, "st", srcIdx);
1201  return new StInst<DataType, SrcDataType,
1202  DRegAddrOperand>(ib, obj, "st", srcIdx);
1203  default:
1204  fatal("Bad st register operand type %d\n", tmp.type);
1205  }
1206  } else {
1207  fatal("Bad st register operand kind %d\n", tmp.kind);
1208  }
1209  }
1210 
1211  template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
1212  bool HasDst>
1214  {
1215  public:
1216  typename OperandType::DestOperand dest;
1217  typename OperandType::SrcOperand src[NumSrcOperands];
1218  AddrOperandType addr;
1219 
1225 
1227  const char *_opcode)
1228  : HsailGPUStaticInst(obj, _opcode)
1229  {
1230  using namespace Brig;
1231 
1232  const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
1233 
1234  segment = (BrigSegment)at->segment;
1235  memoryScope = (BrigMemoryScope)at->memoryScope;
1236  memoryOrder = (BrigMemoryOrder)at->memoryOrder;
1237  atomicOperation = (BrigAtomicOperation)at->atomicOperation;
1238  opcode = (BrigOpcode)ib->opcode;
1239 
1240  assert(opcode == Brig::BRIG_OPCODE_ATOMICNORET ||
1241  opcode == Brig::BRIG_OPCODE_ATOMIC);
1242 
1243  setFlag(MemoryRef);
1244 
1245  if (opcode == Brig::BRIG_OPCODE_ATOMIC) {
1246  setFlag(AtomicReturn);
1247  } else {
1248  setFlag(AtomicNoReturn);
1249  }
1250 
1251  switch (memoryOrder) {
1253  setFlag(NoOrder);
1254  break;
1256  setFlag(RelaxedOrder);
1257  break;
1259  setFlag(Acquire);
1260  break;
1262  setFlag(Release);
1263  break;
1265  setFlag(AcquireRelease);
1266  break;
1267  default:
1268  fatal("AtomicInst has bad memory order type\n");
1269  }
1270 
1271  switch (memoryScope) {
1273  setFlag(NoScope);
1274  break;
1276  setFlag(WorkitemScope);
1277  break;
1279  setFlag(WorkgroupScope);
1280  break;
1282  setFlag(DeviceScope);
1283  break;
1285  setFlag(SystemScope);
1286  break;
1287  default:
1288  fatal("AtomicInst has bad memory scope type\n");
1289  }
1290 
1291  switch (atomicOperation) {
1292  case Brig::BRIG_ATOMIC_AND:
1293  setFlag(AtomicAnd);
1294  break;
1295  case Brig::BRIG_ATOMIC_OR:
1296  setFlag(AtomicOr);
1297  break;
1298  case Brig::BRIG_ATOMIC_XOR:
1299  setFlag(AtomicXor);
1300  break;
1301  case Brig::BRIG_ATOMIC_CAS:
1302  setFlag(AtomicCAS);
1303  break;
1305  setFlag(AtomicExch);
1306  break;
1307  case Brig::BRIG_ATOMIC_ADD:
1308  setFlag(AtomicAdd);
1309  break;
1311  setFlag(AtomicInc);
1312  break;
1314  setFlag(AtomicDec);
1315  break;
1316  case Brig::BRIG_ATOMIC_MIN:
1317  setFlag(AtomicMin);
1318  break;
1319  case Brig::BRIG_ATOMIC_MAX:
1320  setFlag(AtomicMax);
1321  break;
1322  case Brig::BRIG_ATOMIC_SUB:
1323  setFlag(AtomicSub);
1324  break;
1325  default:
1326  fatal("Bad BrigAtomicOperation code %d\n", atomicOperation);
1327  }
1328 
1329  switch (segment) {
1330  case BRIG_SEGMENT_GLOBAL:
1331  setFlag(GlobalSegment);
1332  break;
1333  case BRIG_SEGMENT_GROUP:
1334  setFlag(GroupSegment);
1335  break;
1336  case BRIG_SEGMENT_FLAT:
1337  setFlag(Flat);
1338  break;
1339  default:
1340  panic("Atomic: segment %d not supported\n", segment);
1341  }
1342 
1343  if (HasDst) {
1344  unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1345  dest.init(op_offs, obj);
1346 
1347  op_offs = obj->getOperandPtr(ib->operands, 1);
1348  addr.init(op_offs, obj);
1349 
1350  for (int i = 0; i < NumSrcOperands; ++i) {
1351  op_offs = obj->getOperandPtr(ib->operands, i + 2);
1352  src[i].init(op_offs, obj);
1353  }
1354  } else {
1355 
1356  unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1357  addr.init(op_offs, obj);
1358 
1359  for (int i = 0; i < NumSrcOperands; ++i) {
1360  op_offs = obj->getOperandPtr(ib->operands, i + 1);
1361  src[i].init(op_offs, obj);
1362  }
1363  }
1364  }
1365 
1367  {
1368  int operands = 0;
1369  for (int i = 0; i < NumSrcOperands; i++) {
1370  if (src[i].isVectorRegister()) {
1371  operands++;
1372  }
1373  }
1374  if (addr.isVectorRegister())
1375  operands++;
1376  return operands;
1377  }
1378  int numDstRegOperands() { return dest.isVectorRegister(); }
1380  {
1381  if (addr.isVectorRegister())
1382  return(NumSrcOperands + 2);
1383  return(NumSrcOperands + 1);
1384  }
1385  bool isVectorRegister(int operandIndex)
1386  {
1387  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1388  if (operandIndex < NumSrcOperands)
1389  return src[operandIndex].isVectorRegister();
1390  else if (operandIndex == NumSrcOperands)
1391  return(addr.isVectorRegister());
1392  else
1393  return dest.isVectorRegister();
1394  }
1395  bool isCondRegister(int operandIndex)
1396  {
1397  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1398  if (operandIndex < NumSrcOperands)
1399  return src[operandIndex].isCondRegister();
1400  else if (operandIndex == NumSrcOperands)
1401  return(addr.isCondRegister());
1402  else
1403  return dest.isCondRegister();
1404  }
1405  bool isScalarRegister(int operandIndex)
1406  {
1407  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1408  if (operandIndex < NumSrcOperands)
1409  return src[operandIndex].isScalarRegister();
1410  else if (operandIndex == NumSrcOperands)
1411  return(addr.isScalarRegister());
1412  else
1413  return dest.isScalarRegister();
1414  }
1415  bool isSrcOperand(int operandIndex)
1416  {
1417  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1418  if (operandIndex < NumSrcOperands)
1419  return true;
1420  else if (operandIndex == NumSrcOperands)
1421  return(addr.isVectorRegister());
1422  else
1423  return false;
1424  }
1425  bool isDstOperand(int operandIndex)
1426  {
1427  if (operandIndex <= NumSrcOperands)
1428  return false;
1429  else
1430  return true;
1431  }
1432  int getOperandSize(int operandIndex)
1433  {
1434  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1435  if (operandIndex < NumSrcOperands)
1436  return(src[operandIndex].opSize());
1437  else if (operandIndex == NumSrcOperands)
1438  return(addr.opSize());
1439  else
1440  return(dest.opSize());
1441  }
1442  int
1443  getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
1444  {
1445  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1446  if (operandIndex < NumSrcOperands)
1447  return(src[operandIndex].regIndex());
1448  else if (operandIndex == NumSrcOperands)
1449  return(addr.regIndex());
1450  else
1451  return(dest.regIndex());
1452  return -1;
1453  }
1454  };
1455 
1456  template<typename MemDataType, typename AddrOperandType, int NumSrcOperands,
1457  bool HasDst>
1458  class AtomicInst :
1459  public AtomicInstBase<typename MemDataType::OperandType,
1460  AddrOperandType, NumSrcOperands, HasDst>,
1461  public MemInst
1462  {
1463  public:
1464  void generateDisassembly() override;
1465 
1467  const char *_opcode)
1468  : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType,
1469  NumSrcOperands, HasDst>
1470  (ib, obj, _opcode),
1471  MemInst(MemDataType::memType)
1472  {
1473  init_addr(&this->addr);
1474  }
1475 
1476  void
1477  initiateAcc(GPUDynInstPtr gpuDynInst) override
1478  {
1479  // before doing the RMW, check if this atomic has
1480  // release semantics, and if so issue a release first
1481  if (!this->isLocalMem()) {
1482  if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1483  && (gpuDynInst->isRelease()
1484  || gpuDynInst->isAcquireRelease())) {
1485 
1486  gpuDynInst->statusBitVector = VectorMask(1);
1487 
1488  gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
1489  gpuDynInst->useContinuation = true;
1490 
1491  // create request
1492  RequestPtr req = std::make_shared<Request>(0, 0, 0, 0,
1493  gpuDynInst->computeUnit()->masterId(),
1494  0, gpuDynInst->wfDynId);
1495  req->setFlags(Request::RELEASE);
1496  gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1497 
1498  return;
1499  }
1500  }
1501 
1502  // if there is no release semantic, execute the RMW immediately
1503  execAtomic(gpuDynInst);
1504 
1505  }
1506 
1507  void
1508  completeAcc(GPUDynInstPtr gpuDynInst) override
1509  {
1510  // if this is not an atomic return op, then we
1511  // have nothing more to do.
1512  if (this->isAtomicRet()) {
1513  // the size of the src operands and the
1514  // memory being operated on must match
1515  // for HSAIL atomics - this assumption may
1516  // not apply to all ISAs
1517  typedef typename MemDataType::CType CType;
1518 
1519  Wavefront *w = gpuDynInst->wavefront();
1520  int dst = this->dest.regIndex();
1521  std::vector<uint32_t> regVec;
1522  // virtual->physical VGPR mapping
1523  int physVgpr = w->remap(dst, sizeof(CType), 1);
1524  regVec.push_back(physVgpr);
1525  CType *p1 = &((CType*)gpuDynInst->d_data)[0];
1526 
1527  for (int i = 0; i < w->computeUnit->wfSize(); ++i) {
1528  if (gpuDynInst->exec_mask[i]) {
1529  DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: "
1530  "$%s%d <- %d global ld done (src = wavefront "
1531  "ld inst)\n", w->computeUnit->cu_id, w->simdId,
1532  w->wfSlotId, i, sizeof(CType) == 4 ? "s" : "d",
1533  dst, *p1);
1534  // write the value into the physical VGPR. This is a
1535  // purely functional operation. No timing is modeled.
1536  w->computeUnit->vrf[w->simdId]->write<CType>(physVgpr, *p1, i);
1537  }
1538  ++p1;
1539  }
1540 
1541  // Schedule the write operation of the load data on the VRF.
1542  // This simply models the timing aspect of the VRF write operation.
1543  // It does not modify the physical VGPR.
1544  int loadVrfBankConflictCycles = gpuDynInst->computeUnit()->
1545  vrf[w->simdId]->exec(gpuDynInst->seqNum(), w, regVec,
1546  sizeof(CType), gpuDynInst->time);
1547 
1548  if (this->isGlobalMem()) {
1549  gpuDynInst->computeUnit()->globalMemoryPipe
1550  .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
1551  } else {
1552  assert(this->isLocalMem());
1553  gpuDynInst->computeUnit()->localMemoryPipe
1554  .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
1555  }
1556  }
1557  }
1558 
1559  void execute(GPUDynInstPtr gpuDynInst) override;
1560 
1561  private:
1562  // execAtomic may be called through a continuation
1563  // if the RMW had release semantics. see comment for
1564  // execContinuation in gpu_dyn_inst.hh
1565  void
1566  execAtomic(GPUDynInstPtr gpuDynInst) override
1567  {
1568  gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1569 
1570  typedef typename MemDataType::CType c0;
1571 
1572  c0 *d = &((c0*) gpuDynInst->d_data)[0];
1573  c0 *e = &((c0*) gpuDynInst->a_data)[0];
1574  c0 *f = &((c0*) gpuDynInst->x_data)[0];
1575 
1576  for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
1577  if (gpuDynInst->exec_mask[i]) {
1578  Addr vaddr = gpuDynInst->addr[i];
1579 
1580  if (this->isLocalMem()) {
1581  Wavefront *wavefront = gpuDynInst->wavefront();
1582  *d = wavefront->ldsChunk->read<c0>(vaddr);
1583 
1584  if (this->isAtomicAdd()) {
1585  wavefront->ldsChunk->write<c0>(vaddr,
1586  wavefront->ldsChunk->read<c0>(vaddr) + (*e));
1587  } else if (this->isAtomicSub()) {
1588  wavefront->ldsChunk->write<c0>(vaddr,
1589  wavefront->ldsChunk->read<c0>(vaddr) - (*e));
1590  } else if (this->isAtomicMax()) {
1591  wavefront->ldsChunk->write<c0>(vaddr,
1592  std::max(wavefront->ldsChunk->read<c0>(vaddr),
1593  (*e)));
1594  } else if (this->isAtomicMin()) {
1595  wavefront->ldsChunk->write<c0>(vaddr,
1596  std::min(wavefront->ldsChunk->read<c0>(vaddr),
1597  (*e)));
1598  } else if (this->isAtomicAnd()) {
1599  wavefront->ldsChunk->write<c0>(vaddr,
1600  wavefront->ldsChunk->read<c0>(vaddr) & (*e));
1601  } else if (this->isAtomicOr()) {
1602  wavefront->ldsChunk->write<c0>(vaddr,
1603  wavefront->ldsChunk->read<c0>(vaddr) | (*e));
1604  } else if (this->isAtomicXor()) {
1605  wavefront->ldsChunk->write<c0>(vaddr,
1606  wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
1607  } else if (this->isAtomicInc()) {
1608  wavefront->ldsChunk->write<c0>(vaddr,
1609  wavefront->ldsChunk->read<c0>(vaddr) + 1);
1610  } else if (this->isAtomicDec()) {
1611  wavefront->ldsChunk->write<c0>(vaddr,
1612  wavefront->ldsChunk->read<c0>(vaddr) - 1);
1613  } else if (this->isAtomicExch()) {
1614  wavefront->ldsChunk->write<c0>(vaddr, (*e));
1615  } else if (this->isAtomicCAS()) {
1616  wavefront->ldsChunk->write<c0>(vaddr,
1617  (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
1618  (*f) : wavefront->ldsChunk->read<c0>(vaddr));
1619  } else {
1620  fatal("Unrecognized or invalid HSAIL atomic op "
1621  "type.\n");
1622  }
1623  } else {
1624  RequestPtr req =
1625  std::make_shared<Request>(0, vaddr, sizeof(c0), 0,
1626  gpuDynInst->computeUnit()->masterId(),
1627  0, gpuDynInst->wfDynId,
1628  gpuDynInst->makeAtomicOpFunctor<c0>(e,
1629  f));
1630 
1631  gpuDynInst->setRequestFlags(req);
1632  PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
1633  pkt->dataStatic(d);
1634 
1635  if (gpuDynInst->computeUnit()->shader->
1636  separate_acquire_release &&
1637  (gpuDynInst->isAcquire())) {
1638  // if this atomic has acquire semantics,
1639  // schedule the continuation to perform an
1640  // acquire after the RMW completes
1641  gpuDynInst->execContinuation =
1643 
1644  gpuDynInst->useContinuation = true;
1645  } else {
1646  // the request will be finished when the RMW completes
1647  gpuDynInst->useContinuation = false;
1648  }
1649  // translation is performed in sendRequest()
1650  gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i,
1651  pkt);
1652  }
1653  }
1654 
1655  ++d;
1656  ++e;
1657  ++f;
1658  }
1659 
1660  gpuDynInst->updateStats();
1661  }
1662 
1663  // execAtomicACq will always be called through a continuation.
1664  // see comment for execContinuation in gpu_dyn_inst.hh
1665  void
1666  execAtomicAcq(GPUDynInstPtr gpuDynInst) override
1667  {
1668  // after performing the RMW, check to see if this instruction
1669  // has acquire semantics, and if so, issue an acquire
1670  if (!this->isLocalMem()) {
1671  if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1672  && gpuDynInst->isAcquire()) {
1673  gpuDynInst->statusBitVector = VectorMask(1);
1674 
1675  // the request will be finished when
1676  // the acquire completes
1677  gpuDynInst->useContinuation = false;
1678  // create request
1679  RequestPtr req = std::make_shared<Request>(0, 0, 0, 0,
1680  gpuDynInst->computeUnit()->masterId(),
1681  0, gpuDynInst->wfDynId);
1682  req->setFlags(Request::ACQUIRE);
1683  gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1684  }
1685  }
1686  }
1687  };
1688 
1689  template<typename DataType, typename AddrOperandType, int NumSrcOperands>
1690  GPUStaticInst*
1692  {
1693  const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1694 
1696  return decodeLd<DataType>(ib, obj);
1697  } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) {
1698  switch (ib->type) {
1699  case Brig::BRIG_TYPE_B8:
1700  return decodeSt<S8,S8>(ib, obj);
1701  case Brig::BRIG_TYPE_B16:
1702  return decodeSt<S16,S16>(ib, obj);
1703  case Brig::BRIG_TYPE_B32:
1704  return decodeSt<S32,S32>(ib, obj);
1705  case Brig::BRIG_TYPE_B64:
1706  return decodeSt<S64,S64>(ib, obj);
1707  default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type);
1708  }
1709  } else {
1711  return new AtomicInst<DataType, AddrOperandType,
1712  NumSrcOperands, false>(ib, obj, "atomicnoret");
1713  else
1714  return new AtomicInst<DataType, AddrOperandType,
1715  NumSrcOperands, true>(ib, obj, "atomic");
1716  }
1717  }
1718 
1719  template<typename DataType, int NumSrcOperands>
1720  GPUStaticInst*
1722  {
1723  unsigned addrIndex = (Brig::BrigOpcode)ib->opcode ==
1725 
1726  unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex);
1727 
1728  BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1729 
1731  return constructAtomic<DataType, NoRegAddrOperand,
1732  NumSrcOperands>(ib, obj);
1733  } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1734  // V2/V4 not allowed
1735  switch (tmp.regKind) {
1737  return constructAtomic<DataType, SRegAddrOperand,
1738  NumSrcOperands>(ib, obj);
1740  return constructAtomic<DataType, DRegAddrOperand,
1741  NumSrcOperands>(ib, obj);
1742  default:
1743  fatal("Bad atomic register operand type %d\n", tmp.type);
1744  }
1745  } else {
1746  fatal("Bad atomic register operand kind %d\n", tmp.kind);
1747  }
1748  }
1749 
1750 
1751  template<typename DataType>
1752  GPUStaticInst*
1754  {
1755  const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1756 
1758  return decodeAtomicHelper<DataType, 2>(ib, obj);
1759  } else {
1760  return decodeAtomicHelper<DataType, 1>(ib, obj);
1761  }
1762  }
1763 
1764  template<typename DataType>
1765  GPUStaticInst*
1767  {
1768  const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1770  return decodeAtomicHelper<DataType, 2>(ib, obj);
1771  } else {
1772  return decodeAtomicHelper<DataType, 1>(ib, obj);
1773  }
1774  }
1775 } // namespace HsailISA
1776 
1777 #endif // __ARCH_HSAIL_INSTS_MEM_HH__
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:167
#define DPRINTF(x,...)
Definition: trace.hh:229
Brig::BrigMemoryOrder memoryOrder
Definition: mem.hh:788
Brig::BrigMemoryOrder memoryOrder
Definition: mem.hh:217
Brig::BrigWidth8_t width
Definition: mem.hh:212
bool isVectorRegister(int operandIndex) override
Definition: mem.hh:602
int getNumOperands() override
Definition: mem.hh:1166
int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
Definition: mem.hh:1443
Defines classes encapsulating HSAIL instruction operands.
BrigSegment8_t segment
Definition: Brig.h:1337
bool isDstOperand(int operandIndex) override
Definition: mem.hh:139
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:175
BrigDataOffsetOperandList32_t operands
Definition: Brig.h:1323
bool isDstOperand(int operandIndex)
Definition: mem.hh:1425
bool isScalarRegister(int operandIndex) override
Definition: mem.hh:357
int getOperandSize(int operandIndex) override
Definition: mem.hh:375
BrigType16_t type
Definition: Brig.h:1322
void execSt(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1042
void completeAcc(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:501
Bitfield< 7 > i
void completeAcc(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1508
static const int MAX_REGS_FOR_NON_VEC_MEM_INST
Definition: compute_unit.hh:59
bool isVectorRegister(int operandIndex) override
Definition: mem.hh:1094
static const int MAX_WIDTH_FOR_MEM_INST
Definition: compute_unit.hh:60
bool isVectorRegister(int operandIndex) override
Definition: mem.hh:912
uint16_t BrigKind16_t
Definition: Brig.h:102
bool isSrcOperand(int operandIndex) override
Definition: mem.hh:132
int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:382
BrigMemoryOrder8_t memoryOrder
Definition: Brig.h:1338
bool isVectorRegister(int operandIndex) override
Definition: mem.hh:345
StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode)
Definition: mem.hh:791
void initiateAcc(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:436
std::shared_ptr< Request > RequestPtr
Definition: request.hh:83
void execAtomic(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1566
int getOperandSize(int operandIndex) override
Definition: mem.hh:143
RegAddrOperand< SRegOperand > SRegAddrOperand
Definition: operand.hh:703
virtual void execLdAcq(GPUDynInstPtr gpuDynInst)
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:45
ip6_addr_t addr
Definition: inet.hh:335
bool isSrcOperand(int operandIndex) override
Definition: mem.hh:930
BrigSegment8_t segment
Definition: Brig.h:1389
Bitfield< 35, 32 > at
unsigned int equivClass
Definition: mem.hh:789
int getNumOperands() override
Definition: mem.hh:156
int simdId
Definition: wavefront.hh:165
uint8_t equivClass
Definition: Brig.h:1391
bool isSrcOperand(int operandIndex)
Definition: mem.hh:1415
virtual void execAtomicAcq(GPUDynInstPtr gpuDynInst)
AddrOperandType addr
Definition: mem.hh:784
bool isDstOperand(int operandIndex) override
Definition: mem.hh:1138
AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode)
Definition: mem.hh:1466
int numDstRegOperands() override
Definition: mem.hh:337
bool isCondRegister(int operandIndex)
Definition: mem.hh:1395
int wfSlotId
Definition: wavefront.hh:162
DestOperandType::DestOperand dest
Definition: mem.hh:93
unsigned int equivClass
Definition: mem.hh:219
LdsChunk * ldsChunk
Definition: wavefront.hh:260
MemInst(Enums::MemType m_type)
Definition: mem.hh:53
AddrOperandType addr
Definition: mem.hh:94
Brig::BrigMemoryScope memoryScope
Definition: mem.hh:787
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:1040
bool isScalarRegister(int operandIndex) override
Definition: mem.hh:1120
Brig::BrigKind16_t kind
Definition: operand.hh:80
Bitfield< 6 > f
virtual void execSt(GPUDynInstPtr gpuDynInst)
BrigKind16_t kind
Definition: Brig.h:1180
int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:150
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
uint8_t type
Definition: inet.hh:333
uint16_t num_src_operands
Definition: mem.hh:959
virtual void execAtomic(GPUDynInstPtr gpuDynInst)
void execLdAcq(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:582
bool isCondRegister(int operandIndex) override
Definition: mem.hh:1107
int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1153
unsigned getOperandPtr(int offs, int index) const
Definition: brig_object.cc:122
bool isDstOperand(int operandIndex) override
Definition: mem.hh:654
AddrOperandType addr
Definition: mem.hh:214
BrigMemoryScope8_t memoryScope
Definition: Brig.h:1339
int numSrcRegOperands() override
Definition: mem.hh:901
DestOperandType::DestOperand dest
Definition: mem.hh:213
BrigMemoryOrder
Definition: Brig.h:505
Brig::BrigAtomicOperation atomicOperation
Definition: mem.hh:1222
bool isCondRegister(int operandIndex) override
Definition: mem.hh:351
AddrOperandBase * getAddressOperand()
Definition: mem.hh:86
GPUStaticInst * decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:729
const uint8_t * getData(int offs) const
Definition: brig_object.cc:110
LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode)
Definition: mem.hh:172
Bitfield< 23 > k
Definition: dt_constants.hh:80
Brig::BrigOpcode opcode
Definition: mem.hh:1224
Bitfield< 9 > d
int getMemOperandSize()
Definition: mem.hh:85
GPUStaticInst * decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:1753
ComputeUnit * computeUnit
Definition: wavefront.hh:167
Brig::BrigSegment segment
Definition: mem.hh:1220
BrigMemoryScope
Definition: Brig.h:521
Brig::BrigSegment segment
Definition: mem.hh:786
Brig::BrigSegment segment
Definition: mem.hh:216
AddrOperandBase * addr_operand
Definition: mem.hh:82
int wfSize() const
int getOperandSize(int operandIndex)
Definition: mem.hh:1432
Brig::BrigMemoryScope memoryScope
Definition: mem.hh:1223
void initiateAcc(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1006
void execAtomicAcq(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1666
LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode)
Definition: mem.hh:221
bool isScalarRegister(int operandIndex) override
Definition: mem.hh:924
T read(const uint32_t index)
a read operation
Definition: lds_state.hh:74
bool isCondRegister(int operandIndex) override
Definition: mem.hh:120
BrigAtomicOperation8_t atomicOperation
Definition: Brig.h:1340
Bitfield< 0 > w
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
OperandType::DestOperand dest
Definition: mem.hh:1216
int numDstRegOperands() override
Definition: mem.hh:112
bool isCondRegister(int operandIndex) override
Definition: mem.hh:918
Brig::BrigType type
Definition: operand.hh:81
bool isVectorRegister(int operandIndex) override
Definition: mem.hh:114
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:255
void initiateAcc(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1477
bool isDstOperand(int operandIndex) override
Definition: mem.hh:935
int numSrcRegOperands() override
Definition: mem.hh:335
AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode)
Definition: mem.hh:1226
BrigSegment
Definition: Brig.h:925
bool isCondRegister(int operandIndex) override
Definition: mem.hh:618
RegAddrOperand< DRegOperand > DRegAddrOperand
Definition: operand.hh:704
bool isScalarRegister(int operandIndex)
Definition: mem.hh:1405
uint16_t num_dest_operands
Definition: mem.hh:398
Bitfield< 9 > e
AddrOperandType addr
Definition: mem.hh:1218
const Brig::BrigOperand * getOperand(int offs) const
Definition: brig_object.cc:116
void completeAcc(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1035
int getNumOperands() override
Definition: mem.hh:905
int numSrcRegOperands() override
Definition: mem.hh:110
int numDstRegOperands() override
Definition: mem.hh:900
The request should be marked with RELEASE.
Definition: request.hh:162
void init_addr(AddrOperandBase *_addr_operand)
Definition: mem.hh:75
bool isSrcOperand(int operandIndex) override
Definition: mem.hh:1133
SrcOperandType::SrcOperand src
Definition: mem.hh:783
std::vector< VectorRegisterFile * > vrf
BrigDataOffsetOperandList32_t elements
Definition: Brig.h:1523
BrigAtomicOperation
Definition: Brig.h:270
GPUStaticInst * constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:1691
GPUStaticInst * decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:1721
Brig::BrigRegisterKind regKind
Definition: operand.hh:82
int getOperandSize(int operandIndex) override
Definition: mem.hh:936
GPUStaticInst * decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:1766
LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode)
Definition: mem.hh:402
void write(const uint32_t index, const T value)
a write operation
Definition: lds_state.hh:87
int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:942
Definition: Brig.h:46
BrigRegOperandInfo findRegDataType(unsigned opOffset, const BrigObject *obj)
Definition: operand.cc:213
Bitfield< 4 > op
Definition: types.hh:80
int getNumOperands() override
Definition: mem.hh:691
GPUStaticInst * decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:703
bool isSrcOperand(int operandIndex) override
Definition: mem.hh:363
uint32_t remap(uint32_t vgprIndex, uint32_t size, uint8_t mode=0)
Definition: wavefront.cc:282
Brig::BrigMemoryOrder memoryOrder
Definition: mem.hh:1221
int getOperandSize(int operandIndex) override
Definition: mem.hh:662
LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode)
Definition: mem.hh:96
GPUStaticInst * decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:185
int getNumOperands() override
Definition: mem.hh:338
GPUStaticInst * decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:1178
bool isDstOperand(int operandIndex) override
Definition: mem.hh:370
Brig::BrigMemoryScope memoryScope
Definition: mem.hh:218
bool isSrcOperand(int operandIndex) override
Definition: mem.hh:646
BrigWidth8_t width
Definition: Brig.h:1392
int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:677
The request should be marked with ACQUIRE.
Definition: request.hh:160
bool isVectorRegister(int operandIndex)
Definition: mem.hh:1385
bool isScalarRegister(int operandIndex) override
Definition: mem.hh:126
uint8_t BrigWidth8_t
Definition: Brig.h:146
bool isScalarRegister(int operandIndex) override
Definition: mem.hh:632
BrigOpcode
Definition: Brig.h:538
int getOperandSize(int operandIndex) override
Definition: mem.hh:1139
StInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode, int srcIdx)
Definition: mem.hh:962
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:104
BrigOpcode16_t opcode
Definition: Brig.h:1321

Generated on Fri Feb 28 2020 16:26:57 for gem5 by doxygen 1.8.13