gem5  v20.0.0.2
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
op_encodings.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: Anthony Gutierrez
34  */
35 
36 #ifndef __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
37 #define __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
38 
39 #include "arch/gcn3/gpu_decoder.hh"
41 #include "arch/gcn3/operand.hh"
42 #include "debug/GPUExec.hh"
44 
45 namespace Gcn3ISA
46 {
47  // --- purely virtual instruction classes ---
48 
50  {
51  public:
52  Inst_SOP2(InFmt_SOP2*, const std::string &opcode);
53 
54  int instSize() const override;
55  void generateDisassembly() override;
56 
57  bool isScalarRegister(int opIdx) override;
58  bool isVectorRegister(int opIdx) override;
59  int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
60 
61  protected:
62  // first instruction DWORD
64  // possible second DWORD
66  uint32_t varSize;
67 
68  private:
69  bool hasSecondDword(InFmt_SOP2 *);
70  }; // Inst_SOP2
71 
73  {
74  public:
75  Inst_SOPK(InFmt_SOPK*, const std::string &opcode);
76  ~Inst_SOPK();
77 
78  int instSize() const override;
79  void generateDisassembly() override;
80 
81  bool isScalarRegister(int opIdx) override;
82  bool isVectorRegister(int opIdx) override;
83  int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
84 
85  protected:
86  // first instruction DWORD
88  }; // Inst_SOPK
89 
91  {
92  public:
93  Inst_SOP1(InFmt_SOP1*, const std::string &opcode);
94  ~Inst_SOP1();
95 
96  int instSize() const override;
97  void generateDisassembly() override;
98 
99  bool isScalarRegister(int opIdx) override;
100  bool isVectorRegister(int opIdx) override;
101  int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
102 
103  protected:
104  // first instruction DWORD
106  // possible second DWORD
108  uint32_t varSize;
109 
110  private:
111  bool hasSecondDword(InFmt_SOP1 *);
112  }; // Inst_SOP1
113 
115  {
116  public:
117  Inst_SOPC(InFmt_SOPC*, const std::string &opcode);
118  ~Inst_SOPC();
119 
120  int instSize() const override;
121  void generateDisassembly() override;
122 
123  bool isScalarRegister(int opIdx) override;
124  bool isVectorRegister(int opIdx) override;
125  int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
126 
127  protected:
128  // first instruction DWORD
130  // possible second DWORD
132  uint32_t varSize;
133 
134  private:
135  bool hasSecondDword(InFmt_SOPC *);
136  }; // Inst_SOPC
137 
139  {
140  public:
141  Inst_SOPP(InFmt_SOPP*, const std::string &opcode);
142  ~Inst_SOPP();
143 
144  int instSize() const override;
145  void generateDisassembly() override;
146 
147  bool isScalarRegister(int opIdx) override;
148  bool isVectorRegister(int opIdx) override;
149  int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
150 
151  protected:
152  // first instruction DWORD
154  }; // Inst_SOPP
155 
157  {
158  public:
159  Inst_SMEM(InFmt_SMEM*, const std::string &opcode);
160  ~Inst_SMEM();
161 
162  int instSize() const override;
163  void generateDisassembly() override;
164 
165  bool isScalarRegister(int opIdx) override;
166  bool isVectorRegister(int opIdx) override;
167  int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
168 
169  protected:
173  template<int N>
174  void
176  {
177  int block_size = gpuDynInst->computeUnit()->cacheLineSize();
178  int req_size = N * sizeof(ScalarRegU32);
179  Addr vaddr = gpuDynInst->scalarAddr;
180 
185  Addr split_addr = roundDown(vaddr + req_size - 1, block_size);
186 
187  assert(split_addr <= vaddr || split_addr - vaddr < block_size);
193  bool misaligned_acc = split_addr > vaddr;
194 
195  RequestPtr req = new Request(0, vaddr, req_size, 0,
196  gpuDynInst->computeUnit()->masterId(), 0,
197  gpuDynInst->wfDynId);
198 
199  if (misaligned_acc) {
200  RequestPtr req1, req2;
201  req->splitOnVaddr(split_addr, req1, req2);
202  gpuDynInst->numScalarReqs = 2;
203  gpuDynInst->setRequestFlags(req1);
204  gpuDynInst->setRequestFlags(req2);
205  PacketPtr pkt1 = new Packet(req1, MemCmd::ReadReq);
206  PacketPtr pkt2 = new Packet(req2, MemCmd::ReadReq);
207  pkt1->dataStatic(gpuDynInst->scalar_data);
208  pkt2->dataStatic(gpuDynInst->scalar_data + req1->getSize());
209  gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt1);
210  gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt2);
211  delete req;
212  } else {
213  gpuDynInst->numScalarReqs = 1;
214  gpuDynInst->setRequestFlags(req);
215  PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
216  pkt->dataStatic(gpuDynInst->scalar_data);
217  gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt);
218  }
219  }
220 
224  template<int N>
225  void
227  {
228  int block_size = gpuDynInst->computeUnit()->cacheLineSize();
229  int req_size = N * sizeof(ScalarRegU32);
230  Addr vaddr = gpuDynInst->scalarAddr;
231 
236  Addr split_addr = roundDown(vaddr + req_size - 1, block_size);
237 
238  assert(split_addr <= vaddr || split_addr - vaddr < block_size);
244  bool misaligned_acc = split_addr > vaddr;
245 
246  RequestPtr req = new Request(0, vaddr, req_size, 0,
247  gpuDynInst->computeUnit()->masterId(), 0,
248  gpuDynInst->wfDynId);
249 
250  if (misaligned_acc) {
251  RequestPtr req1, req2;
252  req->splitOnVaddr(split_addr, req1, req2);
253  gpuDynInst->numScalarReqs = 2;
254  gpuDynInst->setRequestFlags(req1);
255  gpuDynInst->setRequestFlags(req2);
256  PacketPtr pkt1 = new Packet(req1, MemCmd::WriteReq);
257  PacketPtr pkt2 = new Packet(req2, MemCmd::WriteReq);
258  pkt1->dataStatic(gpuDynInst->scalar_data);
259  pkt2->dataStatic(gpuDynInst->scalar_data + req1->getSize());
260  gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt1);
261  gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt2);
262  delete req;
263  } else {
264  gpuDynInst->numScalarReqs = 1;
265  gpuDynInst->setRequestFlags(req);
266  PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
267  pkt->dataStatic(gpuDynInst->scalar_data);
268  gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt);
269  }
270  }
271 
272  void
275  {
276  Addr vaddr = addr.rawData();
277  vaddr += offset;
278  vaddr &= ~0x3;
279  gpuDynInst->scalarAddr = vaddr;
280  }
281 
282  // first instruction DWORD
284  // second instruction DWORD
286  }; // Inst_SMEM
287 
289  {
290  public:
291  Inst_VOP2(InFmt_VOP2*, const std::string &opcode);
292  ~Inst_VOP2();
293 
294  int instSize() const override;
295  void generateDisassembly() override;
296 
297  bool isScalarRegister(int opIdx) override;
298  bool isVectorRegister(int opIdx) override;
299  int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
300 
301  protected:
302  // first instruction DWORD
304  // possible second DWORD
306  uint32_t varSize;
307 
308  private:
309  bool hasSecondDword(InFmt_VOP2 *);
310  }; // Inst_VOP2
311 
313  {
314  public:
315  Inst_VOP1(InFmt_VOP1*, const std::string &opcode);
316  ~Inst_VOP1();
317 
318  int instSize() const override;
319  void generateDisassembly() override;
320 
321  bool isScalarRegister(int opIdx) override;
322  bool isVectorRegister(int opIdx) override;
323  int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
324 
325  protected:
326  // first instruction DWORD
328  // possible second DWORD
330  uint32_t varSize;
331 
332  private:
333  bool hasSecondDword(InFmt_VOP1 *);
334  }; // Inst_VOP1
335 
337  {
338  public:
339  Inst_VOPC(InFmt_VOPC*, const std::string &opcode);
340  ~Inst_VOPC();
341 
342  int instSize() const override;
343  void generateDisassembly() override;
344 
345  bool isScalarRegister(int opIdx) override;
346  bool isVectorRegister(int opIdx) override;
347  int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
348 
349  protected:
350  // first instruction DWORD
352  // possible second DWORD
354  uint32_t varSize;
355 
356  private:
357  bool hasSecondDword(InFmt_VOPC *);
358  }; // Inst_VOPC
359 
361  {
362  public:
363  Inst_VINTRP(InFmt_VINTRP*, const std::string &opcode);
364  ~Inst_VINTRP();
365 
366  int instSize() const override;
367 
368  protected:
369  // first instruction DWORD
371  }; // Inst_VINTRP
372 
374  {
375  public:
376  Inst_VOP3(InFmt_VOP3*, const std::string &opcode, bool sgpr_dst);
377  ~Inst_VOP3();
378 
379  int instSize() const override;
380  void generateDisassembly() override;
381 
382  bool isScalarRegister(int opIdx) override;
383  bool isVectorRegister(int opIdx) override;
384  int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
385 
386  protected:
387  // first instruction DWORD
389  // second instruction DWORD
391 
392  private:
393  bool hasSecondDword(InFmt_VOP3 *);
404  const bool sgprDst;
405  }; // Inst_VOP3
406 
408  {
409  public:
410  Inst_VOP3_SDST_ENC(InFmt_VOP3_SDST_ENC*, const std::string &opcode);
412 
413  int instSize() const override;
414  void generateDisassembly() override;
415 
416  bool isScalarRegister(int opIdx) override;
417  bool isVectorRegister(int opIdx) override;
418  int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
419 
420  protected:
421  // first instruction DWORD
423  // second instruction DWORD
425 
426  private:
428  }; // Inst_VOP3_SDST_ENC
429 
430  class Inst_DS : public GCN3GPUStaticInst
431  {
432  public:
433  Inst_DS(InFmt_DS*, const std::string &opcode);
434  ~Inst_DS();
435 
436  int instSize() const override;
437  void generateDisassembly() override;
438 
439  bool isScalarRegister(int opIdx) override;
440  bool isVectorRegister(int opIdx) override;
441  int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
442 
443  protected:
444  template<typename T>
445  void
447  {
448  Wavefront *wf = gpuDynInst->wavefront();
449 
450  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
451  if (gpuDynInst->exec_mask[lane]) {
452  Addr vaddr = gpuDynInst->addr[lane] + offset;
453 
454  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]
455  = wf->ldsChunk->read<T>(vaddr);
456  }
457  }
458  }
459 
460  template<typename T>
461  void
462  initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
463  {
464  Wavefront *wf = gpuDynInst->wavefront();
465 
466  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
467  if (gpuDynInst->exec_mask[lane]) {
468  Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
469  Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
470 
471  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane * 2]
472  = wf->ldsChunk->read<T>(vaddr0);
473  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane * 2 + 1]
474  = wf->ldsChunk->read<T>(vaddr1);
475  }
476  }
477  }
478 
479  template<typename T>
480  void
482  {
483  Wavefront *wf = gpuDynInst->wavefront();
484 
485  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
486  if (gpuDynInst->exec_mask[lane]) {
487  Addr vaddr = gpuDynInst->addr[lane] + offset;
488  wf->ldsChunk->write<T>(vaddr,
489  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]);
490  }
491  }
492  }
493 
494  template<typename T>
495  void
496  initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
497  {
498  Wavefront *wf = gpuDynInst->wavefront();
499 
500  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
501  if (gpuDynInst->exec_mask[lane]) {
502  Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
503  Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
504  wf->ldsChunk->write<T>(vaddr0, (reinterpret_cast<T*>(
505  gpuDynInst->d_data))[lane * 2]);
506  wf->ldsChunk->write<T>(vaddr1, (reinterpret_cast<T*>(
507  gpuDynInst->d_data))[lane * 2 + 1]);
508  }
509  }
510  }
511 
512  void
514  {
515  Wavefront *wf = gpuDynInst->wavefront();
516 
517  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
518  if (wf->execMask(lane)) {
519  gpuDynInst->addr.at(lane) = (Addr)addr[lane];
520  }
521  }
522  }
523 
524  // first instruction DWORD
526  // second instruction DWORD
528  }; // Inst_DS
529 
531  {
532  public:
533  Inst_MUBUF(InFmt_MUBUF*, const std::string &opcode);
534  ~Inst_MUBUF();
535 
536  int instSize() const override;
537  void generateDisassembly() override;
538 
539  bool isScalarRegister(int opIdx) override;
540  bool isVectorRegister(int opIdx) override;
541  int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
542 
543  protected:
545  {
546  uint64_t baseAddr : 48;
547  uint32_t stride : 14;
548  uint32_t cacheSwizzle : 1;
549  uint32_t swizzleEn : 1;
550  uint32_t numRecords : 32;
551  uint32_t dstSelX : 3;
552  uint32_t dstSelY : 3;
553  uint32_t dstSelZ : 3;
554  uint32_t dstSelW : 3;
555  uint32_t numFmt : 3;
556  uint32_t dataFmt : 4;
557  uint32_t elemSize : 2;
558  uint32_t idxStride : 2;
559  uint32_t addTidEn : 1;
560  uint32_t atc : 1;
561  uint32_t hashEn : 1;
562  uint32_t heap : 1;
563  uint32_t mType : 3;
564  uint32_t type : 2;
565  };
566 
567  template<typename T>
568  void
570  {
571  gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
572 
573  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
574  if (gpuDynInst->exec_mask[lane]) {
575  Addr vaddr = gpuDynInst->addr[lane];
576 
577  RequestPtr req = new Request(0, vaddr, sizeof(T), 0,
578  gpuDynInst->computeUnit()->masterId(), 0,
579  gpuDynInst->wfDynId);
580 
581  gpuDynInst->setRequestFlags(req);
582 
583  PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
584  pkt->dataStatic(&(reinterpret_cast<T*>(
585  gpuDynInst->d_data))[lane]);
586 
587  gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane,
588  pkt);
589  }
590  }
591  }
592 
593  template<typename T>
594  void
596  {
597  gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
598 
599  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
600  if (gpuDynInst->exec_mask[lane]) {
601  Addr vaddr = gpuDynInst->addr[lane];
602 
603  RequestPtr req = new Request(0, vaddr, sizeof(T), 0,
604  gpuDynInst->computeUnit()->masterId(),
605  0, gpuDynInst->wfDynId);
606 
607  gpuDynInst->setRequestFlags(req);
608  PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
609  pkt->dataStatic(&(reinterpret_cast<T*>(
610  gpuDynInst->d_data))[lane]);
611  gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane,
612  pkt);
613  }
614  }
615  }
616 
617  void
619  {
620  // create request and set flags
621  gpuDynInst->statusBitVector = VectorMask(1);
622  Request *req = new Request(0, 0, 0, 0,
623  gpuDynInst->computeUnit()->
624  masterId(), 0,
625  gpuDynInst->wfDynId);
626  gpuDynInst->setRequestFlags(req);
627  gpuDynInst->computeUnit()->
628  injectGlobalMemFence(gpuDynInst, false, req);
629  }
630 
651  template<typename VOFF, typename VIDX, typename SRSRC, typename SOFF>
652  void
653  calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx,
654  SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
655  {
656  Addr vaddr = 0;
657  Addr base_addr = 0;
658  Addr stride = 0;
659  Addr buf_idx = 0;
660  Addr buf_off = 0;
661  BufferRsrcDescriptor rsrc_desc;
662 
663  std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
664  sizeof(BufferRsrcDescriptor));
665 
666  base_addr = rsrc_desc.baseAddr;
667 
668  stride = rsrc_desc.addTidEn ? ((rsrc_desc.dataFmt << 14)
669  + rsrc_desc.stride) : rsrc_desc.stride;
670 
671  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
672  if (gpuDynInst->exec_mask[lane]) {
673  vaddr = base_addr + s_offset.rawData();
679  buf_idx = v_idx[lane] + (rsrc_desc.addTidEn ? lane : 0);
680 
681  buf_off = v_off[lane] + inst_offset;
682 
683  if (rsrc_desc.swizzleEn) {
684  Addr idx_stride = 8 << rsrc_desc.idxStride;
685  Addr elem_size = 2 << rsrc_desc.elemSize;
686  Addr idx_msb = buf_idx / idx_stride;
687  Addr idx_lsb = buf_idx % idx_stride;
688  Addr off_msb = buf_off / elem_size;
689  Addr off_lsb = buf_off % elem_size;
690 
691  vaddr += ((idx_msb * stride + off_msb * elem_size)
692  * idx_stride + idx_lsb * elem_size + off_lsb);
693  } else {
694  vaddr += buf_off + stride * buf_idx;
695  }
696 
697  gpuDynInst->addr.at(lane) = vaddr;
698  }
699  }
700  }
701 
702  // first instruction DWORD
704  // second instruction DWORD
706  }; // Inst_MUBUF
707 
709  {
710  public:
711  Inst_MTBUF(InFmt_MTBUF*, const std::string &opcode);
712  ~Inst_MTBUF();
713 
714  int instSize() const override;
715 
716  protected:
717  // first instruction DWORD
719  // second instruction DWORD
721 
722  private:
723  bool hasSecondDword(InFmt_MTBUF *);
724  }; // Inst_MTBUF
725 
727  {
728  public:
729  Inst_MIMG(InFmt_MIMG*, const std::string &opcode);
730  ~Inst_MIMG();
731 
732  int instSize() const override;
733 
734  protected:
735  // first instruction DWORD
737  // second instruction DWORD
739  }; // Inst_MIMG
740 
742  {
743  public:
744  Inst_EXP(InFmt_EXP*, const std::string &opcode);
745  ~Inst_EXP();
746 
747  int instSize() const override;
748 
749  protected:
750  // first instruction DWORD
752  // second instruction DWORD
754  }; // Inst_EXP
755 
757  {
758  public:
759  Inst_FLAT(InFmt_FLAT*, const std::string &opcode);
760  ~Inst_FLAT();
761 
762  int instSize() const override;
763  void generateDisassembly() override;
764 
765  bool isScalarRegister(int opIdx) override;
766  bool isVectorRegister(int opIdx) override;
767  int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
768 
769  protected:
770  template<typename T>
771  void
773  {
774  gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
775 
776  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
777  if (gpuDynInst->exec_mask[lane]) {
778  Addr vaddr = gpuDynInst->addr[lane];
779 
780  RequestPtr req = new Request(0, vaddr, sizeof(T), 0,
781  gpuDynInst->computeUnit()->masterId(), 0,
782  gpuDynInst->wfDynId);
783 
784  gpuDynInst->setRequestFlags(req);
785  PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
786  pkt->dataStatic(&(reinterpret_cast<T*>(
787  gpuDynInst->d_data))[lane]);
788  gpuDynInst->computeUnit()
789  ->sendRequest(gpuDynInst, lane, pkt);
790  }
791  }
792  }
793 
794  template<int N>
795  void
797  {
798  int req_size = N * sizeof(VecElemU32);
799  gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
800 
801  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
802  if (gpuDynInst->exec_mask[lane]) {
803  Addr vaddr = gpuDynInst->addr[lane];
804 
805  RequestPtr req = new Request(0, vaddr, req_size, 0,
806  gpuDynInst->computeUnit()->masterId(), 0,
807  gpuDynInst->wfDynId);
808 
809  gpuDynInst->setRequestFlags(req);
810  PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
811  pkt->dataStatic(&(reinterpret_cast<VecElemU32*>(
812  gpuDynInst->d_data))[lane * N]);
813  gpuDynInst->computeUnit()
814  ->sendRequest(gpuDynInst, lane, pkt);
815  }
816  }
817  }
818 
819  template<typename T>
820  void
822  {
823  gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
824 
825  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
826  if (gpuDynInst->exec_mask[lane]) {
827  Addr vaddr = gpuDynInst->addr[lane];
828 
829  RequestPtr req = new Request(0, vaddr, sizeof(T), 0,
830  gpuDynInst->computeUnit()->masterId(),
831  0, gpuDynInst->wfDynId);
832 
833  gpuDynInst->setRequestFlags(req);
834  PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
835  pkt->dataStatic(&(reinterpret_cast<T*>(
836  gpuDynInst->d_data))[lane]);
837  gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane,
838  pkt);
839  }
840  }
841  }
842 
843  template<int N>
844  void
846  {
847  int req_size = N * sizeof(VecElemU32);
848  gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
849 
850  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
851  if (gpuDynInst->exec_mask[lane]) {
852  Addr vaddr = gpuDynInst->addr[lane];
853 
854  RequestPtr req = new Request(0, vaddr, req_size, 0,
855  gpuDynInst->computeUnit()->masterId(),
856  0, gpuDynInst->wfDynId);
857 
858  gpuDynInst->setRequestFlags(req);
859  PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
860  pkt->dataStatic(&(reinterpret_cast<VecElemU32*>(
861  gpuDynInst->d_data))[lane * N]);
862  gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane,
863  pkt);
864  }
865  }
866  }
867 
868  template<typename T>
869  void
871  {
872  gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
873 
874  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
875  if (gpuDynInst->exec_mask[lane]) {
876  Addr vaddr = gpuDynInst->addr[lane];
877 
878  RequestPtr req = new Request(0, vaddr, sizeof(T), 0,
879  gpuDynInst->computeUnit()->masterId(), 0,
880  gpuDynInst->wfDynId,
881  gpuDynInst->makeAtomicOpFunctor<T>(
882  &(reinterpret_cast<T*>(gpuDynInst->a_data))[lane],
883  &(reinterpret_cast<T*>(
884  gpuDynInst->x_data))[lane]));
885 
886  gpuDynInst->setRequestFlags(req);
887 
888  PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
889  pkt->dataStatic(&(reinterpret_cast<T*>(
890  gpuDynInst->d_data))[lane]);
891 
892  gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane,
893  pkt);
894  }
895  }
896  }
897 
898  void
900  {
901  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
902  if (gpuDynInst->exec_mask[lane]) {
903  gpuDynInst->addr.at(lane) = addr[lane];
904  }
905  }
906  gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
907  }
908 
909  // first instruction DWORD
911  // second instruction DWORD
913  }; // Inst_FLAT
914 } // namespace Gcn3ISA
915 
916 #endif // __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
bool hasSecondDword(InFmt_SOP2 *)
Definition: op_encodings.cc:68
int instSize() const override
Definition: op_encodings.cc:62
void calcAddr(GPUDynInstPtr gpuDynInst, ConstScalarOperandU64 &addr, ScalarRegU32 offset)
InFmt_MUBUF_1 extData
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
InFmt_SMEM_1 extData
Bitfield< 21, 20 > stride
std::shared_ptr< Request > RequestPtr
Definition: request.hh:81
void generateDisassembly() override
Definition: op_encodings.cc:80
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:43
ip6_addr_t addr
Definition: inet.hh:330
InFmt_VINTRP instData
const std::string opcode
InstFormat extData
Definition: op_encodings.hh:65
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
Bitfield< 23, 0 > offset
Definition: types.hh:152
InFmt_MTBUF_1 extData
LdsChunk * ldsChunk
Definition: wavefront.hh:258
InFmt_MIMG_1 extData
uint32_t VecElemU32
Definition: registers.hh:166
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:1034
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
InFmt_EXP_1 extData
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:46
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
void initMemRead(GPUDynInstPtr gpuDynInst)
InFmt_VOP3_SDST_ENC instData
void initMemWrite(GPUDynInstPtr gpuDynInst)
initiate a memory write access for N dwords
bool isVectorRegister(int opIdx) override
InFmt_SOPK instData
Definition: op_encodings.hh:87
classes that represnt vector/scalar operands in GCN3 ISA.
Definition: decoder.cc:44
Inst_SOP2(InFmt_SOP2 *, const std::string &opcode)
Definition: op_encodings.cc:44
InFmt_MTBUF instData
void splitOnVaddr(Addr split_addr, RequestPtr &req1, RequestPtr &req2)
Generate two requests as if this request had been split into two pieces.
Definition: request.hh:481
T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:131
InFmt_VOP3_1 extData
const bool sgprDst
the v_cmp and readlane instructions in the VOP3 encoding are unique because they are the only instruc...
T read(const uint32_t index)
a read operation
Definition: lds_state.hh:71
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
void initMemRead(GPUDynInstPtr gpuDynInst)
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
InFmt_DS_1 extData
InFmt_FLAT_1 extData
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
uint32_t ScalarRegU32
Definition: registers.hh:154
InFmt_MUBUF instData
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
VectorMask execMask() const
Definition: wavefront.cc:837
void initMemRead(GPUDynInstPtr gpuDynInst)
initiate a memory read access for N dwords
void write(const uint32_t index, const T value)
a write operation
Definition: lds_state.hh:84
void initMemWrite(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr)
bool isScalarRegister(int opIdx) override
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
Definition: operand.hh:396
void initMemWrite(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:103
const int NumVecElemPerVecReg(64)
InFmt_SOP2 instData
Definition: op_encodings.hh:63

Generated on Mon Jun 8 2020 15:45:06 for gem5 by doxygen 1.8.13