gem5  v22.1.0.0
op_encodings.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #ifndef __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
33 #define __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
34 
39 #include "debug/GPUExec.hh"
40 #include "debug/VEGA.hh"
42 
43 namespace gem5
44 {
45 
46 namespace VegaISA
47 {
49  {
50  uint64_t baseAddr : 48;
51  uint32_t stride : 14;
52  uint32_t cacheSwizzle : 1;
53  uint32_t swizzleEn : 1;
54  uint32_t numRecords : 32;
55  uint32_t dstSelX : 3;
56  uint32_t dstSelY : 3;
57  uint32_t dstSelZ : 3;
58  uint32_t dstSelW : 3;
59  uint32_t numFmt : 3;
60  uint32_t dataFmt : 4;
61  uint32_t elemSize : 2;
62  uint32_t idxStride : 2;
63  uint32_t addTidEn : 1;
64  uint32_t atc : 1;
65  uint32_t hashEn : 1;
66  uint32_t heap : 1;
67  uint32_t mType : 3;
68  uint32_t type : 2;
69  };
70 
71  // --- purely virtual instruction classes ---
72 
74  {
75  public:
76  Inst_SOP2(InFmt_SOP2*, const std::string &opcode);
77 
78  int instSize() const override;
79  void generateDisassembly() override;
80 
81  void initOperandInfo() override;
82 
83  protected:
84  // first instruction DWORD
86  // possible second DWORD
88  uint32_t varSize;
89 
90  private:
91  bool hasSecondDword(InFmt_SOP2 *);
92  }; // Inst_SOP2
93 
95  {
96  public:
97  Inst_SOPK(InFmt_SOPK*, const std::string &opcode);
98  ~Inst_SOPK();
99 
100  int instSize() const override;
101  void generateDisassembly() override;
102 
103  void initOperandInfo() override;
104 
105  protected:
106  // first instruction DWORD
108  // possible second DWORD
110  uint32_t varSize;
111 
112  private:
113  bool hasSecondDword(InFmt_SOPK *);
114  }; // Inst_SOPK
115 
117  {
118  public:
119  Inst_SOP1(InFmt_SOP1*, const std::string &opcode);
120  ~Inst_SOP1();
121 
122  int instSize() const override;
123  void generateDisassembly() override;
124 
125  void initOperandInfo() override;
126 
127  protected:
128  // first instruction DWORD
130  // possible second DWORD
132  uint32_t varSize;
133 
134  private:
135  bool hasSecondDword(InFmt_SOP1 *);
136  }; // Inst_SOP1
137 
139  {
140  public:
141  Inst_SOPC(InFmt_SOPC*, const std::string &opcode);
142  ~Inst_SOPC();
143 
144  int instSize() const override;
145  void generateDisassembly() override;
146 
147  void initOperandInfo() override;
148 
149  protected:
150  // first instruction DWORD
152  // possible second DWORD
154  uint32_t varSize;
155 
156  private:
157  bool hasSecondDword(InFmt_SOPC *);
158  }; // Inst_SOPC
159 
161  {
162  public:
163  Inst_SOPP(InFmt_SOPP*, const std::string &opcode);
164  ~Inst_SOPP();
165 
166  int instSize() const override;
167  void generateDisassembly() override;
168 
169  void initOperandInfo() override;
170 
171  protected:
172  // first instruction DWORD
174  }; // Inst_SOPP
175 
177  {
178  public:
179  Inst_SMEM(InFmt_SMEM*, const std::string &opcode);
180  ~Inst_SMEM();
181 
182  int instSize() const override;
183  void generateDisassembly() override;
184 
185  void initOperandInfo() override;
186 
187  protected:
191  template<int N>
192  void
194  {
195  initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
197  }
198 
202  template<int N>
203  void
205  {
206  initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
208  }
209 
213  void
216  {
217  Addr vaddr = ((addr.rawData() + offset) & ~0x3);
218  gpu_dyn_inst->scalarAddr = vaddr;
219  }
220 
226  void
227  calcAddr(GPUDynInstPtr gpu_dyn_inst,
229  {
230  BufferRsrcDescriptor rsrc_desc;
231  ScalarRegU32 clamped_offset(offset);
232  std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
233  sizeof(BufferRsrcDescriptor));
234 
240  if (!rsrc_desc.stride && offset >= rsrc_desc.numRecords) {
241  clamped_offset = rsrc_desc.numRecords;
242  } else if (rsrc_desc.stride && offset
243  > (rsrc_desc.stride * rsrc_desc.numRecords)) {
244  clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords);
245  }
246 
247  Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3);
248  gpu_dyn_inst->scalarAddr = vaddr;
249  }
250 
251  // first instruction DWORD
253  // second instruction DWORD
255  }; // Inst_SMEM
256 
258  {
259  public:
260  Inst_VOP2(InFmt_VOP2*, const std::string &opcode);
261  ~Inst_VOP2();
262 
263  int instSize() const override;
264  void generateDisassembly() override;
265 
266  void initOperandInfo() override;
267 
268  protected:
269  // first instruction DWORD
271  // possible second DWORD
273  uint32_t varSize;
274 
275  private:
276  bool hasSecondDword(InFmt_VOP2 *);
277  }; // Inst_VOP2
278 
280  {
281  public:
282  Inst_VOP1(InFmt_VOP1*, const std::string &opcode);
283  ~Inst_VOP1();
284 
285  int instSize() const override;
286  void generateDisassembly() override;
287 
288  void initOperandInfo() override;
289 
290  protected:
291  // first instruction DWORD
293  // possible second DWORD
295  uint32_t varSize;
296 
297  private:
298  bool hasSecondDword(InFmt_VOP1 *);
299  }; // Inst_VOP1
300 
302  {
303  public:
304  Inst_VOPC(InFmt_VOPC*, const std::string &opcode);
305  ~Inst_VOPC();
306 
307  int instSize() const override;
308  void generateDisassembly() override;
309 
310  void initOperandInfo() override;
311 
312  protected:
313  // first instruction DWORD
315  // possible second DWORD
317  uint32_t varSize;
318 
319  private:
320  bool hasSecondDword(InFmt_VOPC *);
321  }; // Inst_VOPC
322 
324  {
325  public:
326  Inst_VINTRP(InFmt_VINTRP*, const std::string &opcode);
327  ~Inst_VINTRP();
328 
329  int instSize() const override;
330 
331  protected:
332  // first instruction DWORD
334  }; // Inst_VINTRP
335 
337  {
338  public:
339  Inst_VOP3A(InFmt_VOP3A*, const std::string &opcode, bool sgpr_dst);
340  ~Inst_VOP3A();
341 
342  int instSize() const override;
343  void generateDisassembly() override;
344 
345  void initOperandInfo() override;
346 
347  protected:
348  // first instruction DWORD
350  // second instruction DWORD
352 
353  private:
365  const bool sgprDst;
366  }; // Inst_VOP3A
367 
369  {
370  public:
371  Inst_VOP3B(InFmt_VOP3B*, const std::string &opcode);
372  ~Inst_VOP3B();
373 
374  int instSize() const override;
375  void generateDisassembly() override;
376 
377  void initOperandInfo() override;
378 
379  protected:
380  // first instruction DWORD
382  // second instruction DWORD
384 
385  private:
387  }; // Inst_VOP3B
388 
389  class Inst_DS : public VEGAGPUStaticInst
390  {
391  public:
392  Inst_DS(InFmt_DS*, const std::string &opcode);
393  ~Inst_DS();
394 
395  int instSize() const override;
396  void generateDisassembly() override;
397 
398  void initOperandInfo() override;
399 
400  protected:
401  template<typename T>
402  void
404  {
405  Wavefront *wf = gpuDynInst->wavefront();
406 
407  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
408  if (gpuDynInst->exec_mask[lane]) {
409  Addr vaddr = gpuDynInst->addr[lane] + offset;
410 
411  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]
412  = wf->ldsChunk->read<T>(vaddr);
413  }
414  }
415  }
416 
417  template<int N>
418  void
420  {
421  Wavefront *wf = gpuDynInst->wavefront();
422 
423  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
424  if (gpuDynInst->exec_mask[lane]) {
425  Addr vaddr = gpuDynInst->addr[lane] + offset;
426  for (int i = 0; i < N; ++i) {
427  (reinterpret_cast<VecElemU32*>(
428  gpuDynInst->d_data))[lane * N + i]
429  = wf->ldsChunk->read<VecElemU32>(
430  vaddr + i*sizeof(VecElemU32));
431  }
432  }
433  }
434  }
435 
436  template<typename T>
437  void
438  initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
439  {
440  Wavefront *wf = gpuDynInst->wavefront();
441 
442  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
443  if (gpuDynInst->exec_mask[lane]) {
444  Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
445  Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
446 
447  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane * 2]
448  = wf->ldsChunk->read<T>(vaddr0);
449  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane * 2 + 1]
450  = wf->ldsChunk->read<T>(vaddr1);
451  }
452  }
453  }
454 
455  template<typename T>
456  void
458  {
459  Wavefront *wf = gpuDynInst->wavefront();
460 
461  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
462  if (gpuDynInst->exec_mask[lane]) {
463  Addr vaddr = gpuDynInst->addr[lane] + offset;
464  wf->ldsChunk->write<T>(vaddr,
465  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]);
466  }
467  }
468  }
469 
470  template<int N>
471  void
473  {
474  Wavefront *wf = gpuDynInst->wavefront();
475 
476  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
477  if (gpuDynInst->exec_mask[lane]) {
478  Addr vaddr = gpuDynInst->addr[lane] + offset;
479  for (int i = 0; i < N; ++i) {
480  wf->ldsChunk->write<VecElemU32>(
481  vaddr + i*sizeof(VecElemU32),
482  (reinterpret_cast<VecElemU32*>(
483  gpuDynInst->d_data))[lane * N + i]);
484  }
485  }
486  }
487  }
488 
489  template<typename T>
490  void
491  initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
492  {
493  Wavefront *wf = gpuDynInst->wavefront();
494 
495  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
496  if (gpuDynInst->exec_mask[lane]) {
497  Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
498  Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
499  wf->ldsChunk->write<T>(vaddr0, (reinterpret_cast<T*>(
500  gpuDynInst->d_data))[lane * 2]);
501  wf->ldsChunk->write<T>(vaddr1, (reinterpret_cast<T*>(
502  gpuDynInst->d_data))[lane * 2 + 1]);
503  }
504  }
505  }
506 
507  template<typename T>
508  void
510  {
511  Wavefront *wf = gpuDynInst->wavefront();
512 
513  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
514  if (gpuDynInst->exec_mask[lane]) {
515  Addr vaddr = gpuDynInst->addr[lane] + offset;
516 
517  AtomicOpFunctorPtr amo_op =
518  gpuDynInst->makeAtomicOpFunctor<T>(
519  &(reinterpret_cast<T*>(gpuDynInst->a_data))[lane],
520  &(reinterpret_cast<T*>(gpuDynInst->x_data))[lane]);
521 
522  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]
523  = wf->ldsChunk->atomic<T>(vaddr, std::move(amo_op));
524  }
525  }
526  }
527 
528  void
530  {
531  Wavefront *wf = gpuDynInst->wavefront();
532 
533  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
534  if (wf->execMask(lane)) {
535  gpuDynInst->addr.at(lane) = (Addr)addr[lane];
536  }
537  }
538  }
539 
540  // first instruction DWORD
542  // second instruction DWORD
544  }; // Inst_DS
545 
547  {
548  public:
549  Inst_MUBUF(InFmt_MUBUF*, const std::string &opcode);
550  ~Inst_MUBUF();
551 
552  int instSize() const override;
553  void generateDisassembly() override;
554 
555  void initOperandInfo() override;
556 
557  protected:
558  template<typename T>
559  void
561  {
562  // temporarily modify exec_mask to supress memory accesses to oob
563  // regions. Only issue memory requests for lanes that have their
564  // exec_mask set and are not out of bounds.
565  VectorMask old_exec_mask = gpuDynInst->exec_mask;
566  gpuDynInst->exec_mask &= ~oobMask;
567  initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
568  gpuDynInst->exec_mask = old_exec_mask;
569  }
570 
571 
572  template<int N>
573  void
575  {
576  // temporarily modify exec_mask to supress memory accesses to oob
577  // regions. Only issue memory requests for lanes that have their
578  // exec_mask set and are not out of bounds.
579  VectorMask old_exec_mask = gpuDynInst->exec_mask;
580  gpuDynInst->exec_mask &= ~oobMask;
581  initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
582  gpuDynInst->exec_mask = old_exec_mask;
583  }
584 
585  template<typename T>
586  void
588  {
589  // temporarily modify exec_mask to supress memory accesses to oob
590  // regions. Only issue memory requests for lanes that have their
591  // exec_mask set and are not out of bounds.
592  VectorMask old_exec_mask = gpuDynInst->exec_mask;
593  gpuDynInst->exec_mask &= ~oobMask;
594  initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
595  gpuDynInst->exec_mask = old_exec_mask;
596  }
597 
598  template<int N>
599  void
601  {
602  // temporarily modify exec_mask to supress memory accesses to oob
603  // regions. Only issue memory requests for lanes that have their
604  // exec_mask set and are not out of bounds.
605  VectorMask old_exec_mask = gpuDynInst->exec_mask;
606  gpuDynInst->exec_mask &= ~oobMask;
607  initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
608  gpuDynInst->exec_mask = old_exec_mask;
609  }
610 
611  void
613  {
614  // create request and set flags
615  gpuDynInst->resetEntireStatusVector();
616  gpuDynInst->setStatusVector(0, 1);
617  RequestPtr req = std::make_shared<Request>(0, 0, 0,
618  gpuDynInst->computeUnit()->
619  requestorId(), 0,
620  gpuDynInst->wfDynId);
621  gpuDynInst->setRequestFlags(req);
622  gpuDynInst->computeUnit()->
623  injectGlobalMemFence(gpuDynInst, false, req);
624  }
625 
646  template<typename VOFF, typename VIDX, typename SRSRC, typename SOFF>
647  void
648  calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx,
649  SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
650  {
651  Addr vaddr = 0;
652  Addr base_addr = 0;
653  Addr stride = 0;
654  Addr buf_idx = 0;
655  Addr buf_off = 0;
656  Addr buffer_offset = 0;
657  BufferRsrcDescriptor rsrc_desc;
658 
659  std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
660  sizeof(BufferRsrcDescriptor));
661 
662  base_addr = rsrc_desc.baseAddr;
663 
664  stride = rsrc_desc.addTidEn ? ((rsrc_desc.dataFmt << 14)
665  + rsrc_desc.stride) : rsrc_desc.stride;
666 
667  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
668  if (gpuDynInst->exec_mask[lane]) {
669  vaddr = base_addr + s_offset.rawData();
675  buf_idx = v_idx[lane] + (rsrc_desc.addTidEn ? lane : 0);
676 
677  buf_off = v_off[lane] + inst_offset;
678 
679  if (rsrc_desc.swizzleEn) {
680  Addr idx_stride = 8 << rsrc_desc.idxStride;
681  Addr elem_size = 2 << rsrc_desc.elemSize;
682  Addr idx_msb = buf_idx / idx_stride;
683  Addr idx_lsb = buf_idx % idx_stride;
684  Addr off_msb = buf_off / elem_size;
685  Addr off_lsb = buf_off % elem_size;
686  DPRINTF(VEGA, "mubuf swizzled lane %d: "
687  "idx_stride = %llx, elem_size = %llx, "
688  "idx_msb = %llx, idx_lsb = %llx, "
689  "off_msb = %llx, off_lsb = %llx\n",
690  lane, idx_stride, elem_size, idx_msb, idx_lsb,
691  off_msb, off_lsb);
692 
693  buffer_offset =(idx_msb * stride + off_msb * elem_size)
694  * idx_stride + idx_lsb * elem_size + off_lsb;
695  } else {
696  buffer_offset = buf_off + stride * buf_idx;
697  }
698 
699 
707  if (rsrc_desc.stride == 0 || !rsrc_desc.swizzleEn) {
708  if (buffer_offset >=
709  rsrc_desc.numRecords - s_offset.rawData()) {
710  DPRINTF(VEGA, "mubuf out-of-bounds condition 1: "
711  "lane = %d, buffer_offset = %llx, "
712  "const_stride = %llx, "
713  "const_num_records = %llx\n",
714  lane, buf_off + stride * buf_idx,
715  stride, rsrc_desc.numRecords);
716  oobMask.set(lane);
717  continue;
718  }
719  }
720 
721  if (rsrc_desc.stride != 0 && rsrc_desc.swizzleEn) {
722  if (buf_idx >= rsrc_desc.numRecords ||
723  buf_off >= stride) {
724  DPRINTF(VEGA, "mubuf out-of-bounds condition 2: "
725  "lane = %d, offset = %llx, "
726  "index = %llx, "
727  "const_num_records = %llx\n",
728  lane, buf_off, buf_idx,
729  rsrc_desc.numRecords);
730  oobMask.set(lane);
731  continue;
732  }
733  }
734 
735  vaddr += buffer_offset;
736 
737  DPRINTF(VEGA, "Calculating mubuf address for lane %d: "
738  "vaddr = %llx, base_addr = %llx, "
739  "stride = %llx, buf_idx = %llx, buf_off = %llx\n",
740  lane, vaddr, base_addr, stride,
741  buf_idx, buf_off);
742  gpuDynInst->addr.at(lane) = vaddr;
743  }
744  }
745  }
746 
747  // first instruction DWORD
749  // second instruction DWORD
751  // Mask of lanes with out-of-bounds accesses. Needs to be tracked
752  // seperately from the exec_mask so that we remember to write zero
753  // to the registers associated with out of bounds lanes.
755  }; // Inst_MUBUF
756 
758  {
759  public:
760  Inst_MTBUF(InFmt_MTBUF*, const std::string &opcode);
761  ~Inst_MTBUF();
762 
763  int instSize() const override;
764  void initOperandInfo() override;
765 
766  protected:
767  // first instruction DWORD
769  // second instruction DWORD
771 
772  private:
774  }; // Inst_MTBUF
775 
777  {
778  public:
779  Inst_MIMG(InFmt_MIMG*, const std::string &opcode);
780  ~Inst_MIMG();
781 
782  int instSize() const override;
783  void initOperandInfo() override;
784 
785  protected:
786  // first instruction DWORD
788  // second instruction DWORD
790  }; // Inst_MIMG
791 
793  {
794  public:
795  Inst_EXP(InFmt_EXP*, const std::string &opcode);
796  ~Inst_EXP();
797 
798  int instSize() const override;
799  void initOperandInfo() override;
800 
801  protected:
802  // first instruction DWORD
804  // second instruction DWORD
806  }; // Inst_EXP
807 
809  {
810  public:
811  Inst_FLAT(InFmt_FLAT*, const std::string &opcode);
812  ~Inst_FLAT();
813 
814  int instSize() const override;
815  void generateDisassembly() override;
816 
817  void initOperandInfo() override;
818 
819  protected:
820  template<typename T>
821  void
823  {
824  if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
825  initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
826  } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
827  Wavefront *wf = gpuDynInst->wavefront();
828  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
829  if (gpuDynInst->exec_mask[lane]) {
830  Addr vaddr = gpuDynInst->addr[lane];
831  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]
832  = wf->ldsChunk->read<T>(vaddr);
833  }
834  }
835  }
836  }
837 
838  template<int N>
839  void
841  {
842  if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
843  initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
844  } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
845  Wavefront *wf = gpuDynInst->wavefront();
846  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
847  if (gpuDynInst->exec_mask[lane]) {
848  Addr vaddr = gpuDynInst->addr[lane];
849  for (int i = 0; i < N; ++i) {
850  (reinterpret_cast<VecElemU32*>(
851  gpuDynInst->d_data))[lane * N + i]
852  = wf->ldsChunk->read<VecElemU32>(
853  vaddr + i*sizeof(VecElemU32));
854  }
855  }
856  }
857  }
858  }
859 
860  template<typename T>
861  void
863  {
864  if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
865  initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
866  } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
867  Wavefront *wf = gpuDynInst->wavefront();
868  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
869  if (gpuDynInst->exec_mask[lane]) {
870  Addr vaddr = gpuDynInst->addr[lane];
871  wf->ldsChunk->write<T>(vaddr,
872  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]);
873  }
874  }
875  }
876  }
877 
878  template<int N>
879  void
881  {
882  if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
883  initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
884  } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
885  Wavefront *wf = gpuDynInst->wavefront();
886  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
887  if (gpuDynInst->exec_mask[lane]) {
888  Addr vaddr = gpuDynInst->addr[lane];
889  for (int i = 0; i < N; ++i) {
890  wf->ldsChunk->write<VecElemU32>(
891  vaddr + i*sizeof(VecElemU32),
892  (reinterpret_cast<VecElemU32*>(
893  gpuDynInst->d_data))[lane * N + i]);
894  }
895  }
896  }
897  }
898  }
899 
900  template<typename T>
901  void
903  {
904  if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
905  initMemReqHelper<T, 1>(gpuDynInst, MemCmd::SwapReq, true);
906  } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
907  Wavefront *wf = gpuDynInst->wavefront();
908  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
909  if (gpuDynInst->exec_mask[lane]) {
910  Addr vaddr = gpuDynInst->addr[lane];
911  auto amo_op =
912  gpuDynInst->makeAtomicOpFunctor<T>(
913  &(reinterpret_cast<T*>(
914  gpuDynInst->a_data))[lane],
915  &(reinterpret_cast<T*>(
916  gpuDynInst->x_data))[lane]);
917 
918  T tmp = wf->ldsChunk->read<T>(vaddr);
919  (*amo_op)(reinterpret_cast<uint8_t *>(&tmp));
920  wf->ldsChunk->write<T>(vaddr, tmp);
921  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane] = tmp;
922  }
923  }
924  }
925  }
926 
927  void
930  {
931  // Offset is a 13-bit field w/the following meanings:
932  // In Flat instructions, offset is a 12-bit unsigned number
933  // In Global/Scratch instructions, offset is a 13-bit signed number
934  if (isFlat()) {
935  offset = offset & 0xfff;
936  } else {
937  offset = (ScalarRegI32)sext<13>(offset);
938  }
939  // If saddr = 0x7f there is no scalar reg to read and address will
940  // be a 64-bit address. Otherwise, saddr is the reg index for a
941  // scalar reg used as the base address for a 32-bit address.
942  if ((saddr == 0x7f && isFlatGlobal()) || isFlat()) {
943  calcAddrVgpr(gpuDynInst, vaddr, offset);
944  } else {
945  // Assume we are operating in 64-bit mode and read a pair of
946  // SGPRs for the address base.
947  ConstScalarOperandU64 sbase(gpuDynInst, saddr);
948  sbase.read();
949 
950  calcAddrSgpr(gpuDynInst, vaddr, sbase, offset);
951  }
952 
953  if (isFlat()) {
954  gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
955  } else {
956  gpuDynInst->staticInstruction()->executed_as =
957  enums::SC_GLOBAL;
958  }
959  }
960 
961  void
963  {
964  if ((gpuDynInst->executedAs() == enums::SC_GLOBAL && isFlat())
965  || isFlatGlobal()) {
966  gpuDynInst->computeUnit()->globalMemoryPipe
967  .issueRequest(gpuDynInst);
968  } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
969  assert(isFlat());
970  gpuDynInst->computeUnit()->localMemoryPipe
971  .issueRequest(gpuDynInst);
972  } else {
973  fatal("Unsupported scope for flat instruction.\n");
974  }
975  }
976 
977  // first instruction DWORD
979  // second instruction DWORD
981 
982  private:
983  void initFlatOperandInfo();
984  void initGlobalOperandInfo();
985 
988 
989  void
992  {
993  // Use SGPR pair as a base address and add VGPR-offset and
994  // instruction offset. The VGPR-offset is always 32-bits so we
995  // mask any upper bits from the vaddr.
996  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
997  if (gpuDynInst->exec_mask[lane]) {
998  gpuDynInst->addr.at(lane) =
999  saddr.rawData() + (vaddr[lane] & 0xffffffff) + offset;
1000  }
1001  }
1002  }
1003 
1004  void
1007  {
1008  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1009  if (gpuDynInst->exec_mask[lane]) {
1010  gpuDynInst->addr.at(lane) = addr[lane] + offset;
1011  }
1012  }
1013  }
1014  }; // Inst_FLAT
1015 } // namespace VegaISA
1016 } // namespace gem5
1017 
1018 #endif // __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
#define DPRINTF(x,...)
Definition: trace.hh:186
const std::string & opcode() const
bool isFlatGlobal() const
void write(const uint32_t index, const T value)
a write operation
Definition: lds_state.hh:90
T atomic(const uint32_t index, AtomicOpFunctorPtr amoOp)
an atomic operation
Definition: lds_state.hh:109
T read(const uint32_t index)
a read operation
Definition: lds_state.hh:71
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
void initOperandInfo() override
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
Inst_DS(InFmt_DS *, const std::string &opcode)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
void generateDisassembly() override
int instSize() const override
void initAtomicAccess(GPUDynInstPtr gpuDynInst, Addr offset)
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
Inst_EXP(InFmt_EXP *, const std::string &opcode)
void initOperandInfo() override
int instSize() const override
void calcAddrSgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr, ConstScalarOperandU64 &saddr, ScalarRegI32 offset)
Inst_FLAT(InFmt_FLAT *, const std::string &opcode)
void initMemRead(GPUDynInstPtr gpuDynInst)
void generateDisassembly() override
void calcAddrVgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr, ScalarRegI32 offset)
void initOperandInfo() override
void issueRequestHelper(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr, ScalarRegU32 saddr, ScalarRegI32 offset)
void initMemWrite(GPUDynInstPtr gpuDynInst)
int instSize() const override
int instSize() const override
void initOperandInfo() override
Inst_MIMG(InFmt_MIMG *, const std::string &opcode)
int instSize() const override
bool hasSecondDword(InFmt_MTBUF *)
void initOperandInfo() override
Inst_MTBUF(InFmt_MTBUF *, const std::string &opcode)
void initOperandInfo() override
void generateDisassembly() override
void initMemWrite(GPUDynInstPtr gpuDynInst)
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void initMemRead(GPUDynInstPtr gpuDynInst)
Inst_MUBUF(InFmt_MUBUF *, const std::string &opcode)
int instSize() const override
void initOperandInfo() override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
Inst_SMEM(InFmt_SMEM *, const std::string &opcode)
void generateDisassembly() override
void initMemRead(GPUDynInstPtr gpuDynInst)
initiate a memory read access for N dwords
int instSize() const override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr, ScalarRegU32 offset)
For normal s_load_dword/s_store_dword instruction addresses.
void initMemWrite(GPUDynInstPtr gpuDynInst)
initiate a memory write access for N dwords
int instSize() const override
void generateDisassembly() override
void initOperandInfo() override
Inst_SOP1(InFmt_SOP1 *, const std::string &opcode)
bool hasSecondDword(InFmt_SOP1 *)
bool hasSecondDword(InFmt_SOP2 *)
Definition: op_encodings.cc:91
void generateDisassembly() override
Inst_SOP2(InFmt_SOP2 *, const std::string &opcode)
Definition: op_encodings.cc:43
void initOperandInfo() override
Definition: op_encodings.cc:61
int instSize() const override
Definition: op_encodings.cc:85
int instSize() const override
bool hasSecondDword(InFmt_SOPC *)
void generateDisassembly() override
void initOperandInfo() override
Inst_SOPC(InFmt_SOPC *, const std::string &opcode)
Inst_SOPK(InFmt_SOPK *, const std::string &opcode)
int instSize() const override
void generateDisassembly() override
bool hasSecondDword(InFmt_SOPK *)
void initOperandInfo() override
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
Inst_SOPP(InFmt_SOPP *, const std::string &opcode)
int instSize() const override
Inst_VINTRP(InFmt_VINTRP *, const std::string &opcode)
Inst_VOP1(InFmt_VOP1 *, const std::string &opcode)
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_VOP1 *)
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_VOP2 *)
Inst_VOP2(InFmt_VOP2 *, const std::string &opcode)
void generateDisassembly() override
Inst_VOP3A(InFmt_VOP3A *, const std::string &opcode, bool sgpr_dst)
void generateDisassembly() override
const bool sgprDst
the v_cmp and readlane instructions in the VOP3 encoding are unique because they are the only instruc...
int instSize() const override
void initOperandInfo() override
bool hasSecondDword(InFmt_VOP3A *)
Inst_VOP3B(InFmt_VOP3B *, const std::string &opcode)
bool hasSecondDword(InFmt_VOP3B *)
void initOperandInfo() override
void generateDisassembly() override
int instSize() const override
bool hasSecondDword(InFmt_VOPC *)
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
Inst_VOPC(InFmt_VOPC *, const std::string &opcode)
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
Definition: operand.hh:391
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition: operand.hh:408
LdsChunk * ldsChunk
Definition: wavefront.hh:223
VectorMask & execMask()
Definition: wavefront.cc:1399
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition: amo.hh:242
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:190
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 23, 0 > offset
Definition: types.hh:144
Bitfield< 21, 20 > stride
Definition: misc_types.hh:453
int32_t ScalarRegI32
uint32_t VecElemU32
const int NumVecElemPerVecReg(64)
uint32_t ScalarRegU32
Bitfield< 3 > addr
Definition: types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:45

Generated on Wed Dec 21 2022 10:22:23 for gem5 by doxygen 1.9.1