gem5  v22.1.0.0
op_encodings.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #ifndef __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
33 #define __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
34 
39 #include "debug/GCN3.hh"
40 #include "debug/GPUExec.hh"
42 
43 namespace gem5
44 {
45 
46 namespace Gcn3ISA
47 {
49  {
50  uint64_t baseAddr : 48;
51  uint32_t stride : 14;
52  uint32_t cacheSwizzle : 1;
53  uint32_t swizzleEn : 1;
54  uint32_t numRecords : 32;
55  uint32_t dstSelX : 3;
56  uint32_t dstSelY : 3;
57  uint32_t dstSelZ : 3;
58  uint32_t dstSelW : 3;
59  uint32_t numFmt : 3;
60  uint32_t dataFmt : 4;
61  uint32_t elemSize : 2;
62  uint32_t idxStride : 2;
63  uint32_t addTidEn : 1;
64  uint32_t atc : 1;
65  uint32_t hashEn : 1;
66  uint32_t heap : 1;
67  uint32_t mType : 3;
68  uint32_t type : 2;
69  };
70 
71  // --- purely virtual instruction classes ---
72 
74  {
75  public:
76  Inst_SOP2(InFmt_SOP2*, const std::string &opcode);
77 
78  int instSize() const override;
79  void generateDisassembly() override;
80 
81  void initOperandInfo() override;
82 
83  protected:
84  // first instruction DWORD
86  // possible second DWORD
88  uint32_t varSize;
89 
90  private:
91  bool hasSecondDword(InFmt_SOP2 *);
92  }; // Inst_SOP2
93 
95  {
96  public:
97  Inst_SOPK(InFmt_SOPK*, const std::string &opcode);
98  ~Inst_SOPK();
99 
100  int instSize() const override;
101  void generateDisassembly() override;
102 
103  void initOperandInfo() override;
104 
105  protected:
106  // first instruction DWORD
108  // possible second DWORD
110  uint32_t varSize;
111 
112  private:
113  bool hasSecondDword(InFmt_SOPK *);
114  }; // Inst_SOPK
115 
117  {
118  public:
119  Inst_SOP1(InFmt_SOP1*, const std::string &opcode);
120  ~Inst_SOP1();
121 
122  int instSize() const override;
123  void generateDisassembly() override;
124 
125  void initOperandInfo() override;
126 
127  protected:
128  // first instruction DWORD
130  // possible second DWORD
132  uint32_t varSize;
133 
134  private:
135  bool hasSecondDword(InFmt_SOP1 *);
136  }; // Inst_SOP1
137 
139  {
140  public:
141  Inst_SOPC(InFmt_SOPC*, const std::string &opcode);
142  ~Inst_SOPC();
143 
144  int instSize() const override;
145  void generateDisassembly() override;
146 
147  void initOperandInfo() override;
148 
149  protected:
150  // first instruction DWORD
152  // possible second DWORD
154  uint32_t varSize;
155 
156  private:
157  bool hasSecondDword(InFmt_SOPC *);
158  }; // Inst_SOPC
159 
161  {
162  public:
163  Inst_SOPP(InFmt_SOPP*, const std::string &opcode);
164  ~Inst_SOPP();
165 
166  int instSize() const override;
167  void generateDisassembly() override;
168 
169  void initOperandInfo() override;
170 
171  protected:
172  // first instruction DWORD
174  }; // Inst_SOPP
175 
177  {
178  public:
179  Inst_SMEM(InFmt_SMEM*, const std::string &opcode);
180  ~Inst_SMEM();
181 
182  int instSize() const override;
183  void generateDisassembly() override;
184 
185  void initOperandInfo() override;
186 
187  protected:
191  template<int N>
192  void
194  {
195  initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
197  }
198 
202  template<int N>
203  void
205  {
206  initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
208  }
209 
213  void
216  {
217  Addr vaddr = ((addr.rawData() + offset) & ~0x3);
218  gpu_dyn_inst->scalarAddr = vaddr;
219  }
220 
226  void
227  calcAddr(GPUDynInstPtr gpu_dyn_inst,
229  {
230  BufferRsrcDescriptor rsrc_desc;
231  ScalarRegU32 clamped_offset(offset);
232  std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
233  sizeof(BufferRsrcDescriptor));
234 
240  if (!rsrc_desc.stride && offset >= rsrc_desc.numRecords) {
241  clamped_offset = rsrc_desc.numRecords;
242  } else if (rsrc_desc.stride && offset
243  > (rsrc_desc.stride * rsrc_desc.numRecords)) {
244  clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords);
245  }
246 
247  Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3);
248  gpu_dyn_inst->scalarAddr = vaddr;
249  }
250 
251  // first instruction DWORD
253  // second instruction DWORD
255  }; // Inst_SMEM
256 
258  {
259  public:
260  Inst_VOP2(InFmt_VOP2*, const std::string &opcode);
261  ~Inst_VOP2();
262 
263  int instSize() const override;
264  void generateDisassembly() override;
265 
266  void initOperandInfo() override;
267 
268  protected:
269  // first instruction DWORD
271  // possible second DWORD
273  uint32_t varSize;
274 
275  private:
276  bool hasSecondDword(InFmt_VOP2 *);
277  }; // Inst_VOP2
278 
280  {
281  public:
282  Inst_VOP1(InFmt_VOP1*, const std::string &opcode);
283  ~Inst_VOP1();
284 
285  int instSize() const override;
286  void generateDisassembly() override;
287 
288  void initOperandInfo() override;
289 
290  protected:
291  // first instruction DWORD
293  // possible second DWORD
295  uint32_t varSize;
296 
297  private:
298  bool hasSecondDword(InFmt_VOP1 *);
299  }; // Inst_VOP1
300 
302  {
303  public:
304  Inst_VOPC(InFmt_VOPC*, const std::string &opcode);
305  ~Inst_VOPC();
306 
307  int instSize() const override;
308  void generateDisassembly() override;
309 
310  void initOperandInfo() override;
311 
312  protected:
313  // first instruction DWORD
315  // possible second DWORD
317  uint32_t varSize;
318 
319  private:
320  bool hasSecondDword(InFmt_VOPC *);
321  }; // Inst_VOPC
322 
324  {
325  public:
326  Inst_VINTRP(InFmt_VINTRP*, const std::string &opcode);
327  ~Inst_VINTRP();
328 
329  int instSize() const override;
330 
331  protected:
332  // first instruction DWORD
334  }; // Inst_VINTRP
335 
337  {
338  public:
339  Inst_VOP3(InFmt_VOP3*, const std::string &opcode, bool sgpr_dst);
340  ~Inst_VOP3();
341 
342  int instSize() const override;
343  void generateDisassembly() override;
344 
345  void initOperandInfo() override;
346 
347  protected:
348  // first instruction DWORD
350  // second instruction DWORD
352 
353  private:
365  const bool sgprDst;
366  }; // Inst_VOP3
367 
369  {
370  public:
371  Inst_VOP3_SDST_ENC(InFmt_VOP3_SDST_ENC*, const std::string &opcode);
373 
374  int instSize() const override;
375  void generateDisassembly() override;
376 
377  void initOperandInfo() override;
378 
379  protected:
380  // first instruction DWORD
382  // second instruction DWORD
384 
385  private:
387  }; // Inst_VOP3_SDST_ENC
388 
389  class Inst_DS : public GCN3GPUStaticInst
390  {
391  public:
392  Inst_DS(InFmt_DS*, const std::string &opcode);
393  ~Inst_DS();
394 
395  int instSize() const override;
396  void generateDisassembly() override;
397 
398  void initOperandInfo() override;
399 
400  protected:
401  template<typename T>
402  void
404  {
405  Wavefront *wf = gpuDynInst->wavefront();
406 
407  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
408  if (gpuDynInst->exec_mask[lane]) {
409  Addr vaddr = gpuDynInst->addr[lane] + offset;
410 
411  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]
412  = wf->ldsChunk->read<T>(vaddr);
413  }
414  }
415  }
416 
417  template<int N>
418  void
420  {
421  Wavefront *wf = gpuDynInst->wavefront();
422 
423  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
424  if (gpuDynInst->exec_mask[lane]) {
425  Addr vaddr = gpuDynInst->addr[lane] + offset;
426  for (int i = 0; i < N; ++i) {
427  (reinterpret_cast<VecElemU32*>(
428  gpuDynInst->d_data))[lane * N + i]
429  = wf->ldsChunk->read<VecElemU32>(
430  vaddr + i*sizeof(VecElemU32));
431  }
432  }
433  }
434  }
435 
436  template<typename T>
437  void
438  initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
439  {
440  Wavefront *wf = gpuDynInst->wavefront();
441 
442  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
443  if (gpuDynInst->exec_mask[lane]) {
444  Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
445  Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
446 
447  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane * 2]
448  = wf->ldsChunk->read<T>(vaddr0);
449  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane * 2 + 1]
450  = wf->ldsChunk->read<T>(vaddr1);
451  }
452  }
453  }
454 
455  template<typename T>
456  void
458  {
459  Wavefront *wf = gpuDynInst->wavefront();
460 
461  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
462  if (gpuDynInst->exec_mask[lane]) {
463  Addr vaddr = gpuDynInst->addr[lane] + offset;
464  wf->ldsChunk->write<T>(vaddr,
465  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]);
466  }
467  }
468  }
469 
470  template<int N>
471  void
473  {
474  Wavefront *wf = gpuDynInst->wavefront();
475 
476  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
477  if (gpuDynInst->exec_mask[lane]) {
478  Addr vaddr = gpuDynInst->addr[lane] + offset;
479  for (int i = 0; i < N; ++i) {
480  wf->ldsChunk->write<VecElemU32>(
481  vaddr + i*sizeof(VecElemU32),
482  (reinterpret_cast<VecElemU32*>(
483  gpuDynInst->d_data))[lane * N + i]);
484  }
485  }
486  }
487  }
488 
489  template<typename T>
490  void
491  initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
492  {
493  Wavefront *wf = gpuDynInst->wavefront();
494 
495  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
496  if (gpuDynInst->exec_mask[lane]) {
497  Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
498  Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
499  wf->ldsChunk->write<T>(vaddr0, (reinterpret_cast<T*>(
500  gpuDynInst->d_data))[lane * 2]);
501  wf->ldsChunk->write<T>(vaddr1, (reinterpret_cast<T*>(
502  gpuDynInst->d_data))[lane * 2 + 1]);
503  }
504  }
505  }
506 
507  void
509  {
510  Wavefront *wf = gpuDynInst->wavefront();
511 
512  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
513  if (wf->execMask(lane)) {
514  gpuDynInst->addr.at(lane) = (Addr)addr[lane];
515  }
516  }
517  }
518 
519  // first instruction DWORD
521  // second instruction DWORD
523  }; // Inst_DS
524 
526  {
527  public:
528  Inst_MUBUF(InFmt_MUBUF*, const std::string &opcode);
529  ~Inst_MUBUF();
530 
531  int instSize() const override;
532  void generateDisassembly() override;
533 
534  void initOperandInfo() override;
535 
536  protected:
537  template<typename T>
538  void
540  {
541  // temporarily modify exec_mask to supress memory accesses to oob
542  // regions. Only issue memory requests for lanes that have their
543  // exec_mask set and are not out of bounds.
544  VectorMask old_exec_mask = gpuDynInst->exec_mask;
545  gpuDynInst->exec_mask &= ~oobMask;
546  initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
547  gpuDynInst->exec_mask = old_exec_mask;
548  }
549 
550 
551  template<int N>
552  void
554  {
555  // temporarily modify exec_mask to supress memory accesses to oob
556  // regions. Only issue memory requests for lanes that have their
557  // exec_mask set and are not out of bounds.
558  VectorMask old_exec_mask = gpuDynInst->exec_mask;
559  gpuDynInst->exec_mask &= ~oobMask;
560  initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
561  gpuDynInst->exec_mask = old_exec_mask;
562  }
563 
564  template<typename T>
565  void
567  {
568  // temporarily modify exec_mask to supress memory accesses to oob
569  // regions. Only issue memory requests for lanes that have their
570  // exec_mask set and are not out of bounds.
571  VectorMask old_exec_mask = gpuDynInst->exec_mask;
572  gpuDynInst->exec_mask &= ~oobMask;
573  initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
574  gpuDynInst->exec_mask = old_exec_mask;
575  }
576 
577  template<int N>
578  void
580  {
581  // temporarily modify exec_mask to supress memory accesses to oob
582  // regions. Only issue memory requests for lanes that have their
583  // exec_mask set and are not out of bounds.
584  VectorMask old_exec_mask = gpuDynInst->exec_mask;
585  gpuDynInst->exec_mask &= ~oobMask;
586  initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
587  gpuDynInst->exec_mask = old_exec_mask;
588  }
589 
590  void
592  {
593  // create request and set flags
594  gpuDynInst->resetEntireStatusVector();
595  gpuDynInst->setStatusVector(0, 1);
596  RequestPtr req = std::make_shared<Request>(0, 0, 0,
597  gpuDynInst->computeUnit()->
598  requestorId(), 0,
599  gpuDynInst->wfDynId);
600  gpuDynInst->setRequestFlags(req);
601  gpuDynInst->computeUnit()->
602  injectGlobalMemFence(gpuDynInst, false, req);
603  }
604 
625  template<typename VOFF, typename VIDX, typename SRSRC, typename SOFF>
626  void
627  calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx,
628  SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
629  {
630  Addr vaddr = 0;
631  Addr base_addr = 0;
632  Addr stride = 0;
633  Addr buf_idx = 0;
634  Addr buf_off = 0;
635  Addr buffer_offset = 0;
636  BufferRsrcDescriptor rsrc_desc;
637 
638  std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
639  sizeof(BufferRsrcDescriptor));
640 
641  base_addr = rsrc_desc.baseAddr;
642 
643  stride = rsrc_desc.addTidEn ? ((rsrc_desc.dataFmt << 14)
644  + rsrc_desc.stride) : rsrc_desc.stride;
645 
646  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
647  if (gpuDynInst->exec_mask[lane]) {
648  vaddr = base_addr + s_offset.rawData();
654  buf_idx = v_idx[lane] + (rsrc_desc.addTidEn ? lane : 0);
655 
656  buf_off = v_off[lane] + inst_offset;
657 
658  if (rsrc_desc.swizzleEn) {
659  Addr idx_stride = 8 << rsrc_desc.idxStride;
660  Addr elem_size = 2 << rsrc_desc.elemSize;
661  Addr idx_msb = buf_idx / idx_stride;
662  Addr idx_lsb = buf_idx % idx_stride;
663  Addr off_msb = buf_off / elem_size;
664  Addr off_lsb = buf_off % elem_size;
665  DPRINTF(GCN3, "mubuf swizzled lane %d: "
666  "idx_stride = %llx, elem_size = %llx, "
667  "idx_msb = %llx, idx_lsb = %llx, "
668  "off_msb = %llx, off_lsb = %llx\n",
669  lane, idx_stride, elem_size, idx_msb, idx_lsb,
670  off_msb, off_lsb);
671 
672  buffer_offset =(idx_msb * stride + off_msb * elem_size)
673  * idx_stride + idx_lsb * elem_size + off_lsb;
674  } else {
675  buffer_offset = buf_off + stride * buf_idx;
676  }
677 
678 
686  if (rsrc_desc.stride == 0 || !rsrc_desc.swizzleEn) {
687  if (buffer_offset >=
688  rsrc_desc.numRecords - s_offset.rawData()) {
689  DPRINTF(GCN3, "mubuf out-of-bounds condition 1: "
690  "lane = %d, buffer_offset = %llx, "
691  "const_stride = %llx, "
692  "const_num_records = %llx\n",
693  lane, buf_off + stride * buf_idx,
694  rsrc_desc.stride, rsrc_desc.numRecords);
695  oobMask.set(lane);
696  continue;
697  }
698  }
699 
700  if (rsrc_desc.stride != 0 && rsrc_desc.swizzleEn) {
701  if (buf_idx >= rsrc_desc.numRecords ||
702  buf_off >= stride) {
703  DPRINTF(GCN3, "mubuf out-of-bounds condition 2: "
704  "lane = %d, offset = %llx, "
705  "index = %llx, "
706  "const_num_records = %llx\n",
707  lane, buf_off, buf_idx,
708  rsrc_desc.numRecords);
709  oobMask.set(lane);
710  continue;
711  }
712  }
713 
714  vaddr += buffer_offset;
715 
716  DPRINTF(GCN3, "Calculating mubuf address for lane %d: "
717  "vaddr = %llx, base_addr = %llx, "
718  "stride = %llx, buf_idx = %llx, buf_off = %llx\n",
719  lane, vaddr, base_addr, stride,
720  buf_idx, buf_off);
721  gpuDynInst->addr.at(lane) = vaddr;
722  }
723  }
724  }
725 
726  // first instruction DWORD
728  // second instruction DWORD
730  // Mask of lanes with out-of-bounds accesses. Needs to be tracked
731  // seperately from the exec_mask so that we remember to write zero
732  // to the registers associated with out of bounds lanes.
734  }; // Inst_MUBUF
735 
737  {
738  public:
739  Inst_MTBUF(InFmt_MTBUF*, const std::string &opcode);
740  ~Inst_MTBUF();
741 
742  int instSize() const override;
743  void initOperandInfo() override;
744 
745  protected:
746  // first instruction DWORD
748  // second instruction DWORD
750 
751  private:
753  }; // Inst_MTBUF
754 
756  {
757  public:
758  Inst_MIMG(InFmt_MIMG*, const std::string &opcode);
759  ~Inst_MIMG();
760 
761  int instSize() const override;
762  void initOperandInfo() override;
763 
764  protected:
765  // first instruction DWORD
767  // second instruction DWORD
769  }; // Inst_MIMG
770 
772  {
773  public:
774  Inst_EXP(InFmt_EXP*, const std::string &opcode);
775  ~Inst_EXP();
776 
777  int instSize() const override;
778  void initOperandInfo() override;
779 
780  protected:
781  // first instruction DWORD
783  // second instruction DWORD
785  }; // Inst_EXP
786 
788  {
789  public:
790  Inst_FLAT(InFmt_FLAT*, const std::string &opcode);
791  ~Inst_FLAT();
792 
793  int instSize() const override;
794  void generateDisassembly() override;
795 
796  void initOperandInfo() override;
797 
798  protected:
799  template<typename T>
800  void
802  {
803  if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
804  initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
805  } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
806  Wavefront *wf = gpuDynInst->wavefront();
807  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
808  if (gpuDynInst->exec_mask[lane]) {
809  Addr vaddr = gpuDynInst->addr[lane];
810  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]
811  = wf->ldsChunk->read<T>(vaddr);
812  }
813  }
814  }
815  }
816 
817  template<int N>
818  void
820  {
821  if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
822  initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
823  } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
824  Wavefront *wf = gpuDynInst->wavefront();
825  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
826  if (gpuDynInst->exec_mask[lane]) {
827  Addr vaddr = gpuDynInst->addr[lane];
828  for (int i = 0; i < N; ++i) {
829  (reinterpret_cast<VecElemU32*>(
830  gpuDynInst->d_data))[lane * N + i]
831  = wf->ldsChunk->read<VecElemU32>(
832  vaddr + i*sizeof(VecElemU32));
833  }
834  }
835  }
836  }
837  }
838 
839  template<typename T>
840  void
842  {
843  if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
844  initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
845  } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
846  Wavefront *wf = gpuDynInst->wavefront();
847  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
848  if (gpuDynInst->exec_mask[lane]) {
849  Addr vaddr = gpuDynInst->addr[lane];
850  wf->ldsChunk->write<T>(vaddr,
851  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]);
852  }
853  }
854  }
855  }
856 
857  template<int N>
858  void
860  {
861  if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
862  initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
863  } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
864  Wavefront *wf = gpuDynInst->wavefront();
865  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
866  if (gpuDynInst->exec_mask[lane]) {
867  Addr vaddr = gpuDynInst->addr[lane];
868  for (int i = 0; i < N; ++i) {
869  wf->ldsChunk->write<VecElemU32>(
870  vaddr + i*sizeof(VecElemU32),
871  (reinterpret_cast<VecElemU32*>(
872  gpuDynInst->d_data))[lane * N + i]);
873  }
874  }
875  }
876  }
877  }
878 
879  template<typename T>
880  void
882  {
883  if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
884  initMemReqHelper<T, 1>(gpuDynInst, MemCmd::SwapReq, true);
885  } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
886  Wavefront *wf = gpuDynInst->wavefront();
887  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
888  if (gpuDynInst->exec_mask[lane]) {
889  Addr vaddr = gpuDynInst->addr[lane];
890  auto amo_op =
891  gpuDynInst->makeAtomicOpFunctor<T>(
892  &(reinterpret_cast<T*>(
893  gpuDynInst->a_data))[lane],
894  &(reinterpret_cast<T*>(
895  gpuDynInst->x_data))[lane]);
896 
897  T tmp = wf->ldsChunk->read<T>(vaddr);
898  (*amo_op)(reinterpret_cast<uint8_t *>(&tmp));
899  wf->ldsChunk->write<T>(vaddr, tmp);
900  (reinterpret_cast<T*>(gpuDynInst->d_data))[lane] = tmp;
901  }
902  }
903  }
904  }
905 
906  void
908  {
909  for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
910  if (gpuDynInst->exec_mask[lane]) {
911  gpuDynInst->addr.at(lane) = addr[lane];
912  }
913  }
914  gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
915  }
916 
917  // first instruction DWORD
919  // second instruction DWORD
921  }; // Inst_FLAT
922 } // namespace Gcn3ISA
923 } // namespace gem5
924 
925 #endif // __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
#define DPRINTF(x,...)
Definition: trace.hh:186
const std::string & opcode() const
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
void generateDisassembly() override
void initOperandInfo() override
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
Inst_DS(InFmt_DS *, const std::string &opcode)
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
int instSize() const override
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
Inst_EXP(InFmt_EXP *, const std::string &opcode)
int instSize() const override
void initOperandInfo() override
void initMemRead(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr)
Inst_FLAT(InFmt_FLAT *, const std::string &opcode)
int instSize() const override
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
void initOperandInfo() override
Inst_MIMG(InFmt_MIMG *, const std::string &opcode)
Inst_MTBUF(InFmt_MTBUF *, const std::string &opcode)
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_MTBUF *)
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
void initOperandInfo() override
Inst_MUBUF(InFmt_MUBUF *, const std::string &opcode)
int instSize() const override
void initMemRead(GPUDynInstPtr gpuDynInst)
void generateDisassembly() override
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr, ScalarRegU32 offset)
For normal s_load_dword/s_store_dword instruction addresses.
void initMemWrite(GPUDynInstPtr gpuDynInst)
initiate a memory write access for N dwords
void generateDisassembly() override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
Inst_SMEM(InFmt_SMEM *, const std::string &opcode)
void initMemRead(GPUDynInstPtr gpuDynInst)
initiate a memory read access for N dwords
void initOperandInfo() override
int instSize() const override
Inst_SOP1(InFmt_SOP1 *, const std::string &opcode)
int instSize() const override
bool hasSecondDword(InFmt_SOP1 *)
void initOperandInfo() override
void generateDisassembly() override
bool hasSecondDword(InFmt_SOP2 *)
Definition: op_encodings.cc:91
void generateDisassembly() override
Inst_SOP2(InFmt_SOP2 *, const std::string &opcode)
Definition: op_encodings.cc:43
void initOperandInfo() override
Definition: op_encodings.cc:61
int instSize() const override
Definition: op_encodings.cc:85
bool hasSecondDword(InFmt_SOPC *)
int instSize() const override
void initOperandInfo() override
Inst_SOPC(InFmt_SOPC *, const std::string &opcode)
void generateDisassembly() override
int instSize() const override
void generateDisassembly() override
Inst_SOPK(InFmt_SOPK *, const std::string &opcode)
bool hasSecondDword(InFmt_SOPK *)
void initOperandInfo() override
Inst_SOPP(InFmt_SOPP *, const std::string &opcode)
void initOperandInfo() override
void generateDisassembly() override
int instSize() const override
int instSize() const override
Inst_VINTRP(InFmt_VINTRP *, const std::string &opcode)
void generateDisassembly() override
int instSize() const override
Inst_VOP1(InFmt_VOP1 *, const std::string &opcode)
void initOperandInfo() override
bool hasSecondDword(InFmt_VOP1 *)
int instSize() const override
void initOperandInfo() override
Inst_VOP2(InFmt_VOP2 *, const std::string &opcode)
void generateDisassembly() override
bool hasSecondDword(InFmt_VOP2 *)
Inst_VOP3_SDST_ENC(InFmt_VOP3_SDST_ENC *, const std::string &opcode)
bool hasSecondDword(InFmt_VOP3_SDST_ENC *)
void initOperandInfo() override
const bool sgprDst
the v_cmp and readlane instructions in the VOP3 encoding are unique because they are the only instruc...
int instSize() const override
bool hasSecondDword(InFmt_VOP3 *)
void generateDisassembly() override
Inst_VOP3(InFmt_VOP3 *, const std::string &opcode, bool sgpr_dst)
bool hasSecondDword(InFmt_VOPC *)
void initOperandInfo() override
void generateDisassembly() override
Inst_VOPC(InFmt_VOPC *, const std::string &opcode)
int instSize() const override
void write(const uint32_t index, const T value)
a write operation
Definition: lds_state.hh:90
T read(const uint32_t index)
a read operation
Definition: lds_state.hh:71
LdsChunk * ldsChunk
Definition: wavefront.hh:223
VectorMask & execMask()
Definition: wavefront.cc:1399
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 23, 0 > offset
Definition: types.hh:144
Bitfield< 21, 20 > stride
Definition: misc_types.hh:453
uint32_t ScalarRegU32
const int NumVecElemPerVecReg(64)
uint32_t VecElemU32
Bitfield< 3 > addr
Definition: types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:45

Generated on Wed Dec 21 2022 10:22:23 for gem5 by doxygen 1.9.1