gem5 v23.0.0.1
Loading...
Searching...
No Matches
op_encodings.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2016-2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#ifndef __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
33#define __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
34
39#include "debug/GCN3.hh"
40#include "debug/GPUExec.hh"
42
43namespace gem5
44{
45
46namespace Gcn3ISA
47{
49 {
50 uint64_t baseAddr : 48;
51 uint32_t stride : 14;
52 uint32_t cacheSwizzle : 1;
53 uint32_t swizzleEn : 1;
54 uint32_t numRecords : 32;
55 uint32_t dstSelX : 3;
56 uint32_t dstSelY : 3;
57 uint32_t dstSelZ : 3;
58 uint32_t dstSelW : 3;
59 uint32_t numFmt : 3;
60 uint32_t dataFmt : 4;
61 uint32_t elemSize : 2;
62 uint32_t idxStride : 2;
63 uint32_t addTidEn : 1;
64 uint32_t atc : 1;
65 uint32_t hashEn : 1;
66 uint32_t heap : 1;
67 uint32_t mType : 3;
68 uint32_t type : 2;
69 };
70
71 // --- purely virtual instruction classes ---
72
74 {
75 public:
76 Inst_SOP2(InFmt_SOP2*, const std::string &opcode);
77
78 int instSize() const override;
79 void generateDisassembly() override;
80
81 void initOperandInfo() override;
82
83 protected:
84 // first instruction DWORD
86 // possible second DWORD
88 uint32_t varSize;
89
90 private:
92 }; // Inst_SOP2
93
95 {
96 public:
97 Inst_SOPK(InFmt_SOPK*, const std::string &opcode);
98 ~Inst_SOPK();
99
100 int instSize() const override;
101 void generateDisassembly() override;
102
103 void initOperandInfo() override;
104
105 protected:
106 // first instruction DWORD
108 // possible second DWORD
110 uint32_t varSize;
111
112 private:
114 }; // Inst_SOPK
115
117 {
118 public:
119 Inst_SOP1(InFmt_SOP1*, const std::string &opcode);
120 ~Inst_SOP1();
121
122 int instSize() const override;
123 void generateDisassembly() override;
124
125 void initOperandInfo() override;
126
127 protected:
128 // first instruction DWORD
130 // possible second DWORD
132 uint32_t varSize;
133
134 private:
136 }; // Inst_SOP1
137
139 {
140 public:
141 Inst_SOPC(InFmt_SOPC*, const std::string &opcode);
142 ~Inst_SOPC();
143
144 int instSize() const override;
145 void generateDisassembly() override;
146
147 void initOperandInfo() override;
148
149 protected:
150 // first instruction DWORD
152 // possible second DWORD
154 uint32_t varSize;
155
156 private:
158 }; // Inst_SOPC
159
161 {
162 public:
163 Inst_SOPP(InFmt_SOPP*, const std::string &opcode);
164 ~Inst_SOPP();
165
166 int instSize() const override;
167 void generateDisassembly() override;
168
169 void initOperandInfo() override;
170
171 protected:
172 // first instruction DWORD
174 }; // Inst_SOPP
175
177 {
178 public:
179 Inst_SMEM(InFmt_SMEM*, const std::string &opcode);
180 ~Inst_SMEM();
181
182 int instSize() const override;
183 void generateDisassembly() override;
184
185 void initOperandInfo() override;
186
187 protected:
191 template<int N>
192 void
194 {
195 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
197 }
198
202 template<int N>
203 void
205 {
206 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
208 }
209
213 void
216 {
217 Addr vaddr = ((addr.rawData() + offset) & ~0x3);
218 gpu_dyn_inst->scalarAddr = vaddr;
219 }
220
226 void
229 {
230 BufferRsrcDescriptor rsrc_desc;
231 ScalarRegU32 clamped_offset(offset);
232 std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
233 sizeof(BufferRsrcDescriptor));
234
240 if (!rsrc_desc.stride && offset >= rsrc_desc.numRecords) {
241 clamped_offset = rsrc_desc.numRecords;
242 } else if (rsrc_desc.stride && offset
243 > (rsrc_desc.stride * rsrc_desc.numRecords)) {
244 clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords);
245 }
246
247 Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3);
248 gpu_dyn_inst->scalarAddr = vaddr;
249 }
250
251 // first instruction DWORD
253 // second instruction DWORD
255 }; // Inst_SMEM
256
258 {
259 public:
260 Inst_VOP2(InFmt_VOP2*, const std::string &opcode);
261 ~Inst_VOP2();
262
263 int instSize() const override;
264 void generateDisassembly() override;
265
266 void initOperandInfo() override;
267
268 protected:
269 // first instruction DWORD
271 // possible second DWORD
273 uint32_t varSize;
274
275 private:
277 }; // Inst_VOP2
278
280 {
281 public:
282 Inst_VOP1(InFmt_VOP1*, const std::string &opcode);
283 ~Inst_VOP1();
284
285 int instSize() const override;
286 void generateDisassembly() override;
287
288 void initOperandInfo() override;
289
290 protected:
291 // first instruction DWORD
293 // possible second DWORD
295 uint32_t varSize;
296
297 private:
299 }; // Inst_VOP1
300
302 {
303 public:
304 Inst_VOPC(InFmt_VOPC*, const std::string &opcode);
305 ~Inst_VOPC();
306
307 int instSize() const override;
308 void generateDisassembly() override;
309
310 void initOperandInfo() override;
311
312 protected:
313 // first instruction DWORD
315 // possible second DWORD
317 uint32_t varSize;
318
319 private:
321 }; // Inst_VOPC
322
324 {
325 public:
326 Inst_VINTRP(InFmt_VINTRP*, const std::string &opcode);
327 ~Inst_VINTRP();
328
329 int instSize() const override;
330
331 protected:
332 // first instruction DWORD
334 }; // Inst_VINTRP
335
337 {
338 public:
339 Inst_VOP3(InFmt_VOP3*, const std::string &opcode, bool sgpr_dst);
340 ~Inst_VOP3();
341
342 int instSize() const override;
343 void generateDisassembly() override;
344
345 void initOperandInfo() override;
346
347 protected:
348 // first instruction DWORD
350 // second instruction DWORD
352
353 private:
365 const bool sgprDst;
366 }; // Inst_VOP3
367
369 {
370 public:
371 Inst_VOP3_SDST_ENC(InFmt_VOP3_SDST_ENC*, const std::string &opcode);
373
374 int instSize() const override;
375 void generateDisassembly() override;
376
377 void initOperandInfo() override;
378
379 protected:
380 // first instruction DWORD
382 // second instruction DWORD
384
385 private:
387 }; // Inst_VOP3_SDST_ENC
388
390 {
391 public:
392 Inst_DS(InFmt_DS*, const std::string &opcode);
393 ~Inst_DS();
394
395 int instSize() const override;
396 void generateDisassembly() override;
397
398 void initOperandInfo() override;
399
400 protected:
401 template<typename T>
402 void
404 {
405 Wavefront *wf = gpuDynInst->wavefront();
406
407 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
408 if (gpuDynInst->exec_mask[lane]) {
409 Addr vaddr = gpuDynInst->addr[lane] + offset;
410
411 (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]
412 = wf->ldsChunk->read<T>(vaddr);
413 }
414 }
415 }
416
417 template<int N>
418 void
420 {
421 Wavefront *wf = gpuDynInst->wavefront();
422
423 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
424 if (gpuDynInst->exec_mask[lane]) {
425 Addr vaddr = gpuDynInst->addr[lane] + offset;
426 for (int i = 0; i < N; ++i) {
427 (reinterpret_cast<VecElemU32*>(
428 gpuDynInst->d_data))[lane * N + i]
429 = wf->ldsChunk->read<VecElemU32>(
430 vaddr + i*sizeof(VecElemU32));
431 }
432 }
433 }
434 }
435
436 template<typename T>
437 void
438 initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
439 {
440 Wavefront *wf = gpuDynInst->wavefront();
441
442 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
443 if (gpuDynInst->exec_mask[lane]) {
444 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
445 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
446
447 (reinterpret_cast<T*>(gpuDynInst->d_data))[lane * 2]
448 = wf->ldsChunk->read<T>(vaddr0);
449 (reinterpret_cast<T*>(gpuDynInst->d_data))[lane * 2 + 1]
450 = wf->ldsChunk->read<T>(vaddr1);
451 }
452 }
453 }
454
455 template<typename T>
456 void
458 {
459 Wavefront *wf = gpuDynInst->wavefront();
460
461 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
462 if (gpuDynInst->exec_mask[lane]) {
463 Addr vaddr = gpuDynInst->addr[lane] + offset;
464 wf->ldsChunk->write<T>(vaddr,
465 (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]);
466 }
467 }
468 }
469
470 template<int N>
471 void
473 {
474 Wavefront *wf = gpuDynInst->wavefront();
475
476 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
477 if (gpuDynInst->exec_mask[lane]) {
478 Addr vaddr = gpuDynInst->addr[lane] + offset;
479 for (int i = 0; i < N; ++i) {
481 vaddr + i*sizeof(VecElemU32),
482 (reinterpret_cast<VecElemU32*>(
483 gpuDynInst->d_data))[lane * N + i]);
484 }
485 }
486 }
487 }
488
489 template<typename T>
490 void
491 initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
492 {
493 Wavefront *wf = gpuDynInst->wavefront();
494
495 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
496 if (gpuDynInst->exec_mask[lane]) {
497 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
498 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
499 wf->ldsChunk->write<T>(vaddr0, (reinterpret_cast<T*>(
500 gpuDynInst->d_data))[lane * 2]);
501 wf->ldsChunk->write<T>(vaddr1, (reinterpret_cast<T*>(
502 gpuDynInst->d_data))[lane * 2 + 1]);
503 }
504 }
505 }
506
507 void
509 {
510 Wavefront *wf = gpuDynInst->wavefront();
511
512 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
513 if (wf->execMask(lane)) {
514 gpuDynInst->addr.at(lane) = (Addr)addr[lane];
515 }
516 }
517 }
518
519 // first instruction DWORD
521 // second instruction DWORD
523 }; // Inst_DS
524
526 {
527 public:
528 Inst_MUBUF(InFmt_MUBUF*, const std::string &opcode);
529 ~Inst_MUBUF();
530
531 int instSize() const override;
532 void generateDisassembly() override;
533
534 void initOperandInfo() override;
535
536 protected:
537 template<typename T>
538 void
540 {
541 // temporarily modify exec_mask to supress memory accesses to oob
542 // regions. Only issue memory requests for lanes that have their
543 // exec_mask set and are not out of bounds.
544 VectorMask old_exec_mask = gpuDynInst->exec_mask;
545 gpuDynInst->exec_mask &= ~oobMask;
546 initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
547 gpuDynInst->exec_mask = old_exec_mask;
548 }
549
550
551 template<int N>
552 void
554 {
555 // temporarily modify exec_mask to supress memory accesses to oob
556 // regions. Only issue memory requests for lanes that have their
557 // exec_mask set and are not out of bounds.
558 VectorMask old_exec_mask = gpuDynInst->exec_mask;
559 gpuDynInst->exec_mask &= ~oobMask;
560 initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
561 gpuDynInst->exec_mask = old_exec_mask;
562 }
563
564 template<typename T>
565 void
567 {
568 // temporarily modify exec_mask to supress memory accesses to oob
569 // regions. Only issue memory requests for lanes that have their
570 // exec_mask set and are not out of bounds.
571 VectorMask old_exec_mask = gpuDynInst->exec_mask;
572 gpuDynInst->exec_mask &= ~oobMask;
573 initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
574 gpuDynInst->exec_mask = old_exec_mask;
575 }
576
577 template<int N>
578 void
580 {
581 // temporarily modify exec_mask to supress memory accesses to oob
582 // regions. Only issue memory requests for lanes that have their
583 // exec_mask set and are not out of bounds.
584 VectorMask old_exec_mask = gpuDynInst->exec_mask;
585 gpuDynInst->exec_mask &= ~oobMask;
586 initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
587 gpuDynInst->exec_mask = old_exec_mask;
588 }
589
590 void
592 {
593 // create request and set flags
594 gpuDynInst->resetEntireStatusVector();
595 gpuDynInst->setStatusVector(0, 1);
596 RequestPtr req = std::make_shared<Request>(0, 0, 0,
597 gpuDynInst->computeUnit()->
598 requestorId(), 0,
599 gpuDynInst->wfDynId);
600 gpuDynInst->setRequestFlags(req);
601 gpuDynInst->computeUnit()->
602 injectGlobalMemFence(gpuDynInst, false, req);
603 }
604
625 template<typename VOFF, typename VIDX, typename SRSRC, typename SOFF>
626 void
627 calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx,
628 SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
629 {
630 Addr vaddr = 0;
631 Addr base_addr = 0;
632 Addr stride = 0;
633 Addr buf_idx = 0;
634 Addr buf_off = 0;
635 Addr buffer_offset = 0;
636 BufferRsrcDescriptor rsrc_desc;
637
638 std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
639 sizeof(BufferRsrcDescriptor));
640
641 base_addr = rsrc_desc.baseAddr;
642
643 stride = rsrc_desc.addTidEn ? ((rsrc_desc.dataFmt << 14)
644 + rsrc_desc.stride) : rsrc_desc.stride;
645
646 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
647 if (gpuDynInst->exec_mask[lane]) {
648 vaddr = base_addr + s_offset.rawData();
654 buf_idx = v_idx[lane] + (rsrc_desc.addTidEn ? lane : 0);
655
656 buf_off = v_off[lane] + inst_offset;
657
658 if (rsrc_desc.swizzleEn) {
659 Addr idx_stride = 8 << rsrc_desc.idxStride;
660 Addr elem_size = 2 << rsrc_desc.elemSize;
661 Addr idx_msb = buf_idx / idx_stride;
662 Addr idx_lsb = buf_idx % idx_stride;
663 Addr off_msb = buf_off / elem_size;
664 Addr off_lsb = buf_off % elem_size;
665 DPRINTF(GCN3, "mubuf swizzled lane %d: "
666 "idx_stride = %llx, elem_size = %llx, "
667 "idx_msb = %llx, idx_lsb = %llx, "
668 "off_msb = %llx, off_lsb = %llx\n",
669 lane, idx_stride, elem_size, idx_msb, idx_lsb,
670 off_msb, off_lsb);
671
672 buffer_offset =(idx_msb * stride + off_msb * elem_size)
673 * idx_stride + idx_lsb * elem_size + off_lsb;
674 } else {
675 buffer_offset = buf_off + stride * buf_idx;
676 }
677
678
686 if (rsrc_desc.stride == 0 || !rsrc_desc.swizzleEn) {
687 if (buffer_offset >=
688 rsrc_desc.numRecords - s_offset.rawData()) {
689 DPRINTF(GCN3, "mubuf out-of-bounds condition 1: "
690 "lane = %d, buffer_offset = %llx, "
691 "const_stride = %llx, "
692 "const_num_records = %llx\n",
693 lane, buf_off + stride * buf_idx,
694 rsrc_desc.stride, rsrc_desc.numRecords);
695 oobMask.set(lane);
696 continue;
697 }
698 }
699
700 if (rsrc_desc.stride != 0 && rsrc_desc.swizzleEn) {
701 if (buf_idx >= rsrc_desc.numRecords ||
702 buf_off >= stride) {
703 DPRINTF(GCN3, "mubuf out-of-bounds condition 2: "
704 "lane = %d, offset = %llx, "
705 "index = %llx, "
706 "const_num_records = %llx\n",
707 lane, buf_off, buf_idx,
708 rsrc_desc.numRecords);
709 oobMask.set(lane);
710 continue;
711 }
712 }
713
714 vaddr += buffer_offset;
715
716 DPRINTF(GCN3, "Calculating mubuf address for lane %d: "
717 "vaddr = %llx, base_addr = %llx, "
718 "stride = %llx, buf_idx = %llx, buf_off = %llx\n",
719 lane, vaddr, base_addr, stride,
720 buf_idx, buf_off);
721 gpuDynInst->addr.at(lane) = vaddr;
722 }
723 }
724 }
725
726 // first instruction DWORD
728 // second instruction DWORD
730 // Mask of lanes with out-of-bounds accesses. Needs to be tracked
731 // seperately from the exec_mask so that we remember to write zero
732 // to the registers associated with out of bounds lanes.
734 }; // Inst_MUBUF
735
737 {
738 public:
739 Inst_MTBUF(InFmt_MTBUF*, const std::string &opcode);
740 ~Inst_MTBUF();
741
742 int instSize() const override;
743 void initOperandInfo() override;
744
745 protected:
746 // first instruction DWORD
748 // second instruction DWORD
750
751 private:
753 }; // Inst_MTBUF
754
756 {
757 public:
758 Inst_MIMG(InFmt_MIMG*, const std::string &opcode);
759 ~Inst_MIMG();
760
761 int instSize() const override;
762 void initOperandInfo() override;
763
764 protected:
765 // first instruction DWORD
767 // second instruction DWORD
769 }; // Inst_MIMG
770
772 {
773 public:
774 Inst_EXP(InFmt_EXP*, const std::string &opcode);
775 ~Inst_EXP();
776
777 int instSize() const override;
778 void initOperandInfo() override;
779
780 protected:
781 // first instruction DWORD
783 // second instruction DWORD
785 }; // Inst_EXP
786
788 {
789 public:
790 Inst_FLAT(InFmt_FLAT*, const std::string &opcode);
791 ~Inst_FLAT();
792
793 int instSize() const override;
794 void generateDisassembly() override;
795
796 void initOperandInfo() override;
797
798 protected:
799 template<typename T>
800 void
802 {
803 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
804 initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
805 } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
806 Wavefront *wf = gpuDynInst->wavefront();
807 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
808 if (gpuDynInst->exec_mask[lane]) {
809 Addr vaddr = gpuDynInst->addr[lane];
810 (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]
811 = wf->ldsChunk->read<T>(vaddr);
812 }
813 }
814 }
815 }
816
817 template<int N>
818 void
820 {
821 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
822 initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
823 } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
824 Wavefront *wf = gpuDynInst->wavefront();
825 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
826 if (gpuDynInst->exec_mask[lane]) {
827 Addr vaddr = gpuDynInst->addr[lane];
828 for (int i = 0; i < N; ++i) {
829 (reinterpret_cast<VecElemU32*>(
830 gpuDynInst->d_data))[lane * N + i]
831 = wf->ldsChunk->read<VecElemU32>(
832 vaddr + i*sizeof(VecElemU32));
833 }
834 }
835 }
836 }
837 }
838
839 template<typename T>
840 void
842 {
843 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
844 initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
845 } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
846 Wavefront *wf = gpuDynInst->wavefront();
847 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
848 if (gpuDynInst->exec_mask[lane]) {
849 Addr vaddr = gpuDynInst->addr[lane];
850 wf->ldsChunk->write<T>(vaddr,
851 (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]);
852 }
853 }
854 }
855 }
856
857 template<int N>
858 void
860 {
861 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
862 initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
863 } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
864 Wavefront *wf = gpuDynInst->wavefront();
865 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
866 if (gpuDynInst->exec_mask[lane]) {
867 Addr vaddr = gpuDynInst->addr[lane];
868 for (int i = 0; i < N; ++i) {
870 vaddr + i*sizeof(VecElemU32),
871 (reinterpret_cast<VecElemU32*>(
872 gpuDynInst->d_data))[lane * N + i]);
873 }
874 }
875 }
876 }
877 }
878
879 template<typename T>
880 void
882 {
883 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
884 initMemReqHelper<T, 1>(gpuDynInst, MemCmd::SwapReq, true);
885 } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
886 Wavefront *wf = gpuDynInst->wavefront();
887 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
888 if (gpuDynInst->exec_mask[lane]) {
889 Addr vaddr = gpuDynInst->addr[lane];
890 auto amo_op =
891 gpuDynInst->makeAtomicOpFunctor<T>(
892 &(reinterpret_cast<T*>(
893 gpuDynInst->a_data))[lane],
894 &(reinterpret_cast<T*>(
895 gpuDynInst->x_data))[lane]);
896
897 T tmp = wf->ldsChunk->read<T>(vaddr);
898 (*amo_op)(reinterpret_cast<uint8_t *>(&tmp));
899 wf->ldsChunk->write<T>(vaddr, tmp);
900 (reinterpret_cast<T*>(gpuDynInst->d_data))[lane] = tmp;
901 }
902 }
903 }
904 }
905
906 void
908 {
909 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
910 if (gpuDynInst->exec_mask[lane]) {
911 gpuDynInst->addr.at(lane) = addr[lane];
912 }
913 }
914 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
915 }
916
917 // first instruction DWORD
919 // second instruction DWORD
921 }; // Inst_FLAT
922} // namespace Gcn3ISA
923} // namespace gem5
924
925#endif // __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
#define DPRINTF(x,...)
Definition trace.hh:210
const std::string & opcode() const
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
void generateDisassembly() override
void initOperandInfo() override
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
int instSize() const override
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
int instSize() const override
void initOperandInfo() override
void initMemRead(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr)
int instSize() const override
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
void initOperandInfo() override
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_MTBUF *)
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
void initOperandInfo() override
int instSize() const override
void initMemRead(GPUDynInstPtr gpuDynInst)
void generateDisassembly() override
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr, ScalarRegU32 offset)
For normal s_load_dword/s_store_dword instruction addresses.
void initMemWrite(GPUDynInstPtr gpuDynInst)
initiate a memory write access for N dwords
void generateDisassembly() override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
void initMemRead(GPUDynInstPtr gpuDynInst)
initiate a memory read access for N dwords
void initOperandInfo() override
int instSize() const override
int instSize() const override
bool hasSecondDword(InFmt_SOP1 *)
void initOperandInfo() override
void generateDisassembly() override
bool hasSecondDword(InFmt_SOP2 *)
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_SOPC *)
int instSize() const override
void initOperandInfo() override
void generateDisassembly() override
int instSize() const override
void generateDisassembly() override
bool hasSecondDword(InFmt_SOPK *)
void initOperandInfo() override
void initOperandInfo() override
void generateDisassembly() override
int instSize() const override
int instSize() const override
void generateDisassembly() override
int instSize() const override
void initOperandInfo() override
bool hasSecondDword(InFmt_VOP1 *)
int instSize() const override
void initOperandInfo() override
void generateDisassembly() override
bool hasSecondDword(InFmt_VOP2 *)
bool hasSecondDword(InFmt_VOP3_SDST_ENC *)
void initOperandInfo() override
const bool sgprDst
the v_cmp and readlane instructions in the VOP3 encoding are unique because they are the only instruc...
int instSize() const override
bool hasSecondDword(InFmt_VOP3 *)
void generateDisassembly() override
bool hasSecondDword(InFmt_VOPC *)
void initOperandInfo() override
void generateDisassembly() override
int instSize() const override
void write(const uint32_t index, const T value)
a write operation
Definition lds_state.hh:90
T read(const uint32_t index)
a read operation
Definition lds_state.hh:71
LdsChunk * ldsChunk
Definition wavefront.hh:223
VectorMask & execMask()
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
constexpr unsigned NumVecElemPerVecReg
Definition vec.hh:61
Bitfield< 21, 20 > stride
uint32_t ScalarRegU32
uint32_t VecElemU32
Bitfield< 3 > addr
Definition types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition misc.hh:48

Generated on Mon Jul 10 2023 15:31:56 for gem5 by doxygen 1.9.7