gem5 v23.0.0.1
Loading...
Searching...
No Matches
op_encodings.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2016-2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#ifndef __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
33#define __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
34
39#include "debug/GPUExec.hh"
40#include "debug/VEGA.hh"
42
43namespace gem5
44{
45
46namespace VegaISA
47{
49 {
50 uint64_t baseAddr : 48;
51 uint32_t stride : 14;
52 uint32_t cacheSwizzle : 1;
53 uint32_t swizzleEn : 1;
54 uint32_t numRecords : 32;
55 uint32_t dstSelX : 3;
56 uint32_t dstSelY : 3;
57 uint32_t dstSelZ : 3;
58 uint32_t dstSelW : 3;
59 uint32_t numFmt : 3;
60 uint32_t dataFmt : 4;
61 uint32_t elemSize : 2;
62 uint32_t idxStride : 2;
63 uint32_t addTidEn : 1;
64 uint32_t atc : 1;
65 uint32_t hashEn : 1;
66 uint32_t heap : 1;
67 uint32_t mType : 3;
68 uint32_t type : 2;
69 };
70
71 // --- purely virtual instruction classes ---
72
74 {
75 public:
76 Inst_SOP2(InFmt_SOP2*, const std::string &opcode);
77
78 int instSize() const override;
79 void generateDisassembly() override;
80
81 void initOperandInfo() override;
82
83 protected:
84 // first instruction DWORD
86 // possible second DWORD
88 uint32_t varSize;
89
90 private:
92 }; // Inst_SOP2
93
95 {
96 public:
97 Inst_SOPK(InFmt_SOPK*, const std::string &opcode);
98 ~Inst_SOPK();
99
100 int instSize() const override;
101 void generateDisassembly() override;
102
103 void initOperandInfo() override;
104
105 protected:
106 // first instruction DWORD
108 // possible second DWORD
110 uint32_t varSize;
111
112 private:
114 }; // Inst_SOPK
115
117 {
118 public:
119 Inst_SOP1(InFmt_SOP1*, const std::string &opcode);
120 ~Inst_SOP1();
121
122 int instSize() const override;
123 void generateDisassembly() override;
124
125 void initOperandInfo() override;
126
127 protected:
128 // first instruction DWORD
130 // possible second DWORD
132 uint32_t varSize;
133
134 private:
136 }; // Inst_SOP1
137
139 {
140 public:
141 Inst_SOPC(InFmt_SOPC*, const std::string &opcode);
142 ~Inst_SOPC();
143
144 int instSize() const override;
145 void generateDisassembly() override;
146
147 void initOperandInfo() override;
148
149 protected:
150 // first instruction DWORD
152 // possible second DWORD
154 uint32_t varSize;
155
156 private:
158 }; // Inst_SOPC
159
161 {
162 public:
163 Inst_SOPP(InFmt_SOPP*, const std::string &opcode);
164 ~Inst_SOPP();
165
166 int instSize() const override;
167 void generateDisassembly() override;
168
169 void initOperandInfo() override;
170
171 protected:
172 // first instruction DWORD
174 }; // Inst_SOPP
175
177 {
178 public:
179 Inst_SMEM(InFmt_SMEM*, const std::string &opcode);
180 ~Inst_SMEM();
181
182 int instSize() const override;
183 void generateDisassembly() override;
184
185 void initOperandInfo() override;
186
187 protected:
191 template<int N>
192 void
194 {
195 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
197 }
198
202 template<int N>
203 void
205 {
206 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
208 }
209
213 void
216 {
217 Addr vaddr = ((addr.rawData() + offset) & ~0x3);
218 gpu_dyn_inst->scalarAddr = vaddr;
219 }
220
226 void
229 {
230 BufferRsrcDescriptor rsrc_desc;
231 ScalarRegU32 clamped_offset(offset);
232 std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
233 sizeof(BufferRsrcDescriptor));
234
240 if (!rsrc_desc.stride && offset >= rsrc_desc.numRecords) {
241 clamped_offset = rsrc_desc.numRecords;
242 } else if (rsrc_desc.stride && offset
243 > (rsrc_desc.stride * rsrc_desc.numRecords)) {
244 clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords);
245 }
246
247 Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3);
248 gpu_dyn_inst->scalarAddr = vaddr;
249 }
250
251 // first instruction DWORD
253 // second instruction DWORD
255 }; // Inst_SMEM
256
258 {
259 public:
260 Inst_VOP2(InFmt_VOP2*, const std::string &opcode);
261 ~Inst_VOP2();
262
263 int instSize() const override;
264 void generateDisassembly() override;
265
266 void initOperandInfo() override;
267
268 protected:
269 // first instruction DWORD
271 // possible second DWORD
273 uint32_t varSize;
274
275 private:
277 }; // Inst_VOP2
278
280 {
281 public:
282 Inst_VOP1(InFmt_VOP1*, const std::string &opcode);
283 ~Inst_VOP1();
284
285 int instSize() const override;
286 void generateDisassembly() override;
287
288 void initOperandInfo() override;
289
290 protected:
291 // first instruction DWORD
293 // possible second DWORD
295 uint32_t varSize;
296
297 private:
299 }; // Inst_VOP1
300
302 {
303 public:
304 Inst_VOPC(InFmt_VOPC*, const std::string &opcode);
305 ~Inst_VOPC();
306
307 int instSize() const override;
308 void generateDisassembly() override;
309
310 void initOperandInfo() override;
311
312 protected:
313 // first instruction DWORD
315 // possible second DWORD
317 uint32_t varSize;
318
319 private:
321 }; // Inst_VOPC
322
324 {
325 public:
326 Inst_VINTRP(InFmt_VINTRP*, const std::string &opcode);
327 ~Inst_VINTRP();
328
329 int instSize() const override;
330
331 protected:
332 // first instruction DWORD
334 }; // Inst_VINTRP
335
337 {
338 public:
339 Inst_VOP3A(InFmt_VOP3A*, const std::string &opcode, bool sgpr_dst);
340 ~Inst_VOP3A();
341
342 int instSize() const override;
343 void generateDisassembly() override;
344
345 void initOperandInfo() override;
346
347 protected:
348 // first instruction DWORD
350 // second instruction DWORD
352
353 private:
365 const bool sgprDst;
366 }; // Inst_VOP3A
367
369 {
370 public:
371 Inst_VOP3B(InFmt_VOP3B*, const std::string &opcode);
372 ~Inst_VOP3B();
373
374 int instSize() const override;
375 void generateDisassembly() override;
376
377 void initOperandInfo() override;
378
379 protected:
380 // first instruction DWORD
382 // second instruction DWORD
384
385 private:
387 }; // Inst_VOP3B
388
390 {
391 public:
392 Inst_DS(InFmt_DS*, const std::string &opcode);
393 ~Inst_DS();
394
395 int instSize() const override;
396 void generateDisassembly() override;
397
398 void initOperandInfo() override;
399
400 protected:
401 template<typename T>
402 void
404 {
405 Wavefront *wf = gpuDynInst->wavefront();
406
407 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
408 if (gpuDynInst->exec_mask[lane]) {
409 Addr vaddr = gpuDynInst->addr[lane] + offset;
410
411 (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]
412 = wf->ldsChunk->read<T>(vaddr);
413 }
414 }
415 }
416
417 template<int N>
418 void
420 {
421 Wavefront *wf = gpuDynInst->wavefront();
422
423 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
424 if (gpuDynInst->exec_mask[lane]) {
425 Addr vaddr = gpuDynInst->addr[lane] + offset;
426 for (int i = 0; i < N; ++i) {
427 (reinterpret_cast<VecElemU32*>(
428 gpuDynInst->d_data))[lane * N + i]
429 = wf->ldsChunk->read<VecElemU32>(
430 vaddr + i*sizeof(VecElemU32));
431 }
432 }
433 }
434 }
435
436 template<typename T>
437 void
438 initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
439 {
440 Wavefront *wf = gpuDynInst->wavefront();
441
442 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
443 if (gpuDynInst->exec_mask[lane]) {
444 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
445 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
446
447 (reinterpret_cast<T*>(gpuDynInst->d_data))[lane * 2]
448 = wf->ldsChunk->read<T>(vaddr0);
449 (reinterpret_cast<T*>(gpuDynInst->d_data))[lane * 2 + 1]
450 = wf->ldsChunk->read<T>(vaddr1);
451 }
452 }
453 }
454
455 template<typename T>
456 void
458 {
459 Wavefront *wf = gpuDynInst->wavefront();
460
461 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
462 if (gpuDynInst->exec_mask[lane]) {
463 Addr vaddr = gpuDynInst->addr[lane] + offset;
464 wf->ldsChunk->write<T>(vaddr,
465 (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]);
466 }
467 }
468 }
469
470 template<int N>
471 void
473 {
474 Wavefront *wf = gpuDynInst->wavefront();
475
476 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
477 if (gpuDynInst->exec_mask[lane]) {
478 Addr vaddr = gpuDynInst->addr[lane] + offset;
479 for (int i = 0; i < N; ++i) {
481 vaddr + i*sizeof(VecElemU32),
482 (reinterpret_cast<VecElemU32*>(
483 gpuDynInst->d_data))[lane * N + i]);
484 }
485 }
486 }
487 }
488
489 template<typename T>
490 void
491 initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
492 {
493 Wavefront *wf = gpuDynInst->wavefront();
494
495 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
496 if (gpuDynInst->exec_mask[lane]) {
497 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
498 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
499 wf->ldsChunk->write<T>(vaddr0, (reinterpret_cast<T*>(
500 gpuDynInst->d_data))[lane * 2]);
501 wf->ldsChunk->write<T>(vaddr1, (reinterpret_cast<T*>(
502 gpuDynInst->d_data))[lane * 2 + 1]);
503 }
504 }
505 }
506
507 template<typename T>
508 void
510 {
511 Wavefront *wf = gpuDynInst->wavefront();
512
513 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
514 if (gpuDynInst->exec_mask[lane]) {
515 Addr vaddr = gpuDynInst->addr[lane] + offset;
516
517 AtomicOpFunctorPtr amo_op =
518 gpuDynInst->makeAtomicOpFunctor<T>(
519 &(reinterpret_cast<T*>(gpuDynInst->a_data))[lane],
520 &(reinterpret_cast<T*>(gpuDynInst->x_data))[lane]);
521
522 (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]
523 = wf->ldsChunk->atomic<T>(vaddr, std::move(amo_op));
524 }
525 }
526 }
527
528 void
530 {
531 Wavefront *wf = gpuDynInst->wavefront();
532
533 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
534 if (wf->execMask(lane)) {
535 gpuDynInst->addr.at(lane) = (Addr)addr[lane];
536 }
537 }
538 }
539
540 // first instruction DWORD
542 // second instruction DWORD
544 }; // Inst_DS
545
547 {
548 public:
549 Inst_MUBUF(InFmt_MUBUF*, const std::string &opcode);
550 ~Inst_MUBUF();
551
552 int instSize() const override;
553 void generateDisassembly() override;
554
555 void initOperandInfo() override;
556
557 protected:
558 template<typename T>
559 void
561 {
562 // temporarily modify exec_mask to supress memory accesses to oob
563 // regions. Only issue memory requests for lanes that have their
564 // exec_mask set and are not out of bounds.
565 VectorMask old_exec_mask = gpuDynInst->exec_mask;
566 gpuDynInst->exec_mask &= ~oobMask;
567 initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
568 gpuDynInst->exec_mask = old_exec_mask;
569 }
570
571
572 template<int N>
573 void
575 {
576 // temporarily modify exec_mask to supress memory accesses to oob
577 // regions. Only issue memory requests for lanes that have their
578 // exec_mask set and are not out of bounds.
579 VectorMask old_exec_mask = gpuDynInst->exec_mask;
580 gpuDynInst->exec_mask &= ~oobMask;
581 initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
582 gpuDynInst->exec_mask = old_exec_mask;
583 }
584
585 template<typename T>
586 void
588 {
589 // temporarily modify exec_mask to supress memory accesses to oob
590 // regions. Only issue memory requests for lanes that have their
591 // exec_mask set and are not out of bounds.
592 VectorMask old_exec_mask = gpuDynInst->exec_mask;
593 gpuDynInst->exec_mask &= ~oobMask;
594 initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
595 gpuDynInst->exec_mask = old_exec_mask;
596 }
597
598 template<int N>
599 void
601 {
602 // temporarily modify exec_mask to supress memory accesses to oob
603 // regions. Only issue memory requests for lanes that have their
604 // exec_mask set and are not out of bounds.
605 VectorMask old_exec_mask = gpuDynInst->exec_mask;
606 gpuDynInst->exec_mask &= ~oobMask;
607 initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
608 gpuDynInst->exec_mask = old_exec_mask;
609 }
610
611 void
613 {
614 // create request and set flags
615 gpuDynInst->resetEntireStatusVector();
616 gpuDynInst->setStatusVector(0, 1);
617 RequestPtr req = std::make_shared<Request>(0, 0, 0,
618 gpuDynInst->computeUnit()->
619 requestorId(), 0,
620 gpuDynInst->wfDynId);
621 gpuDynInst->setRequestFlags(req);
622 gpuDynInst->computeUnit()->
623 injectGlobalMemFence(gpuDynInst, false, req);
624 }
625
646 template<typename VOFF, typename VIDX, typename SRSRC, typename SOFF>
647 void
648 calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx,
649 SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
650 {
651 Addr vaddr = 0;
652 Addr base_addr = 0;
653 Addr stride = 0;
654 Addr buf_idx = 0;
655 Addr buf_off = 0;
656 Addr buffer_offset = 0;
657 BufferRsrcDescriptor rsrc_desc;
658
659 std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
660 sizeof(BufferRsrcDescriptor));
661
662 base_addr = rsrc_desc.baseAddr;
663
664 stride = rsrc_desc.addTidEn ? ((rsrc_desc.dataFmt << 14)
665 + rsrc_desc.stride) : rsrc_desc.stride;
666
667 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
668 if (gpuDynInst->exec_mask[lane]) {
669 vaddr = base_addr + s_offset.rawData();
675 buf_idx = v_idx[lane] + (rsrc_desc.addTidEn ? lane : 0);
676
677 buf_off = v_off[lane] + inst_offset;
678
679 if (rsrc_desc.swizzleEn) {
680 Addr idx_stride = 8 << rsrc_desc.idxStride;
681 Addr elem_size = 2 << rsrc_desc.elemSize;
682 Addr idx_msb = buf_idx / idx_stride;
683 Addr idx_lsb = buf_idx % idx_stride;
684 Addr off_msb = buf_off / elem_size;
685 Addr off_lsb = buf_off % elem_size;
686 DPRINTF(VEGA, "mubuf swizzled lane %d: "
687 "idx_stride = %llx, elem_size = %llx, "
688 "idx_msb = %llx, idx_lsb = %llx, "
689 "off_msb = %llx, off_lsb = %llx\n",
690 lane, idx_stride, elem_size, idx_msb, idx_lsb,
691 off_msb, off_lsb);
692
693 buffer_offset =(idx_msb * stride + off_msb * elem_size)
694 * idx_stride + idx_lsb * elem_size + off_lsb;
695 } else {
696 buffer_offset = buf_off + stride * buf_idx;
697 }
698
699
707 if (rsrc_desc.stride == 0 || !rsrc_desc.swizzleEn) {
708 if (buffer_offset >=
709 rsrc_desc.numRecords - s_offset.rawData()) {
710 DPRINTF(VEGA, "mubuf out-of-bounds condition 1: "
711 "lane = %d, buffer_offset = %llx, "
712 "const_stride = %llx, "
713 "const_num_records = %llx\n",
714 lane, buf_off + stride * buf_idx,
715 stride, rsrc_desc.numRecords);
716 oobMask.set(lane);
717 continue;
718 }
719 }
720
721 if (rsrc_desc.stride != 0 && rsrc_desc.swizzleEn) {
722 if (buf_idx >= rsrc_desc.numRecords ||
723 buf_off >= stride) {
724 DPRINTF(VEGA, "mubuf out-of-bounds condition 2: "
725 "lane = %d, offset = %llx, "
726 "index = %llx, "
727 "const_num_records = %llx\n",
728 lane, buf_off, buf_idx,
729 rsrc_desc.numRecords);
730 oobMask.set(lane);
731 continue;
732 }
733 }
734
735 vaddr += buffer_offset;
736
737 DPRINTF(VEGA, "Calculating mubuf address for lane %d: "
738 "vaddr = %llx, base_addr = %llx, "
739 "stride = %llx, buf_idx = %llx, buf_off = %llx\n",
740 lane, vaddr, base_addr, stride,
741 buf_idx, buf_off);
742 gpuDynInst->addr.at(lane) = vaddr;
743 }
744 }
745 }
746
747 // first instruction DWORD
749 // second instruction DWORD
751 // Mask of lanes with out-of-bounds accesses. Needs to be tracked
752 // seperately from the exec_mask so that we remember to write zero
753 // to the registers associated with out of bounds lanes.
755 }; // Inst_MUBUF
756
758 {
759 public:
760 Inst_MTBUF(InFmt_MTBUF*, const std::string &opcode);
761 ~Inst_MTBUF();
762
763 int instSize() const override;
764 void initOperandInfo() override;
765
766 protected:
767 // first instruction DWORD
769 // second instruction DWORD
771
772 private:
774 }; // Inst_MTBUF
775
777 {
778 public:
779 Inst_MIMG(InFmt_MIMG*, const std::string &opcode);
780 ~Inst_MIMG();
781
782 int instSize() const override;
783 void initOperandInfo() override;
784
785 protected:
786 // first instruction DWORD
788 // second instruction DWORD
790 }; // Inst_MIMG
791
793 {
794 public:
795 Inst_EXP(InFmt_EXP*, const std::string &opcode);
796 ~Inst_EXP();
797
798 int instSize() const override;
799 void initOperandInfo() override;
800
801 protected:
802 // first instruction DWORD
804 // second instruction DWORD
806 }; // Inst_EXP
807
809 {
810 public:
811 Inst_FLAT(InFmt_FLAT*, const std::string &opcode);
812 ~Inst_FLAT();
813
814 int instSize() const override;
815 void generateDisassembly() override;
816
817 void initOperandInfo() override;
818
819 protected:
820 template<typename T>
821 void
823 {
824 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
825 initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
826 } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
827 Wavefront *wf = gpuDynInst->wavefront();
828 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
829 if (gpuDynInst->exec_mask[lane]) {
830 Addr vaddr = gpuDynInst->addr[lane];
831 (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]
832 = wf->ldsChunk->read<T>(vaddr);
833 }
834 }
835 }
836 }
837
838 template<int N>
839 void
841 {
842 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
843 initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
844 } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
845 Wavefront *wf = gpuDynInst->wavefront();
846 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
847 if (gpuDynInst->exec_mask[lane]) {
848 Addr vaddr = gpuDynInst->addr[lane];
849 for (int i = 0; i < N; ++i) {
850 (reinterpret_cast<VecElemU32*>(
851 gpuDynInst->d_data))[lane * N + i]
852 = wf->ldsChunk->read<VecElemU32>(
853 vaddr + i*sizeof(VecElemU32));
854 }
855 }
856 }
857 }
858 }
859
860 template<typename T>
861 void
863 {
864 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
865 initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
866 } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
867 Wavefront *wf = gpuDynInst->wavefront();
868 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
869 if (gpuDynInst->exec_mask[lane]) {
870 Addr vaddr = gpuDynInst->addr[lane];
871 wf->ldsChunk->write<T>(vaddr,
872 (reinterpret_cast<T*>(gpuDynInst->d_data))[lane]);
873 }
874 }
875 }
876 }
877
878 template<int N>
879 void
881 {
882 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
883 initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
884 } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
885 Wavefront *wf = gpuDynInst->wavefront();
886 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
887 if (gpuDynInst->exec_mask[lane]) {
888 Addr vaddr = gpuDynInst->addr[lane];
889 for (int i = 0; i < N; ++i) {
891 vaddr + i*sizeof(VecElemU32),
892 (reinterpret_cast<VecElemU32*>(
893 gpuDynInst->d_data))[lane * N + i]);
894 }
895 }
896 }
897 }
898 }
899
900 template<typename T>
901 void
903 {
904 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
905 initMemReqHelper<T, 1>(gpuDynInst, MemCmd::SwapReq, true);
906 } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
907 Wavefront *wf = gpuDynInst->wavefront();
908 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
909 if (gpuDynInst->exec_mask[lane]) {
910 Addr vaddr = gpuDynInst->addr[lane];
911 auto amo_op =
912 gpuDynInst->makeAtomicOpFunctor<T>(
913 &(reinterpret_cast<T*>(
914 gpuDynInst->a_data))[lane],
915 &(reinterpret_cast<T*>(
916 gpuDynInst->x_data))[lane]);
917
918 T tmp = wf->ldsChunk->read<T>(vaddr);
919 (*amo_op)(reinterpret_cast<uint8_t *>(&tmp));
920 wf->ldsChunk->write<T>(vaddr, tmp);
921 (reinterpret_cast<T*>(gpuDynInst->d_data))[lane] = tmp;
922 }
923 }
924 }
925 }
926
927 void
930 {
931 // Offset is a 13-bit field w/the following meanings:
932 // In Flat instructions, offset is a 12-bit unsigned number
933 // In Global/Scratch instructions, offset is a 13-bit signed number
934 if (isFlat()) {
935 offset = offset & 0xfff;
936 } else {
937 offset = (ScalarRegI32)sext<13>(offset);
938 }
939 // If saddr = 0x7f there is no scalar reg to read and address will
940 // be a 64-bit address. Otherwise, saddr is the reg index for a
941 // scalar reg used as the base address for a 32-bit address.
942 if ((saddr == 0x7f && isFlatGlobal()) || isFlat()) {
943 ConstVecOperandU64 vbase(gpuDynInst, vaddr);
944 vbase.read();
945
946 calcAddrVgpr(gpuDynInst, vbase, offset);
947 } else {
948 // Assume we are operating in 64-bit mode and read a pair of
949 // SGPRs for the address base.
950 ConstScalarOperandU64 sbase(gpuDynInst, saddr);
951 sbase.read();
952
953 ConstVecOperandU32 voffset(gpuDynInst, vaddr);
954 voffset.read();
955
956 calcAddrSgpr(gpuDynInst, voffset, sbase, offset);
957 }
958
959 if (isFlat()) {
960 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
961 } else {
962 gpuDynInst->staticInstruction()->executed_as =
963 enums::SC_GLOBAL;
964 }
965 }
966
967 void
969 {
970 if ((gpuDynInst->executedAs() == enums::SC_GLOBAL && isFlat())
971 || isFlatGlobal()) {
972 gpuDynInst->computeUnit()->globalMemoryPipe
973 .issueRequest(gpuDynInst);
974 } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
975 assert(isFlat());
976 gpuDynInst->computeUnit()->localMemoryPipe
977 .issueRequest(gpuDynInst);
978 } else {
979 fatal("Unsupported scope for flat instruction.\n");
980 }
981 }
982
983 bool
985 {
986 return (extData.SADDR != 0x7f);
987 }
988
989 // first instruction DWORD
991 // second instruction DWORD
993
994 private:
995 void initFlatOperandInfo();
997
1000
1001 void
1004 {
1005 // Use SGPR pair as a base address and add VGPR-offset and
1006 // instruction offset. The VGPR-offset is always 32-bits so we
1007 // mask any upper bits from the vaddr.
1008 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1009 if (gpuDynInst->exec_mask[lane]) {
1010 ScalarRegI32 voffset = vaddr[lane];
1011 gpuDynInst->addr.at(lane) =
1012 saddr.rawData() + voffset + offset;
1013 }
1014 }
1015 }
1016
1017 void
1020 {
1021 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1022 if (gpuDynInst->exec_mask[lane]) {
1023 gpuDynInst->addr.at(lane) = addr[lane] + offset;
1024 }
1025 }
1026 }
1027 }; // Inst_FLAT
1028} // namespace VegaISA
1029} // namespace gem5
1030
1031#endif // __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
#define DPRINTF(x,...)
Definition trace.hh:210
const std::string & opcode() const
void write(const uint32_t index, const T value)
a write operation
Definition lds_state.hh:90
T atomic(const uint32_t index, AtomicOpFunctorPtr amoOp)
an atomic operation
Definition lds_state.hh:109
T read(const uint32_t index)
a read operation
Definition lds_state.hh:71
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
void initOperandInfo() override
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
void generateDisassembly() override
int instSize() const override
void initAtomicAccess(GPUDynInstPtr gpuDynInst, Addr offset)
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
void initOperandInfo() override
int instSize() const override
void calcAddrSgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &vaddr, ConstScalarOperandU64 &saddr, ScalarRegI32 offset)
void calcAddr(GPUDynInstPtr gpuDynInst, ScalarRegU32 vaddr, ScalarRegU32 saddr, ScalarRegI32 offset)
void initMemRead(GPUDynInstPtr gpuDynInst)
void generateDisassembly() override
void calcAddrVgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr, ScalarRegI32 offset)
void initOperandInfo() override
void issueRequestHelper(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
int instSize() const override
int instSize() const override
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_MTBUF *)
void initOperandInfo() override
void initOperandInfo() override
void generateDisassembly() override
void initMemWrite(GPUDynInstPtr gpuDynInst)
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void initMemRead(GPUDynInstPtr gpuDynInst)
int instSize() const override
void initOperandInfo() override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
void generateDisassembly() override
void initMemRead(GPUDynInstPtr gpuDynInst)
initiate a memory read access for N dwords
int instSize() const override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr, ScalarRegU32 offset)
For normal s_load_dword/s_store_dword instruction addresses.
void initMemWrite(GPUDynInstPtr gpuDynInst)
initiate a memory write access for N dwords
int instSize() const override
void generateDisassembly() override
void initOperandInfo() override
bool hasSecondDword(InFmt_SOP1 *)
bool hasSecondDword(InFmt_SOP2 *)
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
int instSize() const override
bool hasSecondDword(InFmt_SOPC *)
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
void generateDisassembly() override
bool hasSecondDword(InFmt_SOPK *)
void initOperandInfo() override
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
int instSize() const override
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_VOP1 *)
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_VOP2 *)
void generateDisassembly() override
void generateDisassembly() override
const bool sgprDst
the v_cmp and readlane instructions in the VOP3 encoding are unique because they are the only instruc...
int instSize() const override
void initOperandInfo() override
bool hasSecondDword(InFmt_VOP3A *)
bool hasSecondDword(InFmt_VOP3B *)
void initOperandInfo() override
void generateDisassembly() override
int instSize() const override
bool hasSecondDword(InFmt_VOPC *)
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition operand.hh:408
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
Definition operand.hh:391
void read() override
read from the vrf.
Definition operand.hh:146
LdsChunk * ldsChunk
Definition wavefront.hh:223
VectorMask & execMask()
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition amo.hh:269
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
constexpr unsigned NumVecElemPerVecReg
Definition vec.hh:61
Bitfield< 21, 20 > stride
uint32_t VecElemU32
uint32_t ScalarRegU32
Bitfield< 3 > addr
Definition types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition misc.hh:48

Generated on Mon Jul 10 2023 15:31:56 for gem5 by doxygen 1.9.7