gem5 [DEVELOP-FOR-25.1]
Loading...
Searching...
No Matches
flat.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34namespace gem5
35{
36
37namespace VegaISA
38{
39 // --- Inst_FLAT__FLAT_LOAD_UBYTE class methods ---
40
42 : Inst_FLAT(iFmt, "flat_load_ubyte")
43 {
44 setFlag(MemoryRef);
46 } // Inst_FLAT__FLAT_LOAD_UBYTE
47
49 {
50 } // ~Inst_FLAT__FLAT_LOAD_UBYTE
51
52 // --- description from .arch file ---
53 // Untyped buffer load unsigned byte (zero extend to VGPR destination).
54 void
56 {
57 Wavefront *wf = gpuDynInst->wavefront();
58
59 if (gpuDynInst->exec_mask.none()) {
61 wf->untrackVMemInst(gpuDynInst);
62 if (isFlat()) {
64 wf->untrackLGKMInst(gpuDynInst);
65 }
66 return;
67 }
68
69 gpuDynInst->execUnitId = wf->execUnitId;
70 gpuDynInst->latency.init(gpuDynInst->computeUnit());
71 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
72
73 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
74
75 issueRequestHelper(gpuDynInst);
76 } // execute
77
78 void
80 {
81 initMemRead<VecElemU8>(gpuDynInst);
82 } // initiateAcc
83
84 void
86 {
87 VecOperandU32 vdst(gpuDynInst, extData.VDST);
88
89 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
90 if (gpuDynInst->exec_mask[lane]) {
91 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU8*>(
92 gpuDynInst->d_data))[lane]);
93 }
94 }
95 vdst.write();
96 } // execute
97 // --- Inst_FLAT__FLAT_LOAD_SBYTE class methods ---
98
100 : Inst_FLAT(iFmt, "flat_load_sbyte")
101 {
102 setFlag(MemoryRef);
103 setFlag(Load);
104 } // Inst_FLAT__FLAT_LOAD_SBYTE
105
107 {
108 } // ~Inst_FLAT__FLAT_LOAD_SBYTE
109
110 // --- description from .arch file ---
111 // Untyped buffer load signed byte (sign extend to VGPR destination).
112 void
114 {
115 Wavefront *wf = gpuDynInst->wavefront();
116
117 if (gpuDynInst->exec_mask.none()) {
118 wf->decVMemInstsIssued();
119 wf->untrackVMemInst(gpuDynInst);
120 if (isFlat()) {
121 wf->decLGKMInstsIssued();
122 wf->untrackLGKMInst(gpuDynInst);
123 }
124 return;
125 }
126
127 gpuDynInst->execUnitId = wf->execUnitId;
128 gpuDynInst->latency.init(gpuDynInst->computeUnit());
129 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
130
131 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
132
133 issueRequestHelper(gpuDynInst);
134 } // execute
135
136 void
138 {
139 initMemRead<VecElemI8>(gpuDynInst);
140 } // initiateAcc
141
142 void
144 {
145 VecOperandU32 vdst(gpuDynInst, extData.VDST);
146
147 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
148 if (gpuDynInst->exec_mask[lane]) {
149 vdst[lane] = (VecElemI32)((reinterpret_cast<VecElemI8*>(
150 gpuDynInst->d_data))[lane]);
151 }
152 }
153 vdst.write();
154 } // execute
155 // --- Inst_FLAT__FLAT_LOAD_USHORT class methods ---
156
158 : Inst_FLAT(iFmt, "flat_load_ushort")
159 {
160 setFlag(MemoryRef);
161 setFlag(Load);
162 } // Inst_FLAT__FLAT_LOAD_USHORT
163
165 {
166 } // ~Inst_FLAT__FLAT_LOAD_USHORT
167
168 // --- description from .arch file ---
169 // Untyped buffer load unsigned short (zero extend to VGPR destination).
170 void
172 {
173 Wavefront *wf = gpuDynInst->wavefront();
174
175 if (gpuDynInst->exec_mask.none()) {
176 wf->decVMemInstsIssued();
177 wf->untrackVMemInst(gpuDynInst);
178 if (isFlat()) {
179 wf->decLGKMInstsIssued();
180 wf->untrackLGKMInst(gpuDynInst);
181 }
182 return;
183 }
184
185 gpuDynInst->execUnitId = wf->execUnitId;
186 gpuDynInst->latency.init(gpuDynInst->computeUnit());
187 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
188
189 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
190
191 issueRequestHelper(gpuDynInst);
192 } // execute
193
194 void
196 {
197 initMemRead<VecElemU16>(gpuDynInst);
198 } // initiateAcc
199
200 void
202 {
203 VecOperandU32 vdst(gpuDynInst, extData.VDST);
204
205 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
206 if (gpuDynInst->exec_mask[lane]) {
207 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU16*>(
208 gpuDynInst->d_data))[lane]);
209 }
210 }
211 vdst.write();
212 } // execute
213
214 // --- Inst_FLAT__FLAT_LOAD_SSHORT class methods ---
215
217 : Inst_FLAT(iFmt, "flat_load_sshort")
218 {
219 setFlag(MemoryRef);
220 setFlag(Load);
221 } // Inst_FLAT__FLAT_LOAD_SSHORT
222
224 {
225 } // ~Inst_FLAT__FLAT_LOAD_SSHORT
226
227 // --- description from .arch file ---
228 // Untyped buffer load signed short (sign extend to VGPR destination).
229 void
234
235 void
237 {
238 } // initiateAcc
239
240 void
242 {
243 } // execute
244 // --- Inst_FLAT__FLAT_LOAD_DWORD class methods ---
245
247 : Inst_FLAT(iFmt, "flat_load_dword")
248 {
249 setFlag(MemoryRef);
250 setFlag(Load);
251 } // Inst_FLAT__FLAT_LOAD_DWORD
252
254 {
255 } // ~Inst_FLAT__FLAT_LOAD_DWORD
256
257 // --- description from .arch file ---
258 // Untyped buffer load dword.
259 void
261 {
262 Wavefront *wf = gpuDynInst->wavefront();
263
264 if (gpuDynInst->exec_mask.none()) {
265 wf->decVMemInstsIssued();
266 wf->untrackVMemInst(gpuDynInst);
267 if (isFlat()) {
268 wf->decLGKMInstsIssued();
269 wf->untrackLGKMInst(gpuDynInst);
270 }
271 return;
272 }
273
274 gpuDynInst->execUnitId = wf->execUnitId;
275 gpuDynInst->latency.init(gpuDynInst->computeUnit());
276 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
277
278 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
279
280 issueRequestHelper(gpuDynInst);
281 } // execute
282
283 void
285 {
286 initMemRead<VecElemU32>(gpuDynInst);
287 } // initiateAcc
288
289 void
291 {
292 VecOperandU32 vdst(gpuDynInst, extData.VDST);
293
294 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
295 if (gpuDynInst->exec_mask[lane]) {
296 vdst[lane] = (reinterpret_cast<VecElemU32*>(
297 gpuDynInst->d_data))[lane];
298 }
299 }
300 vdst.write();
301 } // completeAcc
302 // --- Inst_FLAT__FLAT_LOAD_DWORDX2 class methods ---
303
305 InFmt_FLAT *iFmt)
306 : Inst_FLAT(iFmt, "flat_load_dwordx2")
307 {
308 setFlag(MemoryRef);
309 setFlag(Load);
310 } // Inst_FLAT__FLAT_LOAD_DWORDX2
311
313 {
314 } // ~Inst_FLAT__FLAT_LOAD_DWORDX2
315
316 // --- description from .arch file ---
317 // Untyped buffer load 2 dwords.
318 void
320 {
321 Wavefront *wf = gpuDynInst->wavefront();
322
323 if (gpuDynInst->exec_mask.none()) {
324 wf->decVMemInstsIssued();
325 wf->untrackVMemInst(gpuDynInst);
326 if (isFlat()) {
327 wf->decLGKMInstsIssued();
328 wf->untrackLGKMInst(gpuDynInst);
329 }
330 return;
331 }
332
333 gpuDynInst->execUnitId = wf->execUnitId;
334 gpuDynInst->latency.init(gpuDynInst->computeUnit());
335 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
336
337 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
338
339 issueRequestHelper(gpuDynInst);
340 } // execute
341
342 void
344 {
345 initMemRead<2>(gpuDynInst);
346 } // initiateAcc
347
348 void
350 {
351 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
352 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
353
354 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
355 if (gpuDynInst->exec_mask[lane] && !isFlatScratch()) {
356 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
357 gpuDynInst->d_data))[lane * 2];
358 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
359 gpuDynInst->d_data))[lane * 2 + 1];
360 } else if (gpuDynInst->exec_mask[lane] && isFlatScratch()) {
361 // Unswizzle the data opposite of swizzleData. See swizzleData
362 // in src/arch/amdgpu/vega/insts/op_encodings.hh for details.
363 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
364 gpuDynInst->d_data))[lane];
365 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
366 gpuDynInst->d_data))[lane + NumVecElemPerVecReg];
367 }
368 }
369
370 vdst0.write();
371 vdst1.write();
372 } // completeAcc
373 // --- Inst_FLAT__FLAT_LOAD_DWORDX3 class methods ---
374
376 InFmt_FLAT *iFmt)
377 : Inst_FLAT(iFmt, "flat_load_dwordx3")
378 {
379 setFlag(MemoryRef);
380 setFlag(Load);
381 } // Inst_FLAT__FLAT_LOAD_DWORDX3
382
384 {
385 } // ~Inst_FLAT__FLAT_LOAD_DWORDX3
386
387 // --- description from .arch file ---
388 // Untyped buffer load 3 dwords.
389 void
391 {
392 Wavefront *wf = gpuDynInst->wavefront();
393
394 if (gpuDynInst->exec_mask.none()) {
395 wf->decVMemInstsIssued();
396 wf->untrackVMemInst(gpuDynInst);
397 if (isFlat()) {
398 wf->decLGKMInstsIssued();
399 wf->untrackLGKMInst(gpuDynInst);
400 }
401 return;
402 }
403
404 gpuDynInst->execUnitId = wf->execUnitId;
405 gpuDynInst->latency.init(gpuDynInst->computeUnit());
406 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
407
408 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
409
410 issueRequestHelper(gpuDynInst);
411 } // execute
412
413 void
415 {
416 initMemRead<3>(gpuDynInst);
417 } // initiateAcc
418
419 void
421 {
422 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
423 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
424 VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
425
426 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
427 if (gpuDynInst->exec_mask[lane] && !isFlatScratch()) {
428 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
429 gpuDynInst->d_data))[lane * 3];
430 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
431 gpuDynInst->d_data))[lane * 3 + 1];
432 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
433 gpuDynInst->d_data))[lane * 3 + 2];
434 } else if (gpuDynInst->exec_mask[lane] && isFlatScratch()) {
435 // Unswizzle the data opposite of swizzleData. See swizzleData
436 // in src/arch/amdgpu/vega/insts/op_encodings.hh for details.
437 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
438 gpuDynInst->d_data))[lane];
439 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
440 gpuDynInst->d_data))[lane + NumVecElemPerVecReg];
441 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
442 gpuDynInst->d_data))[lane + 2*NumVecElemPerVecReg];
443 }
444 }
445
446 vdst0.write();
447 vdst1.write();
448 vdst2.write();
449 } // completeAcc
450 // --- Inst_FLAT__FLAT_LOAD_DWORDX4 class methods ---
451
453 InFmt_FLAT *iFmt)
454 : Inst_FLAT(iFmt, "flat_load_dwordx4")
455 {
456 setFlag(MemoryRef);
457 setFlag(Load);
458 } // Inst_FLAT__FLAT_LOAD_DWORDX4
459
461 {
462 } // ~Inst_FLAT__FLAT_LOAD_DWORDX4
463
464 // --- description from .arch file ---
465 // Untyped buffer load 4 dwords.
466 void
468 {
469 Wavefront *wf = gpuDynInst->wavefront();
470
471 if (gpuDynInst->exec_mask.none()) {
472 wf->decVMemInstsIssued();
473 wf->untrackVMemInst(gpuDynInst);
474 if (isFlat()) {
475 wf->decLGKMInstsIssued();
476 wf->untrackLGKMInst(gpuDynInst);
477 }
478 return;
479 }
480
481 gpuDynInst->execUnitId = wf->execUnitId;
482 gpuDynInst->latency.init(gpuDynInst->computeUnit());
483 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
484
485 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
486
487 issueRequestHelper(gpuDynInst);
488 } // execute
489
490 void
492 {
493 initMemRead<4>(gpuDynInst);
494 } // initiateAcc
495
496 void
498 {
499 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
500 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
501 VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
502 VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3);
503
504 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
505 if (gpuDynInst->exec_mask[lane] && !isFlatScratch()) {
506 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
507 gpuDynInst->d_data))[lane * 4];
508 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
509 gpuDynInst->d_data))[lane * 4 + 1];
510 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
511 gpuDynInst->d_data))[lane * 4 + 2];
512 vdst3[lane] = (reinterpret_cast<VecElemU32*>(
513 gpuDynInst->d_data))[lane * 4 + 3];
514 } else if (gpuDynInst->exec_mask[lane] && isFlatScratch()) {
515 // Unswizzle the data opposite of swizzleData. See swizzleData
516 // in src/arch/amdgpu/vega/insts/op_encodings.hh for details.
517 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
518 gpuDynInst->d_data))[lane];
519 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
520 gpuDynInst->d_data))[lane + NumVecElemPerVecReg];
521 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
522 gpuDynInst->d_data))[lane + 2*NumVecElemPerVecReg];
523 vdst3[lane] = (reinterpret_cast<VecElemU32*>(
524 gpuDynInst->d_data))[lane + 3*NumVecElemPerVecReg];
525 }
526 }
527
528 vdst0.write();
529 vdst1.write();
530 vdst2.write();
531 vdst3.write();
532 } // completeAcc
533 // --- Inst_FLAT__FLAT_STORE_BYTE class methods ---
534
536 : Inst_FLAT(iFmt, "flat_store_byte")
537 {
538 setFlag(MemoryRef);
539 setFlag(Store);
540 } // Inst_FLAT__FLAT_STORE_BYTE
541
543 {
544 } // ~Inst_FLAT__FLAT_STORE_BYTE
545
546 // --- description from .arch file ---
547 // Untyped buffer store byte.
548 void
550 {
551 Wavefront *wf = gpuDynInst->wavefront();
552
553 if (gpuDynInst->exec_mask.none()) {
554 wf->decVMemInstsIssued();
555 wf->untrackVMemInst(gpuDynInst);
556 if (isFlat()) {
557 wf->decLGKMInstsIssued();
558 wf->untrackLGKMInst(gpuDynInst);
559 }
560 wf->decExpInstsIssued();
561 wf->untrackExpInst(gpuDynInst);
562 return;
563 }
564
565 gpuDynInst->execUnitId = wf->execUnitId;
566 gpuDynInst->latency.init(gpuDynInst->computeUnit());
567 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
568
569 ConstVecOperandU8 data(gpuDynInst, extData.DATA);
570
571 data.read();
572
573 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
574
575 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
576 if (gpuDynInst->exec_mask[lane]) {
577 (reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane]
578 = data[lane];
579 }
580 }
581
582 issueRequestHelper(gpuDynInst);
583 } // execute
584
585 void
587 {
588 initMemWrite<VecElemU8>(gpuDynInst);
589 } // initiateAcc
590
591 void
593 {
594 } // execute
595 // --- Inst_FLAT__FLAT_STORE_SHORT class methods ---
596
598 : Inst_FLAT(iFmt, "flat_store_short")
599 {
600 setFlag(MemoryRef);
601 setFlag(Store);
602 } // Inst_FLAT__FLAT_STORE_SHORT
603
605 {
606 } // ~Inst_FLAT__FLAT_STORE_SHORT
607
608 // --- description from .arch file ---
609 // Untyped buffer store short.
610 void
612 {
613 Wavefront *wf = gpuDynInst->wavefront();
614
615 if (gpuDynInst->exec_mask.none()) {
616 wf->decVMemInstsIssued();
617 wf->untrackVMemInst(gpuDynInst);
618 if (isFlat()) {
619 wf->decLGKMInstsIssued();
620 wf->untrackLGKMInst(gpuDynInst);
621 }
622 wf->decExpInstsIssued();
623 wf->untrackExpInst(gpuDynInst);
624 return;
625 }
626
627 gpuDynInst->execUnitId = wf->execUnitId;
628 gpuDynInst->latency.init(gpuDynInst->computeUnit());
629 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
630
631 ConstVecOperandU16 data(gpuDynInst, extData.DATA);
632
633 data.read();
634
635 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
636
637 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
638 if (gpuDynInst->exec_mask[lane]) {
639 (reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane]
640 = data[lane];
641 }
642 }
643
644 issueRequestHelper(gpuDynInst);
645 } // execute
646
647 void
649 {
650 initMemWrite<VecElemU16>(gpuDynInst);
651 } // initiateAcc
652
653 void
655 {
656 } // completeAcc
657 // --- Inst_FLAT__FLAT_STORE_SHORT_D16_HI class methods ---
658
661 : Inst_FLAT(iFmt, "flat_store_short_d16_hi")
662 {
663 setFlag(MemoryRef);
664 setFlag(Store);
665 } // Inst_FLAT__FLAT_STORE_SHORT_D16_HI
666
668 {
669 } // ~Inst_FLAT__FLAT_STORE_SHORT_D16_HI
670
671 // --- description from .arch file ---
672 // Untyped buffer store short.
673 void
675 {
676 Wavefront *wf = gpuDynInst->wavefront();
677
678 if (gpuDynInst->exec_mask.none()) {
679 wf->decVMemInstsIssued();
680 wf->untrackVMemInst(gpuDynInst);
681 if (isFlat()) {
682 wf->decLGKMInstsIssued();
683 wf->untrackLGKMInst(gpuDynInst);
684 }
685 wf->decExpInstsIssued();
686 wf->untrackExpInst(gpuDynInst);
687 return;
688 }
689
690 gpuDynInst->execUnitId = wf->execUnitId;
691 gpuDynInst->latency.init(gpuDynInst->computeUnit());
692 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
693
694 ConstVecOperandU32 data(gpuDynInst, extData.DATA);
695
696 data.read();
697
698 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
699
700 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
701 if (gpuDynInst->exec_mask[lane]) {
702 (reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane]
703 = (data[lane] >> 16);
704 }
705 }
706
707 issueRequestHelper(gpuDynInst);
708 } // execute
709
710 void
712 {
713 initMemWrite<VecElemU16>(gpuDynInst);
714 } // initiateAcc
715
716 void
720 // --- Inst_FLAT__FLAT_STORE_DWORD class methods ---
721
723 : Inst_FLAT(iFmt, "flat_store_dword")
724 {
725 setFlag(MemoryRef);
726 setFlag(Store);
727 } // Inst_FLAT__FLAT_STORE_DWORD
728
730 {
731 } // ~Inst_FLAT__FLAT_STORE_DWORD
732
733 // --- description from .arch file ---
734 // Untyped buffer store dword.
735 void
737 {
738 Wavefront *wf = gpuDynInst->wavefront();
739
740 if (gpuDynInst->exec_mask.none()) {
741 wf->decVMemInstsIssued();
742 wf->untrackVMemInst(gpuDynInst);
743 if (isFlat()) {
744 wf->decLGKMInstsIssued();
745 wf->untrackLGKMInst(gpuDynInst);
746 }
747 wf->decExpInstsIssued();
748 wf->untrackExpInst(gpuDynInst);
749 return;
750 }
751
752 gpuDynInst->execUnitId = wf->execUnitId;
753 gpuDynInst->latency.init(gpuDynInst->computeUnit());
754 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
755
756 ConstVecOperandU32 data(gpuDynInst, extData.DATA);
757
758 data.read();
759
760 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
761
762 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
763 if (gpuDynInst->exec_mask[lane]) {
764 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
765 = data[lane];
766 }
767 }
768
769 issueRequestHelper(gpuDynInst);
770 } // execute
771
772 void
774 {
775 initMemWrite<VecElemU32>(gpuDynInst);
776 } // initiateAcc
777
778 void
780 {
781 } // completeAcc
782 // --- Inst_FLAT__FLAT_STORE_DWORDX2 class methods ---
783
785 InFmt_FLAT *iFmt)
786 : Inst_FLAT(iFmt, "flat_store_dwordx2")
787 {
788 setFlag(MemoryRef);
789 setFlag(Store);
790 } // Inst_FLAT__FLAT_STORE_DWORDX2
791
793 {
794 } // ~Inst_FLAT__FLAT_STORE_DWORDX2
795
796 // --- description from .arch file ---
797 // Untyped buffer store 2 dwords.
798 void
800 {
801 Wavefront *wf = gpuDynInst->wavefront();
802
803 if (gpuDynInst->exec_mask.none()) {
804 wf->decVMemInstsIssued();
805 wf->untrackVMemInst(gpuDynInst);
806 if (isFlat()) {
807 wf->decLGKMInstsIssued();
808 wf->untrackLGKMInst(gpuDynInst);
809 }
810 wf->decExpInstsIssued();
811 wf->untrackExpInst(gpuDynInst);
812 return;
813 }
814
815 gpuDynInst->execUnitId = wf->execUnitId;
816 gpuDynInst->latency.init(gpuDynInst->computeUnit());
817 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
818
819 ConstVecOperandU64 data(gpuDynInst, extData.DATA);
820
821 data.read();
822
823 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
824
825 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
826 if (gpuDynInst->exec_mask[lane]) {
827 (reinterpret_cast<VecElemU64*>(gpuDynInst->d_data))[lane]
828 = data[lane];
829 }
830 }
831
832 issueRequestHelper(gpuDynInst);
833 } // execute
834
835 void
837 {
838 initMemWrite<2>(gpuDynInst);
839 } // initiateAcc
840
841 void
843 {
844 } // completeAcc
845 // --- Inst_FLAT__FLAT_STORE_DWORDX3 class methods ---
846
848 InFmt_FLAT *iFmt)
849 : Inst_FLAT(iFmt, "flat_store_dwordx3")
850 {
851 setFlag(MemoryRef);
852 setFlag(Store);
853 } // Inst_FLAT__FLAT_STORE_DWORDX3
854
856 {
857 } // ~Inst_FLAT__FLAT_STORE_DWORDX3
858
859 // --- description from .arch file ---
860 // Untyped buffer store 3 dwords.
861 void
863 {
864 Wavefront *wf = gpuDynInst->wavefront();
865
866 if (gpuDynInst->exec_mask.none()) {
867 wf->decVMemInstsIssued();
868 wf->untrackVMemInst(gpuDynInst);
869 if (isFlat()) {
870 wf->decLGKMInstsIssued();
871 wf->untrackLGKMInst(gpuDynInst);
872 }
873 wf->decExpInstsIssued();
874 wf->untrackExpInst(gpuDynInst);
875 return;
876 }
877
878 gpuDynInst->execUnitId = wf->execUnitId;
879 gpuDynInst->latency.init(gpuDynInst->computeUnit());
880 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
881
882 ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
883 ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
884 ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);
885
886 data0.read();
887 data1.read();
888 data2.read();
889
890 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
891
892 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
893 if (gpuDynInst->exec_mask[lane]) {
894 (reinterpret_cast<VecElemU32*>(
895 gpuDynInst->d_data))[lane * 3] = data0[lane];
896 (reinterpret_cast<VecElemU32*>(
897 gpuDynInst->d_data))[lane * 3 + 1] = data1[lane];
898 (reinterpret_cast<VecElemU32*>(
899 gpuDynInst->d_data))[lane * 3 + 2] = data2[lane];
900 }
901 }
902
903 issueRequestHelper(gpuDynInst);
904 } // execute
905
906 void
908 {
909 initMemWrite<3>(gpuDynInst);
910 } // initiateAcc
911
912 void
914 {
915 } // completeAcc
916 // --- Inst_FLAT__FLAT_STORE_DWORDX4 class methods ---
917
919 InFmt_FLAT *iFmt)
920 : Inst_FLAT(iFmt, "flat_store_dwordx4")
921 {
922 setFlag(MemoryRef);
923 setFlag(Store);
924 } // Inst_FLAT__FLAT_STORE_DWORDX4
925
927 {
928 } // ~Inst_FLAT__FLAT_STORE_DWORDX4
929
930 // --- description from .arch file ---
931 // Untyped buffer store 4 dwords.
932 void
934 {
935 Wavefront *wf = gpuDynInst->wavefront();
936
937 if (gpuDynInst->exec_mask.none()) {
938 wf->decVMemInstsIssued();
939 wf->untrackVMemInst(gpuDynInst);
940 if (isFlat()) {
941 wf->decLGKMInstsIssued();
942 wf->untrackLGKMInst(gpuDynInst);
943 }
944 wf->decExpInstsIssued();
945 wf->untrackExpInst(gpuDynInst);
946 return;
947 }
948
949 gpuDynInst->execUnitId = wf->execUnitId;
950 gpuDynInst->latency.init(gpuDynInst->computeUnit());
951 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
952
953 ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
954 ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
955 ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);
956 ConstVecOperandU32 data3(gpuDynInst, extData.DATA + 3);
957
958 data0.read();
959 data1.read();
960 data2.read();
961 data3.read();
962
963 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
964
965 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
966 if (gpuDynInst->exec_mask[lane]) {
967 (reinterpret_cast<VecElemU32*>(
968 gpuDynInst->d_data))[lane * 4] = data0[lane];
969 (reinterpret_cast<VecElemU32*>(
970 gpuDynInst->d_data))[lane * 4 + 1] = data1[lane];
971 (reinterpret_cast<VecElemU32*>(
972 gpuDynInst->d_data))[lane * 4 + 2] = data2[lane];
973 (reinterpret_cast<VecElemU32*>(
974 gpuDynInst->d_data))[lane * 4 + 3] = data3[lane];
975 }
976 }
977
978 issueRequestHelper(gpuDynInst);
979 } // execute
980
981 void
983 {
984 initMemWrite<4>(gpuDynInst);
985 } // initiateAcc
986
987 void
989 {
990 } // completeAcc
991 // --- Inst_FLAT__FLAT_LOAD_LDS_UBYTE class methods ---
992
995 : Inst_FLAT(iFmt, "flat_load_lds_ubyte")
996 {
997 setFlag(Load);
998
999 assert(isFlatGlobal() || isFlatScratch());
1000 } // Inst_FLAT__FLAT_LOAD_LDS_UBYTE
1001
1003 {
1004 } // ~Inst_FLAT__FLAT_LOAD_LDS_UBYTE
1005
1006 // --- description from .arch file ---
1007 // Untyped buffer load unsigned byte (zero extend to VGPR destination).
1008 void
1010 {
1011 Wavefront *wf = gpuDynInst->wavefront();
1012
1013 if (gpuDynInst->exec_mask.none()) {
1014 wf->decVMemInstsIssued();
1015 return;
1016 }
1017
1018 gpuDynInst->execUnitId = wf->execUnitId;
1019 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1020 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1021
1022 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
1023
1024 issueRequestHelper(gpuDynInst);
1025 } // execute
1026
1027 void
1029 {
1030 initMemRead<VecElemU8>(gpuDynInst);
1031 } // initiateAcc
1032
1033 void
1035 {
1036 // Align to dword.
1037 ldsComplete<1>(gpuDynInst);
1038 } // execute
1039 // --- Inst_FLAT__FLAT_LOAD_LDS_SBYTE class methods ---
1040
1043 : Inst_FLAT(iFmt, "flat_load_lds_sbyte")
1044 {
1045 setFlag(Load);
1046
1047 assert(isFlatGlobal() || isFlatScratch());
1048 } // Inst_FLAT__FLAT_LOAD_LDS_SBYTE
1049
1051 {
1052 } // ~Inst_FLAT__FLAT_LOAD_LDS_SBYTE
1053
1054 // --- description from .arch file ---
1055 // Untyped buffer load signed byte (sign extend to VGPR destination).
1056 void
1058 {
1059 Wavefront *wf = gpuDynInst->wavefront();
1060
1061 if (gpuDynInst->exec_mask.none()) {
1062 wf->decVMemInstsIssued();
1063 return;
1064 }
1065
1066 gpuDynInst->execUnitId = wf->execUnitId;
1067 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1068 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1069
1070 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
1071
1072 issueRequestHelper(gpuDynInst);
1073 } // execute
1074
1075 void
1077 {
1078 initMemRead<VecElemI8>(gpuDynInst);
1079 } // initiateAcc
1080
1081 void
1083 {
1084 // Align to dword.
1085 ldsComplete<1, 8>(gpuDynInst);
1086 } // execute
1087 // --- Inst_FLAT__FLAT_LOAD_LDS_USHORT class methods ---
1088
1091 : Inst_FLAT(iFmt, "flat_load_lds_ushort")
1092 {
1093 setFlag(Load);
1094
1095 assert(isFlatGlobal() || isFlatScratch());
1096 } // Inst_FLAT__FLAT_LOAD_LDS_USHORT
1097
1099 {
1100 } // ~Inst_FLAT__FLAT_LOAD_LDS_USHORT
1101
1102 // --- description from .arch file ---
1103 // Untyped buffer load unsigned short (zero extend to VGPR destination).
1104 void
1106 {
1107 Wavefront *wf = gpuDynInst->wavefront();
1108
1109 if (gpuDynInst->exec_mask.none()) {
1110 wf->decVMemInstsIssued();
1111 return;
1112 }
1113
1114 gpuDynInst->execUnitId = wf->execUnitId;
1115 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1116 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1117
1118 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
1119
1120 issueRequestHelper(gpuDynInst);
1121 } // execute
1122
1123 void
1125 {
1126 initMemRead<VecElemU16>(gpuDynInst);
1127 } // initiateAcc
1128
1129 void
1131 {
1132 // Align to dword.
1133 ldsComplete<1>(gpuDynInst);
1134 } // execute
1135
1136 // --- Inst_FLAT__FLAT_LOAD_LDS_SSHORT class methods ---
1137
1140 : Inst_FLAT(iFmt, "flat_load_lds_sshort")
1141 {
1142 setFlag(Load);
1143
1144 assert(isFlatGlobal() || isFlatScratch());
1145 } // Inst_FLAT__FLAT_LOAD_LDS_SSHORT
1146
1148 {
1149 } // ~Inst_FLAT__FLAT_LOAD_LDS_SSHORT
1150
1151 // --- description from .arch file ---
1152 // Untyped buffer load signed short (sign extend to VGPR destination).
1153 void
1155 {
1156 Wavefront *wf = gpuDynInst->wavefront();
1157
1158 if (gpuDynInst->exec_mask.none()) {
1159 wf->decVMemInstsIssued();
1160 return;
1161 }
1162
1163 gpuDynInst->execUnitId = wf->execUnitId;
1164 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1165 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1166
1167 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
1168
1169 issueRequestHelper(gpuDynInst);
1170 } // execute
1171
1172 void
1174 {
1175 initMemRead<VecElemI16>(gpuDynInst);
1176 } // initiateAcc
1177
1178 void
1180 {
1181 // Align to dword.
1182 ldsComplete<1, 16>(gpuDynInst);
1183 } // execute
1184 // --- Inst_FLAT__FLAT_LOAD_LDS_DWORD class methods ---
1185
1188 : Inst_FLAT(iFmt, "flat_load_lds_dword")
1189 {
1190 setFlag(Load);
1191
1192 assert(isFlatGlobal() || isFlatScratch());
1193 } // Inst_FLAT__FLAT_LOAD_LDS_DWORD
1194
1196 {
1197 } // ~Inst_FLAT__FLAT_LOAD_LDS_DWORD
1198
1199 // --- description from .arch file ---
1200 // Untyped buffer load dword.
1201 void
1203 {
1204 Wavefront *wf = gpuDynInst->wavefront();
1205
1206 if (gpuDynInst->exec_mask.none()) {
1207 wf->decVMemInstsIssued();
1208 return;
1209 }
1210
1211 gpuDynInst->execUnitId = wf->execUnitId;
1212 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1213 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1214
1215 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
1216
1217 issueRequestHelper(gpuDynInst);
1218 } // execute
1219
1220 void
1222 {
1223 initMemRead<VecElemU32>(gpuDynInst);
1224 } // initiateAcc
1225
1226 void
1228 {
1229 ldsComplete<1>(gpuDynInst);
1230 } // completeAcc
1231 // --- Inst_FLAT__FLAT_ATOMIC_SWAP class methods ---
1232
1234 : Inst_FLAT(iFmt, "flat_atomic_swap")
1235 {
1236 setFlag(AtomicExch);
1237 if (instData.GLC) {
1238 setFlag(AtomicReturn);
1239 } else {
1240 setFlag(AtomicNoReturn);
1241 }
1242 setFlag(MemoryRef);
1243 } // Inst_FLAT__FLAT_ATOMIC_SWAP
1244
1246 {
1247 } // ~Inst_FLAT__FLAT_ATOMIC_SWAP
1248
1249 // --- description from .arch file ---
1250 // 32b:
1251 // tmp = MEM[ADDR];
1252 // MEM[ADDR] = DATA;
1253 // RETURN_DATA = tmp.
1254 void
1259
1260 void
1262 {
1263 initAtomicAccess<VecElemU32>(gpuDynInst);
1264 } // initiateAcc
1265
1266 void
1271
1272 // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods ---
1273
1274 Inst_FLAT__FLAT_ATOMIC_CMPSWAP
1275 ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT *iFmt)
1276 : Inst_FLAT(iFmt, "flat_atomic_cmpswap")
1277 {
1278 setFlag(AtomicCAS);
1279 if (instData.GLC) {
1280 setFlag(AtomicReturn);
1281 } else {
1282 setFlag(AtomicNoReturn);
1283 }
1284 setFlag(MemoryRef);
1285 } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP
1286
1288 {
1289 } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP
1290
1291 // --- description from .arch file ---
1292 // 32b:
1293 // tmp = MEM[ADDR];
1294 // src = DATA[0];
1295 // cmp = DATA[1];
1296 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
1297 // RETURN_DATA[0] = tmp.
1298 void
1303
1304 void
1306 {
1307 initAtomicAccess<VecElemU32>(gpuDynInst);
1308 } // initiateAcc
1309
1310 void
1315 // --- Inst_FLAT__FLAT_ATOMIC_ADD class methods ---
1316
1318 : Inst_FLAT(iFmt, "flat_atomic_add")
1319 {
1320 setFlag(AtomicAdd);
1321 if (instData.GLC) {
1322 setFlag(AtomicReturn);
1323 } else {
1324 setFlag(AtomicNoReturn);
1325 }
1326 setFlag(MemoryRef);
1327 } // Inst_FLAT__FLAT_ATOMIC_ADD
1328
1330 {
1331 } // ~Inst_FLAT__FLAT_ATOMIC_ADD
1332
1333 // --- description from .arch file ---
1334 // 32b:
1335 // tmp = MEM[ADDR];
1336 // MEM[ADDR] += DATA;
1337 // RETURN_DATA = tmp.
1338 void
1343
1344 void
1346 {
1347 initAtomicAccess<VecElemU32>(gpuDynInst);
1348 } // initiateAcc
1349
1350 void
1355 // --- Inst_FLAT__FLAT_ATOMIC_SUB class methods ---
1356
1358 : Inst_FLAT(iFmt, "flat_atomic_sub")
1359 {
1360 setFlag(AtomicSub);
1361 if (instData.GLC) {
1362 setFlag(AtomicReturn);
1363 } else {
1364 setFlag(AtomicNoReturn);
1365 }
1366 setFlag(MemoryRef);
1367 } // Inst_FLAT__FLAT_ATOMIC_SUB
1368
1370 {
1371 } // ~Inst_FLAT__FLAT_ATOMIC_SUB
1372
1373 // --- description from .arch file ---
1374 // 32b:
1375 // tmp = MEM[ADDR];
1376 // MEM[ADDR] -= DATA;
1377 // RETURN_DATA = tmp.
1378 void
1383
1384 void
1386 {
1387 initAtomicAccess<VecElemU32>(gpuDynInst);
1388 } // initiateAcc
1389
1390 void
1395 // --- Inst_FLAT__FLAT_ATOMIC_SMIN class methods ---
1396
1398 : Inst_FLAT(iFmt, "flat_atomic_smin")
1399 {
1400 setFlag(AtomicMin);
1401 if (instData.GLC) {
1402 setFlag(AtomicReturn);
1403 } else {
1404 setFlag(AtomicNoReturn);
1405 }
1406 setFlag(MemoryRef);
1407 } // Inst_FLAT__FLAT_ATOMIC_SMIN
1408
1410 {
1411 } // ~Inst_FLAT__FLAT_ATOMIC_SMIN
1412
1413 // --- description from .arch file ---
1414 // 32b:
1415 // tmp = MEM[ADDR];
1416 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
1417 // RETURN_DATA = tmp.
1418 void
1423
1424 void
1426 {
1427 initAtomicAccess<VecElemI32>(gpuDynInst);
1428 } // initiateAcc
1429
1430 void
1435 // --- Inst_FLAT__FLAT_ATOMIC_UMIN class methods ---
1436
1438 : Inst_FLAT(iFmt, "flat_atomic_umin")
1439 {
1440 setFlag(AtomicMin);
1441 if (instData.GLC) {
1442 setFlag(AtomicReturn);
1443 } else {
1444 setFlag(AtomicNoReturn);
1445 }
1446 setFlag(MemoryRef);
1447 } // Inst_FLAT__FLAT_ATOMIC_UMIN
1448
1450 {
1451 } // ~Inst_FLAT__FLAT_ATOMIC_UMIN
1452
1453 // --- description from .arch file ---
1454 // 32b:
1455 // tmp = MEM[ADDR];
1456 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
1457 // RETURN_DATA = tmp.
1458 void
1463
1464 void
1466 {
1467 initAtomicAccess<VecElemU32>(gpuDynInst);
1468 } // initiateAcc
1469
1470 void
1475 // --- Inst_FLAT__FLAT_ATOMIC_SMAX class methods ---
1476
1478 : Inst_FLAT(iFmt, "flat_atomic_smax")
1479 {
1480 setFlag(AtomicMax);
1481 if (instData.GLC) {
1482 setFlag(AtomicReturn);
1483 } else {
1484 setFlag(AtomicNoReturn);
1485 }
1486 setFlag(MemoryRef);
1487 } // Inst_FLAT__FLAT_ATOMIC_SMAX
1488
1490 {
1491 } // ~Inst_FLAT__FLAT_ATOMIC_SMAX
1492
1493 // --- description from .arch file ---
1494 // 32b:
1495 // tmp = MEM[ADDR];
1496 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
1497 // RETURN_DATA = tmp.
1498 void
1503
1504 void
1506 {
1507 initAtomicAccess<VecElemI32>(gpuDynInst);
1508 } // initiateAcc
1509
1510 void
1515 // --- Inst_FLAT__FLAT_ATOMIC_UMAX class methods ---
1516
1518 : Inst_FLAT(iFmt, "flat_atomic_umax")
1519 {
1520 setFlag(AtomicMax);
1521 if (instData.GLC) {
1522 setFlag(AtomicReturn);
1523 } else {
1524 setFlag(AtomicNoReturn);
1525 }
1526 setFlag(MemoryRef);
1527 } // Inst_FLAT__FLAT_ATOMIC_UMAX
1528
1530 {
1531 } // ~Inst_FLAT__FLAT_ATOMIC_UMAX
1532
1533 // --- description from .arch file ---
1534 // 32b:
1535 // tmp = MEM[ADDR];
1536 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
1537 // RETURN_DATA = tmp.
1538 void
1543
1544 void
1546 {
1547 initAtomicAccess<VecElemU32>(gpuDynInst);
1548 } // initiateAcc
1549
1550 void
1555 // --- Inst_FLAT__FLAT_ATOMIC_AND class methods ---
1556
1558 : Inst_FLAT(iFmt, "flat_atomic_and")
1559 {
1560 setFlag(AtomicAnd);
1561 if (instData.GLC) {
1562 setFlag(AtomicReturn);
1563 } else {
1564 setFlag(AtomicNoReturn);
1565 }
1566 setFlag(MemoryRef);
1567 } // Inst_FLAT__FLAT_ATOMIC_AND
1568
1570 {
1571 } // ~Inst_FLAT__FLAT_ATOMIC_AND
1572
1573 // --- description from .arch file ---
1574 // 32b:
1575 // tmp = MEM[ADDR];
1576 // MEM[ADDR] &= DATA;
1577 // RETURN_DATA = tmp.
1578 void
1583
1584 void
1586 {
1587 initAtomicAccess<VecElemU32>(gpuDynInst);
1588 } // initiateAcc
1589
1590 void
1595 // --- Inst_FLAT__FLAT_ATOMIC_OR class methods ---
1596
1598 : Inst_FLAT(iFmt, "flat_atomic_or")
1599 {
1600 setFlag(AtomicOr);
1601 if (instData.GLC) {
1602 setFlag(AtomicReturn);
1603 } else {
1604 setFlag(AtomicNoReturn);
1605 }
1606 setFlag(MemoryRef);
1607 } // Inst_FLAT__FLAT_ATOMIC_OR
1608
1610 {
1611 } // ~Inst_FLAT__FLAT_ATOMIC_OR
1612
1613 // --- description from .arch file ---
1614 // 32b:
1615 // tmp = MEM[ADDR];
1616 // MEM[ADDR] |= DATA;
1617 // RETURN_DATA = tmp.
1618 void
1623
1624 void
1626 {
1627 initAtomicAccess<VecElemU32>(gpuDynInst);
1628 } // initiateAcc
1629
1630 void
1635
1636 // --- Inst_FLAT__FLAT_ATOMIC_XOR class methods ---
1637
1639 : Inst_FLAT(iFmt, "flat_atomic_xor")
1640 {
1641 setFlag(AtomicXor);
1642 if (instData.GLC) {
1643 setFlag(AtomicReturn);
1644 } else {
1645 setFlag(AtomicNoReturn);
1646 }
1647 setFlag(MemoryRef);
1648 } // Inst_FLAT__FLAT_ATOMIC_XOR
1649
1651 {
1652 } // ~Inst_FLAT__FLAT_ATOMIC_XOR
1653
1654 // --- description from .arch file ---
1655 // 32b:
1656 // tmp = MEM[ADDR];
1657 // MEM[ADDR] ^= DATA;
1658 // RETURN_DATA = tmp.
1659 void
1664
1665 void
1667 {
1668 initAtomicAccess<VecElemU32>(gpuDynInst);
1669 } // initiateAcc
1670
1671 void
1676 // --- Inst_FLAT__FLAT_ATOMIC_INC class methods ---
1677
1679 : Inst_FLAT(iFmt, "flat_atomic_inc")
1680 {
1681 setFlag(AtomicInc);
1682 if (instData.GLC) {
1683 setFlag(AtomicReturn);
1684 } else {
1685 setFlag(AtomicNoReturn);
1686 }
1687 setFlag(MemoryRef);
1688 } // Inst_FLAT__FLAT_ATOMIC_INC
1689
1691 {
1692 } // ~Inst_FLAT__FLAT_ATOMIC_INC
1693
1694 // --- description from .arch file ---
1695 // 32b:
1696 // tmp = MEM[ADDR];
1697 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
1698 // RETURN_DATA = tmp.
1699 void
1704
1705 void
1707 {
1708 initAtomicAccess<VecElemU32>(gpuDynInst);
1709 } // initiateAcc
1710
1711 void
1716 // --- Inst_FLAT__FLAT_ATOMIC_DEC class methods ---
1717
1719 : Inst_FLAT(iFmt, "flat_atomic_dec")
1720 {
1721 setFlag(AtomicDec);
1722 if (instData.GLC) {
1723 setFlag(AtomicReturn);
1724 } else {
1725 setFlag(AtomicNoReturn);
1726 }
1727 setFlag(MemoryRef);
1728 } // Inst_FLAT__FLAT_ATOMIC_DEC
1729
1731 {
1732 } // ~Inst_FLAT__FLAT_ATOMIC_DEC
1733
1734 // --- description from .arch file ---
1735 // 32b:
1736 // tmp = MEM[ADDR];
1737 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
1738 // (unsigned compare); RETURN_DATA = tmp.
1739 void
1744
1745 void
1747 {
1748 initAtomicAccess<VecElemU32>(gpuDynInst);
1749 } // initiateAcc
1750
1751 void
1756 // --- Inst_FLAT__FLAT_ATOMIC_SWAP_X2 class methods ---
1757
1759 InFmt_FLAT *iFmt)
1760 : Inst_FLAT(iFmt, "flat_atomic_swap_x2")
1761 {
1762 setFlag(AtomicExch);
1763 if (instData.GLC) {
1764 setFlag(AtomicReturn);
1765 } else {
1766 setFlag(AtomicNoReturn);
1767 }
1768 setFlag(MemoryRef);
1769 } // Inst_FLAT__FLAT_ATOMIC_SWAP_X2
1770
1772 {
1773 } // ~Inst_FLAT__FLAT_ATOMIC_SWAP_X2
1774
1775 // --- description from .arch file ---
1776 // 64b:
1777 // tmp = MEM[ADDR];
1778 // MEM[ADDR] = DATA[0:1];
1779 // RETURN_DATA[0:1] = tmp.
1780 void
1785
1786 void
1788 {
1789 initAtomicAccess<VecElemU64>(gpuDynInst);
1790 } // initiateAcc
1791
1792 void
1797 // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 class methods ---
1798
1800 InFmt_FLAT *iFmt)
1801 : Inst_FLAT(iFmt, "flat_atomic_cmpswap_x2")
1802 {
1803 setFlag(AtomicCAS);
1804 if (instData.GLC) {
1805 setFlag(AtomicReturn);
1806 } else {
1807 setFlag(AtomicNoReturn);
1808 }
1809 setFlag(MemoryRef);
1810 } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
1811
1813 {
1814 } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
1815
1816 // --- description from .arch file ---
1817 // 64b:
1818 // tmp = MEM[ADDR];
1819 // src = DATA[0:1];
1820 // cmp = DATA[2:3];
1821 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
1822 // RETURN_DATA[0:1] = tmp.
1823 void
1828
1829 void
1831 {
1832 initAtomicAccess<VecElemU64>(gpuDynInst);
1833 } // initiateAcc
1834
1835 void
1840 // --- Inst_FLAT__FLAT_ATOMIC_ADD_X2 class methods ---
1841
1843 InFmt_FLAT *iFmt)
1844 : Inst_FLAT(iFmt, "flat_atomic_add_x2")
1845 {
1846 setFlag(AtomicAdd);
1847 if (instData.GLC) {
1848 setFlag(AtomicReturn);
1849 } else {
1850 setFlag(AtomicNoReturn);
1851 }
1852 setFlag(MemoryRef);
1853 } // Inst_FLAT__FLAT_ATOMIC_ADD_X2
1854
1856 {
1857 } // ~Inst_FLAT__FLAT_ATOMIC_ADD_X2
1858
1859 // --- description from .arch file ---
1860 // 64b:
1861 // tmp = MEM[ADDR];
1862 // MEM[ADDR] += DATA[0:1];
1863 // RETURN_DATA[0:1] = tmp.
1864 void
1869
1870 void
1872 {
1873 initAtomicAccess<VecElemU64>(gpuDynInst);
1874 } // initiateAcc
1875
1876 void
1881 // --- Inst_FLAT__FLAT_ATOMIC_SUB_X2 class methods ---
1882
1884 InFmt_FLAT *iFmt)
1885 : Inst_FLAT(iFmt, "flat_atomic_sub_x2")
1886 {
1887 setFlag(AtomicSub);
1888 if (instData.GLC) {
1889 setFlag(AtomicReturn);
1890 } else {
1891 setFlag(AtomicNoReturn);
1892 }
1893 setFlag(MemoryRef);
1894 } // Inst_FLAT__FLAT_ATOMIC_SUB_X2
1895
1897 {
1898 } // ~Inst_FLAT__FLAT_ATOMIC_SUB_X2
1899
1900 // --- description from .arch file ---
1901 // 64b:
1902 // tmp = MEM[ADDR];
1903 // MEM[ADDR] -= DATA[0:1];
1904 // RETURN_DATA[0:1] = tmp.
1905 void
1910
1911 void
1913 {
1914 initAtomicAccess<VecElemU64>(gpuDynInst);
1915 } // initiateAcc
1916
1917 void
1922 // --- Inst_FLAT__FLAT_ATOMIC_SMIN_X2 class methods ---
1923
1925 InFmt_FLAT *iFmt)
1926 : Inst_FLAT(iFmt, "flat_atomic_smin_x2")
1927 {
1928 setFlag(AtomicMin);
1929 if (instData.GLC) {
1930 setFlag(AtomicReturn);
1931 } else {
1932 setFlag(AtomicNoReturn);
1933 }
1934 setFlag(MemoryRef);
1935 } // Inst_FLAT__FLAT_ATOMIC_SMIN_X2
1936
1938 {
1939 } // ~Inst_FLAT__FLAT_ATOMIC_SMIN_X2
1940
1941 // --- description from .arch file ---
1942 // 64b:
1943 // tmp = MEM[ADDR];
1944 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
1945 // RETURN_DATA[0:1] = tmp.
1946 void
1951
1952 void
1954 {
1955 initAtomicAccess<VecElemI64>(gpuDynInst);
1956 } // initiateAcc
1957
1958 void
1963 // --- Inst_FLAT__FLAT_ATOMIC_UMIN_X2 class methods ---
1964
1966 InFmt_FLAT *iFmt)
1967 : Inst_FLAT(iFmt, "flat_atomic_umin_x2")
1968 {
1969 setFlag(AtomicMin);
1970 if (instData.GLC) {
1971 setFlag(AtomicReturn);
1972 } else {
1973 setFlag(AtomicNoReturn);
1974 }
1975 setFlag(MemoryRef);
1976 } // Inst_FLAT__FLAT_ATOMIC_UMIN_X2
1977
1979 {
1980 } // ~Inst_FLAT__FLAT_ATOMIC_UMIN_X2
1981
1982 // --- description from .arch file ---
1983 // 64b:
1984 // tmp = MEM[ADDR];
1985 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
1986 // RETURN_DATA[0:1] = tmp.
1987 void
1992
1993 void
1995 {
1996 initAtomicAccess<VecElemU64>(gpuDynInst);
1997 } // initiateAcc
1998
1999 void
2004 // --- Inst_FLAT__FLAT_ATOMIC_SMAX_X2 class methods ---
2005
2007 InFmt_FLAT *iFmt)
2008 : Inst_FLAT(iFmt, "flat_atomic_smax_x2")
2009 {
2010 setFlag(AtomicMax);
2011 if (instData.GLC) {
2012 setFlag(AtomicReturn);
2013 } else {
2014 setFlag(AtomicNoReturn);
2015 }
2016 setFlag(MemoryRef);
2017 } // Inst_FLAT__FLAT_ATOMIC_SMAX_X2
2018
2020 {
2021 } // ~Inst_FLAT__FLAT_ATOMIC_SMAX_X2
2022
2023 // --- description from .arch file ---
2024 // 64b:
2025 // tmp = MEM[ADDR];
2026 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
2027 // RETURN_DATA[0:1] = tmp.
2028 void
2033
2034 void
2036 {
2037 initAtomicAccess<VecElemI64>(gpuDynInst);
2038 } // initiateAcc
2039
2040 void
2045 // --- Inst_FLAT__FLAT_ATOMIC_UMAX_X2 class methods ---
2046
2048 InFmt_FLAT *iFmt)
2049 : Inst_FLAT(iFmt, "flat_atomic_umax_x2")
2050 {
2051 setFlag(AtomicMax);
2052 if (instData.GLC) {
2053 setFlag(AtomicReturn);
2054 } else {
2055 setFlag(AtomicNoReturn);
2056 }
2057 setFlag(MemoryRef);
2058 } // Inst_FLAT__FLAT_ATOMIC_UMAX_X2
2059
2061 {
2062 } // ~Inst_FLAT__FLAT_ATOMIC_UMAX_X2
2063
2064 // --- description from .arch file ---
2065 // 64b:
2066 // tmp = MEM[ADDR];
2067 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
2068 // RETURN_DATA[0:1] = tmp.
2069 void
2074
2075 void
2077 {
2078 initAtomicAccess<VecElemU64>(gpuDynInst);
2079 } // initiateAcc
2080
2081 void
2086 // --- Inst_FLAT__FLAT_ATOMIC_AND_X2 class methods ---
2087
2089 InFmt_FLAT *iFmt)
2090 : Inst_FLAT(iFmt, "flat_atomic_and_x2")
2091 {
2092 setFlag(AtomicAnd);
2093 if (instData.GLC) {
2094 setFlag(AtomicReturn);
2095 } else {
2096 setFlag(AtomicNoReturn);
2097 }
2098 setFlag(MemoryRef);
2099 } // Inst_FLAT__FLAT_ATOMIC_AND_X2
2100
2102 {
2103 } // ~Inst_FLAT__FLAT_ATOMIC_AND_X2
2104
2105 // --- description from .arch file ---
2106 // 64b:
2107 // tmp = MEM[ADDR];
2108 // MEM[ADDR] &= DATA[0:1];
2109 // RETURN_DATA[0:1] = tmp.
2110 void
2115
2116 void
2118 {
2119 initAtomicAccess<VecElemU64>(gpuDynInst);
2120 } // initiateAcc
2121
2122 void
2127 // --- Inst_FLAT__FLAT_ATOMIC_OR_X2 class methods ---
2128
2130 InFmt_FLAT *iFmt)
2131 : Inst_FLAT(iFmt, "flat_atomic_or_x2")
2132 {
2133 setFlag(AtomicOr);
2134 if (instData.GLC) {
2135 setFlag(AtomicReturn);
2136 } else {
2137 setFlag(AtomicNoReturn);
2138 }
2139 setFlag(MemoryRef);
2140 } // Inst_FLAT__FLAT_ATOMIC_OR_X2
2141
2143 {
2144 } // ~Inst_FLAT__FLAT_ATOMIC_OR_X2
2145
2146 // --- description from .arch file ---
2147 // 64b:
2148 // tmp = MEM[ADDR];
2149 // MEM[ADDR] |= DATA[0:1];
2150 // RETURN_DATA[0:1] = tmp.
2151 void
2156
2157 void
2159 {
2160 initAtomicAccess<VecElemU64>(gpuDynInst);
2161 } // initiateAcc
2162
2163 void
2168 // --- Inst_FLAT__FLAT_ATOMIC_XOR_X2 class methods ---
2169
2171 InFmt_FLAT *iFmt)
2172 : Inst_FLAT(iFmt, "flat_atomic_xor_x2")
2173 {
2174 setFlag(AtomicXor);
2175 if (instData.GLC) {
2176 setFlag(AtomicReturn);
2177 } else {
2178 setFlag(AtomicNoReturn);
2179 }
2180 setFlag(MemoryRef);
2181 } // Inst_FLAT__FLAT_ATOMIC_XOR_X2
2182
2184 {
2185 } // ~Inst_FLAT__FLAT_ATOMIC_XOR_X2
2186
2187 // --- description from .arch file ---
2188 // 64b:
2189 // tmp = MEM[ADDR];
2190 // MEM[ADDR] ^= DATA[0:1];
2191 // RETURN_DATA[0:1] = tmp.
2192 void
2197
2198 void
2200 {
2201 initAtomicAccess<VecElemU64>(gpuDynInst);
2202 } // initiateAcc
2203
2204 void
2209 // --- Inst_FLAT__FLAT_ATOMIC_INC_X2 class methods ---
2210
2212 InFmt_FLAT *iFmt)
2213 : Inst_FLAT(iFmt, "flat_atomic_inc_x2")
2214 {
2215 setFlag(AtomicInc);
2216 if (instData.GLC) {
2217 setFlag(AtomicReturn);
2218 } else {
2219 setFlag(AtomicNoReturn);
2220 }
2221 setFlag(MemoryRef);
2222 } // Inst_FLAT__FLAT_ATOMIC_INC_X2
2223
2225 {
2226 } // ~Inst_FLAT__FLAT_ATOMIC_INC_X2
2227
2228 // --- description from .arch file ---
2229 // 64b:
2230 // tmp = MEM[ADDR];
2231 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
2232 // RETURN_DATA[0:1] = tmp.
2233 void
2238
2239 void
2241 {
2242 initAtomicAccess<VecElemU64>(gpuDynInst);
2243 } // initiateAcc
2244
2245 void
2250 // --- Inst_FLAT__FLAT_ATOMIC_DEC_X2 class methods ---
2251
2253 InFmt_FLAT *iFmt)
2254 : Inst_FLAT(iFmt, "flat_atomic_dec_x2")
2255 {
2256 setFlag(AtomicDec);
2257 if (instData.GLC) {
2258 setFlag(AtomicReturn);
2259 } else {
2260 setFlag(AtomicNoReturn);
2261 }
2262 setFlag(MemoryRef);
2263 } // Inst_FLAT__FLAT_ATOMIC_DEC_X2
2264
2266 {
2267 } // ~Inst_FLAT__FLAT_ATOMIC_DEC_X2
2268
2269 // --- description from .arch file ---
2270 // 64b:
2271 // tmp = MEM[ADDR];
2272 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
2273 // (unsigned compare);
2274 // RETURN_DATA[0:1] = tmp.
2275 void
2280
2281 void
2283 {
2284 initAtomicAccess<VecElemU64>(gpuDynInst);
2285 } // initiateAcc
2286
2287 void
2292 // --- Inst_FLAT__FLAT_ATOMIC_ADD_F32 class methods ---
2293
2295 InFmt_FLAT *iFmt)
2296 : Inst_FLAT(iFmt, "flat_atomic_add_f32")
2297 {
2298 setFlag(AtomicAdd);
2299 if (instData.GLC) {
2300 setFlag(AtomicReturn);
2301 } else {
2302 setFlag(AtomicNoReturn);
2303 }
2304 setFlag(MemoryRef);
2305 } // Inst_FLAT__FLAT_ATOMIC_ADD_F32
2306
2308 {
2309 } // ~Inst_FLAT__FLAT_ATOMIC_ADD_F32
2310
2311 void
2316
2317 void
2319 {
2320 initAtomicAccess<VecElemF32>(gpuDynInst);
2321 } // initiateAcc
2322
2323 void
2328 // --- Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 class methods ---
2329
2331 InFmt_FLAT *iFmt)
2332 : Inst_FLAT(iFmt, "flat_atomic_pk_add_f16")
2333 {
2334 setFlag(AtomicAdd);
2335 if (instData.GLC) {
2336 setFlag(AtomicReturn);
2337 } else {
2338 setFlag(AtomicNoReturn);
2339 }
2340 setFlag(MemoryRef);
2341 } // Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16
2342
2344 {
2345 } // ~Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16
2346
2347 void
2352
2353 void
2355 {
2356 } // initiateAcc
2357
2358 void
2360 {
2361 } // completeAcc
2362 // --- Inst_FLAT__FLAT_ATOMIC_ADD_F64 class methods ---
2363
2365 InFmt_FLAT *iFmt)
2366 : Inst_FLAT(iFmt, "flat_atomic_add_f64")
2367 {
2368 setFlag(AtomicAdd);
2369 if (instData.GLC) {
2370 setFlag(AtomicReturn);
2371 } else {
2372 setFlag(AtomicNoReturn);
2373 }
2374 setFlag(MemoryRef);
2375 } // Inst_FLAT__FLAT_ATOMIC_ADD_F64
2376
2378 {
2379 } // ~Inst_FLAT__FLAT_ATOMIC_ADD_F64
2380
2381 void
2386
2387 void
2389 {
2390 initAtomicAccess<VecElemF64>(gpuDynInst);
2391 } // initiateAcc
2392
2393 void
2398 // --- Inst_FLAT__FLAT_ATOMIC_MIN_F64 class methods ---
2399
2401 InFmt_FLAT *iFmt)
2402 : Inst_FLAT(iFmt, "flat_atomic_min_f64")
2403 {
2404 setFlag(AtomicMin);
2405 if (instData.GLC) {
2406 setFlag(AtomicReturn);
2407 } else {
2408 setFlag(AtomicNoReturn);
2409 }
2410 setFlag(MemoryRef);
2411 } // Inst_FLAT__FLAT_ATOMIC_MIN_F64
2412
2414 {
2415 } // ~Inst_FLAT__FLAT_ATOMIC_MIN_F64
2416
2417 void
2422
2423 void
2425 {
2426 initAtomicAccess<VecElemF64>(gpuDynInst);
2427 } // initiateAcc
2428
2429 void
2434 // --- Inst_FLAT__FLAT_ATOMIC_MAX_F64 class methods ---
2435
2437 InFmt_FLAT *iFmt)
2438 : Inst_FLAT(iFmt, "flat_atomic_max_f64")
2439 {
2440 setFlag(AtomicMax);
2441 if (instData.GLC) {
2442 setFlag(AtomicReturn);
2443 } else {
2444 setFlag(AtomicNoReturn);
2445 }
2446 setFlag(MemoryRef);
2447 } // Inst_FLAT__FLAT_ATOMIC_MAX_F64
2448
2450 {
2451 } // ~Inst_FLAT__FLAT_ATOMIC_MAX_F64
2452
2453 void
2458
2459 void
2461 {
2462 initAtomicAccess<VecElemF64>(gpuDynInst);
2463 } // initiateAcc
2464
2465 void
2470 // --- Inst_FLAT__FLAT_ATOMIC_PK_ADD_BF16 class methods ---
2471
2473 InFmt_FLAT *iFmt)
2474 : Inst_FLAT(iFmt, "flat_atomic_pk_add_bf16")
2475 {
2476 setFlag(AtomicPkAddBF16);
2477
2478 // MI300 spec: "Float atomics must set SC[0]=0 (no return value)."
2479 panic_if(instData.GLC, "Saw float atomic with return set!");
2480
2481 setFlag(AtomicNoReturn);
2482 } // Inst_FLAT__FLAT_ATOMIC_PK_ADD_BF16
2483
2485 {
2486 } // ~Inst_FLAT__FLAT_ATOMIC_PK_ADD_BF16
2487
2488 void
2493
2494 void
2496 {
2497 initAtomicAccess<VecElemU32>(gpuDynInst);
2498 } // initiateAcc
2499
2500 void
2502 {
2503 } // completeAcc
2504 // --- Inst_FLAT__FLAT_LOAD_LDS_DWORDX3 class methods ---
2505
2507 InFmt_FLAT *iFmt)
2508 : Inst_FLAT(iFmt, "flat_load_lds_dwordx3")
2509 {
2510 setFlag(Load);
2511
2512 assert(isFlatGlobal());
2513 } // Inst_FLAT__FLAT_LOAD_LDS_DWORDX3
2514
2516 {
2517 } // ~Inst_FLAT__FLAT_LOAD_LDS_DWORDX3
2518
2519 // --- description from .arch file ---
2520 // Untyped buffer load 3 dwords.
2521 void
2523 {
2524 Wavefront *wf = gpuDynInst->wavefront();
2525
2526 if (gpuDynInst->exec_mask.none()) {
2527 wf->decVMemInstsIssued();
2528 return;
2529 }
2530
2531 gpuDynInst->execUnitId = wf->execUnitId;
2532 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2533 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2534
2535 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
2536
2537 issueRequestHelper(gpuDynInst);
2538 } // execute
2539
2540 void
2542 {
2543 initMemRead<3>(gpuDynInst);
2544 } // initiateAcc
2545
2546 void
2548 {
2549 ldsComplete<3>(gpuDynInst);
2550 } // completeAcc
2551 // --- Inst_FLAT__FLAT_LOAD_LDS_DWORDX4 class methods ---
2552
2554 InFmt_FLAT *iFmt)
2555 : Inst_FLAT(iFmt, "flat_load_lds_dwordx4")
2556 {
2557 setFlag(Load);
2558
2559 assert(isFlatGlobal());
2560 } // Inst_FLAT__FLAT_LOAD_LDS_DWORDX4
2561
2563 {
2564 } // ~Inst_FLAT__FLAT_LOAD_LDS_DWORDX4
2565
2566 // --- description from .arch file ---
2567 // Untyped buffer load 4 dwords.
2568 void
2570 {
2571 Wavefront *wf = gpuDynInst->wavefront();
2572
2573 if (gpuDynInst->exec_mask.none()) {
2574 wf->decVMemInstsIssued();
2575 return;
2576 }
2577
2578 gpuDynInst->execUnitId = wf->execUnitId;
2579 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2580 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2581
2582 calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
2583
2584 issueRequestHelper(gpuDynInst);
2585 } // execute
2586
2587 void
2589 {
2590 initMemRead<4>(gpuDynInst);
2591 } // initiateAcc
2592
2593 void
2595 {
2596 ldsComplete<4>(gpuDynInst);
2597 } // completeAcc
2598} // namespace VegaISA
2599} // namespace gem5
const char data[]
void setFlag(Flags flag)
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2324
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2318
void execute(GPUDynInstPtr) override
Definition flat.cc:2312
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2394
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2388
void execute(GPUDynInstPtr) override
Definition flat.cc:2382
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1877
void execute(GPUDynInstPtr) override
Definition flat.cc:1865
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1871
void execute(GPUDynInstPtr) override
Definition flat.cc:1339
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1351
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1345
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2117
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2123
void execute(GPUDynInstPtr) override
Definition flat.cc:2111
void execute(GPUDynInstPtr) override
Definition flat.cc:1579
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1585
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1591
void execute(GPUDynInstPtr) override
Definition flat.cc:1824
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1830
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1836
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1311
void execute(GPUDynInstPtr) override
Definition flat.cc:1299
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1305
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2288
void execute(GPUDynInstPtr) override
Definition flat.cc:2276
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2282
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1746
void execute(GPUDynInstPtr) override
Definition flat.cc:1740
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1752
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2246
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2240
void execute(GPUDynInstPtr) override
Definition flat.cc:2234
void execute(GPUDynInstPtr) override
Definition flat.cc:1700
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1706
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1712
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2460
void execute(GPUDynInstPtr) override
Definition flat.cc:2454
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2466
void execute(GPUDynInstPtr) override
Definition flat.cc:2418
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2424
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2430
void execute(GPUDynInstPtr) override
Definition flat.cc:2152
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2164
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2158
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1631
void execute(GPUDynInstPtr) override
Definition flat.cc:1619
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1625
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2501
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2495
void execute(GPUDynInstPtr) override
Definition flat.cc:2489
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2354
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2359
void execute(GPUDynInstPtr) override
Definition flat.cc:2348
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2035
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2041
void execute(GPUDynInstPtr) override
Definition flat.cc:2029
void execute(GPUDynInstPtr) override
Definition flat.cc:1499
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1511
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1505
void execute(GPUDynInstPtr) override
Definition flat.cc:1947
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1959
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1953
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1431
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1425
void execute(GPUDynInstPtr) override
Definition flat.cc:1419
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1912
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1918
void execute(GPUDynInstPtr) override
Definition flat.cc:1906
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1391
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1385
void execute(GPUDynInstPtr) override
Definition flat.cc:1379
void execute(GPUDynInstPtr) override
Definition flat.cc:1781
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1793
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1787
void execute(GPUDynInstPtr) override
Definition flat.cc:1255
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1261
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1267
void execute(GPUDynInstPtr) override
Definition flat.cc:2070
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2076
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2082
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1545
void execute(GPUDynInstPtr) override
Definition flat.cc:1539
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1551
void execute(GPUDynInstPtr) override
Definition flat.cc:1988
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1994
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2000
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1465
void execute(GPUDynInstPtr) override
Definition flat.cc:1459
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1471
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2205
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2199
void execute(GPUDynInstPtr) override
Definition flat.cc:2193
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1672
void execute(GPUDynInstPtr) override
Definition flat.cc:1660
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1666
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:349
void execute(GPUDynInstPtr) override
Definition flat.cc:319
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:343
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:414
void execute(GPUDynInstPtr) override
Definition flat.cc:390
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:420
void execute(GPUDynInstPtr) override
Definition flat.cc:467
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:491
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:497
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:284
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:290
void execute(GPUDynInstPtr) override
Definition flat.cc:260
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2541
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2547
void execute(GPUDynInstPtr) override
Definition flat.cc:2522
void execute(GPUDynInstPtr) override
Definition flat.cc:2569
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2588
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2594
void execute(GPUDynInstPtr) override
Definition flat.cc:1202
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1221
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1227
void execute(GPUDynInstPtr) override
Definition flat.cc:1057
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1082
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1076
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1179
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1173
void execute(GPUDynInstPtr) override
Definition flat.cc:1154
void execute(GPUDynInstPtr) override
Definition flat.cc:1009
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1034
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1028
void execute(GPUDynInstPtr) override
Definition flat.cc:1105
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1124
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1130
void execute(GPUDynInstPtr) override
Definition flat.cc:113
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:137
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:143
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:241
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:236
void execute(GPUDynInstPtr) override
Definition flat.cc:230
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:79
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:85
void execute(GPUDynInstPtr) override
Definition flat.cc:55
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:195
void execute(GPUDynInstPtr) override
Definition flat.cc:171
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:201
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:592
void execute(GPUDynInstPtr) override
Definition flat.cc:549
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:586
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:842
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:836
void execute(GPUDynInstPtr) override
Definition flat.cc:799
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:913
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:907
void execute(GPUDynInstPtr) override
Definition flat.cc:862
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:988
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:982
void execute(GPUDynInstPtr) override
Definition flat.cc:933
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:773
void execute(GPUDynInstPtr) override
Definition flat.cc:736
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:779
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:717
void execute(GPUDynInstPtr) override
Definition flat.cc:674
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:711
void execute(GPUDynInstPtr) override
Definition flat.cc:611
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:654
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:648
void atomicComplete(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, ScalarRegU32 vaddr, ScalarRegU32 saddr, ScalarRegI32 offset)
Inst_FLAT(InFmt_FLAT *, const std::string &opcode)
void initMemRead(GPUDynInstPtr gpuDynInst)
void atomicExecute(GPUDynInstPtr gpuDynInst)
void ldsComplete(GPUDynInstPtr gpuDynInst)
void issueRequestHelper(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
void read() override
read from the vrf.
Definition operand.hh:148
void write() override
write to the vrf.
Definition operand.hh:203
void decExpInstsIssued()
void untrackExpInst(GPUDynInstPtr gpu_dyn_inst)
void decVMemInstsIssued()
void untrackLGKMInst(GPUDynInstPtr gpu_dyn_inst)
void decLGKMInstsIssued()
void untrackVMemInst(GPUDynInstPtr gpu_dyn_inst)
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:246
classes that represnt vector/scalar operands in VEGA ISA.
Definition faults.cc:39
VecOperand< VecElemU32, false > VecOperandU32
Definition operand.hh:829
VecOperand< VecElemU8, true, 1 > ConstVecOperandU8
Definition operand.hh:840
VecOperand< VecElemU32, true > ConstVecOperandU32
Definition operand.hh:844
uint16_t VecElemU16
uint32_t VecElemU32
VecOperand< VecElemU16, true, 1 > ConstVecOperandU16
Definition operand.hh:842
const int NumVecElemPerVecReg(64)
uint64_t VecElemU64
VecOperand< VecElemU64, true > ConstVecOperandU64
Definition operand.hh:847
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

Generated on Mon Oct 27 2025 04:12:33 for gem5 by doxygen 1.14.0