gem5 [DEVELOP-FOR-25.1]
Loading...
Searching...
No Matches
vop2.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
34#include "debug/VEGA.hh"
35
36namespace gem5
37{
38
39namespace VegaISA
40{
41 // --- Inst_VOP2__V_CNDMASK_B32 class methods ---
42
44 : Inst_VOP2(iFmt, "v_cndmask_b32")
45 {
46 setFlag(ALU);
47 setFlag(ReadsVCC);
48 } // Inst_VOP2__V_CNDMASK_B32
49
51 {
52 } // ~Inst_VOP2__V_CNDMASK_B32
53
54 // --- description from .arch file ---
55 // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC
56 // as a scalar GPR in S2.
57 void
59 {
60 Wavefront *wf = gpuDynInst->wavefront();
61 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
62 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
63 VecOperandU32 vdst(gpuDynInst, instData.VDST);
64 ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
65
66 src0.readSrc();
67 src1.read();
68 vcc.read();
69
70 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
71 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
72
73 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
74 if (wf->execMask(lane)) {
75 vdst[lane]
76 = bits(vcc.rawData(), lane) ? src1[lane] : src0[lane];
77 }
78 }
79
80 vdst.write();
81 } // execute
82 // --- Inst_VOP2__V_ADD_F32 class methods ---
83
85 : Inst_VOP2(iFmt, "v_add_f32")
86 {
87 setFlag(ALU);
88 setFlag(F32);
89 } // Inst_VOP2__V_ADD_F32
90
92 {
93 } // ~Inst_VOP2__V_ADD_F32
94
95 // --- description from .arch file ---
96 // D.f = S0.f + S1.f.
97 void
99 {
100 Wavefront *wf = gpuDynInst->wavefront();
101 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
102 VecOperandF32 src1(gpuDynInst, instData.VSRC1);
103 VecOperandF32 vdst(gpuDynInst, instData.VDST);
104
105 src0.readSrc();
106 src1.read();
107
108 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
109
110 if (isDPPInst()) {
111 VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
112 src0_dpp.read();
113
114 DPRINTF(VEGA, "Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], "
115 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
116 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
117 "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
118 extData.iFmt_VOP_DPP.DPP_CTRL,
119 extData.iFmt_VOP_DPP.SRC0_ABS,
120 extData.iFmt_VOP_DPP.SRC0_NEG,
121 extData.iFmt_VOP_DPP.SRC1_ABS,
122 extData.iFmt_VOP_DPP.SRC1_NEG,
123 extData.iFmt_VOP_DPP.BC,
124 extData.iFmt_VOP_DPP.BANK_MASK,
125 extData.iFmt_VOP_DPP.ROW_MASK);
126
127 processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
128
129 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
130 if (wf->execMask(lane)) {
131 vdst[lane] = src0_dpp[lane] + src1[lane];
132 }
133 }
134 } else {
135 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
136 if (wf->execMask(lane)) {
137 vdst[lane] = src0[lane] + src1[lane];
138 }
139 }
140 }
141
142 vdst.write();
143 } // execute
144 // --- Inst_VOP2__V_SUB_F32 class methods ---
145
147 : Inst_VOP2(iFmt, "v_sub_f32")
148 {
149 setFlag(ALU);
150 setFlag(F32);
151 } // Inst_VOP2__V_SUB_F32
152
154 {
155 } // ~Inst_VOP2__V_SUB_F32
156
157 // --- description from .arch file ---
158 // D.f = S0.f - S1.f.
159 // SQ translates to V_ADD_F32.
160 void
162 {
163 Wavefront *wf = gpuDynInst->wavefront();
164 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
165 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
166 VecOperandF32 vdst(gpuDynInst, instData.VDST);
167
168 src0.readSrc();
169 src1.read();
170
171 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
172 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
173
174 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
175 if (wf->execMask(lane)) {
176 vdst[lane] = src0[lane] - src1[lane];
177 }
178 }
179
180 vdst.write();
181 } // execute
182 // --- Inst_VOP2__V_SUBREV_F32 class methods ---
183
185 : Inst_VOP2(iFmt, "v_subrev_f32")
186 {
187 setFlag(ALU);
188 setFlag(F32);
189 } // Inst_VOP2__V_SUBREV_F32
190
192 {
193 } // ~Inst_VOP2__V_SUBREV_F32
194
195 // --- description from .arch file ---
196 // D.f = S1.f - S0.f.
197 // SQ translates to V_ADD_F32.
198 void
200 {
201 Wavefront *wf = gpuDynInst->wavefront();
202 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
203 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
204 VecOperandF32 vdst(gpuDynInst, instData.VDST);
205
206 src0.readSrc();
207 src1.read();
208
209 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
210 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
211
212 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
213 if (wf->execMask(lane)) {
214 vdst[lane] = src1[lane] - src0[lane];
215 }
216 }
217
218 vdst.write();
219 } // execute
220 // --- Inst_VOP2__V_MUL_LEGACY_F32 class methods ---
221
223 : Inst_VOP2(iFmt, "v_mul_legacy_f32")
224 {
225 setFlag(ALU);
226 setFlag(F32);
227 } // Inst_VOP2__V_MUL_LEGACY_F32
228
230 {
231 } // ~Inst_VOP2__V_MUL_LEGACY_F32
232
233 // --- description from .arch file ---
234 // D.f = S0.f * S1.f (DX9 rules, 0.0*x = 0.0).
235 void
237 {
238 Wavefront *wf = gpuDynInst->wavefront();
239 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
240 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
241 VecOperandF32 vdst(gpuDynInst, instData.VDST);
242
243 src0.readSrc();
244 src1.read();
245
246 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
247 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
248
249 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
250 if (wf->execMask(lane)) {
251 vdst[lane] = src0[lane] * src1[lane];
252 }
253 }
254
255 vdst.write();
256 } // execute
257 // --- Inst_VOP2__V_MUL_F32 class methods ---
258
260 : Inst_VOP2(iFmt, "v_mul_f32")
261 {
262 setFlag(ALU);
263 setFlag(F32);
264 } // Inst_VOP2__V_MUL_F32
265
267 {
268 } // ~Inst_VOP2__V_MUL_F32
269
270 // --- description from .arch file ---
271 // D.f = S0.f * S1.f.
272 void
274 {
275 Wavefront *wf = gpuDynInst->wavefront();
276 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
277 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
278 VecOperandF32 vdst(gpuDynInst, instData.VDST);
279
280 src0.readSrc();
281 src1.read();
282
283 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
284 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
285
286 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
287 if (wf->execMask(lane)) {
288 if (std::isnan(src0[lane]) ||
289 std::isnan(src1[lane])) {
290 vdst[lane] = NAN;
291 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
292 std::fpclassify(src0[lane]) == FP_ZERO) &&
293 !std::signbit(src0[lane])) {
294 if (std::isinf(src1[lane])) {
295 vdst[lane] = NAN;
296 } else if (!std::signbit(src1[lane])) {
297 vdst[lane] = +0.0;
298 } else {
299 vdst[lane] = -0.0;
300 }
301 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
302 std::fpclassify(src0[lane]) == FP_ZERO) &&
303 std::signbit(src0[lane])) {
304 if (std::isinf(src1[lane])) {
305 vdst[lane] = NAN;
306 } else if (std::signbit(src1[lane])) {
307 vdst[lane] = +0.0;
308 } else {
309 vdst[lane] = -0.0;
310 }
311 } else if (std::isinf(src0[lane]) &&
312 !std::signbit(src0[lane])) {
313 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
314 std::fpclassify(src1[lane]) == FP_ZERO) {
315 vdst[lane] = NAN;
316 } else if (!std::signbit(src1[lane])) {
317 vdst[lane] = +INFINITY;
318 } else {
319 vdst[lane] = -INFINITY;
320 }
321 } else if (std::isinf(src0[lane]) &&
322 std::signbit(src0[lane])) {
323 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
324 std::fpclassify(src1[lane]) == FP_ZERO) {
325 vdst[lane] = NAN;
326 } else if (std::signbit(src1[lane])) {
327 vdst[lane] = +INFINITY;
328 } else {
329 vdst[lane] = -INFINITY;
330 }
331 } else {
332 vdst[lane] = src0[lane] * src1[lane];
333 }
334 }
335 }
336
337 vdst.write();
338 } // execute
339 // --- Inst_VOP2__V_MUL_I32_I24 class methods ---
340
342 : Inst_VOP2(iFmt, "v_mul_i32_i24")
343 {
344 setFlag(ALU);
345 } // Inst_VOP2__V_MUL_I32_I24
346
348 {
349 } // ~Inst_VOP2__V_MUL_I32_I24
350
351 // --- description from .arch file ---
352 // D.i = S0.i[23:0] * S1.i[23:0].
353 void
355 {
356 Wavefront *wf = gpuDynInst->wavefront();
357 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
358 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
359 VecOperandI32 vdst(gpuDynInst, instData.VDST);
360
361 src0.readSrc();
362 src1.read();
363
364 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
365 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
366
367 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
368 if (wf->execMask(lane)) {
369 vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
370 * sext<24>(bits(src1[lane], 23, 0));
371 }
372 }
373
374 vdst.write();
375 } // execute
376 // --- Inst_VOP2__V_MUL_HI_I32_I24 class methods ---
377
379 : Inst_VOP2(iFmt, "v_mul_hi_i32_i24")
380 {
381 setFlag(ALU);
382 } // Inst_VOP2__V_MUL_HI_I32_I24
383
385 {
386 } // ~Inst_VOP2__V_MUL_HI_I32_I24
387
388 // --- description from .arch file ---
389 // D.i = (S0.i[23:0] * S1.i[23:0])>>32.
390 void
392 {
393 Wavefront *wf = gpuDynInst->wavefront();
394 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
395 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
396 VecOperandI32 vdst(gpuDynInst, instData.VDST);
397
398 src0.readSrc();
399 src1.read();
400
401 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
402 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
403
404 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
405 if (wf->execMask(lane)) {
406 VecElemI64 tmp_src0
407 = (VecElemI64)sext<24>(bits(src0[lane], 23, 0));
408 VecElemI64 tmp_src1
409 = (VecElemI64)sext<24>(bits(src1[lane], 23, 0));
410
411 vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32);
412 }
413 }
414
415 vdst.write();
416 } // execute
417 // --- Inst_VOP2__V_MUL_U32_U24 class methods ---
418
420 : Inst_VOP2(iFmt, "v_mul_u32_u24")
421 {
422 setFlag(ALU);
423 } // Inst_VOP2__V_MUL_U32_U24
424
426 {
427 } // ~Inst_VOP2__V_MUL_U32_U24
428
429 // --- description from .arch file ---
430 // D.u = S0.u[23:0] * S1.u[23:0].
431 void
433 {
434 auto opImpl = [](VecOperandU32& src0, VecOperandU32& src1,
435 VecOperandU32& vdst, Wavefront* wf) {
436 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
437 if (wf->execMask(lane)) {
438 vdst[lane] = bits(src0[lane], 23, 0) *
439 bits(src1[lane], 23, 0);
440 }
441 }
442 };
443
445 } // execute
446 // --- Inst_VOP2__V_MUL_HI_U32_U24 class methods ---
447
449 : Inst_VOP2(iFmt, "v_mul_hi_u32_u24")
450 {
451 setFlag(ALU);
452 } // Inst_VOP2__V_MUL_HI_U32_U24
453
455 {
456 } // ~Inst_VOP2__V_MUL_HI_U32_U24
457
458 // --- description from .arch file ---
459 // D.i = (S0.u[23:0] * S1.u[23:0])>>32.
460 void
462 {
463 Wavefront *wf = gpuDynInst->wavefront();
464 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
465 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
466 VecOperandU32 vdst(gpuDynInst, instData.VDST);
467
468 src0.readSrc();
469 src1.read();
470
471 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
472 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
473
474 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
475 if (wf->execMask(lane)) {
476 VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);
477 VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0);
478 vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32);
479 }
480 }
481
482 vdst.write();
483 } // execute
484 // --- Inst_VOP2__V_MIN_F32 class methods ---
485
487 : Inst_VOP2(iFmt, "v_min_f32")
488 {
489 setFlag(ALU);
490 setFlag(F32);
491 } // Inst_VOP2__V_MIN_F32
492
494 {
495 } // ~Inst_VOP2__V_MIN_F32
496
497 // --- description from .arch file ---
498 // D.f = (S0.f < S1.f ? S0.f : S1.f).
499 void
501 {
502 Wavefront *wf = gpuDynInst->wavefront();
503 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
504 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
505 VecOperandF32 vdst(gpuDynInst, instData.VDST);
506
507 src0.readSrc();
508 src1.read();
509
510 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
511 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
512
513 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
514 if (wf->execMask(lane)) {
515 vdst[lane] = std::fmin(src0[lane], src1[lane]);
516 }
517 }
518
519 vdst.write();
520 } // execute
521 // --- Inst_VOP2__V_MAX_F32 class methods ---
522
524 : Inst_VOP2(iFmt, "v_max_f32")
525 {
526 setFlag(ALU);
527 setFlag(F32);
528 } // Inst_VOP2__V_MAX_F32
529
531 {
532 } // ~Inst_VOP2__V_MAX_F32
533
534 // --- description from .arch file ---
535 // D.f = (S0.f >= S1.f ? S0.f : S1.f).
536 void
538 {
539 Wavefront *wf = gpuDynInst->wavefront();
540 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
541 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
542 VecOperandF32 vdst(gpuDynInst, instData.VDST);
543
544 src0.readSrc();
545 src1.read();
546
547 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
548 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
549
550 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
551 if (wf->execMask(lane)) {
552 vdst[lane] = std::fmax(src0[lane], src1[lane]);
553 }
554 }
555
556 vdst.write();
557 } // execute
558 // --- Inst_VOP2__V_MIN_I32 class methods ---
559
561 : Inst_VOP2(iFmt, "v_min_i32")
562 {
563 setFlag(ALU);
564 } // Inst_VOP2__V_MIN_I32
565
567 {
568 } // ~Inst_VOP2__V_MIN_I32
569
570 // --- description from .arch file ---
571 // D.i = min(S0.i, S1.i).
572 void
574 {
575 Wavefront *wf = gpuDynInst->wavefront();
576 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
577 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
578 VecOperandI32 vdst(gpuDynInst, instData.VDST);
579
580 src0.readSrc();
581 src1.read();
582
583 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
584 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
585
586 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
587 if (wf->execMask(lane)) {
588 vdst[lane] = std::min(src0[lane], src1[lane]);
589 }
590 }
591
592 vdst.write();
593 } // execute
594 // --- Inst_VOP2__V_MAX_I32 class methods ---
595
597 : Inst_VOP2(iFmt, "v_max_i32")
598 {
599 setFlag(ALU);
600 } // Inst_VOP2__V_MAX_I32
601
603 {
604 } // ~Inst_VOP2__V_MAX_I32
605
606 // --- description from .arch file ---
607 // D.i = max(S0.i, S1.i).
608 void
610 {
611 Wavefront *wf = gpuDynInst->wavefront();
612 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
613 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
614 VecOperandI32 vdst(gpuDynInst, instData.VDST);
615
616 src0.readSrc();
617 src1.read();
618
619 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
620 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
621
622 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
623 if (wf->execMask(lane)) {
624 vdst[lane] = std::max(src0[lane], src1[lane]);
625 }
626 }
627
628 vdst.write();
629 } // execute
630 // --- Inst_VOP2__V_MIN_U32 class methods ---
631
633 : Inst_VOP2(iFmt, "v_min_u32")
634 {
635 setFlag(ALU);
636 } // Inst_VOP2__V_MIN_U32
637
639 {
640 } // ~Inst_VOP2__V_MIN_U32
641
642 // --- description from .arch file ---
643 // D.u = min(S0.u, S1.u).
644 void
646 {
647 Wavefront *wf = gpuDynInst->wavefront();
648 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
649 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
650 VecOperandU32 vdst(gpuDynInst, instData.VDST);
651
652 src0.readSrc();
653 src1.read();
654
655 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
656 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
657
658 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
659 if (wf->execMask(lane)) {
660 vdst[lane] = std::min(src0[lane], src1[lane]);
661 }
662 }
663
664 vdst.write();
665 } // execute
666 // --- Inst_VOP2__V_MAX_U32 class methods ---
667
669 : Inst_VOP2(iFmt, "v_max_u32")
670 {
671 setFlag(ALU);
672 } // Inst_VOP2__V_MAX_U32
673
675 {
676 } // ~Inst_VOP2__V_MAX_U32
677
678 // --- description from .arch file ---
679 // D.u = max(S0.u, S1.u).
680 void
682 {
683 Wavefront *wf = gpuDynInst->wavefront();
684 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
685 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
686 VecOperandU32 vdst(gpuDynInst, instData.VDST);
687
688 src0.readSrc();
689 src1.read();
690
691 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
692 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
693
694 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
695 if (wf->execMask(lane)) {
696 vdst[lane] = std::max(src0[lane], src1[lane]);
697 }
698 }
699
700 vdst.write();
701 } // execute
702 // --- Inst_VOP2__V_LSHRREV_B32 class methods ---
703
705 : Inst_VOP2(iFmt, "v_lshrrev_b32")
706 {
707 setFlag(ALU);
708 } // Inst_VOP2__V_LSHRREV_B32
709
711 {
712 } // ~Inst_VOP2__V_LSHRREV_B32
713
714 // --- description from .arch file ---
715 // D.u = S1.u >> S0.u[4:0].
716 // The vacated bits are set to zero.
717 // SQ translates this to an internal SP opcode.
718 void
720 {
721 auto opImpl = [](VecOperandU32& src0, VecOperandU32& src1,
722 VecOperandU32& vdst, Wavefront* wf) {
723 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
724 if (wf->execMask(lane)) {
725 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
726 }
727 }
728 };
729
731 } // execute
732 // --- Inst_VOP2__V_ASHRREV_I32 class methods ---
733
735 : Inst_VOP2(iFmt, "v_ashrrev_i32")
736 {
737 setFlag(ALU);
738 } // Inst_VOP2__V_ASHRREV_I32
739
741 {
742 } // ~Inst_VOP2__V_ASHRREV_I32
743
744 // --- description from .arch file ---
745 // D.i = signext(S1.i) >> S0.i[4:0].
746 // The vacated bits are set to the sign bit of the input value.
747 // SQ translates this to an internal SP opcode.
748 void
750 {
751 Wavefront *wf = gpuDynInst->wavefront();
752 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
753 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
754 VecOperandI32 vdst(gpuDynInst, instData.VDST);
755
756 src0.readSrc();
757 src1.read();
758
759 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
760 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
761
762 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
763 if (wf->execMask(lane)) {
764 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
765 }
766 }
767
768 vdst.write();
769 } // execute
770 // --- Inst_VOP2__V_LSHLREV_B32 class methods ---
771
773 : Inst_VOP2(iFmt, "v_lshlrev_b32")
774 {
775 setFlag(ALU);
776 } // Inst_VOP2__V_LSHLREV_B32
777
779 {
780 } // ~Inst_VOP2__V_LSHLREV_B32
781
782 // --- description from .arch file ---
783 // D.u = S1.u << S0.u[4:0].
784 // SQ translates this to an internal SP opcode.
785 void
787 {
788 Wavefront *wf = gpuDynInst->wavefront();
789 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
790 VecOperandU32 src1(gpuDynInst, instData.VSRC1);
791 VecOperandU32 vdst(gpuDynInst, instData.VDST);
792
793 src0.readSrc();
794 src1.read();
795
796 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
797
798 if (isSDWAInst()) {
799 VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
800 // use copies of original src0, src1, and vdst during selecting
801 VecOperandU32 origSrc0_sdwa(gpuDynInst,
802 extData.iFmt_VOP_SDWA.SRC0);
803 VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
804 VecOperandU32 origVdst(gpuDynInst, instData.VDST);
805
806 src0_sdwa.read();
807 origSrc0_sdwa.read();
808 origSrc1.read();
809
810 DPRINTF(VEGA, "Handling V_LSHLREV_B32 SRC SDWA. SRC0: register "
811 "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: "
812 "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
813 "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
814 extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
815 extData.iFmt_VOP_SDWA.DST_U,
816 extData.iFmt_VOP_SDWA.CLMP,
817 extData.iFmt_VOP_SDWA.SRC0_SEL,
818 extData.iFmt_VOP_SDWA.SRC0_SEXT,
819 extData.iFmt_VOP_SDWA.SRC0_NEG,
820 extData.iFmt_VOP_SDWA.SRC0_ABS,
821 extData.iFmt_VOP_SDWA.SRC1_SEL,
822 extData.iFmt_VOP_SDWA.SRC1_SEXT,
823 extData.iFmt_VOP_SDWA.SRC1_NEG,
824 extData.iFmt_VOP_SDWA.SRC1_ABS);
825
826 processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
827 src1, origSrc1);
828
829 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
830 if (wf->execMask(lane)) {
831 vdst[lane] = src1[lane] << bits(src0_sdwa[lane], 4, 0);
832 origVdst[lane] = vdst[lane]; // keep copy consistent
833 }
834 }
835
836 processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
837 } else {
838 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
839 if (wf->execMask(lane)) {
840 vdst[lane] = src1[lane] << bits(src0[lane], 4, 0);
841 }
842 }
843 }
844
845 vdst.write();
846 } // execute
847 // --- Inst_VOP2__V_AND_B32 class methods ---
848
850 : Inst_VOP2(iFmt, "v_and_b32")
851 {
852 setFlag(ALU);
853 } // Inst_VOP2__V_AND_B32
854
856 {
857 } // ~Inst_VOP2__V_AND_B32
858
859 // --- description from .arch file ---
860 // D.u = S0.u & S1.u.
861 // Input and output modifiers not supported.
862 void
864 {
865 auto opImpl = [](VecOperandU32& src0, VecOperandU32& src1,
866 VecOperandU32& vdst, Wavefront* wf) {
867 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
868 if (wf->execMask(lane)) {
869 vdst[lane] = src0[lane] & src1[lane];
870 }
871 }
872 };
873
875 } // execute
876 // --- Inst_VOP2__V_OR_B32 class methods ---
877
879 : Inst_VOP2(iFmt, "v_or_b32")
880 {
881 setFlag(ALU);
882 } // Inst_VOP2__V_OR_B32
883
885 {
886 } // ~Inst_VOP2__V_OR_B32
887
888 // --- description from .arch file ---
889 // D.u = S0.u | S1.u.
890 // Input and output modifiers not supported.
891 void
893 {
894 Wavefront *wf = gpuDynInst->wavefront();
895 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
896 VecOperandU32 src1(gpuDynInst, instData.VSRC1);
897 VecOperandU32 vdst(gpuDynInst, instData.VDST);
898
899 src0.readSrc();
900 src1.read();
901
902 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
903
904 if (isSDWAInst()) {
905 VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
906 // use copies of original src0, src1, and dest during selecting
907 VecOperandU32 origSrc0_sdwa(gpuDynInst,
908 extData.iFmt_VOP_SDWA.SRC0);
909 VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
910 VecOperandU32 origVdst(gpuDynInst, instData.VDST);
911
912 src0_sdwa.read();
913 origSrc0_sdwa.read();
914 origSrc1.read();
915
916 DPRINTF(VEGA, "Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], "
917 "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
918 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
919 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
920 extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
921 extData.iFmt_VOP_SDWA.DST_U,
922 extData.iFmt_VOP_SDWA.CLMP,
923 extData.iFmt_VOP_SDWA.SRC0_SEL,
924 extData.iFmt_VOP_SDWA.SRC0_SEXT,
925 extData.iFmt_VOP_SDWA.SRC0_NEG,
926 extData.iFmt_VOP_SDWA.SRC0_ABS,
927 extData.iFmt_VOP_SDWA.SRC1_SEL,
928 extData.iFmt_VOP_SDWA.SRC1_SEXT,
929 extData.iFmt_VOP_SDWA.SRC1_NEG,
930 extData.iFmt_VOP_SDWA.SRC1_ABS);
931
932 processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
933 src1, origSrc1);
934
935 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
936 if (wf->execMask(lane)) {
937 vdst[lane] = src0_sdwa[lane] | src1[lane];
938 origVdst[lane] = vdst[lane]; // keep copy consistent
939 }
940 }
941
942 processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
943 } else {
944 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
945 if (wf->execMask(lane)) {
946 vdst[lane] = src0[lane] | src1[lane];
947 }
948 }
949 }
950
951 vdst.write();
952 } // execute
953 // --- Inst_VOP2__V_XOR_B32 class methods ---
954
956 : Inst_VOP2(iFmt, "v_xor_b32")
957 {
958 setFlag(ALU);
959 } // Inst_VOP2__V_XOR_B32
960
962 {
963 } // ~Inst_VOP2__V_XOR_B32
964
965 // --- description from .arch file ---
966 // D.u = S0.u ^ S1.u.
967 // Input and output modifiers not supported.
968 void
970 {
971 Wavefront *wf = gpuDynInst->wavefront();
972 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
973 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
974 VecOperandU32 vdst(gpuDynInst, instData.VDST);
975
976 src0.readSrc();
977 src1.read();
978
979 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
980 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
981
982 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
983 if (wf->execMask(lane)) {
984 vdst[lane] = src0[lane] ^ src1[lane];
985 }
986 }
987
988 vdst.write();
989 } // execute
990 // --- Inst_VOP2__V_DOT2C_F32_BF16 class methods ---
991
993 : Inst_VOP2(iFmt, "v_dot2c_f32_bf16")
994 {
995 setFlag(ALU);
996 } // Inst_VOP2__V_DOT2C_F32_BF16
997
999 {
1000 } // ~Inst_VOP2__V_DOT2C_F32_BF16
1001
1002 void
1004 {
1005 Wavefront *wf = gpuDynInst->wavefront();
1006 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
1007 VecOperandU32 src1(gpuDynInst, instData.VSRC1);
1008 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1009
1010 src0.readSrc();
1011 src1.read();
1012 vdst.read();
1013
1014 fatal_if(isSDWAInst(), "SDWA not supported for V_DOT2C_F32_BF16");
1015
1017 if (isDPPInst()) {
1018 VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
1019 src0_dpp.read();
1020
1021 processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
1022 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1023 src0d[lane] = src0_dpp[lane];
1024 }
1025 } else {
1026 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1027 src0d[lane] = src0[lane];
1028 }
1029 }
1030
1031 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1032 if (wf->execMask(lane)) {
1033 AMDGPU::mxbfloat16 a1, a2, b1, b2;
1034 a1.data = uint16_t(bits(src0d[lane], 15, 0));
1035 a2.data = uint16_t(bits(src0d[lane], 31, 16));
1036 b1.data = uint16_t(bits(src1[lane], 15, 0));
1037 b2.data = uint16_t(bits(src1[lane], 31, 16));
1038
1039 vdst[lane] += float(a1) * float(b1);
1040 vdst[lane] += float(a2) * float(b2);
1041 }
1042 }
1043
1044 vdst.write();
1045 } // execute
1046 // --- Inst_VOP2__V_MAC_F32 class methods ---
1047
1049 : Inst_VOP2(iFmt, "v_mac_f32")
1050 {
1051 setFlag(ALU);
1052 setFlag(F32);
1053 setFlag(MAC);
1054 } // Inst_VOP2__V_MAC_F32
1055
1057 {
1058 } // ~Inst_VOP2__V_MAC_F32
1059
1060 // --- description from .arch file ---
1061 // D.f = S0.f * S1.f + D.f.
1062 // SQ translates to V_MAD_F32.
1063 void
1065 {
1066 Wavefront *wf = gpuDynInst->wavefront();
1067 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
1068 VecOperandF32 src1(gpuDynInst, instData.VSRC1);
1069 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1070
1071 src0.readSrc();
1072 src1.read();
1073 vdst.read();
1074
1075 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
1076
1077 if (isDPPInst()) {
1078 VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
1079 src0_dpp.read();
1080
1081 DPRINTF(VEGA, "Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], "
1082 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
1083 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
1084 "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
1085 extData.iFmt_VOP_DPP.DPP_CTRL,
1086 extData.iFmt_VOP_DPP.SRC0_ABS,
1087 extData.iFmt_VOP_DPP.SRC0_NEG,
1088 extData.iFmt_VOP_DPP.SRC1_ABS,
1089 extData.iFmt_VOP_DPP.SRC1_NEG,
1090 extData.iFmt_VOP_DPP.BC,
1091 extData.iFmt_VOP_DPP.BANK_MASK,
1092 extData.iFmt_VOP_DPP.ROW_MASK);
1093
1094 processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
1095
1096 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1097 if (wf->execMask(lane)) {
1098 vdst[lane] = std::fma(src0_dpp[lane], src1[lane],
1099 vdst[lane]);
1100 }
1101 }
1102 } else {
1103 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1104 if (wf->execMask(lane)) {
1105 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
1106 }
1107 }
1108 }
1109
1110 vdst.write();
1111 } // execute
1112 // --- Inst_VOP2__V_MADMK_F32 class methods ---
1113
1115 : Inst_VOP2(iFmt, "v_madmk_f32")
1116 {
1117 setFlag(ALU);
1118 setFlag(F32);
1119 setFlag(MAD);
1120 } // Inst_VOP2__V_MADMK_F32
1121
1123 {
1124 } // ~Inst_VOP2__V_MADMK_F32
1125
1126 // --- description from .arch file ---
1127 // D.f = S0.f * K + S1.f; K is a 32-bit inline constant.
1128 // This opcode cannot use the VOP3 encoding and cannot use input/output
1129 // --- modifiers.
1130 // SQ translates to V_MAD_F32.
1131 void
1133 {
1134 Wavefront *wf = gpuDynInst->wavefront();
1135 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
1136 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
1137 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1138 VecElemF32 k = extData.imm_f32;
1139
1140 src0.readSrc();
1141 src1.read();
1142
1143 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
1144 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
1145
1146 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1147 if (wf->execMask(lane)) {
1148 vdst[lane] = std::fma(src0[lane], k, src1[lane]);
1149 }
1150 }
1151
1152 vdst.write();
1153 } // execute
1154 // --- Inst_VOP2__V_MADAK_F32 class methods ---
1155
1157 : Inst_VOP2(iFmt, "v_madak_f32")
1158 {
1159 setFlag(ALU);
1160 setFlag(F32);
1161 setFlag(MAD);
1162 } // Inst_VOP2__V_MADAK_F32
1163
1165 {
1166 } // ~Inst_VOP2__V_MADAK_F32
1167
1168 // --- description from .arch file ---
1169 // D.f = S0.f * S1.f + K; K is a 32-bit inline constant.
1170 // This opcode cannot use the VOP3 encoding and cannot use input/output
1171 // --- modifiers.
1172 // SQ translates to V_MAD_F32.
1173 void
1175 {
1176 Wavefront *wf = gpuDynInst->wavefront();
1177 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
1178 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
1179 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1180 VecElemF32 k = extData.imm_f32;
1181
1182 src0.readSrc();
1183 src1.read();
1184
1185 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
1186 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
1187
1188 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1189 if (wf->execMask(lane)) {
1190 vdst[lane] = std::fma(src0[lane], src1[lane], k);
1191 }
1192 }
1193
1194 vdst.write();
1195 } // execute
1196 // --- Inst_VOP2__V_ADD_CO_U32 class methods ---
1197
1199 : Inst_VOP2(iFmt, "v_add_co_u32")
1200 {
1201 setFlag(ALU);
1202 setFlag(WritesVCC);
1203 } // Inst_VOP2__V_ADD_CO_U32
1204
1206 {
1207 } // ~Inst_VOP2__V_ADD_CO_U32
1208
1209 // --- description from .arch file ---
1210 // D.u = S0.u + S1.u;
1211 // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED
1212 // --- overflow or carry-out for V_ADDC_U32.
1213 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
1214 void
1216 {
1217 Wavefront *wf = gpuDynInst->wavefront();
1218 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
1219 VecOperandU32 src1(gpuDynInst, instData.VSRC1);
1220 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1221 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
1222
1223 src0.readSrc();
1224 src1.read();
1225
1226 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
1227
1228 if (isSDWAInst()) {
1229 VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
1230 // use copies of original src0, src1, and dest during selecting
1231 VecOperandU32 origSrc0_sdwa(gpuDynInst,
1232 extData.iFmt_VOP_SDWA.SRC0);
1233 VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
1234 VecOperandU32 origVdst(gpuDynInst, instData.VDST);
1235
1236 src0_sdwa.read();
1237 origSrc0_sdwa.read();
1238 origSrc1.read();
1239
1240 DPRINTF(VEGA, "Handling V_ADD_CO_U32 SRC SDWA. SRC0: register "
1241 "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
1242 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
1243 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
1244 extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
1245 extData.iFmt_VOP_SDWA.DST_U,
1246 extData.iFmt_VOP_SDWA.CLMP,
1247 extData.iFmt_VOP_SDWA.SRC0_SEL,
1248 extData.iFmt_VOP_SDWA.SRC0_SEXT,
1249 extData.iFmt_VOP_SDWA.SRC0_NEG,
1250 extData.iFmt_VOP_SDWA.SRC0_ABS,
1251 extData.iFmt_VOP_SDWA.SRC1_SEL,
1252 extData.iFmt_VOP_SDWA.SRC1_SEXT,
1253 extData.iFmt_VOP_SDWA.SRC1_NEG,
1254 extData.iFmt_VOP_SDWA.SRC1_ABS);
1255
1256 processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
1257 src1, origSrc1);
1258
1259 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1260 if (wf->execMask(lane)) {
1261 vdst[lane] = src0_sdwa[lane] + src1[lane];
1262 origVdst[lane] = vdst[lane]; // keep copy consistent
1263 vcc.setBit(lane, ((VecElemU64)src0_sdwa[lane]
1264 + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
1265 }
1266 }
1267
1268 processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
1269 } else {
1270 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1271 if (wf->execMask(lane)) {
1272 vdst[lane] = src0[lane] + src1[lane];
1273 vcc.setBit(lane, ((VecElemU64)src0[lane]
1274 + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
1275 }
1276 }
1277 }
1278
1279 vcc.write();
1280 vdst.write();
1281 } // execute
1282 // --- Inst_VOP2__V_SUB_CO_U32 class methods ---
1283
1285 : Inst_VOP2(iFmt, "v_sub_co_u32")
1286 {
1287 setFlag(ALU);
1288 setFlag(WritesVCC);
1289 } // Inst_VOP2__V_SUB_CO_U32
1290
1292 {
1293 } // ~Inst_VOP2__V_SUB_CO_U32
1294
1295 // --- description from .arch file ---
1296 // D.u = S0.u - S1.u;
1297 // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or
1298 // carry-out for V_SUBB_U32.
1299 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
1300 void
1302 {
1303 Wavefront *wf = gpuDynInst->wavefront();
1304 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
1305 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
1306 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1307 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
1308
1309 src0.readSrc();
1310 src1.read();
1311
1312 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
1313 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
1314
1315 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1316 if (wf->execMask(lane)) {
1317 vdst[lane] = src0[lane] - src1[lane];
1318 vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
1319 }
1320 }
1321
1322 vdst.write();
1323 vcc.write();
1324 } // execute
1325 // --- Inst_VOP2__V_SUBREV_CO_U32 class methods ---
1326
1328 : Inst_VOP2(iFmt, "v_subrev_co_u32")
1329 {
1330 setFlag(ALU);
1331 setFlag(WritesVCC);
1332 } // Inst_VOP2__V_SUBREV_CO_U32
1333
1335 {
1336 } // ~Inst_VOP2__V_SUBREV_CO_U32
1337
1338 // --- description from .arch file ---
1339 // D.u = S1.u - S0.u;
1340 // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
1341 // carry-out for V_SUBB_U32.
1342 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
1343 void
1345 {
1346 Wavefront *wf = gpuDynInst->wavefront();
1347 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
1348 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
1349 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1350 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
1351
1352 src0.readSrc();
1353 src1.read();
1354
1355 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
1356 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
1357
1358 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1359 if (wf->execMask(lane)) {
1360 vdst[lane] = src1[lane] - src0[lane];
1361 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
1362 }
1363 }
1364
1365 vdst.write();
1366 vcc.write();
1367 } // execute
1368 // --- Inst_VOP2__V_ADDC_CO_U32 class methods ---
1369
1371 : Inst_VOP2(iFmt, "v_addc_co_u32")
1372 {
1373 setFlag(ALU);
1374 setFlag(WritesVCC);
1375 setFlag(ReadsVCC);
1376 } // Inst_VOP2__V_ADDC_CO_U32
1377
1379 {
1380 } // ~Inst_VOP2__V_ADDC_CO_U32
1381
1382 // --- description from .arch file ---
1383 // D.u = S0.u + S1.u + VCC[threadId];
1384 // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x800000000ULL ? 1 : 0)
1385 // is an UNSIGNED overflow.
1386 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
1387 // source comes from the SGPR-pair at S2.u.
1388 void
1390 {
1391 Wavefront *wf = gpuDynInst->wavefront();
1392 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
1393 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
1394 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1395 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
1396
1397 src0.readSrc();
1398 src1.read();
1399 vcc.read();
1400
1401 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
1402 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
1403
1404 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1405 if (wf->execMask(lane)) {
1406 vdst[lane] = src0[lane] + src1[lane]
1407 + bits(vcc.rawData(), lane);
1408 vcc.setBit(lane, ((VecElemU64)src0[lane]
1409 + (VecElemU64)src1[lane]
1410 + (VecElemU64)bits(vcc.rawData(), lane, lane))
1411 >= 0x100000000 ? 1 : 0);
1412 }
1413 }
1414
1415 vdst.write();
1416 vcc.write();
1417 } // execute
1418 // --- Inst_VOP2__V_SUBB_CO_U32 class methods ---
1419
1421 : Inst_VOP2(iFmt, "v_subb_co_u32")
1422 {
1423 setFlag(ALU);
1424 setFlag(WritesVCC);
1425 setFlag(ReadsVCC);
1426 } // Inst_VOP2__V_SUBB_CO_U32
1427
1429 {
1430 } // ~Inst_VOP2__V_SUBB_CO_U32
1431
1432 // --- description from .arch file ---
1433 // D.u = S0.u - S1.u - VCC[threadId];
1434 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
1435 // --- overflow.
1436 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
1437 // --- source comes from the SGPR-pair at S2.u.
1438 void
1440 {
1441 Wavefront *wf = gpuDynInst->wavefront();
1442 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
1443 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
1444 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1445 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
1446
1447 src0.readSrc();
1448 src1.read();
1449 vcc.read();
1450
1451 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
1452 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
1453
1454 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1455 if (wf->execMask(lane)) {
1456 vdst[lane]
1457 = src0[lane] - src1[lane] - bits(vcc.rawData(), lane);
1458 vcc.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
1459 > src0[lane] ? 1 : 0);
1460 }
1461 }
1462
1463 vdst.write();
1464 vcc.write();
1465 } // execute
1466 // --- Inst_VOP2__V_SUBBREV_CO_U32 class methods ---
1467
1469 : Inst_VOP2(iFmt, "v_subbrev_co_u32")
1470 {
1471 setFlag(ALU);
1472 setFlag(WritesVCC);
1473 setFlag(ReadsVCC);
1474 } // Inst_VOP2__V_SUBBREV_CO_U32
1475
1477 {
1478 } // ~Inst_VOP2__V_SUBBREV_CO_U32
1479
1480 // --- description from .arch file ---
1481 // D.u = S1.u - S0.u - VCC[threadId];
1482 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
1483 // overflow.
1484 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
1485 // source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32.
1486 // SQ translates this to V_SUBREV_U32 with reversed operands.
1487 void
1489 {
1490 Wavefront *wf = gpuDynInst->wavefront();
1491 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
1492 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
1493 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1494 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
1495
1496 src0.readSrc();
1497 src1.read();
1498 vcc.read();
1499
1500 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
1501 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
1502
1503 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1504 if (wf->execMask(lane)) {
1505 vdst[lane]
1506 = src1[lane] - src0[lane] - bits(vcc.rawData(), lane);
1507 vcc.setBit(lane, (src0[lane] + bits(vcc.rawData(), lane))
1508 > src1[lane] ? 1 : 0);
1509 }
1510 }
1511
1512 vdst.write();
1513 vcc.write();
1514 } // execute
1515 // --- Inst_VOP2__V_ADD_F16 class methods ---
1516
1518 : Inst_VOP2(iFmt, "v_add_f16")
1519 {
1520 setFlag(ALU);
1521 setFlag(F16);
1522 } // Inst_VOP2__V_ADD_F16
1523
1525 {
1526 } // ~Inst_VOP2__V_ADD_F16
1527
1528 // --- description from .arch file ---
1529 // D.f16 = S0.f16 + S1.f16.
1530 // Supports denormals, round mode, exception flags, saturation.
1531 void
1533 {
1535 } // execute
1536 // --- Inst_VOP2__V_SUB_F16 class methods ---
1537
1539 : Inst_VOP2(iFmt, "v_sub_f16")
1540 {
1541 setFlag(ALU);
1542 setFlag(F16);
1543 } // Inst_VOP2__V_SUB_F16
1544
1546 {
1547 } // ~Inst_VOP2__V_SUB_F16
1548
1549 // --- description from .arch file ---
1550 // D.f16 = S0.f16 - S1.f16.
1551 // Supports denormals, round mode, exception flags, saturation.
1552 // SQ translates to V_ADD_F16.
1553 void
1555 {
1557 } // execute
1558 // --- Inst_VOP2__V_SUBREV_F16 class methods ---
1559
1561 : Inst_VOP2(iFmt, "v_subrev_f16")
1562 {
1563 setFlag(ALU);
1564 setFlag(F16);
1565 } // Inst_VOP2__V_SUBREV_F16
1566
1568 {
1569 } // ~Inst_VOP2__V_SUBREV_F16
1570
1571 // --- description from .arch file ---
1572 // D.f16 = S1.f16 - S0.f16.
1573 // Supports denormals, round mode, exception flags, saturation.
1574 // SQ translates to V_ADD_F16.
1575 void
1577 {
1579 } // execute
1580 // --- Inst_VOP2__V_MUL_F16 class methods ---
1581
1583 : Inst_VOP2(iFmt, "v_mul_f16")
1584 {
1585 setFlag(ALU);
1586 setFlag(F16);
1587 } // Inst_VOP2__V_MUL_F16
1588
1590 {
1591 } // ~Inst_VOP2__V_MUL_F16
1592
1593 // --- description from .arch file ---
1594 // D.f16 = S0.f16 * S1.f16.
1595 // Supports denormals, round mode, exception flags, saturation.
1596 void
1598 {
1600 } // execute
1601 // --- Inst_VOP2__V_MAC_F16 class methods ---
1602
1604 : Inst_VOP2(iFmt, "v_mac_f16")
1605 {
1606 setFlag(ALU);
1607 setFlag(F16);
1608 setFlag(MAC);
1609 } // Inst_VOP2__V_MAC_F16
1610
1612 {
1613 } // ~Inst_VOP2__V_MAC_F16
1614
1615 // --- description from .arch file ---
1616 // D.f16 = S0.f16 * S1.f16 + D.f16.
1617 // Supports round mode, exception flags, saturation.
1618 // SQ translates this to V_MAD_F16.
1619 void
1621 {
1623 } // execute
1624 // --- Inst_VOP2__V_MADMK_F16 class methods ---
1625
1627 : Inst_VOP2(iFmt, "v_madmk_f16")
1628 {
1629 setFlag(ALU);
1630 setFlag(F16);
1631 setFlag(MAD);
1632 } // Inst_VOP2__V_MADMK_F16
1633
1635 {
1636 } // ~Inst_VOP2__V_MADMK_F16
1637
1638 // --- description from .arch file ---
1639 // D.f16 = S0.f16 * K.f16 + S1.f16; K is a 16-bit inline constant stored
1640 // in the following literal DWORD.
1641 // This opcode cannot use the VOP3 encoding and cannot use input/output
1642 // modifiers. Supports round mode, exception flags, saturation.
1643 // SQ translates this to V_MAD_F16.
1644 void
1646 {
1648 } // execute
1649 // --- Inst_VOP2__V_MADAK_F16 class methods ---
1650
1652 : Inst_VOP2(iFmt, "v_madak_f16")
1653 {
1654 setFlag(ALU);
1655 setFlag(F16);
1656 setFlag(MAD);
1657 } // Inst_VOP2__V_MADAK_F16
1658
1660 {
1661 } // ~Inst_VOP2__V_MADAK_F16
1662
1663 // --- description from .arch file ---
1664 // D.f16 = S0.f16 * S1.f16 + K.f16; K is a 16-bit inline constant stored
1665 // in the following literal DWORD.
1666 // This opcode cannot use the VOP3 encoding and cannot use input/output
1667 // modifiers. Supports round mode, exception flags, saturation.
1668 // SQ translates this to V_MAD_F16.
1669 void
1671 {
1673 } // execute
1674 // --- Inst_VOP2__V_ADD_U16 class methods ---
1675
1677 : Inst_VOP2(iFmt, "v_add_u16")
1678 {
1679 setFlag(ALU);
1680 } // Inst_VOP2__V_ADD_U16
1681
1683 {
1684 } // ~Inst_VOP2__V_ADD_U16
1685
1686 // --- description from .arch file ---
1687 // D.u16 = S0.u16 + S1.u16.
1688 // Supports saturation (unsigned 16-bit integer domain).
1689 void
1691 {
1692 Wavefront *wf = gpuDynInst->wavefront();
1693 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
1694 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
1695 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1696
1697 src0.readSrc();
1698 src1.read();
1699
1700 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
1701 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
1702
1703 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1704 if (wf->execMask(lane)) {
1705 vdst[lane] = src0[lane] + src1[lane];
1706 }
1707 }
1708
1709 vdst.write();
1710 } // execute
1711 // --- Inst_VOP2__V_SUB_U16 class methods ---
1712
1714 : Inst_VOP2(iFmt, "v_sub_u16")
1715 {
1716 setFlag(ALU);
1717 } // Inst_VOP2__V_SUB_U16
1718
1720 {
1721 } // ~Inst_VOP2__V_SUB_U16
1722
1723 // --- description from .arch file ---
1724 // D.u16 = S0.u16 - S1.u16.
1725 // Supports saturation (unsigned 16-bit integer domain).
1726 void
1728 {
1729 Wavefront *wf = gpuDynInst->wavefront();
1730 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
1731 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
1732 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1733
1734 src0.readSrc();
1735 src1.read();
1736
1737 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
1738 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
1739
1740 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1741 if (wf->execMask(lane)) {
1742 vdst[lane] = src0[lane] - src1[lane];
1743 }
1744 }
1745
1746 vdst.write();
1747 } // execute
1748 // --- Inst_VOP2__V_SUBREV_U16 class methods ---
1749
1751 : Inst_VOP2(iFmt, "v_subrev_u16")
1752 {
1753 setFlag(ALU);
1754 } // Inst_VOP2__V_SUBREV_U16
1755
1757 {
1758 } // ~Inst_VOP2__V_SUBREV_U16
1759
1760 // --- description from .arch file ---
1761 // D.u16 = S1.u16 - S0.u16.
1762 // Supports saturation (unsigned 16-bit integer domain).
1763 // SQ translates this to V_SUB_U16 with reversed operands.
1764 void
1766 {
1767 Wavefront *wf = gpuDynInst->wavefront();
1768 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
1769 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
1770 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1771
1772 src0.readSrc();
1773 src1.read();
1774
1775 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
1776 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
1777
1778 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1779 if (wf->execMask(lane)) {
1780 vdst[lane] = src1[lane] - src0[lane];
1781 }
1782 }
1783
1784 vdst.write();
1785 } // execute
1786 // --- Inst_VOP2__V_MUL_LO_U16 class methods ---
1787
1789 : Inst_VOP2(iFmt, "v_mul_lo_u16")
1790 {
1791 setFlag(ALU);
1792 } // Inst_VOP2__V_MUL_LO_U16
1793
1795 {
1796 } // ~Inst_VOP2__V_MUL_LO_U16
1797
1798 // --- description from .arch file ---
1799 // D.u16 = S0.u16 * S1.u16.
1800 // Supports saturation (unsigned 16-bit integer domain).
1801 void
1803 {
1804 Wavefront *wf = gpuDynInst->wavefront();
1805 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
1806 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
1807 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1808
1809 src0.readSrc();
1810 src1.read();
1811
1812 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
1813 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
1814
1815 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1816 if (wf->execMask(lane)) {
1817 vdst[lane] = src0[lane] * src1[lane];
1818 }
1819 }
1820
1821 vdst.write();
1822 } // execute
1823 // --- Inst_VOP2__V_LSHLREV_B16 class methods ---
1824
1826 : Inst_VOP2(iFmt, "v_lshlrev_b16")
1827 {
1828 setFlag(ALU);
1829 } // Inst_VOP2__V_LSHLREV_B16
1830
1832 {
1833 } // ~Inst_VOP2__V_LSHLREV_B16
1834
1835 // --- description from .arch file ---
1836 // D.u[15:0] = S1.u[15:0] << S0.u[3:0].
1837 // SQ translates this to an internal SP opcode.
1838 void
1840 {
1841 auto opImpl = [](VecOperandU32& src0, VecOperandU32& src1,
1842 VecOperandU32& vdst, Wavefront* wf) {
1843 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1844 if (wf->execMask(lane)) {
1845 vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);
1846 }
1847 }
1848 };
1849
1851 } // execute
1852 // --- Inst_VOP2__V_LSHRREV_B16 class methods ---
1853
1855 : Inst_VOP2(iFmt, "v_lshrrev_b16")
1856 {
1857 setFlag(ALU);
1858 } // Inst_VOP2__V_LSHRREV_B16
1859
1861 {
1862 } // ~Inst_VOP2__V_LSHRREV_B16
1863
1864 // --- description from .arch file ---
1865 // D.u[15:0] = S1.u[15:0] >> S0.u[3:0].
1866 // The vacated bits are set to zero.
1867 // SQ translates this to an internal SP opcode.
1868 void
1870 {
1871 Wavefront *wf = gpuDynInst->wavefront();
1872 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
1873 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
1874 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1875
1876 src0.readSrc();
1877 src1.read();
1878
1879 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
1880 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
1881
1882 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1883 if (wf->execMask(lane)) {
1884 vdst[lane] = src1[lane] >> src0[lane];
1885 }
1886 }
1887
1888 vdst.write();
1889 } // execute
1890 // --- Inst_VOP2__V_ASHRREV_I16 class methods ---
1891
1893 : Inst_VOP2(iFmt, "v_ashrrev_i16")
1894 {
1895 setFlag(ALU);
1896 } // Inst_VOP2__V_ASHRREV_I16
1897
1899 {
1900 } // ~Inst_VOP2__V_ASHRREV_I16
1901
1902 // --- description from .arch file ---
1903 // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].
1904 // The vacated bits are set to the sign bit of the input value.
1905 // SQ translates this to an internal SP opcode.
1906 void
1908 {
1909 Wavefront *wf = gpuDynInst->wavefront();
1910 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
1911 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
1912 VecOperandI16 vdst(gpuDynInst, instData.VDST);
1913
1914 src0.readSrc();
1915 src1.read();
1916
1917 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
1918 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
1919
1920 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1921 if (wf->execMask(lane)) {
1922 vdst[lane] = src1[lane] >> src0[lane];
1923 }
1924 }
1925
1926 vdst.write();
1927 } // execute
1928 // --- Inst_VOP2__V_MAX_F16 class methods ---
1929
1931 : Inst_VOP2(iFmt, "v_max_f16")
1932 {
1933 setFlag(ALU);
1934 setFlag(F16);
1935 } // Inst_VOP2__V_MAX_F16
1936
1938 {
1939 } // ~Inst_VOP2__V_MAX_F16
1940
1941 // --- description from .arch file ---
1942 // D.f16 = max(S0.f16, S1.f16).
1943 // IEEE compliant. Supports denormals, round mode, exception flags,
1944 // saturation.
1945 void
1947 {
1949 } // execute
1950 // --- Inst_VOP2__V_MIN_F16 class methods ---
1951
1953 : Inst_VOP2(iFmt, "v_min_f16")
1954 {
1955 setFlag(ALU);
1956 setFlag(F16);
1957 } // Inst_VOP2__V_MIN_F16
1958
1960 {
1961 } // ~Inst_VOP2__V_MIN_F16
1962
1963 // --- description from .arch file ---
1964 // D.f16 = min(S0.f16, S1.f16).
1965 // IEEE compliant. Supports denormals, round mode, exception flags,
1966 // saturation.
1967 void
1969 {
1971 } // execute
1972 // --- Inst_VOP2__V_MAX_U16 class methods ---
1973
1975 : Inst_VOP2(iFmt, "v_max_u16")
1976 {
1977 setFlag(ALU);
1978 } // Inst_VOP2__V_MAX_U16
1979
1981 {
1982 } // ~Inst_VOP2__V_MAX_U16
1983
1984 // --- description from .arch file ---
1985 // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).
1986 void
1988 {
1989 Wavefront *wf = gpuDynInst->wavefront();
1990 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
1991 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
1992 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1993
1994 src0.readSrc();
1995 src1.read();
1996
1997 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
1998 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
1999
2000 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2001 if (wf->execMask(lane)) {
2002 vdst[lane] = std::max(src0[lane], src1[lane]);
2003 }
2004 }
2005
2006 vdst.write();
2007 } // execute
2008 // --- Inst_VOP2__V_MAX_I16 class methods ---
2009
2011 : Inst_VOP2(iFmt, "v_max_i16")
2012 {
2013 setFlag(ALU);
2014 } // Inst_VOP2__V_MAX_I16
2015
2017 {
2018 } // ~Inst_VOP2__V_MAX_I16
2019
2020 // --- description from .arch file ---
2021 // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).
2022 void
2024 {
2025 Wavefront *wf = gpuDynInst->wavefront();
2026 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
2027 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
2028 VecOperandI16 vdst(gpuDynInst, instData.VDST);
2029
2030 src0.readSrc();
2031 src1.read();
2032
2033 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
2034 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
2035
2036 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2037 if (wf->execMask(lane)) {
2038 vdst[lane] = std::max(src0[lane], src1[lane]);
2039 }
2040 }
2041
2042 vdst.write();
2043 } // execute
2044 // --- Inst_VOP2__V_MIN_U16 class methods ---
2045
2047 : Inst_VOP2(iFmt, "v_min_u16")
2048 {
2049 setFlag(ALU);
2050 } // Inst_VOP2__V_MIN_U16
2051
2053 {
2054 } // ~Inst_VOP2__V_MIN_U16
2055
2056 // --- description from .arch file ---
2057 // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).
2058 void
2060 {
2061 Wavefront *wf = gpuDynInst->wavefront();
2062 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
2063 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
2064 VecOperandU16 vdst(gpuDynInst, instData.VDST);
2065
2066 src0.readSrc();
2067 src1.read();
2068
2069 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
2070 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
2071
2072 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2073 if (wf->execMask(lane)) {
2074 vdst[lane] = std::min(src0[lane], src1[lane]);
2075 }
2076 }
2077
2078 vdst.write();
2079 } // execute
2080 // --- Inst_VOP2__V_MIN_I16 class methods ---
2081
2083 : Inst_VOP2(iFmt, "v_min_i16")
2084 {
2085 setFlag(ALU);
2086 } // Inst_VOP2__V_MIN_I16
2087
2089 {
2090 } // ~Inst_VOP2__V_MIN_I16
2091
2092 // --- description from .arch file ---
2093 // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).
2094 void
2096 {
2097 Wavefront *wf = gpuDynInst->wavefront();
2098 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
2099 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
2100 VecOperandI16 vdst(gpuDynInst, instData.VDST);
2101
2102 src0.readSrc();
2103 src1.read();
2104
2105 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
2106 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
2107
2108 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2109 if (wf->execMask(lane)) {
2110 vdst[lane] = std::min(src0[lane], src1[lane]);
2111 }
2112 }
2113
2114 vdst.write();
2115 } // execute
2116 // --- Inst_VOP2__V_LDEXP_F16 class methods ---
2117
2119 : Inst_VOP2(iFmt, "v_ldexp_f16")
2120 {
2121 setFlag(ALU);
2122 setFlag(F16);
2123 } // Inst_VOP2__V_LDEXP_F16
2124
2126 {
2127 } // ~Inst_VOP2__V_LDEXP_F16
2128
2129 // --- description from .arch file ---
2130 // D.f16 = S0.f16 * (2 ** S1.i16).
2131 void
2133 {
2135 } // execute
2136 // --- Inst_VOP2__V_ADD_U32 class methods ---
2137
2139 : Inst_VOP2(iFmt, "v_add_u32")
2140 {
2141 setFlag(ALU);
2142 } // Inst_VOP2__V_ADD_U32
2143
2145 {
2146 } // ~Inst_VOP2__V_ADD_U32
2147
2148 // --- description from .arch file ---
2149 // D.u = S0.u + S1.u;
2150 void
2152 {
2153 Wavefront *wf = gpuDynInst->wavefront();
2154 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
2155 VecOperandU32 src1(gpuDynInst, instData.VSRC1);
2156 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2157
2158 src0.readSrc();
2159 src1.read();
2160
2161 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
2162
2163 if (isSDWAInst()) {
2164 VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
2165 // use copies of original src0, src1, and dest during selecting
2166 VecOperandU32 origSrc0_sdwa(gpuDynInst,
2167 extData.iFmt_VOP_SDWA.SRC0);
2168 VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
2169 VecOperandU32 origVdst(gpuDynInst, instData.VDST);
2170
2171 src0_sdwa.read();
2172 origSrc0_sdwa.read();
2173 origSrc1.read();
2174
2175 DPRINTF(VEGA, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], "
2176 "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
2177 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
2178 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
2179 extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
2180 extData.iFmt_VOP_SDWA.DST_U,
2181 extData.iFmt_VOP_SDWA.CLMP,
2182 extData.iFmt_VOP_SDWA.SRC0_SEL,
2183 extData.iFmt_VOP_SDWA.SRC0_SEXT,
2184 extData.iFmt_VOP_SDWA.SRC0_NEG,
2185 extData.iFmt_VOP_SDWA.SRC0_ABS,
2186 extData.iFmt_VOP_SDWA.SRC1_SEL,
2187 extData.iFmt_VOP_SDWA.SRC1_SEXT,
2188 extData.iFmt_VOP_SDWA.SRC1_NEG,
2189 extData.iFmt_VOP_SDWA.SRC1_ABS);
2190
2191 processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
2192 src1, origSrc1);
2193
2194 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2195 if (wf->execMask(lane)) {
2196 vdst[lane] = src0_sdwa[lane] + src1[lane];
2197 origVdst[lane] = vdst[lane]; // keep copy consistent
2198 }
2199 }
2200
2201 processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
2202 } else {
2203 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2204 if (wf->execMask(lane)) {
2205 vdst[lane] = src0[lane] + src1[lane];
2206 }
2207 }
2208 }
2209
2210 vdst.write();
2211 } // execute
2212 // --- Inst_VOP2__V_SUB_U32 class methods ---
2213
2215 : Inst_VOP2(iFmt, "v_sub_u32")
2216 {
2217 setFlag(ALU);
2218 } // Inst_VOP2__V_SUB_U32
2219
2221 {
2222 } // ~Inst_VOP2__V_SUB_U32
2223
2224 // --- description from .arch file ---
2225 // D.u = S0.u - S1.u;
2226 void
2228 {
2229 Wavefront *wf = gpuDynInst->wavefront();
2230 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
2231 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
2232 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2233
2234 src0.readSrc();
2235 src1.read();
2236
2237 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
2238 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
2239
2240 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2241 if (wf->execMask(lane)) {
2242 vdst[lane] = src0[lane] - src1[lane];
2243 }
2244 }
2245
2246 vdst.write();
2247 } // execute
2248 // --- Inst_VOP2__V_SUBREV_U32 class methods ---
2249
2251 : Inst_VOP2(iFmt, "v_subrev_u32")
2252 {
2253 setFlag(ALU);
2254 } // Inst_VOP2__V_SUBREV_U32
2255
2257 {
2258 } // ~Inst_VOP2__V_SUBREV_U32
2259
2260 // --- description from .arch file ---
2261 // D.u = S1.u - S0.u;
2262 void
2264 {
2265 Wavefront *wf = gpuDynInst->wavefront();
2266 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
2267 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
2268 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2269
2270 src0.readSrc();
2271 src1.read();
2272
2273 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
2274 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
2275
2276 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2277 if (wf->execMask(lane)) {
2278 vdst[lane] = src1[lane] - src0[lane];
2279 }
2280 }
2281
2282 vdst.write();
2283 } // execute
2284 // --- Inst_VOP2__V_FMAC_F32 class methods ---
2285
2287 : Inst_VOP2(iFmt, "v_fmac_f32")
2288 {
2289 setFlag(ALU);
2290 } // Inst_VOP2__V_FMAC_F32
2291
2293 {
2294 } // ~Inst_VOP2__V_FMAC_F32
2295
2296 // --- description from .arch file ---
2297 // D.u = S1.u - S0.u;
2298 void
2300 {
2301 Wavefront *wf = gpuDynInst->wavefront();
2302 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
2303 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
2304 VecOperandF32 vdst(gpuDynInst, instData.VDST);
2305
2306 src0.readSrc();
2307 src1.read();
2308 vdst.read();
2309
2310 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
2311 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
2312
2313 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2314 if (wf->execMask(lane)) {
2315 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
2316 }
2317 }
2318
2319 vdst.write();
2320 } // execute
2321 // --- Inst_VOP2__V_FMAC_F64 class methods ---
2322
2324 : Inst_VOP2(iFmt, "v_fmac_f64")
2325 {
2326 setFlag(ALU);
2327 } // Inst_VOP2__V_FMAC_F64
2328
2330 {
2331 } // ~Inst_VOP2__V_FMAC_F64
2332
2333 // --- description from .arch file ---
2334 // D0.f64 = fma(S0.f64, S1.f64, D0.f64)
2335 void
2337 {
2338 Wavefront *wf = gpuDynInst->wavefront();
2339 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
2340 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
2341 VecOperandF64 vdst(gpuDynInst, instData.VDST);
2342
2343 src0.readSrc();
2344 src1.read();
2345 vdst.read();
2346
2347 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
2348 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
2349
2350 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2351 if (wf->execMask(lane)) {
2352 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
2353 }
2354 }
2355
2356 vdst.write();
2357 } // execute
2358 // --- Inst_VOP2__V_XNOR_B32 class methods ---
2359
2361 : Inst_VOP2(iFmt, "v_xnor_b32")
2362 {
2363 setFlag(ALU);
2364 } // Inst_VOP2__V_XNOR_B32
2365
2367 {
2368 } // ~Inst_VOP2__V_XNOR_B32
2369
2370 // --- description from .arch file ---
2371 // D.u = S1.u - S0.u;
2372 void
2374 {
2375 Wavefront *wf = gpuDynInst->wavefront();
2376 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
2377 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
2378 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2379
2380 src0.readSrc();
2381 src1.read();
2382 vdst.read();
2383
2384 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
2385 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
2386
2387 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2388 if (wf->execMask(lane)) {
2389 vdst[lane] = ~(src0[lane] ^ src1[lane]);
2390 }
2391 }
2392
2393 vdst.write();
2394 } // execute
2395} // namespace VegaISA
2396} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
uint32_t data
Definition mxfp.hh:112
void setFlag(Flags flag)
const std::string _opcode
void execute(GPUDynInstPtr) override
Definition vop2.cc:1389
void execute(GPUDynInstPtr) override
Definition vop2.cc:1215
void execute(GPUDynInstPtr) override
Definition vop2.cc:1532
void execute(GPUDynInstPtr) override
Definition vop2.cc:98
Inst_VOP2__V_ADD_F32(InFmt_VOP2 *)
Definition vop2.cc:84
void execute(GPUDynInstPtr) override
Definition vop2.cc:1690
void execute(GPUDynInstPtr) override
Definition vop2.cc:2151
void execute(GPUDynInstPtr) override
Definition vop2.cc:863
void execute(GPUDynInstPtr) override
Definition vop2.cc:1907
void execute(GPUDynInstPtr) override
Definition vop2.cc:749
void execute(GPUDynInstPtr) override
Definition vop2.cc:58
void execute(GPUDynInstPtr) override
Definition vop2.cc:1003
void execute(GPUDynInstPtr) override
Definition vop2.cc:2299
void execute(GPUDynInstPtr) override
Definition vop2.cc:2336
void execute(GPUDynInstPtr) override
Definition vop2.cc:2132
void execute(GPUDynInstPtr) override
Definition vop2.cc:1839
void execute(GPUDynInstPtr) override
Definition vop2.cc:786
void execute(GPUDynInstPtr) override
Definition vop2.cc:1869
void execute(GPUDynInstPtr) override
Definition vop2.cc:719
void execute(GPUDynInstPtr) override
Definition vop2.cc:1620
void execute(GPUDynInstPtr) override
Definition vop2.cc:1064
void execute(GPUDynInstPtr) override
Definition vop2.cc:1670
void execute(GPUDynInstPtr) override
Definition vop2.cc:1174
void execute(GPUDynInstPtr) override
Definition vop2.cc:1645
void execute(GPUDynInstPtr) override
Definition vop2.cc:1132
void execute(GPUDynInstPtr) override
Definition vop2.cc:1946
void execute(GPUDynInstPtr) override
Definition vop2.cc:537
void execute(GPUDynInstPtr) override
Definition vop2.cc:2023
void execute(GPUDynInstPtr) override
Definition vop2.cc:609
void execute(GPUDynInstPtr) override
Definition vop2.cc:1987
void execute(GPUDynInstPtr) override
Definition vop2.cc:681
void execute(GPUDynInstPtr) override
Definition vop2.cc:1968
void execute(GPUDynInstPtr) override
Definition vop2.cc:500
void execute(GPUDynInstPtr) override
Definition vop2.cc:2095
void execute(GPUDynInstPtr) override
Definition vop2.cc:573
void execute(GPUDynInstPtr) override
Definition vop2.cc:2059
void execute(GPUDynInstPtr) override
Definition vop2.cc:645
void execute(GPUDynInstPtr) override
Definition vop2.cc:1597
void execute(GPUDynInstPtr) override
Definition vop2.cc:273
void execute(GPUDynInstPtr) override
Definition vop2.cc:391
void execute(GPUDynInstPtr) override
Definition vop2.cc:461
void execute(GPUDynInstPtr) override
Definition vop2.cc:354
void execute(GPUDynInstPtr) override
Definition vop2.cc:236
void execute(GPUDynInstPtr) override
Definition vop2.cc:1802
void execute(GPUDynInstPtr) override
Definition vop2.cc:432
void execute(GPUDynInstPtr) override
Definition vop2.cc:892
Inst_VOP2__V_OR_B32(InFmt_VOP2 *)
Definition vop2.cc:878
void execute(GPUDynInstPtr) override
Definition vop2.cc:1488
void execute(GPUDynInstPtr) override
Definition vop2.cc:1439
void execute(GPUDynInstPtr) override
Definition vop2.cc:1344
void execute(GPUDynInstPtr) override
Definition vop2.cc:1576
void execute(GPUDynInstPtr) override
Definition vop2.cc:199
void execute(GPUDynInstPtr) override
Definition vop2.cc:1765
void execute(GPUDynInstPtr) override
Definition vop2.cc:2263
void execute(GPUDynInstPtr) override
Definition vop2.cc:1301
void execute(GPUDynInstPtr) override
Definition vop2.cc:1554
void execute(GPUDynInstPtr) override
Definition vop2.cc:161
void execute(GPUDynInstPtr) override
Definition vop2.cc:1727
void execute(GPUDynInstPtr) override
Definition vop2.cc:2227
void execute(GPUDynInstPtr) override
Definition vop2.cc:2373
void execute(GPUDynInstPtr) override
Definition vop2.cc:969
void vop2Helper(GPUDynInstPtr gpuDynInst, void(*fOpImpl)(T &, T &, T &, Wavefront *))
Inst_VOP2(InFmt_VOP2 *, const std::string &opcode)
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition operand.hh:419
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
Definition operand.hh:402
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
Definition operand.hh:507
void read() override
read from the vrf.
Definition operand.hh:148
void readSrc()
certain vector operands can read from the vrf/srf or constants.
Definition operand.hh:132
void write() override
write to the vrf.
Definition operand.hh:203
VectorMask & execMask()
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
constexpr uint64_t sext(uint64_t val)
Sign-extend an N-bit value to 64 bits.
Definition bitfield.hh:129
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:268
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:246
mxfp< fp16_e8m7_info > mxbfloat16
Definition mxfp_types.hh:49
Bitfield< 22 > a1
Bitfield< 23 > k
Bitfield< 7, 4 > b1
Definition qarma.hh:65
Bitfield< 11, 8 > b2
Definition qarma.hh:64
classes that represnt vector/scalar operands in VEGA ISA.
Definition faults.cc:39
ScalarOperand< ScalarRegU64, false > ScalarOperandU64
Definition operand.hh:804
VecOperand< VecElemF32, true > ConstVecOperandF32
Definition operand.hh:846
VecOperand< VecElemU32, false > VecOperandU32
Definition operand.hh:829
VecOperand< VecElemF64, true > ConstVecOperandF64
Definition operand.hh:849
VecOperand< VecElemI16, false, 1 > VecOperandI16
Definition operand.hh:828
VecOperand< VecElemI32, true > ConstVecOperandI32
Definition operand.hh:845
VecOperand< VecElemU32, true > ConstVecOperandU32
Definition operand.hh:844
void processSDWA_src(InFmt_VOP_SDWA sdwaInst, T &src0, T &origSrc0)
processSDWA_src is a helper function for implementing sub d-word addressing instructions for the src ...
Definition inst_util.hh:836
uint32_t VecElemU32
VecOperand< VecElemU16, false, 1 > VecOperandU16
Definition operand.hh:827
ScalarOperand< ScalarRegU64, true > ConstScalarOperandU64
Definition operand.hh:818
VecOperand< VecElemU16, true, 1 > ConstVecOperandU16
Definition operand.hh:842
void processSDWA_dst(InFmt_VOP_SDWA sdwaInst, T &dst, T &origDst)
processSDWA_dst is a helper function for implementing sub d-word addressing instructions for the dst ...
Definition inst_util.hh:892
const int NumVecElemPerVecReg(64)
uint64_t VecElemU64
VecOperand< VecElemI32, false > VecOperandI32
Definition operand.hh:830
VecOperand< VecElemI16, true, 1 > ConstVecOperandI16
Definition operand.hh:843
VecOperand< VecElemF64, false > VecOperandF64
Definition operand.hh:833
VecOperand< VecElemF32, false > VecOperandF32
Definition operand.hh:831
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst, T &src0)
processDPP is a helper function for implementing Data Parallel Primitive instructions.
Definition inst_util.hh:424
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
constexpr bool isinf(gem5::AMDGPU::fp16_e5m10_info a)
Definition fp16_e5m10.hh:78
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)
Definition fp16_e5m10.hh:83

Generated on Mon Oct 27 2025 04:12:44 for gem5 by doxygen 1.14.0