gem5 v24.0.0.0
Loading...
Searching...
No Matches
vop1.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
34
35namespace gem5
36{
37
38namespace VegaISA
39{
40 // --- Inst_VOP1__V_NOP class methods ---
41
43 : Inst_VOP1(iFmt, "v_nop")
44 {
45 setFlag(Nop);
46 setFlag(ALU);
47 } // Inst_VOP1__V_NOP
48
50 {
51 } // ~Inst_VOP1__V_NOP
52
53 // --- description from .arch file ---
54 // Do nothing.
55 void
57 {
58 } // execute
59 // --- Inst_VOP1__V_MOV_B32 class methods ---
60
62 : Inst_VOP1(iFmt, "v_mov_b32")
63 {
64 setFlag(ALU);
65 } // Inst_VOP1__V_MOV_B32
66
68 {
69 } // ~Inst_VOP1__V_MOV_B32
70
71 // --- description from .arch file ---
72 // D.u = S0.u.
73 // Input and output modifiers not supported; this is an untyped operation.
74 void
76 {
77 Wavefront *wf = gpuDynInst->wavefront();
78 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
79 VecOperandU32 vdst(gpuDynInst, instData.VDST);
80
81 src.readSrc();
82
83 if (isDPPInst()) {
84 VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
85 src_dpp.read();
86
87 DPRINTF(VEGA, "Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], "
88 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
89 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
90 "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
99
100 // NOTE: For VOP1, there is no SRC1, so make sure we're not trying
101 // to negate it or take the absolute value of it
104 processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src_dpp);
105
106 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
107 if (wf->execMask(lane)) {
108 vdst[lane] = src_dpp[lane];
109 }
110 }
111 } else {
112 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
113 if (wf->execMask(lane)) {
114 vdst[lane] = src[lane];
115 }
116 }
117 }
118
119 vdst.write();
120 } // execute
121 // --- Inst_VOP1__V_READFIRSTLANE_B32 class methods ---
122
124 InFmt_VOP1 *iFmt)
125 : Inst_VOP1(iFmt, "v_readfirstlane_b32")
126 {
127 setFlag(ALU);
128 } // Inst_VOP1__V_READFIRSTLANE_B32
129
131 {
132 } // ~Inst_VOP1__V_READFIRSTLANE_B32
133
134 // --- description from .arch file ---
135 // Copy one VGPR value to one SGPR. D = SGPR destination, S0 = source data
136 // (VGPR# or M0 for lds direct access), Lane# = FindFirst1fromLSB(exec)
137 // (Lane# = 0 if exec is zero). Ignores exec mask for the access. SQ
138 // translates to V_READLANE_B32.
139 // Input and output modifiers not supported; this is an untyped operation.
140 void
142 {
143 Wavefront *wf = gpuDynInst->wavefront();
144 ScalarRegI32 src_lane(0);
145 ScalarRegU64 exec_mask = wf->execMask().to_ullong();
146 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
147 ScalarOperandU32 sdst(gpuDynInst, instData.VDST);
148
149 src.readSrc();
150
151 if (exec_mask) {
152 src_lane = findLsbSet(exec_mask);
153 }
154
155 sdst = src[src_lane];
156
157 sdst.write();
158 } // execute
159 // --- Inst_VOP1__V_CVT_I32_F64 class methods ---
160
162 : Inst_VOP1(iFmt, "v_cvt_i32_f64")
163 {
164 setFlag(ALU);
165 setFlag(F64);
166 } // Inst_VOP1__V_CVT_I32_F64
167
169 {
170 } // ~Inst_VOP1__V_CVT_I32_F64
171
172 // --- description from .arch file ---
173 // D.i = (int)S0.d.
174 // Out-of-range floating point values (including infinity) saturate. NaN is
175 // --- converted to 0.
176 void
178 {
179 Wavefront *wf = gpuDynInst->wavefront();
180 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
181 VecOperandI32 vdst(gpuDynInst, instData.VDST);
182
183 src.readSrc();
184
185 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
186 if (wf->execMask(lane)) {
187 int exp;
188 std::frexp(src[lane],&exp);
189 if (std::isnan(src[lane])) {
190 vdst[lane] = 0;
191 } else if (std::isinf(src[lane]) || exp > 30) {
192 if (std::signbit(src[lane])) {
193 vdst[lane] = INT_MIN;
194 } else {
195 vdst[lane] = INT_MAX;
196 }
197 } else {
198 vdst[lane] = (VecElemI32)src[lane];
199 }
200 }
201 }
202
203 vdst.write();
204 } // execute
205 // --- Inst_VOP1__V_CVT_F64_I32 class methods ---
206
208 : Inst_VOP1(iFmt, "v_cvt_f64_i32")
209 {
210 setFlag(ALU);
211 setFlag(F64);
212 } // Inst_VOP1__V_CVT_F64_I32
213
215 {
216 } // ~Inst_VOP1__V_CVT_F64_I32
217
218 // --- description from .arch file ---
219 // D.d = (double)S0.i.
220 void
222 {
223 Wavefront *wf = gpuDynInst->wavefront();
224 ConstVecOperandI32 src(gpuDynInst, instData.SRC0);
225 VecOperandF64 vdst(gpuDynInst, instData.VDST);
226
227 src.readSrc();
228
229 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
230 if (wf->execMask(lane)) {
231 vdst[lane] = (VecElemF64)src[lane];
232 }
233 }
234
235 vdst.write();
236 } // execute
237 // --- Inst_VOP1__V_CVT_F32_I32 class methods ---
238
240 : Inst_VOP1(iFmt, "v_cvt_f32_i32")
241 {
242 setFlag(ALU);
243 setFlag(F32);
244 } // Inst_VOP1__V_CVT_F32_I32
245
247 {
248 } // ~Inst_VOP1__V_CVT_F32_I32
249
250 // --- description from .arch file ---
251 // D.f = (float)S0.i.
252 void
254 {
255 Wavefront *wf = gpuDynInst->wavefront();
256 ConstVecOperandI32 src(gpuDynInst, instData.SRC0);
257 VecOperandF32 vdst(gpuDynInst, instData.VDST);
258
259 src.readSrc();
260
261 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
262 if (wf->execMask(lane)) {
263 vdst[lane] = (VecElemF32)src[lane];
264 }
265 }
266
267 vdst.write();
268 } // execute
269 // --- Inst_VOP1__V_CVT_F32_U32 class methods ---
270
272 : Inst_VOP1(iFmt, "v_cvt_f32_u32")
273 {
274 setFlag(ALU);
275 setFlag(F32);
276 } // Inst_VOP1__V_CVT_F32_U32
277
279 {
280 } // ~Inst_VOP1__V_CVT_F32_U32
281
282 // --- description from .arch file ---
283 // D.f = (float)S0.u.
284 void
286 {
287 Wavefront *wf = gpuDynInst->wavefront();
288 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
289 VecOperandF32 vdst(gpuDynInst, instData.VDST);
290
291 src.readSrc();
292
293 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
294 if (wf->execMask(lane)) {
295 vdst[lane] = (VecElemF32)src[lane];
296 }
297 }
298
299 vdst.write();
300 } // execute
301 // --- Inst_VOP1__V_CVT_U32_F32 class methods ---
302
304 : Inst_VOP1(iFmt, "v_cvt_u32_f32")
305 {
306 setFlag(ALU);
307 setFlag(F32);
308 } // Inst_VOP1__V_CVT_U32_F32
309
311 {
312 } // ~Inst_VOP1__V_CVT_U32_F32
313
314 // --- description from .arch file ---
315 // D.u = (unsigned)S0.f.
316 // Out-of-range floating point values (including infinity) saturate. NaN is
317 // --- converted to 0.
318 void
320 {
321 Wavefront *wf = gpuDynInst->wavefront();
322 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
323 VecOperandU32 vdst(gpuDynInst, instData.VDST);
324
325 src.readSrc();
326
327 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
328 if (wf->execMask(lane)) {
329 int exp;
330 std::frexp(src[lane],&exp);
331 if (std::isnan(src[lane])) {
332 vdst[lane] = 0;
333 } else if (std::isinf(src[lane])) {
334 if (std::signbit(src[lane])) {
335 vdst[lane] = 0;
336 } else {
337 vdst[lane] = UINT_MAX;
338 }
339 } else if (exp > 31) {
340 vdst[lane] = UINT_MAX;
341 } else {
342 vdst[lane] = (VecElemU32)src[lane];
343 }
344 }
345 }
346
347 vdst.write();
348 } // execute
349 // --- Inst_VOP1__V_CVT_I32_F32 class methods ---
350
352 : Inst_VOP1(iFmt, "v_cvt_i32_f32")
353 {
354 setFlag(ALU);
355 setFlag(F32);
356 } // Inst_VOP1__V_CVT_I32_F32
357
359 {
360 } // ~Inst_VOP1__V_CVT_I32_F32
361
362 // --- description from .arch file ---
363 // D.i = (int)S0.f.
364 // Out-of-range floating point values (including infinity) saturate. NaN is
365 // --- converted to 0.
366 void
368 {
369 Wavefront *wf = gpuDynInst->wavefront();
370 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
371 VecOperandI32 vdst(gpuDynInst, instData.VDST);
372
373 src.readSrc();
374
375 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
376 if (wf->execMask(lane)) {
377 int exp;
378 std::frexp(src[lane],&exp);
379 if (std::isnan(src[lane])) {
380 vdst[lane] = 0;
381 } else if (std::isinf(src[lane]) || exp > 30) {
382 if (std::signbit(src[lane])) {
383 vdst[lane] = INT_MIN;
384 } else {
385 vdst[lane] = INT_MAX;
386 }
387 } else {
388 vdst[lane] = (VecElemI32)src[lane];
389 }
390 }
391 }
392
393 vdst.write();
394 } // execute
395 // --- Inst_VOP1__V_MOV_FED_B32 class methods ---
396
398 : Inst_VOP1(iFmt, "v_mov_fed_b32")
399 {
400 setFlag(ALU);
401 } // Inst_VOP1__V_MOV_FED_B32
402
404 {
405 } // ~Inst_VOP1__V_MOV_FED_B32
406
407 // --- description from .arch file ---
408 // D.u = S0.u;
409 // Introduce EDC double error upon write to dest vgpr without causing an
410 // --- exception.
411 // Input and output modifiers not supported; this is an untyped operation.
412 void
414 {
416 } // execute
417 // --- Inst_VOP1__V_CVT_F16_F32 class methods ---
418
420 : Inst_VOP1(iFmt, "v_cvt_f16_f32")
421 {
422 setFlag(ALU);
423 setFlag(F32);
424 } // Inst_VOP1__V_CVT_F16_F32
425
427 {
428 } // ~Inst_VOP1__V_CVT_F16_F32
429
430 // --- description from .arch file ---
431 // D.f16 = flt32_to_flt16(S0.f).
432 // Supports input modifiers and creates FP16 denormals when appropriate.
433 void
435 {
436 Wavefront *wf = gpuDynInst->wavefront();
437 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
438 VecOperandU32 vdst(gpuDynInst, instData.VDST);
439
440 src.readSrc();
441
442 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
443 if (wf->execMask(lane)) {
444 float tmp = src[lane];
445 AMDGPU::mxfloat16 out(tmp);
446
447 vdst[lane] = (out.data >> 16);
448 }
449 }
450
451 vdst.write();
452 } // execute
453 // --- Inst_VOP1__V_CVT_F32_F16 class methods ---
454
456 : Inst_VOP1(iFmt, "v_cvt_f32_f16")
457 {
458 setFlag(ALU);
459 setFlag(F32);
460 } // Inst_VOP1__V_CVT_F32_F16
461
463 {
464 } // ~Inst_VOP1__V_CVT_F32_F16
465
466 // --- description from .arch file ---
467 // D.f = flt16_to_flt32(S0.f16).
468 // FP16 denormal inputs are always accepted.
469 void
471 {
472 Wavefront *wf = gpuDynInst->wavefront();
473 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
474 VecOperandF32 vdst(gpuDynInst, instData.VDST);
475
476 src.readSrc();
477
478 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
479 if (wf->execMask(lane)) {
480 AMDGPU::mxfloat16 tmp(src[lane]);
481 vdst[lane] = float(tmp);
482 }
483 }
484
485 vdst.write();
486 } // execute
487 // --- Inst_VOP1__V_CVT_RPI_I32_F32 class methods ---
488
490 InFmt_VOP1 *iFmt)
491 : Inst_VOP1(iFmt, "v_cvt_rpi_i32_f32")
492 {
493 setFlag(ALU);
494 setFlag(F32);
495 } // Inst_VOP1__V_CVT_RPI_I32_F32
496
498 {
499 } // ~Inst_VOP1__V_CVT_RPI_I32_F32
500
501 // --- description from .arch file ---
502 // D.i = (int)floor(S0.f + 0.5).
503 void
505 {
506 Wavefront *wf = gpuDynInst->wavefront();
507 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
508 VecOperandI32 vdst(gpuDynInst, instData.VDST);
509
510 src.readSrc();
511
512 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
513 if (wf->execMask(lane)) {
514 vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);
515 }
516 }
517
518 vdst.write();
519 } // execute
520 // --- Inst_VOP1__V_CVT_FLR_I32_F32 class methods ---
521
523 InFmt_VOP1 *iFmt)
524 : Inst_VOP1(iFmt, "v_cvt_flr_i32_f32")
525 {
526 setFlag(ALU);
527 setFlag(F32);
528 } // Inst_VOP1__V_CVT_FLR_I32_F32
529
531 {
532 } // ~Inst_VOP1__V_CVT_FLR_I32_F32
533
534 // --- description from .arch file ---
535 // D.i = (int)floor(S0.f).
536 void
538 {
539 Wavefront *wf = gpuDynInst->wavefront();
540 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
541 VecOperandI32 vdst(gpuDynInst, instData.VDST);
542
543 src.readSrc();
544
545 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
546 if (wf->execMask(lane)) {
547 vdst[lane] = (VecElemI32)std::floor(src[lane]);
548 }
549 }
550
551 vdst.write();
552 } // execute
553 // --- Inst_VOP1__V_CVT_OFF_F32_I4 class methods ---
554
556 : Inst_VOP1(iFmt, "v_cvt_off_f32_i4")
557 {
558 setFlag(ALU);
559 setFlag(F32);
560 } // Inst_VOP1__V_CVT_OFF_F32_I4
561
563 {
564 } // ~Inst_VOP1__V_CVT_OFF_F32_I4
565
566 // --- description from .arch file ---
567 // 4-bit signed int to 32-bit float. Used for interpolation in shader.
568 void
570 {
571 // Could not parse sq_uc.arch desc field
573 } // execute
574 // --- Inst_VOP1__V_CVT_F32_F64 class methods ---
575
577 : Inst_VOP1(iFmt, "v_cvt_f32_f64")
578 {
579 setFlag(ALU);
580 setFlag(F64);
581 } // Inst_VOP1__V_CVT_F32_F64
582
584 {
585 } // ~Inst_VOP1__V_CVT_F32_F64
586
587 // --- description from .arch file ---
588 // D.f = (float)S0.d.
589 void
591 {
592 Wavefront *wf = gpuDynInst->wavefront();
593 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
594 VecOperandF32 vdst(gpuDynInst, instData.VDST);
595
596 src.readSrc();
597
598 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
599 if (wf->execMask(lane)) {
600 vdst[lane] = (VecElemF32)src[lane];
601 }
602 }
603
604 vdst.write();
605 } // execute
606 // --- Inst_VOP1__V_CVT_F64_F32 class methods ---
607
609 : Inst_VOP1(iFmt, "v_cvt_f64_f32")
610 {
611 setFlag(ALU);
612 setFlag(F64);
613 } // Inst_VOP1__V_CVT_F64_F32
614
616 {
617 } // ~Inst_VOP1__V_CVT_F64_F32
618
619 // --- description from .arch file ---
620 // D.d = (double)S0.f.
621 void
623 {
624 Wavefront *wf = gpuDynInst->wavefront();
625 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
626 VecOperandF64 vdst(gpuDynInst, instData.VDST);
627
628 src.readSrc();
629
630 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
631 if (wf->execMask(lane)) {
632 vdst[lane] = (VecElemF64)src[lane];
633 }
634 }
635
636 vdst.write();
637 } // execute
638 // --- Inst_VOP1__V_CVT_F32_UBYTE0 class methods ---
639
641 : Inst_VOP1(iFmt, "v_cvt_f32_ubyte0")
642 {
643 setFlag(ALU);
644 setFlag(F32);
645 } // Inst_VOP1__V_CVT_F32_UBYTE0
646
648 {
649 } // ~Inst_VOP1__V_CVT_F32_UBYTE0
650
651 // --- description from .arch file ---
652 // D.f = (float)(S0.u[7:0]).
653 void
655 {
656 Wavefront *wf = gpuDynInst->wavefront();
657 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
658 VecOperandF32 vdst(gpuDynInst, instData.VDST);
659
660 src.readSrc();
661
662 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
663 if (wf->execMask(lane)) {
664 vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0));
665 }
666 }
667
668 vdst.write();
669 } // execute
670 // --- Inst_VOP1__V_CVT_F32_UBYTE1 class methods ---
671
673 : Inst_VOP1(iFmt, "v_cvt_f32_ubyte1")
674 {
675 setFlag(ALU);
676 setFlag(F32);
677 } // Inst_VOP1__V_CVT_F32_UBYTE1
678
680 {
681 } // ~Inst_VOP1__V_CVT_F32_UBYTE1
682
683 // --- description from .arch file ---
684 // D.f = (float)(S0.u[15:8]).
685 void
687 {
688 Wavefront *wf = gpuDynInst->wavefront();
689 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
690 VecOperandF32 vdst(gpuDynInst, instData.VDST);
691
692 src.readSrc();
693
694 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
695 if (wf->execMask(lane)) {
696 vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8));
697 }
698 }
699
700 vdst.write();
701 } // execute
702 // --- Inst_VOP1__V_CVT_F32_UBYTE2 class methods ---
703
705 : Inst_VOP1(iFmt, "v_cvt_f32_ubyte2")
706 {
707 setFlag(ALU);
708 setFlag(F32);
709 } // Inst_VOP1__V_CVT_F32_UBYTE2
710
712 {
713 } // ~Inst_VOP1__V_CVT_F32_UBYTE2
714
715 // --- description from .arch file ---
716 // D.f = (float)(S0.u[23:16]).
717 void
719 {
720 Wavefront *wf = gpuDynInst->wavefront();
721 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
722 VecOperandF32 vdst(gpuDynInst, instData.VDST);
723
724 src.readSrc();
725
726 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
727 if (wf->execMask(lane)) {
728 vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16));
729 }
730 }
731
732 vdst.write();
733 } // execute
734 // --- Inst_VOP1__V_CVT_F32_UBYTE3 class methods ---
735
737 : Inst_VOP1(iFmt, "v_cvt_f32_ubyte3")
738 {
739 setFlag(ALU);
740 setFlag(F32);
741 } // Inst_VOP1__V_CVT_F32_UBYTE3
742
744 {
745 } // ~Inst_VOP1__V_CVT_F32_UBYTE3
746
747 // --- description from .arch file ---
748 // D.f = (float)(S0.u[31:24]).
749 void
751 {
752 Wavefront *wf = gpuDynInst->wavefront();
753 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
754 VecOperandF32 vdst(gpuDynInst, instData.VDST);
755
756 src.readSrc();
757
758 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
759 if (wf->execMask(lane)) {
760 vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24));
761 }
762 }
763
764 vdst.write();
765 } // execute
766 // --- Inst_VOP1__V_CVT_U32_F64 class methods ---
767
769 : Inst_VOP1(iFmt, "v_cvt_u32_f64")
770 {
771 setFlag(ALU);
772 setFlag(F64);
773 } // Inst_VOP1__V_CVT_U32_F64
774
776 {
777 } // ~Inst_VOP1__V_CVT_U32_F64
778
779 // --- description from .arch file ---
780 // D.u = (unsigned)S0.d.
781 // Out-of-range floating point values (including infinity) saturate. NaN is
782 // --- converted to 0.
783 void
785 {
786 Wavefront *wf = gpuDynInst->wavefront();
787 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
788 VecOperandU32 vdst(gpuDynInst, instData.VDST);
789
790 src.readSrc();
791
792 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
793 if (wf->execMask(lane)) {
794 int exp;
795 std::frexp(src[lane],&exp);
796 if (std::isnan(src[lane])) {
797 vdst[lane] = 0;
798 } else if (std::isinf(src[lane])) {
799 if (std::signbit(src[lane])) {
800 vdst[lane] = 0;
801 } else {
802 vdst[lane] = UINT_MAX;
803 }
804 } else if (exp > 31) {
805 vdst[lane] = UINT_MAX;
806 } else {
807 vdst[lane] = (VecElemU32)src[lane];
808 }
809 }
810 }
811
812 vdst.write();
813 } // execute
814 // --- Inst_VOP1__V_CVT_F64_U32 class methods ---
815
817 : Inst_VOP1(iFmt, "v_cvt_f64_u32")
818 {
819 setFlag(ALU);
820 setFlag(F64);
821 } // Inst_VOP1__V_CVT_F64_U32
822
824 {
825 } // ~Inst_VOP1__V_CVT_F64_U32
826
827 // --- description from .arch file ---
828 // D.d = (double)S0.u.
829 void
831 {
832 Wavefront *wf = gpuDynInst->wavefront();
833 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
834 VecOperandF64 vdst(gpuDynInst, instData.VDST);
835
836 src.readSrc();
837
838 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
839 if (wf->execMask(lane)) {
840 vdst[lane] = (VecElemF64)src[lane];
841 }
842 }
843
844 vdst.write();
845 } // execute
846 // --- Inst_VOP1__V_TRUNC_F64 class methods ---
847
849 : Inst_VOP1(iFmt, "v_trunc_f64")
850 {
851 setFlag(ALU);
852 setFlag(F64);
853 } // Inst_VOP1__V_TRUNC_F64
854
856 {
857 } // ~Inst_VOP1__V_TRUNC_F64
858
859 // --- description from .arch file ---
860 // D.d = trunc(S0.d), return integer part of S0.d.
861 void
863 {
864 Wavefront *wf = gpuDynInst->wavefront();
865 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
866 VecOperandF64 vdst(gpuDynInst, instData.VDST);
867
868 src.readSrc();
869
870 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
871 if (wf->execMask(lane)) {
872 vdst[lane] = std::trunc(src[lane]);
873 }
874 }
875
876 vdst.write();
877 } // execute
878 // --- Inst_VOP1__V_CEIL_F64 class methods ---
879
881 : Inst_VOP1(iFmt, "v_ceil_f64")
882 {
883 setFlag(ALU);
884 setFlag(F64);
885 } // Inst_VOP1__V_CEIL_F64
886
888 {
889 } // ~Inst_VOP1__V_CEIL_F64
890
891 // --- description from .arch file ---
892 // D.d = trunc(S0.d);
893 // if (S0.d > 0.0 && S0.d != D.d) then D.d += 1.0.
894 void
896 {
897 Wavefront *wf = gpuDynInst->wavefront();
898 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
899 VecOperandF64 vdst(gpuDynInst, instData.VDST);
900
901 src.readSrc();
902
903 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
904 if (wf->execMask(lane)) {
905 vdst[lane] = std::ceil(src[lane]);
906 }
907 }
908
909 vdst.write();
910 } // execute
911 // --- Inst_VOP1__V_RNDNE_F64 class methods ---
912
914 : Inst_VOP1(iFmt, "v_rndne_f64")
915 {
916 setFlag(ALU);
917 setFlag(F64);
918 } // Inst_VOP1__V_RNDNE_F64
919
921 {
922 } // ~Inst_VOP1__V_RNDNE_F64
923
924 // --- description from .arch file ---
925 // D.d = round_nearest_even(S0.d).
926 void
928 {
929 Wavefront *wf = gpuDynInst->wavefront();
930 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
931 VecOperandF64 vdst(gpuDynInst, instData.VDST);
932
933 src.readSrc();
934
935 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
936 if (wf->execMask(lane)) {
937 vdst[lane] = roundNearestEven(src[lane]);
938 }
939 }
940
941 vdst.write();
942 } // execute
943 // --- Inst_VOP1__V_FLOOR_F64 class methods ---
944
946 : Inst_VOP1(iFmt, "v_floor_f64")
947 {
948 setFlag(ALU);
949 setFlag(F64);
950 } // Inst_VOP1__V_FLOOR_F64
951
953 {
954 } // ~Inst_VOP1__V_FLOOR_F64
955
956 // --- description from .arch file ---
957 // D.d = trunc(S0.d);
958 // if (S0.d < 0.0 && S0.d != D.d) then D.d += -1.0.
959 void
961 {
962 Wavefront *wf = gpuDynInst->wavefront();
963 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
964 VecOperandF64 vdst(gpuDynInst, instData.VDST);
965
966 src.readSrc();
967
968 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
969 if (wf->execMask(lane)) {
970 vdst[lane] = std::floor(src[lane]);
971 }
972 }
973
974 vdst.write();
975 } // execute
976 // --- Inst_VOP1__V_FRACT_F32 class methods ---
977
979 : Inst_VOP1(iFmt, "v_fract_f32")
980 {
981 setFlag(ALU);
982 setFlag(F32);
983 } // Inst_VOP1__V_FRACT_F32
984
986 {
987 } // ~Inst_VOP1__V_FRACT_F32
988
989 // --- description from .arch file ---
990 // D.f = S0.f - floor(S0.f).
991 void
993 {
994 Wavefront *wf = gpuDynInst->wavefront();
995 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
996 VecOperandF32 vdst(gpuDynInst, instData.VDST);
997
998 src.readSrc();
999
1000 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1001 if (wf->execMask(lane)) {
1002 VecElemF32 int_part(0.0);
1003 vdst[lane] = std::modf(src[lane], &int_part);
1004 }
1005 }
1006
1007 vdst.write();
1008 } // execute
1009 // --- Inst_VOP1__V_TRUNC_F32 class methods ---
1010
1012 : Inst_VOP1(iFmt, "v_trunc_f32")
1013 {
1014 setFlag(ALU);
1015 setFlag(F32);
1016 } // Inst_VOP1__V_TRUNC_F32
1017
1019 {
1020 } // ~Inst_VOP1__V_TRUNC_F32
1021
1022 // --- description from .arch file ---
1023 // D.f = trunc(S0.f), return integer part of S0.f.
1024 void
1026 {
1027 Wavefront *wf = gpuDynInst->wavefront();
1028 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
1029 VecOperandF32 vdst (gpuDynInst, instData.VDST);
1030
1031 src.readSrc();
1032
1033 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1034 if (wf->execMask(lane)) {
1035 vdst[lane] = std::trunc(src[lane]);
1036 }
1037 }
1038
1039 vdst.write();
1040 } // execute
1041 // --- Inst_VOP1__V_CEIL_F32 class methods ---
1042
1044 : Inst_VOP1(iFmt, "v_ceil_f32")
1045 {
1046 setFlag(ALU);
1047 setFlag(F32);
1048 } // Inst_VOP1__V_CEIL_F32
1049
1051 {
1052 } // ~Inst_VOP1__V_CEIL_F32
1053
1054 // --- description from .arch file ---
1055 // D.f = trunc(S0.f);
1056 // if (S0.f > 0.0 && S0.f != D.f) then D.f += 1.0.
1057 void
1059 {
1060 Wavefront *wf = gpuDynInst->wavefront();
1061 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
1062 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1063
1064 src.readSrc();
1065
1066 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1067 if (wf->execMask(lane)) {
1068 vdst[lane] = std::ceil(src[lane]);
1069 }
1070 }
1071
1072 vdst.write();
1073 } // execute
1074 // --- Inst_VOP1__V_RNDNE_F32 class methods ---
1075
1077 : Inst_VOP1(iFmt, "v_rndne_f32")
1078 {
1079 setFlag(ALU);
1080 setFlag(F32);
1081 } // Inst_VOP1__V_RNDNE_F32
1082
1084 {
1085 } // ~Inst_VOP1__V_RNDNE_F32
1086
1087 // --- description from .arch file ---
1088 // D.f = round_nearest_even(S0.f).
1089 void
1091 {
1092 Wavefront *wf = gpuDynInst->wavefront();
1093 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
1094 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1095
1096 src.readSrc();
1097
1098 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1099 if (wf->execMask(lane)) {
1100 vdst[lane] = roundNearestEven(src[lane]);
1101 }
1102 }
1103
1104 vdst.write();
1105 } // execute
1106 // --- Inst_VOP1__V_FLOOR_F32 class methods ---
1107
1109 : Inst_VOP1(iFmt, "v_floor_f32")
1110 {
1111 setFlag(ALU);
1112 setFlag(F32);
1113 } // Inst_VOP1__V_FLOOR_F32
1114
1116 {
1117 } // ~Inst_VOP1__V_FLOOR_F32
1118
1119 // --- description from .arch file ---
1120 // D.f = trunc(S0.f);
1121 // if (S0.f < 0.0 && S0.f != D.f) then D.f += -1.0.
1122 void
1124 {
1125 Wavefront *wf = gpuDynInst->wavefront();
1126 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
1127 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1128
1129 src.readSrc();
1130
1131 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1132 if (wf->execMask(lane)) {
1133 vdst[lane] = std::floor(src[lane]);
1134 }
1135 }
1136
1137 vdst.write();
1138 } // execute
1139 // --- Inst_VOP1__V_EXP_F32 class methods ---
1140
1142 : Inst_VOP1(iFmt, "v_exp_f32")
1143 {
1144 setFlag(ALU);
1145 setFlag(F32);
1146 } // Inst_VOP1__V_EXP_F32
1147
1149 {
1150 } // ~Inst_VOP1__V_EXP_F32
1151
1152 // --- description from .arch file ---
1153 // D.f = pow(2.0, S0.f).
1154 void
1156 {
1157 Wavefront *wf = gpuDynInst->wavefront();
1158 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
1159 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1160
1161 src.readSrc();
1162
1163 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1164 if (wf->execMask(lane)) {
1165 vdst[lane] = std::pow(2.0, src[lane]);
1166 }
1167 }
1168
1169 vdst.write();
1170 } // execute
1171 // --- Inst_VOP1__V_LOG_F32 class methods ---
1172
1174 : Inst_VOP1(iFmt, "v_log_f32")
1175 {
1176 setFlag(ALU);
1177 setFlag(F32);
1178 } // Inst_VOP1__V_LOG_F32
1179
1181 {
1182 } // ~Inst_VOP1__V_LOG_F32
1183
1184 // --- description from .arch file ---
1185 // D.f = log2(S0.f). Base 2 logarithm.
1186 void
1188 {
1189 Wavefront *wf = gpuDynInst->wavefront();
1190 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
1191 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1192
1193 src.readSrc();
1194
1195 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1196 if (wf->execMask(lane)) {
1197 vdst[lane] = std::log2(src[lane]);
1198 }
1199 }
1200
1201 vdst.write();
1202 } // execute
1203 // --- Inst_VOP1__V_RCP_F32 class methods ---
1204
1206 : Inst_VOP1(iFmt, "v_rcp_f32")
1207 {
1208 setFlag(ALU);
1209 setFlag(F32);
1210 } // Inst_VOP1__V_RCP_F32
1211
1213 {
1214 } // ~Inst_VOP1__V_RCP_F32
1215
1216 // --- description from .arch file ---
1217 // D.f = 1.0 / S0.f. Reciprocal with IEEE rules and < 1ulp error.
1218 void
1220 {
1221 Wavefront *wf = gpuDynInst->wavefront();
1222 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
1223 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1224
1225 src.readSrc();
1226
1227 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1228 if (wf->execMask(lane)) {
1229 vdst[lane] = 1.0 / src[lane];
1230 }
1231 }
1232
1233 vdst.write();
1234 } // execute
1235 // --- Inst_VOP1__V_RCP_IFLAG_F32 class methods ---
1236
1238 : Inst_VOP1(iFmt, "v_rcp_iflag_f32")
1239 {
1240 setFlag(ALU);
1241 setFlag(F32);
1242 } // Inst_VOP1__V_RCP_IFLAG_F32
1243
1245 {
1246 } // ~Inst_VOP1__V_RCP_IFLAG_F32
1247
1248 // --- description from .arch file ---
1249 // D.f = 1.0 / S0.f. Reciprocal intended for integer division, can raise
1250 // --- integer DIV_BY_ZERO exception but cannot raise floating-point
1251 // --- exceptions.
1252 void
1254 {
1255 Wavefront *wf = gpuDynInst->wavefront();
1256 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
1257 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1258
1259 src.readSrc();
1260
1261 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1262 if (wf->execMask(lane)) {
1263 vdst[lane] = 1.0 / src[lane];
1264 }
1265 }
1266
1267 vdst.write();
1268 } // execute
1269 // --- Inst_VOP1__V_RSQ_F32 class methods ---
1270
1272 : Inst_VOP1(iFmt, "v_rsq_f32")
1273 {
1274 setFlag(ALU);
1275 setFlag(F32);
1276 } // Inst_VOP1__V_RSQ_F32
1277
1279 {
1280 } // ~Inst_VOP1__V_RSQ_F32
1281
1282 // --- description from .arch file ---
1283 // D.f = 1.0 / sqrt(S0.f). Reciprocal square root with IEEE rules.
1284 void
1286 {
1287 Wavefront *wf = gpuDynInst->wavefront();
1288 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
1289 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1290
1291 src.readSrc();
1292
1293 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1294 if (wf->execMask(lane)) {
1295 vdst[lane] = 1.0 / std::sqrt(src[lane]);
1296 }
1297 }
1298
1299 vdst.write();
1300 } // execute
1301 // --- Inst_VOP1__V_RCP_F64 class methods ---
1302
1304 : Inst_VOP1(iFmt, "v_rcp_f64")
1305 {
1306 setFlag(ALU);
1307 setFlag(F64);
1308 } // Inst_VOP1__V_RCP_F64
1309
1311 {
1312 } // ~Inst_VOP1__V_RCP_F64
1313
1314 // --- description from .arch file ---
1315 // D.d = 1.0 / S0.d.
1316 void
1318 {
1319 Wavefront *wf = gpuDynInst->wavefront();
1320 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
1321 VecOperandF64 vdst(gpuDynInst, instData.VDST);
1322
1323 src.readSrc();
1324
1325 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1326 if (wf->execMask(lane)) {
1327 if (std::fpclassify(src[lane]) == FP_ZERO) {
1328 vdst[lane] = +INFINITY;
1329 } else if (std::isnan(src[lane])) {
1330 vdst[lane] = NAN;
1331 } else if (std::isinf(src[lane])) {
1332 if (std::signbit(src[lane])) {
1333 vdst[lane] = -0.0;
1334 } else {
1335 vdst[lane] = 0.0;
1336 }
1337 } else {
1338 vdst[lane] = 1.0 / src[lane];
1339 }
1340 }
1341 }
1342
1343 vdst.write();
1344 } // execute
1345 // --- Inst_VOP1__V_RSQ_F64 class methods ---
1346
1348 : Inst_VOP1(iFmt, "v_rsq_f64")
1349 {
1350 setFlag(ALU);
1351 setFlag(F64);
1352 } // Inst_VOP1__V_RSQ_F64
1353
1355 {
1356 } // ~Inst_VOP1__V_RSQ_F64
1357
1358 // --- description from .arch file ---
1359 // D.d = 1.0 / sqrt(S0.d). See V_RSQ_F32.
1360 void
1362 {
1363 Wavefront *wf = gpuDynInst->wavefront();
1364 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
1365 VecOperandF64 vdst(gpuDynInst, instData.VDST);
1366
1367 src.readSrc();
1368
1369 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1370 if (wf->execMask(lane)) {
1371 if (std::fpclassify(src[lane]) == FP_ZERO) {
1372 vdst[lane] = +INFINITY;
1373 } else if (std::isnan(src[lane])) {
1374 vdst[lane] = NAN;
1375 } else if (std::isinf(src[lane])
1376 && !std::signbit(src[lane])) {
1377 vdst[lane] = 0.0;
1378 } else if (std::signbit(src[lane])) {
1379 vdst[lane] = NAN;
1380 } else {
1381 vdst[lane] = 1.0 / std::sqrt(src[lane]);
1382 }
1383 }
1384 }
1385
1386 vdst.write();
1387 } // execute
1388 // --- Inst_VOP1__V_SQRT_F32 class methods ---
1389
1391 : Inst_VOP1(iFmt, "v_sqrt_f32")
1392 {
1393 setFlag(ALU);
1394 setFlag(F32);
1395 } // Inst_VOP1__V_SQRT_F32
1396
1398 {
1399 } // ~Inst_VOP1__V_SQRT_F32
1400
1401 // --- description from .arch file ---
1402 // D.f = sqrt(S0.f).
1403 void
1405 {
1406 Wavefront *wf = gpuDynInst->wavefront();
1407 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
1408 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1409
1410 src.readSrc();
1411
1412 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1413 if (wf->execMask(lane)) {
1414 vdst[lane] = std::sqrt(src[lane]);
1415 }
1416 }
1417
1418 vdst.write();
1419 } // execute
1420 // --- Inst_VOP1__V_SQRT_F64 class methods ---
1421
1423 : Inst_VOP1(iFmt, "v_sqrt_f64")
1424 {
1425 setFlag(ALU);
1426 setFlag(F64);
1427 } // Inst_VOP1__V_SQRT_F64
1428
1430 {
1431 } // ~Inst_VOP1__V_SQRT_F64
1432
1433 // --- description from .arch file ---
1434 // D.d = sqrt(S0.d).
1435 void
1437 {
1438 Wavefront *wf = gpuDynInst->wavefront();
1439 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
1440 VecOperandF64 vdst(gpuDynInst, instData.VDST);
1441
1442 src.readSrc();
1443
1444 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1445 if (wf->execMask(lane)) {
1446 vdst[lane] = std::sqrt(src[lane]);
1447 }
1448 }
1449
1450 vdst.write();
1451 } // execute
1452 // --- Inst_VOP1__V_SIN_F32 class methods ---
1453
1455 : Inst_VOP1(iFmt, "v_sin_f32")
1456 {
1457 setFlag(ALU);
1458 setFlag(F32);
1459 } // Inst_VOP1__V_SIN_F32
1460
1462 {
1463 } // ~Inst_VOP1__V_SIN_F32
1464
1465 // --- description from .arch file ---
1466 // D.f = sin(S0.f * 2 * PI).
1467 // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in
1468 // float 0.0.
1469 void
1471 {
1472 Wavefront *wf = gpuDynInst->wavefront();
1473 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
1474 ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
1475 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1476
1477 src.readSrc();
1478 pi.read();
1479
1480 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1481 if (wf->execMask(lane)) {
1482 if (src[lane] < -256.0 || src[lane] > 256.0) {
1483 vdst[lane] = 0.0;
1484 } else {
1485 vdst[lane] = std::sin(src[lane] * 2.0 * pi.rawData());
1486 }
1487 }
1488 }
1489
1490 vdst.write();
1491 } // execute
1492 // --- Inst_VOP1__V_COS_F32 class methods ---
1493
1495 : Inst_VOP1(iFmt, "v_cos_f32")
1496 {
1497 setFlag(ALU);
1498 setFlag(F32);
1499 } // Inst_VOP1__V_COS_F32
1500
1502 {
1503 } // ~Inst_VOP1__V_COS_F32
1504
1505 // --- description from .arch file ---
1506 // D.f = cos(S0.f * 2 * PI).
1507 // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in
1508 // float 1.0.
1509 void
1511 {
1512 Wavefront *wf = gpuDynInst->wavefront();
1513 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
1514 ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
1515 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1516
1517 src.readSrc();
1518 pi.read();
1519
1520 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1521 if (wf->execMask(lane)) {
1522 if (src[lane] < -256.0 || src[lane] > 256.0) {
1523 vdst[lane] = 0.0;
1524 } else {
1525 vdst[lane] = std::cos(src[lane] * 2.0 * pi.rawData());
1526 }
1527 }
1528 }
1529
1530 vdst.write();
1531 } // execute
1532 // --- Inst_VOP1__V_NOT_B32 class methods ---
1533
1535 : Inst_VOP1(iFmt, "v_not_b32")
1536 {
1537 setFlag(ALU);
1538 } // Inst_VOP1__V_NOT_B32
1539
1541 {
1542 } // ~Inst_VOP1__V_NOT_B32
1543
1544 // --- description from .arch file ---
1545 // D.u = ~S0.u.
1546 // Input and output modifiers not supported.
1547 void
1549 {
1550 Wavefront *wf = gpuDynInst->wavefront();
1551 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
1552 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1553
1554 src.readSrc();
1555
1556 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1557 if (wf->execMask(lane)) {
1558 vdst[lane] = ~src[lane];
1559 }
1560 }
1561
1562 vdst.write();
1563 } // execute
1564 // --- Inst_VOP1__V_BFREV_B32 class methods ---
1565
1567 : Inst_VOP1(iFmt, "v_bfrev_b32")
1568 {
1569 setFlag(ALU);
1570 } // Inst_VOP1__V_BFREV_B32
1571
1573 {
1574 } // ~Inst_VOP1__V_BFREV_B32
1575
1576 // --- description from .arch file ---
1577 // D.u[31:0] = S0.u[0:31], bitfield reverse.
1578 // Input and output modifiers not supported.
1579 void
1581 {
1582 Wavefront *wf = gpuDynInst->wavefront();
1583 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
1584 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1585
1586 src.readSrc();
1587
1588 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1589 if (wf->execMask(lane)) {
1590 vdst[lane] = reverseBits(src[lane]);
1591 }
1592 }
1593
1594 vdst.write();
1595 } // execute
1596 // --- Inst_VOP1__V_FFBH_U32 class methods ---
1597
1599 : Inst_VOP1(iFmt, "v_ffbh_u32")
1600 {
1601 setFlag(ALU);
1602 } // Inst_VOP1__V_FFBH_U32
1603
1605 {
1606 } // ~Inst_VOP1__V_FFBH_U32
1607
1608 // --- description from .arch file ---
1609 // D.u = position of first 1 in S0.u from MSB;
1610 // D.u = 0xffffffff if S0.u == 0.
1611 void
1613 {
1614 Wavefront *wf = gpuDynInst->wavefront();
1615 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
1616 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1617
1618 src.readSrc();
1619
1620 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1621 if (wf->execMask(lane)) {
1622 vdst[lane] = findFirstOneMsb(src[lane]);
1623 }
1624 }
1625
1626 vdst.write();
1627 } // execute
1628 // --- Inst_VOP1__V_FFBL_B32 class methods ---
1629
1631 : Inst_VOP1(iFmt, "v_ffbl_b32")
1632 {
1633 setFlag(ALU);
1634 } // Inst_VOP1__V_FFBL_B32
1635
1637 {
1638 } // ~Inst_VOP1__V_FFBL_B32
1639
1640 // --- description from .arch file ---
1641 // D.u = position of first 1 in S0.u from LSB;
1642 // D.u = 0xffffffff if S0.u == 0.
1643 void
1645 {
1646 Wavefront *wf = gpuDynInst->wavefront();
1647 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
1648 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1649
1650 src.readSrc();
1651
1652 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1653 if (wf->execMask(lane)) {
1654 vdst[lane] = findFirstOne(src[lane]);
1655 }
1656 }
1657
1658 vdst.write();
1659 } // execute
1660 // --- Inst_VOP1__V_FFBH_I32 class methods ---
1661
1663 : Inst_VOP1(iFmt, "v_ffbh_i32")
1664 {
1665 setFlag(ALU);
1666 } // Inst_VOP1__V_FFBH_I32
1667
1669 {
1670 } // ~Inst_VOP1__V_FFBH_I32
1671
1672 // --- description from .arch file ---
1673 // D.u = position of first bit different from sign bit in S0.i from MSB;
1674 // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.
1675 void
1677 {
1678 Wavefront *wf = gpuDynInst->wavefront();
1679 ConstVecOperandI32 src(gpuDynInst, instData.SRC0);
1680 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1681
1682 src.readSrc();
1683
1684 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1685 if (wf->execMask(lane)) {
1686 vdst[lane] = firstOppositeSignBit(src[lane]);
1687 }
1688 }
1689
1690 vdst.write();
1691 } // execute
1692 // --- Inst_VOP1__V_FREXP_EXP_I32_F64 class methods ---
1693
1695 InFmt_VOP1 *iFmt)
1696 : Inst_VOP1(iFmt, "v_frexp_exp_i32_f64")
1697 {
1698 setFlag(ALU);
1699 setFlag(F64);
1700 } // Inst_VOP1__V_FREXP_EXP_I32_F64
1701
1703 {
1704 } // ~Inst_VOP1__V_FREXP_EXP_I32_F64
1705
1706 // --- description from .arch file ---
1707 // See V_FREXP_EXP_I32_F32.
1708 void
1710 {
1711 Wavefront *wf = gpuDynInst->wavefront();
1712 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
1713 VecOperandI32 vdst(gpuDynInst, instData.VDST);
1714
1715 src.readSrc();
1716
1717 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1718 if (wf->execMask(lane)) {
1719 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
1720 vdst[lane] = 0;
1721 } else {
1722 VecElemI32 exp = 0;
1723 std::frexp(src[lane], &exp);
1724 vdst[lane] = exp;
1725 }
1726 }
1727 }
1728
1729 vdst.write();
1730 } // execute
1731 // --- Inst_VOP1__V_FREXP_MANT_F64 class methods ---
1732
1734 : Inst_VOP1(iFmt, "v_frexp_mant_f64")
1735 {
1736 setFlag(ALU);
1737 setFlag(F64);
1738 } // Inst_VOP1__V_FREXP_MANT_F64
1739
1741 {
1742 } // ~Inst_VOP1__V_FREXP_MANT_F64
1743
1744 // --- description from .arch file ---
1745 // See V_FREXP_MANT_F32.
1746 void
1748 {
1749 Wavefront *wf = gpuDynInst->wavefront();
1750 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
1751 VecOperandF64 vdst(gpuDynInst, instData.VDST);
1752
1753 src.readSrc();
1754
1755 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1756 if (wf->execMask(lane)) {
1757 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
1758 vdst[lane] = src[lane];
1759 } else {
1760 VecElemI32 exp(0);
1761 vdst[lane] = std::frexp(src[lane], &exp);
1762 }
1763 }
1764 }
1765
1766 vdst.write();
1767 } // execute
1768 // --- Inst_VOP1__V_FRACT_F64 class methods ---
1769
1771 : Inst_VOP1(iFmt, "v_fract_f64")
1772 {
1773 setFlag(ALU);
1774 setFlag(F64);
1775 } // Inst_VOP1__V_FRACT_F64
1776
1778 {
1779 } // ~Inst_VOP1__V_FRACT_F64
1780
1781 // --- description from .arch file ---
1782 // See V_FRACT_F32.
1783 void
1785 {
1786 Wavefront *wf = gpuDynInst->wavefront();
1787 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
1788 VecOperandF64 vdst(gpuDynInst, instData.VDST);
1789
1790 src.readSrc();
1791
1792 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1793 if (wf->execMask(lane)) {
1794 VecElemF64 int_part(0.0);
1795 vdst[lane] = std::modf(src[lane], &int_part);
1796 }
1797 }
1798
1799 vdst.write();
1800 } // execute
1801 // --- Inst_VOP1__V_FREXP_EXP_I32_F32 class methods ---
1802
1804 InFmt_VOP1 *iFmt)
1805 : Inst_VOP1(iFmt, "v_frexp_exp_i32_f32")
1806 {
1807 setFlag(ALU);
1808 setFlag(F32);
1809 } // Inst_VOP1__V_FREXP_EXP_I32_F32
1810
1812 {
1813 } // ~Inst_VOP1__V_FREXP_EXP_I32_F32
1814
1815 // --- description from .arch file ---
1816 // if (S0.f == INF || S0.f == NAN) then D.i = 0;
1817 // else D.i = TwosComplement(Exponent(S0.f) - 127 + 1).
1818 // Returns exponent of single precision float input, such that S0.f =
1819 // significand * (2 ** exponent). See also FREXP_MANT_F32, which returns
1820 // the significand.
1821 void
1823 {
1824 Wavefront *wf = gpuDynInst->wavefront();
1825 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
1826 VecOperandI32 vdst(gpuDynInst, instData.VDST);
1827
1828 src.readSrc();
1829
1830 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1831 if (wf->execMask(lane)) {
1832 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
1833 vdst[lane] = 0;
1834 } else {
1835 VecElemI32 exp(0);
1836 std::frexp(src[lane], &exp);
1837 vdst[lane] = exp;
1838 }
1839 }
1840 }
1841
1842 vdst.write();
1843 } // execute
1844 // --- Inst_VOP1__V_FREXP_MANT_F32 class methods ---
1845
1847 : Inst_VOP1(iFmt, "v_frexp_mant_f32")
1848 {
1849 setFlag(ALU);
1850 setFlag(F32);
1851 } // Inst_VOP1__V_FREXP_MANT_F32
1852
1854 {
1855 } // ~Inst_VOP1__V_FREXP_MANT_F32
1856
1857 // --- description from .arch file ---
1858 // if (S0.f == INF || S0.f == NAN) then D.f = S0.f;
1859 // else D.f = Mantissa(S0.f).
1860 // Result range is in (-1.0,-0.5][0.5,1.0) in normal cases. Returns binary
1861 // --- significand of single precision float input, such that S0.f =
1862 // --- significand * (2 ** exponent). See also FREXP_EXP_I32_F32, which
1863 // --- returns integer exponent.
1864 void
1866 {
1867 Wavefront *wf = gpuDynInst->wavefront();
1868 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
1869 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1870
1871 src.readSrc();
1872
1873 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1874 if (wf->execMask(lane)) {
1875 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
1876 vdst[lane] = src[lane];
1877 } else {
1878 VecElemI32 exp(0);
1879 vdst[lane] = std::frexp(src[lane], &exp);
1880 }
1881 }
1882 }
1883
1884 vdst.write();
1885 } // execute
1886 // --- Inst_VOP1__V_CLREXCP class methods ---
1887
1889 : Inst_VOP1(iFmt, "v_clrexcp")
1890 {
1891 setFlag(ALU);
1892 } // Inst_VOP1__V_CLREXCP
1893
1895 {
1896 } // ~Inst_VOP1__V_CLREXCP
1897
1898 // --- description from .arch file ---
1899 // Clear wave's exception state in SIMD (SP).
1900 void
1902 {
1904 } // execute
1905 // --- Inst_VOP1__V_MOV_B64 class methods ---
1906
1908 : Inst_VOP1(iFmt, "v_mov_b64")
1909 {
1910 setFlag(ALU);
1911 } // Inst_VOP1__V_MOV_B64
1912
1914 {
1915 } // ~Inst_VOP1__V_MOV_B64
1916
1917 // --- description from .arch file ---
1918 // D.u = S0.u.
1919 // Input and output modifiers not supported; this is an untyped operation.
1920 void
1922 {
1923 Wavefront *wf = gpuDynInst->wavefront();
1924 ConstVecOperandU64 src(gpuDynInst, instData.SRC0);
1925 VecOperandU64 vdst(gpuDynInst, instData.VDST);
1926
1927 src.readSrc();
1928
1929 panic_if(isDPPInst(), "DPP unimplemented for v_mov_b64");
1930 panic_if(isSDWAInst(), "SDWA unimplemented for v_mov_b64");
1931
1932 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1933 if (wf->execMask(lane)) {
1934 vdst[lane] = src[lane];
1935 }
1936 }
1937
1938 vdst.write();
1939 } // execute
1940 // --- Inst_VOP1__V_CVT_F16_U16 class methods ---
1941
1943 : Inst_VOP1(iFmt, "v_cvt_f16_u16")
1944 {
1945 setFlag(ALU);
1946 setFlag(F16);
1947 } // Inst_VOP1__V_CVT_F16_U16
1948
1950 {
1951 } // ~Inst_VOP1__V_CVT_F16_U16
1952
1953 // --- description from .arch file ---
1954 // D.f16 = uint16_to_flt16(S.u16).
1955 // Supports denormals, rounding, exception flags and saturation.
1956 void
1958 {
1960 } // execute
1961 // --- Inst_VOP1__V_CVT_F16_I16 class methods ---
1962
1964 : Inst_VOP1(iFmt, "v_cvt_f16_i16")
1965 {
1966 setFlag(ALU);
1967 setFlag(F16);
1968 } // Inst_VOP1__V_CVT_F16_I16
1969
1971 {
1972 } // ~Inst_VOP1__V_CVT_F16_I16
1973
1974 // --- description from .arch file ---
1975 // D.f16 = int16_to_flt16(S.i16).
1976 // Supports denormals, rounding, exception flags and saturation.
1977 void
1979 {
1981 } // execute
1982 // --- Inst_VOP1__V_CVT_U16_F16 class methods ---
1983
1985 : Inst_VOP1(iFmt, "v_cvt_u16_f16")
1986 {
1987 setFlag(ALU);
1988 setFlag(F16);
1989 } // Inst_VOP1__V_CVT_U16_F16
1990
1992 {
1993 } // ~Inst_VOP1__V_CVT_U16_F16
1994
1995 // --- description from .arch file ---
1996 // D.u16 = flt16_to_uint16(S.f16).
1997 // Supports rounding, exception flags and saturation.
1998 void
2000 {
2002 } // execute
2003 // --- Inst_VOP1__V_CVT_I16_F16 class methods ---
2004
2006 : Inst_VOP1(iFmt, "v_cvt_i16_f16")
2007 {
2008 setFlag(ALU);
2009 setFlag(F16);
2010 } // Inst_VOP1__V_CVT_I16_F16
2011
2013 {
2014 } // ~Inst_VOP1__V_CVT_I16_F16
2015
2016 // --- description from .arch file ---
2017 // D.i16 = flt16_to_int16(S.f16).
2018 // Supports rounding, exception flags and saturation.
2019 void
2021 {
2023 } // execute
2024 // --- Inst_VOP1__V_RCP_F16 class methods ---
2025
2027 : Inst_VOP1(iFmt, "v_rcp_f16")
2028 {
2029 setFlag(ALU);
2030 setFlag(F16);
2031 } // Inst_VOP1__V_RCP_F16
2032
2034 {
2035 } // ~Inst_VOP1__V_RCP_F16
2036
2037 // --- description from .arch file ---
2038 // if (S0.f16 == 1.0f)
2039 // D.f16 = 1.0f;
2040 // else
2041 // D.f16 = ApproximateRecip(S0.f16).
2042 void
2044 {
2046 } // execute
2047 // --- Inst_VOP1__V_SQRT_F16 class methods ---
2048
2050 : Inst_VOP1(iFmt, "v_sqrt_f16")
2051 {
2052 setFlag(ALU);
2053 setFlag(F16);
2054 } // Inst_VOP1__V_SQRT_F16
2055
2057 {
2058 } // ~Inst_VOP1__V_SQRT_F16
2059
2060 // --- description from .arch file ---
2061 // if (S0.f16 == 1.0f)
2062 // D.f16 = 1.0f;
2063 // else
2064 // D.f16 = ApproximateSqrt(S0.f16).
2065 void
2067 {
2069 } // execute
2070 // --- Inst_VOP1__V_RSQ_F16 class methods ---
2071
2073 : Inst_VOP1(iFmt, "v_rsq_f16")
2074 {
2075 setFlag(ALU);
2076 setFlag(F16);
2077 } // Inst_VOP1__V_RSQ_F16
2078
2080 {
2081 } // ~Inst_VOP1__V_RSQ_F16
2082
2083 // --- description from .arch file ---
2084 // if (S0.f16 == 1.0f)
2085 // D.f16 = 1.0f;
2086 // else
2087 // D.f16 = ApproximateRecipSqrt(S0.f16).
2088 void
2090 {
2092 } // execute
2093 // --- Inst_VOP1__V_LOG_F16 class methods ---
2094
2096 : Inst_VOP1(iFmt, "v_log_f16")
2097 {
2098 setFlag(ALU);
2099 setFlag(F16);
2100 } // Inst_VOP1__V_LOG_F16
2101
2103 {
2104 } // ~Inst_VOP1__V_LOG_F16
2105
2106 // --- description from .arch file ---
2107 // if (S0.f16 == 1.0f)
2108 // D.f16 = 0.0f;
2109 // else
2110 // D.f16 = ApproximateLog2(S0.f16).
2111 void
2113 {
2115 } // execute
2116 // --- Inst_VOP1__V_EXP_F16 class methods ---
2117
2119 : Inst_VOP1(iFmt, "v_exp_f16")
2120 {
2121 setFlag(ALU);
2122 setFlag(F16);
2123 } // Inst_VOP1__V_EXP_F16
2124
2126 {
2127 } // ~Inst_VOP1__V_EXP_F16
2128
2129 // --- description from .arch file ---
2130 // if (S0.f16 == 0.0f)
2131 // D.f16 = 1.0f;
2132 // else
2133 // D.f16 = Approximate2ToX(S0.f16).
2134 void
2136 {
2138 } // execute
2139 // --- Inst_VOP1__V_FREXP_MANT_F16 class methods ---
2140
2142 : Inst_VOP1(iFmt, "v_frexp_mant_f16")
2143 {
2144 setFlag(ALU);
2145 setFlag(F16);
2146 } // Inst_VOP1__V_FREXP_MANT_F16
2147
2149 {
2150 } // ~Inst_VOP1__V_FREXP_MANT_F16
2151
2152 // --- description from .arch file ---
2153 // if (S0.f16 == +-INF || S0.f16 == NAN)
2154 // D.f16 = S0.f16;
2155 // else
2156 // D.f16 = mantissa(S0.f16).
2157 // Result range is (-1.0,-0.5][0.5,1.0).
2158 // C math library frexp function.
2159 // Returns binary significand of half precision float input, such that the
2160 // original single float = significand * (2 ** exponent).
2161 void
2163 {
2165 } // execute
2166 // --- Inst_VOP1__V_FREXP_EXP_I16_F16 class methods ---
2167
2169 InFmt_VOP1 *iFmt)
2170 : Inst_VOP1(iFmt, "v_frexp_exp_i16_f16")
2171 {
2172 setFlag(ALU);
2173 setFlag(F16);
2174 } // Inst_VOP1__V_FREXP_EXP_I16_F16
2175
2177 {
2178 } // ~Inst_VOP1__V_FREXP_EXP_I16_F16
2179
2180 // --- description from .arch file ---
2181 // if (S0.f16 == +-INF || S0.f16 == NAN)
2182 // D.i16 = 0;
2183 // else
2184 // D.i16 = 2s_complement(exponent(S0.f16) - 15 + 1).
2185 // C math library frexp function.
2186 // Returns exponent of half precision float input, such that the
2187 // original single float = significand * (2 ** exponent).
2188 void
2193 // --- Inst_VOP1__V_FLOOR_F16 class methods ---
2194
2196 : Inst_VOP1(iFmt, "v_floor_f16")
2197 {
2198 setFlag(ALU);
2199 setFlag(F16);
2200 } // Inst_VOP1__V_FLOOR_F16
2201
2203 {
2204 } // ~Inst_VOP1__V_FLOOR_F16
2205
2206 // --- description from .arch file ---
2207 // D.f16 = trunc(S0.f16);
2208 // if (S0.f16 < 0.0f && S0.f16 != D.f16) then D.f16 -= 1.0f.
2209 void
2211 {
2213 } // execute
2214 // --- Inst_VOP1__V_CEIL_F16 class methods ---
2215
2217 : Inst_VOP1(iFmt, "v_ceil_f16")
2218 {
2219 setFlag(ALU);
2220 setFlag(F16);
2221 } // Inst_VOP1__V_CEIL_F16
2222
2224 {
2225 } // ~Inst_VOP1__V_CEIL_F16
2226
2227 // --- description from .arch file ---
2228 // D.f16 = trunc(S0.f16);
2229 // if (S0.f16 > 0.0f && S0.f16 != D.f16) then D.f16 += 1.0f.
2230 void
2232 {
2234 } // execute
2235 // --- Inst_VOP1__V_TRUNC_F16 class methods ---
2236
2238 : Inst_VOP1(iFmt, "v_trunc_f16")
2239 {
2240 setFlag(ALU);
2241 setFlag(F16);
2242 } // Inst_VOP1__V_TRUNC_F16
2243
2245 {
2246 } // ~Inst_VOP1__V_TRUNC_F16
2247
2248 // --- description from .arch file ---
2249 // D.f16 = trunc(S0.f16).
2250 // Round-to-zero semantics.
2251 void
2253 {
2255 } // execute
2256 // --- Inst_VOP1__V_RNDNE_F16 class methods ---
2257
2259 : Inst_VOP1(iFmt, "v_rndne_f16")
2260 {
2261 setFlag(ALU);
2262 setFlag(F16);
2263 } // Inst_VOP1__V_RNDNE_F16
2264
2266 {
2267 } // ~Inst_VOP1__V_RNDNE_F16
2268
2269 // --- description from .arch file ---
2270 // D.f16 = FLOOR(S0.f16 + 0.5f);
2271 // if (floor(S0.f16) is even && fract(S0.f16) == 0.5f) then D.f16 -= 1.0f.
2272 // Round-to-nearest-even semantics.
2273 void
2275 {
2277 } // execute
2278 // --- Inst_VOP1__V_FRACT_F16 class methods ---
2279
2281 : Inst_VOP1(iFmt, "v_fract_f16")
2282 {
2283 setFlag(ALU);
2284 setFlag(F16);
2285 } // Inst_VOP1__V_FRACT_F16
2286
2288 {
2289 } // ~Inst_VOP1__V_FRACT_F16
2290
2291 // --- description from .arch file ---
2292 // D.f16 = S0.f16 + -floor(S0.f16).
2293 void
2295 {
2297 } // execute
2298 // --- Inst_VOP1__V_SIN_F16 class methods ---
2299
2301 : Inst_VOP1(iFmt, "v_sin_f16")
2302 {
2303 setFlag(ALU);
2304 setFlag(F16);
2305 } // Inst_VOP1__V_SIN_F16
2306
2308 {
2309 } // ~Inst_VOP1__V_SIN_F16
2310
2311 // --- description from .arch file ---
2312 // D.f16 = sin(S0.f16 * 2 * PI).
2313 void
2315 {
2317 } // execute
2318 // --- Inst_VOP1__V_COS_F16 class methods ---
2319
2321 : Inst_VOP1(iFmt, "v_cos_f16")
2322 {
2323 setFlag(ALU);
2324 setFlag(F16);
2325 } // Inst_VOP1__V_COS_F16
2326
2328 {
2329 } // ~Inst_VOP1__V_COS_F16
2330
2331 // --- description from .arch file ---
2332 // D.f16 = cos(S0.f16 * 2 * PI).
2333 void
2335 {
2337 } // execute
2338 // --- Inst_VOP1__V_EXP_LEGACY_F32 class methods ---
2339
2341 : Inst_VOP1(iFmt, "v_exp_legacy_f32")
2342 {
2343 setFlag(ALU);
2344 setFlag(F32);
2345 } // Inst_VOP1__V_EXP_LEGACY_F32
2346
2348 {
2349 } // ~Inst_VOP1__V_EXP_LEGACY_F32
2350
2351 // --- description from .arch file ---
2352 // D.f = pow(2.0, S0.f) with legacy semantics.
2353 void
2355 {
2356 Wavefront *wf = gpuDynInst->wavefront();
2357 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
2358 VecOperandF32 vdst(gpuDynInst, instData.VDST);
2359
2360 src.readSrc();
2361
2362 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2363 if (wf->execMask(lane)) {
2364 vdst[lane] = std::pow(2.0, src[lane]);
2365 }
2366 }
2367
2368 vdst.write();
2369 } // execute
2370 // --- Inst_VOP1__V_LOG_LEGACY_F32 class methods ---
2371
2373 : Inst_VOP1(iFmt, "v_log_legacy_f32")
2374 {
2375 setFlag(ALU);
2376 setFlag(F32);
2377 } // Inst_VOP1__V_LOG_LEGACY_F32
2378
2380 {
2381 } // ~Inst_VOP1__V_LOG_LEGACY_F32
2382
2383 // --- description from .arch file ---
2384 // D.f = log2(S0.f). Base 2 logarithm with legacy semantics.
2385 void
2387 {
2388 Wavefront *wf = gpuDynInst->wavefront();
2389 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
2390 VecOperandF32 vdst(gpuDynInst, instData.VDST);
2391
2392 src.readSrc();
2393
2394 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2395 if (wf->execMask(lane)) {
2396 vdst[lane] = std::log2(src[lane]);
2397 }
2398 }
2399
2400 vdst.write();
2401 } // execute
2402 // --- Inst_VOP1__V_ACCVGPR_MOV_B32 class methods ---
2403
2406 : Inst_VOP1(iFmt, "v_accvgpr_mov_b32")
2407 {
2408 setFlag(ALU);
2409 } // Inst_VOP1__V_ACCVGPR_MOV_B32
2410
2412 {
2413 } // ~Inst_VOP1__V_ACCVGPR_MOV_B32
2414
2415 void
2417 {
2418 Wavefront *wf = gpuDynInst->wavefront();
2419 unsigned accum_offset = wf->accumOffset;
2420
2421 ConstVecOperandU32 src(gpuDynInst, instData.SRC0+accum_offset);
2422 VecOperandU32 vdst(gpuDynInst, instData.VDST+accum_offset);
2423
2424 src.readSrc();
2425
2426 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2427 if (wf->execMask(lane)) {
2428 vdst[lane] = src[lane];
2429 }
2430 }
2431
2432 vdst.write();
2433 } // execute
2434} // namespace VegaISA
2435} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
uint32_t data
Definition mxfp.hh:112
void setFlag(Flags flag)
Nop class.
Definition nop.hh:49
void execute(GPUDynInstPtr) override
Definition vop1.cc:2416
void execute(GPUDynInstPtr) override
Definition vop1.cc:1580
void execute(GPUDynInstPtr) override
Definition vop1.cc:2231
void execute(GPUDynInstPtr) override
Definition vop1.cc:1058
void execute(GPUDynInstPtr) override
Definition vop1.cc:895
void execute(GPUDynInstPtr) override
Definition vop1.cc:1901
void execute(GPUDynInstPtr) override
Definition vop1.cc:2334
void execute(GPUDynInstPtr) override
Definition vop1.cc:1510
void execute(GPUDynInstPtr) override
Definition vop1.cc:434
void execute(GPUDynInstPtr) override
Definition vop1.cc:1978
void execute(GPUDynInstPtr) override
Definition vop1.cc:1957
void execute(GPUDynInstPtr) override
Definition vop1.cc:470
void execute(GPUDynInstPtr) override
Definition vop1.cc:590
void execute(GPUDynInstPtr) override
Definition vop1.cc:253
void execute(GPUDynInstPtr) override
Definition vop1.cc:285
void execute(GPUDynInstPtr) override
Definition vop1.cc:654
void execute(GPUDynInstPtr) override
Definition vop1.cc:686
void execute(GPUDynInstPtr) override
Definition vop1.cc:718
void execute(GPUDynInstPtr) override
Definition vop1.cc:750
void execute(GPUDynInstPtr) override
Definition vop1.cc:622
void execute(GPUDynInstPtr) override
Definition vop1.cc:221
void execute(GPUDynInstPtr) override
Definition vop1.cc:830
void execute(GPUDynInstPtr) override
Definition vop1.cc:537
void execute(GPUDynInstPtr) override
Definition vop1.cc:2020
void execute(GPUDynInstPtr) override
Definition vop1.cc:367
void execute(GPUDynInstPtr) override
Definition vop1.cc:177
void execute(GPUDynInstPtr) override
Definition vop1.cc:569
void execute(GPUDynInstPtr) override
Definition vop1.cc:504
void execute(GPUDynInstPtr) override
Definition vop1.cc:1999
void execute(GPUDynInstPtr) override
Definition vop1.cc:319
void execute(GPUDynInstPtr) override
Definition vop1.cc:784
void execute(GPUDynInstPtr) override
Definition vop1.cc:2135
void execute(GPUDynInstPtr) override
Definition vop1.cc:1155
void execute(GPUDynInstPtr) override
Definition vop1.cc:2354
void execute(GPUDynInstPtr) override
Definition vop1.cc:1676
void execute(GPUDynInstPtr) override
Definition vop1.cc:1612
void execute(GPUDynInstPtr) override
Definition vop1.cc:1644
void execute(GPUDynInstPtr) override
Definition vop1.cc:2210
void execute(GPUDynInstPtr) override
Definition vop1.cc:1123
void execute(GPUDynInstPtr) override
Definition vop1.cc:960
void execute(GPUDynInstPtr) override
Definition vop1.cc:2294
void execute(GPUDynInstPtr) override
Definition vop1.cc:992
void execute(GPUDynInstPtr) override
Definition vop1.cc:1784
void execute(GPUDynInstPtr) override
Definition vop1.cc:2189
void execute(GPUDynInstPtr) override
Definition vop1.cc:1822
void execute(GPUDynInstPtr) override
Definition vop1.cc:1709
void execute(GPUDynInstPtr) override
Definition vop1.cc:2162
void execute(GPUDynInstPtr) override
Definition vop1.cc:1865
void execute(GPUDynInstPtr) override
Definition vop1.cc:1747
void execute(GPUDynInstPtr) override
Definition vop1.cc:2112
void execute(GPUDynInstPtr) override
Definition vop1.cc:1187
void execute(GPUDynInstPtr) override
Definition vop1.cc:2386
Inst_VOP1__V_MOV_B32(InFmt_VOP1 *)
Definition vop1.cc:61
void execute(GPUDynInstPtr) override
Definition vop1.cc:75
void execute(GPUDynInstPtr) override
Definition vop1.cc:1921
void execute(GPUDynInstPtr) override
Definition vop1.cc:413
Inst_VOP1__V_NOP(InFmt_VOP1 *)
Definition vop1.cc:42
void execute(GPUDynInstPtr) override
Definition vop1.cc:56
void execute(GPUDynInstPtr) override
Definition vop1.cc:1548
void execute(GPUDynInstPtr) override
Definition vop1.cc:2043
void execute(GPUDynInstPtr) override
Definition vop1.cc:1219
void execute(GPUDynInstPtr) override
Definition vop1.cc:1317
void execute(GPUDynInstPtr) override
Definition vop1.cc:1253
void execute(GPUDynInstPtr) override
Definition vop1.cc:141
void execute(GPUDynInstPtr) override
Definition vop1.cc:2274
void execute(GPUDynInstPtr) override
Definition vop1.cc:1090
void execute(GPUDynInstPtr) override
Definition vop1.cc:927
void execute(GPUDynInstPtr) override
Definition vop1.cc:2089
void execute(GPUDynInstPtr) override
Definition vop1.cc:1285
void execute(GPUDynInstPtr) override
Definition vop1.cc:1361
void execute(GPUDynInstPtr) override
Definition vop1.cc:2314
void execute(GPUDynInstPtr) override
Definition vop1.cc:1470
void execute(GPUDynInstPtr) override
Definition vop1.cc:2066
void execute(GPUDynInstPtr) override
Definition vop1.cc:1404
void execute(GPUDynInstPtr) override
Definition vop1.cc:1436
void execute(GPUDynInstPtr) override
Definition vop1.cc:2252
void execute(GPUDynInstPtr) override
Definition vop1.cc:1025
void execute(GPUDynInstPtr) override
Definition vop1.cc:862
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition operand.hh:409
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
Definition operand.hh:392
void read() override
read from the vrf.
Definition operand.hh:147
void readSrc()
certain vector operands can read from the vrf/srf or constants.
Definition operand.hh:131
void write() override
write to the vrf.
Definition operand.hh:199
uint32_t accumOffset
Definition wavefront.hh:137
VectorMask & execMask()
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
constexpr int findLsbSet(uint64_t val)
Returns the bit position of the LSB that is set in the input That function will either use a builtin ...
Definition bitfield.hh:369
std::enable_if_t< std::is_integral_v< T >, T > reverseBits(T val, size_t size=sizeof(T))
Takes a value and returns the bit reversed version.
Definition bitfield.hh:255
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214
constexpr unsigned NumVecElemPerVecReg
Definition vec.hh:61
ScalarRegI32 firstOppositeSignBit(ScalarRegI32 val)
Definition inst_util.hh:174
uint64_t ScalarRegU64
ScalarRegI32 findFirstOne(T val)
Definition inst_util.hh:142
ScalarRegI32 findFirstOneMsb(T val)
Definition inst_util.hh:153
T roundNearestEven(T val)
Definition inst_util.hh:259
uint32_t VecElemU32
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst, T &src0)
processDPP is a helper function for implementing Data Parallel Primitive instructions.
Definition inst_util.hh:424
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
constexpr bool isinf(gem5::AMDGPU::fp16_e5m10_info a)
Definition fp16_e5m10.hh:78
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)
Definition fp16_e5m10.hh:83

Generated on Tue Jun 18 2024 16:23:49 for gem5 by doxygen 1.11.0