gem5 v25.0.0.1
Loading...
Searching...
No Matches
fplib.cc
Go to the documentation of this file.
1/*
2* Copyright (c) 2012-2013, 2017-2018, 2020, 2025 Arm Limited
3* Copyright (c) 2020 Metempsy Technology Consulting
4* All rights reserved
5*
6* The license below extends only to copyright in the software and shall
7* not be construed as granting a license to any other intellectual
8* property including but not limited to intellectual property relating
9* to a hardware implementation of the functionality of the software
10* licensed hereunder. You may use the software subject to the license
11* terms below provided that you ensure that this notice is replicated
12* unmodified and in its entirety in all distributions of the software,
13* modified or unmodified, in source code or in binary form.
14*
15* Redistribution and use in source and binary forms, with or without
16* modification, are permitted provided that the following conditions are
17* met: redistributions of source code must retain the above copyright
18* notice, this list of conditions and the following disclaimer;
19* redistributions in binary form must reproduce the above copyright
20* notice, this list of conditions and the following disclaimer in the
21* documentation and/or other materials provided with the distribution;
22* neither the name of the copyright holders nor the names of its
23* contributors may be used to endorse or promote products derived from
24* this software without specific prior written permission.
25*
26* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37*/
38
39#include <stdint.h>
40
41#include <cassert>
42#include <cmath>
43
44#include "base/logging.hh"
45#include "fplib.hh"
46
47namespace gem5
48{
49
50namespace ArmISA
51{
52
53#define FPLIB_RN 0
54#define FPLIB_RP 1
55#define FPLIB_RM 2
56#define FPLIB_RZ 3
57#define FPLIB_FZ 4
58#define FPLIB_DN 8
59#define FPLIB_AHP 16
60#define FPLIB_FZ16 32
61
62#define FPLIB_IDC 128 // Input Denormal
63#define FPLIB_IXC 16 // Inexact
64#define FPLIB_UFC 8 // Underflow
65#define FPLIB_OFC 4 // Overflow
66#define FPLIB_DZC 2 // Division by Zero
67#define FPLIB_IOC 1 // Invalid Operation
68
69#define FP16_BITS 16
70#define FP32_BITS 32
71#define FP64_BITS 64
72
73#define FP16_EXP_BITS 5
74#define FP32_EXP_BITS 8
75#define FP64_EXP_BITS 11
76
77#define FP16_EXP_BIAS 15
78#define FP32_EXP_BIAS 127
79#define FP64_EXP_BIAS 1023
80
81#define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1)
82#define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1)
83#define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1)
84
85#define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1)
86#define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1)
87#define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1)
88
89#define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1))
90#define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1))
91#define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1))
92
93#define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1))
94#define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1))
95#define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1))
96
97static inline uint16_t
98lsl16(uint16_t x, uint32_t shift)
99{
100 return shift < 16 ? x << shift : 0;
101}
102
103static inline uint16_t
104lsr16(uint16_t x, uint32_t shift)
105{
106 return shift < 16 ? x >> shift : 0;
107}
108
109static inline uint32_t
110lsl32(uint32_t x, uint32_t shift)
111{
112 return shift < 32 ? x << shift : 0;
113}
114
115static inline uint32_t
116lsr32(uint32_t x, uint32_t shift)
117{
118 return shift < 32 ? x >> shift : 0;
119}
120
121static inline uint64_t
122lsl64(uint64_t x, uint32_t shift)
123{
124 return shift < 64 ? x << shift : 0;
125}
126
127static inline uint64_t
128lsr64(uint64_t x, uint32_t shift)
129{
130 return shift < 64 ? x >> shift : 0;
131}
132
133static inline void
134lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
135{
136 if (shift == 0) {
137 *r1 = x1;
138 *r0 = x0;
139 } else if (shift < 64) {
140 *r1 = x1 << shift | x0 >> (64 - shift);
141 *r0 = x0 << shift;
142 } else if (shift < 128) {
143 *r1 = x0 << (shift - 64);
144 *r0 = 0;
145 } else {
146 *r1 = 0;
147 *r0 = 0;
148 }
149}
150
151static inline void
152lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
153{
154 if (shift == 0) {
155 *r1 = x1;
156 *r0 = x0;
157 } else if (shift < 64) {
158 *r0 = x0 >> shift | x1 << (64 - shift);
159 *r1 = x1 >> shift;
160 } else if (shift < 128) {
161 *r0 = x1 >> (shift - 64);
162 *r1 = 0;
163 } else {
164 *r0 = 0;
165 *r1 = 0;
166 }
167}
168
169static inline void
170mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
171{
172 uint32_t mask = ((uint32_t)1 << 31) - 1;
173 uint64_t a0 = a & mask;
174 uint64_t a1 = a >> 31 & mask;
175 uint64_t b0 = b & mask;
176 uint64_t b1 = b >> 31 & mask;
177 uint64_t p0 = a0 * b0;
178 uint64_t p2 = a1 * b1;
179 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
180 uint64_t s0 = p0;
181 uint64_t s1 = (s0 >> 31) + p1;
182 uint64_t s2 = (s1 >> 31) + p2;
183 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
184 *x1 = s2 >> 2;
185}
186
187static inline
188void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
189{
190 uint64_t t0 = (uint64_t)(uint32_t)a * b;
191 uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
192 *x0 = t1 << 32 | (uint32_t)t0;
193 *x1 = t1 >> 32;
194}
195
196static inline void
197add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
198 uint64_t b1)
199{
200 *x0 = a0 + b0;
201 *x1 = a1 + b1 + (*x0 < a0);
202}
203
204static inline void
205sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
206 uint64_t b1)
207{
208 *x0 = a0 - b0;
209 *x1 = a1 - b1 - (*x0 > a0);
210}
211
212static inline int
213cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
214{
215 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
216}
217
218static inline uint16_t
219fp16_normalise(uint16_t mnt, int *exp)
220{
221 int shift;
222
223 if (!mnt) {
224 return 0;
225 }
226
227 for (shift = 8; shift; shift >>= 1) {
228 if (!(mnt >> (16 - shift))) {
229 mnt <<= shift;
230 *exp -= shift;
231 }
232 }
233 return mnt;
234}
235
236static inline uint32_t
237fp32_normalise(uint32_t mnt, int *exp)
238{
239 int shift;
240
241 if (!mnt) {
242 return 0;
243 }
244
245 for (shift = 16; shift; shift >>= 1) {
246 if (!(mnt >> (32 - shift))) {
247 mnt <<= shift;
248 *exp -= shift;
249 }
250 }
251 return mnt;
252}
253
254static inline uint64_t
255fp64_normalise(uint64_t mnt, int *exp)
256{
257 int shift;
258
259 if (!mnt) {
260 return 0;
261 }
262
263 for (shift = 32; shift; shift >>= 1) {
264 if (!(mnt >> (64 - shift))) {
265 mnt <<= shift;
266 *exp -= shift;
267 }
268 }
269 return mnt;
270}
271
272static inline void
273fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
274{
275 uint64_t x0 = *mnt0;
276 uint64_t x1 = *mnt1;
277 int shift;
278
279 if (!x0 && !x1) {
280 return;
281 }
282
283 if (!x1) {
284 x1 = x0;
285 x0 = 0;
286 *exp -= 64;
287 }
288
289 for (shift = 32; shift; shift >>= 1) {
290 if (!(x1 >> (64 - shift))) {
291 x1 = x1 << shift | x0 >> (64 - shift);
292 x0 <<= shift;
293 *exp -= shift;
294 }
295 }
296
297 *mnt0 = x0;
298 *mnt1 = x1;
299}
300
301static inline uint16_t
302fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
303{
304 return sgn << (FP16_BITS - 1) | exp << FP16_MANT_BITS | FP16_MANT(mnt);
305}
306
307static inline uint32_t
308fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
309{
310 return sgn << (FP32_BITS - 1) | exp << FP32_MANT_BITS | FP32_MANT(mnt);
311}
312
313static inline uint64_t
314fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
315{
316 return sgn << (FP64_BITS - 1) | exp << FP64_MANT_BITS | FP64_MANT(mnt);
317}
318
319static inline uint16_t
320fp16_zero(int sgn)
321{
322 return fp16_pack(sgn, 0, 0);
323}
324
325static inline uint32_t
326fp32_zero(int sgn)
327{
328 return fp32_pack(sgn, 0, 0);
329}
330
331static inline uint64_t
332fp64_zero(int sgn)
333{
334 return fp64_pack(sgn, 0, 0);
335}
336
337static inline uint16_t
339{
340 return fp16_pack(sgn, FP16_EXP_INF - 1, -1);
341}
342
343static inline uint32_t
345{
346 return fp32_pack(sgn, FP32_EXP_INF - 1, -1);
347}
348
349static inline uint64_t
351{
352 return fp64_pack(sgn, FP64_EXP_INF - 1, -1);
353}
354
355static inline uint16_t
357{
358 return fp16_pack(sgn, FP16_EXP_INF, 0);
359}
360
361static inline uint32_t
363{
364 return fp32_pack(sgn, FP32_EXP_INF, 0);
365}
366
367static inline uint64_t
369{
370 return fp64_pack(sgn, FP64_EXP_INF, 0);
371}
372
373static inline uint16_t
375{
376 return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1));
377}
378
379static inline uint32_t
381{
382 return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1));
383}
384
385static inline uint64_t
387{
388 return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1));
389}
390
391static inline void
392fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
393 int *flags)
394{
395 *sgn = x >> (FP16_BITS - 1);
396 *exp = FP16_EXP(x);
397 *mnt = FP16_MANT(x);
398
399 if (*exp) {
400 *mnt |= 1ULL << FP16_MANT_BITS;
401 } else {
402 // Handle subnormals:
403 // IDC (Input Denormal) is not set in this case.
404 if (*mnt) {
405 if (mode & FPLIB_FZ16) {
406 *mnt = 0;
407 } else {
408 ++*exp;
409 }
410 }
411 }
412}
413
414static inline void
415fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
416 int *flags)
417{
418 *sgn = x >> (FP32_BITS - 1);
419 *exp = FP32_EXP(x);
420 *mnt = FP32_MANT(x);
421
422 if (*exp) {
423 *mnt |= 1ULL << FP32_MANT_BITS;
424 } else {
425 // Handle subnormals:
426 if (*mnt) {
427 if (mode & FPLIB_FZ) {
428 *flags |= FPLIB_IDC;
429 *mnt = 0;
430 } else {
431 ++*exp;
432 }
433 }
434 }
435}
436
437static inline void
438fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
439 int *flags)
440{
441
442
443 *sgn = x >> (FP64_BITS - 1);
444 *exp = FP64_EXP(x);
445 *mnt = FP64_MANT(x);
446
447 if (*exp) {
448 *mnt |= 1ULL << FP64_MANT_BITS;
449 } else {
450 // Handle subnormals:
451 if (*mnt) {
452 if (mode & FPLIB_FZ) {
453 *flags |= FPLIB_IDC;
454 *mnt = 0;
455 } else {
456 ++*exp;
457 }
458 }
459 }
460}
461
462static inline int
463fp16_is_NaN(int exp, uint16_t mnt)
464{
465 return exp == FP16_EXP_INF && FP16_MANT(mnt);
466}
467
468static inline int
469fp32_is_NaN(int exp, uint32_t mnt)
470{
471 return exp == FP32_EXP_INF && FP32_MANT(mnt);
472}
473
474static inline int
475fp64_is_NaN(int exp, uint64_t mnt)
476{
477 return exp == FP64_EXP_INF && FP64_MANT(mnt);
478}
479
480static inline int
481fp16_is_signalling_NaN(int exp, uint16_t mnt)
482{
483 return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1);
484}
485
486static inline int
487fp32_is_signalling_NaN(int exp, uint32_t mnt)
488{
489 return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1);
490}
491
492static inline int
493fp64_is_signalling_NaN(int exp, uint64_t mnt)
494{
495 return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1);
496}
497
498static inline int
499fp16_is_quiet_NaN(int exp, uint16_t mnt)
500{
501 return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1);
502}
503
504static inline int
505fp32_is_quiet_NaN(int exp, uint32_t mnt)
506{
507 return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1);
508}
509
510static inline int
511fp64_is_quiet_NaN(int exp, uint64_t mnt)
512{
513 return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1);
514}
515
516static inline int
517fp16_is_infinity(int exp, uint16_t mnt)
518{
519 return exp == FP16_EXP_INF && !FP16_MANT(mnt);
520}
521
522static inline int
523fp32_is_infinity(int exp, uint32_t mnt)
524{
525 return exp == FP32_EXP_INF && !FP32_MANT(mnt);
526}
527
528static inline int
529fp64_is_infinity(int exp, uint64_t mnt)
530{
531 return exp == FP64_EXP_INF && !FP64_MANT(mnt);
532}
533
534static inline uint16_t
535fp16_process_NaN(uint16_t a, int mode, int *flags)
536{
537 if (!(a >> (FP16_MANT_BITS - 1) & 1)) {
538 *flags |= FPLIB_IOC;
539 a |= 1ULL << (FP16_MANT_BITS - 1);
540 }
541 return mode & FPLIB_DN ? fp16_defaultNaN() : a;
542}
543
544static inline uint32_t
545fp32_process_NaN(uint32_t a, int mode, int *flags)
546{
547 if (!(a >> (FP32_MANT_BITS - 1) & 1)) {
548 *flags |= FPLIB_IOC;
549 a |= 1ULL << (FP32_MANT_BITS - 1);
550 }
551 return mode & FPLIB_DN ? fp32_defaultNaN() : a;
552}
553
554static inline uint64_t
555fp64_process_NaN(uint64_t a, int mode, int *flags)
556{
557 if (!(a >> (FP64_MANT_BITS - 1) & 1)) {
558 *flags |= FPLIB_IOC;
559 a |= 1ULL << (FP64_MANT_BITS - 1);
560 }
561 return mode & FPLIB_DN ? fp64_defaultNaN() : a;
562}
563
564static uint16_t
565fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
566{
567 int a_exp = FP16_EXP(a);
568 uint16_t a_mnt = FP16_MANT(a);
569 int b_exp = FP16_EXP(b);
570 uint16_t b_mnt = FP16_MANT(b);
571
572 // Handle signalling NaNs:
573 if (fp16_is_signalling_NaN(a_exp, a_mnt))
574 return fp16_process_NaN(a, mode, flags);
575 if (fp16_is_signalling_NaN(b_exp, b_mnt))
576 return fp16_process_NaN(b, mode, flags);
577
578 // Handle quiet NaNs:
579 if (fp16_is_NaN(a_exp, a_mnt))
580 return fp16_process_NaN(a, mode, flags);
581 if (fp16_is_NaN(b_exp, b_mnt))
582 return fp16_process_NaN(b, mode, flags);
583
584 return 0;
585}
586
587static uint32_t
588fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
589{
590 int a_exp = FP32_EXP(a);
591 uint32_t a_mnt = FP32_MANT(a);
592 int b_exp = FP32_EXP(b);
593 uint32_t b_mnt = FP32_MANT(b);
594
595 // Handle signalling NaNs:
596 if (fp32_is_signalling_NaN(a_exp, a_mnt))
597 return fp32_process_NaN(a, mode, flags);
598 if (fp32_is_signalling_NaN(b_exp, b_mnt))
599 return fp32_process_NaN(b, mode, flags);
600
601 // Handle quiet NaNs:
602 if (fp32_is_NaN(a_exp, a_mnt))
603 return fp32_process_NaN(a, mode, flags);
604 if (fp32_is_NaN(b_exp, b_mnt))
605 return fp32_process_NaN(b, mode, flags);
606
607 return 0;
608}
609
610static uint64_t
611fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
612{
613 int a_exp = FP64_EXP(a);
614 uint64_t a_mnt = FP64_MANT(a);
615 int b_exp = FP64_EXP(b);
616 uint64_t b_mnt = FP64_MANT(b);
617
618 // Handle signalling NaNs:
619 if (fp64_is_signalling_NaN(a_exp, a_mnt))
620 return fp64_process_NaN(a, mode, flags);
621 if (fp64_is_signalling_NaN(b_exp, b_mnt))
622 return fp64_process_NaN(b, mode, flags);
623
624 // Handle quiet NaNs:
625 if (fp64_is_NaN(a_exp, a_mnt))
626 return fp64_process_NaN(a, mode, flags);
627 if (fp64_is_NaN(b_exp, b_mnt))
628 return fp64_process_NaN(b, mode, flags);
629
630 return 0;
631}
632
633static uint16_t
634fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
635{
636 int a_exp = FP16_EXP(a);
637 uint16_t a_mnt = FP16_MANT(a);
638 int b_exp = FP16_EXP(b);
639 uint16_t b_mnt = FP16_MANT(b);
640 int c_exp = FP16_EXP(c);
641 uint16_t c_mnt = FP16_MANT(c);
642
643 // Handle signalling NaNs:
644 if (fp16_is_signalling_NaN(a_exp, a_mnt))
645 return fp16_process_NaN(a, mode, flags);
646 if (fp16_is_signalling_NaN(b_exp, b_mnt))
647 return fp16_process_NaN(b, mode, flags);
648 if (fp16_is_signalling_NaN(c_exp, c_mnt))
649 return fp16_process_NaN(c, mode, flags);
650
651 // Handle quiet NaNs:
652 if (fp16_is_NaN(a_exp, a_mnt))
653 return fp16_process_NaN(a, mode, flags);
654 if (fp16_is_NaN(b_exp, b_mnt))
655 return fp16_process_NaN(b, mode, flags);
656 if (fp16_is_NaN(c_exp, c_mnt))
657 return fp16_process_NaN(c, mode, flags);
658
659 return 0;
660}
661
662static uint32_t
663fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
664{
665 int a_exp = FP32_EXP(a);
666 uint32_t a_mnt = FP32_MANT(a);
667 int b_exp = FP32_EXP(b);
668 uint32_t b_mnt = FP32_MANT(b);
669 int c_exp = FP32_EXP(c);
670 uint32_t c_mnt = FP32_MANT(c);
671
672 // Handle signalling NaNs:
673 if (fp32_is_signalling_NaN(a_exp, a_mnt))
674 return fp32_process_NaN(a, mode, flags);
675 if (fp32_is_signalling_NaN(b_exp, b_mnt))
676 return fp32_process_NaN(b, mode, flags);
677 if (fp32_is_signalling_NaN(c_exp, c_mnt))
678 return fp32_process_NaN(c, mode, flags);
679
680 // Handle quiet NaNs:
681 if (fp32_is_NaN(a_exp, a_mnt))
682 return fp32_process_NaN(a, mode, flags);
683 if (fp32_is_NaN(b_exp, b_mnt))
684 return fp32_process_NaN(b, mode, flags);
685 if (fp32_is_NaN(c_exp, c_mnt))
686 return fp32_process_NaN(c, mode, flags);
687
688 return 0;
689}
690
691static uint64_t
692fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
693{
694 int a_exp = FP64_EXP(a);
695 uint64_t a_mnt = FP64_MANT(a);
696 int b_exp = FP64_EXP(b);
697 uint64_t b_mnt = FP64_MANT(b);
698 int c_exp = FP64_EXP(c);
699 uint64_t c_mnt = FP64_MANT(c);
700
701 // Handle signalling NaNs:
702 if (fp64_is_signalling_NaN(a_exp, a_mnt))
703 return fp64_process_NaN(a, mode, flags);
704 if (fp64_is_signalling_NaN(b_exp, b_mnt))
705 return fp64_process_NaN(b, mode, flags);
706 if (fp64_is_signalling_NaN(c_exp, c_mnt))
707 return fp64_process_NaN(c, mode, flags);
708
709 // Handle quiet NaNs:
710 if (fp64_is_NaN(a_exp, a_mnt))
711 return fp64_process_NaN(a, mode, flags);
712 if (fp64_is_NaN(b_exp, b_mnt))
713 return fp64_process_NaN(b, mode, flags);
714 if (fp64_is_NaN(c_exp, c_mnt))
715 return fp64_process_NaN(c, mode, flags);
716
717 return 0;
718}
719
720static uint32_t
722{
723 uint32_t sgn = op >> (FP16_BITS - 1);
724 uint32_t mnt = FP16_MANT(op);
725
726 mnt = mnt << (FP32_MANT_BITS - FP16_MANT_BITS);
727
728 return fp32_pack(sgn, FP32_EXP_INF, mnt);
729}
730
731static uint32_t
732fp32_process_NaNs3H(uint32_t a, uint16_t b, uint16_t c, int mode, int *flags)
733{
734 int a_exp = FP32_EXP(a);
735 uint32_t a_mnt = FP32_MANT(a);
736 int b_exp = FP16_EXP(b);
737 uint16_t b_mnt = FP16_MANT(b);
738 int c_exp = FP16_EXP(c);
739 uint16_t c_mnt = FP16_MANT(c);
740
741 // Handle signalling NaNs:
742 if (fp32_is_signalling_NaN(a_exp, a_mnt))
743 return fp32_process_NaN(a, mode, flags);
744 if (fp16_is_signalling_NaN(b_exp, b_mnt))
746 if (fp16_is_signalling_NaN(c_exp, c_mnt))
748
749 // Handle quiet NaNs:
750 if (fp32_is_NaN(a_exp, a_mnt))
751 return fp32_process_NaN(a, mode, flags);
752 if (fp16_is_NaN(b_exp, b_mnt))
754 if (fp16_is_NaN(c_exp, c_mnt))
756
757 return 0;
758}
759
760static uint16_t
761fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
762{
763 int biased_exp; // non-negative exponent value for result
764 uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS)
765 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
766
767 assert(rm != FPRounding_TIEAWAY);
768
769 // Flush to zero:
770 if ((mode & FPLIB_FZ16) && exp < 1) {
771 *flags |= FPLIB_UFC;
772 return fp16_zero(sgn);
773 }
774
775 // The bottom FP16_EXP_BITS bits of mnt are orred together:
776 mnt = (4ULL << FP16_MANT_BITS | mnt >> (FP16_EXP_BITS - 1) |
777 ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0));
778
779 if (exp > 0) {
780 biased_exp = exp;
781 int_mant = mnt >> 2;
782 error = mnt & 3;
783 } else {
784 biased_exp = 0;
785 int_mant = lsr16(mnt, 3 - exp);
786 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
787 }
788
789 if (!biased_exp && error) { // xx should also check fpscr_val<11>
790 *flags |= FPLIB_UFC;
791 }
792
793 // Round up:
794 if ((rm == FPLIB_RN && (error == 3 ||
795 (error == 2 && (int_mant & 1)))) ||
796 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
797 ++int_mant;
798 if (int_mant == 1ULL << FP16_MANT_BITS) {
799 // Rounded up from denormalized to normalized
800 biased_exp = 1;
801 }
802 if (int_mant == 2ULL << FP16_MANT_BITS) {
803 // Rounded up to next exponent
804 ++biased_exp;
805 int_mant >>= 1;
806 }
807 }
808
809 // Handle rounding to odd aka Von Neumann rounding:
810 if (error && rm == FPRounding_ODD)
811 int_mant |= 1;
812
813 // Handle overflow:
814 if (!(mode & FPLIB_AHP)) {
815 if (biased_exp >= (int)FP16_EXP_INF) {
816 *flags |= FPLIB_OFC | FPLIB_IXC;
817 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
818 (rm == FPLIB_RM && sgn)) {
819 return fp16_infinity(sgn);
820 } else {
821 return fp16_max_normal(sgn);
822 }
823 }
824 } else {
825 if (biased_exp >= (int)FP16_EXP_INF + 1) {
826 *flags |= FPLIB_IOC;
827 return fp16_pack(sgn, FP16_EXP_INF, -1);
828 }
829 }
830
831 if (error) {
832 *flags |= FPLIB_IXC;
833 }
834
835 return fp16_pack(sgn, biased_exp, int_mant);
836}
837
838static uint16_t
839fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
840{
841 return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags);
842}
843
844static uint32_t
845fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
846{
847 int biased_exp; // non-negative exponent value for result
848 uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS)
849 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
850
851 assert(rm != FPRounding_TIEAWAY);
852
853 // Flush to zero:
854 if ((mode & FPLIB_FZ) && exp < 1) {
855 *flags |= FPLIB_UFC;
856 return fp32_zero(sgn);
857 }
858
859 // The bottom FP32_EXP_BITS bits of mnt are orred together:
860 mnt = (4ULL << FP32_MANT_BITS | mnt >> (FP32_EXP_BITS - 1) |
861 ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0));
862
863 if (exp > 0) {
864 biased_exp = exp;
865 int_mant = mnt >> 2;
866 error = mnt & 3;
867 } else {
868 biased_exp = 0;
869 int_mant = lsr32(mnt, 3 - exp);
870 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
871 }
872
873 if (!biased_exp && error) { // xx should also check fpscr_val<11>
874 *flags |= FPLIB_UFC;
875 }
876
877 // Round up:
878 if ((rm == FPLIB_RN && (error == 3 ||
879 (error == 2 && (int_mant & 1)))) ||
880 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
881 ++int_mant;
882 if (int_mant == 1ULL << FP32_MANT_BITS) {
883 // Rounded up from denormalized to normalized
884 biased_exp = 1;
885 }
886 if (int_mant == 2ULL << FP32_MANT_BITS) {
887 // Rounded up to next exponent
888 ++biased_exp;
889 int_mant >>= 1;
890 }
891 }
892
893 // Handle rounding to odd aka Von Neumann rounding:
894 if (error && rm == FPRounding_ODD)
895 int_mant |= 1;
896
897 // Handle overflow:
898 if (biased_exp >= (int)FP32_EXP_INF) {
899 *flags |= FPLIB_OFC | FPLIB_IXC;
900 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
901 (rm == FPLIB_RM && sgn)) {
902 return fp32_infinity(sgn);
903 } else {
904 return fp32_max_normal(sgn);
905 }
906 }
907
908 if (error) {
909 *flags |= FPLIB_IXC;
910 }
911
912 return fp32_pack(sgn, biased_exp, int_mant);
913}
914
915static uint32_t
916fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
917{
918 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
919}
920
921static uint64_t
922fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
923{
924 int biased_exp; // non-negative exponent value for result
925 uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS)
926 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
927
928 assert(rm != FPRounding_TIEAWAY);
929
930 // Flush to zero:
931 if ((mode & FPLIB_FZ) && exp < 1) {
932 *flags |= FPLIB_UFC;
933 return fp64_zero(sgn);
934 }
935
936 // The bottom FP64_EXP_BITS bits of mnt are orred together:
937 mnt = (4ULL << FP64_MANT_BITS | mnt >> (FP64_EXP_BITS - 1) |
938 ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0));
939
940 if (exp > 0) {
941 biased_exp = exp;
942 int_mant = mnt >> 2;
943 error = mnt & 3;
944 } else {
945 biased_exp = 0;
946 int_mant = lsr64(mnt, 3 - exp);
947 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
948 }
949
950 if (!biased_exp && error) { // xx should also check fpscr_val<11>
951 *flags |= FPLIB_UFC;
952 }
953
954 // Round up:
955 if ((rm == FPLIB_RN && (error == 3 ||
956 (error == 2 && (int_mant & 1)))) ||
957 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
958 ++int_mant;
959 if (int_mant == 1ULL << FP64_MANT_BITS) {
960 // Rounded up from denormalized to normalized
961 biased_exp = 1;
962 }
963 if (int_mant == 2ULL << FP64_MANT_BITS) {
964 // Rounded up to next exponent
965 ++biased_exp;
966 int_mant >>= 1;
967 }
968 }
969
970 // Handle rounding to odd aka Von Neumann rounding:
971 if (error && rm == FPRounding_ODD)
972 int_mant |= 1;
973
974 // Handle overflow:
975 if (biased_exp >= (int)FP64_EXP_INF) {
976 *flags |= FPLIB_OFC | FPLIB_IXC;
977 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
978 (rm == FPLIB_RM && sgn)) {
979 return fp64_infinity(sgn);
980 } else {
981 return fp64_max_normal(sgn);
982 }
983 }
984
985 if (error) {
986 *flags |= FPLIB_IXC;
987 }
988
989 return fp64_pack(sgn, biased_exp, int_mant);
990}
991
992static uint64_t
993fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
994{
995 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
996}
997
998static int
999fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
1000{
1001 int a_sgn, a_exp, b_sgn, b_exp;
1002 uint16_t a_mnt, b_mnt;
1003
1004 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1005 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1006
1007 if (fp16_is_NaN(a_exp, a_mnt) ||
1008 fp16_is_NaN(b_exp, b_mnt)) {
1009 if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
1010 fp16_is_signalling_NaN(b_exp, b_mnt))
1011 *flags |= FPLIB_IOC;
1012 return 0;
1013 }
1014 return a == b || (!a_mnt && !b_mnt);
1015}
1016
1017static int
1018fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
1019{
1020 int a_sgn, a_exp, b_sgn, b_exp;
1021 uint16_t a_mnt, b_mnt;
1022
1023 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1024 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1025
1026 if (fp16_is_NaN(a_exp, a_mnt) ||
1027 fp16_is_NaN(b_exp, b_mnt)) {
1028 *flags |= FPLIB_IOC;
1029 return 0;
1030 }
1031 if (!a_mnt && !b_mnt)
1032 return 1;
1033 if (a_sgn != b_sgn)
1034 return b_sgn;
1035 if (a_exp != b_exp)
1036 return a_sgn ^ (a_exp > b_exp);
1037 if (a_mnt != b_mnt)
1038 return a_sgn ^ (a_mnt > b_mnt);
1039 return 1;
1040}
1041
1042static int
1043fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
1044{
1045 int a_sgn, a_exp, b_sgn, b_exp;
1046 uint16_t a_mnt, b_mnt;
1047
1048 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1049 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1050
1051 if (fp16_is_NaN(a_exp, a_mnt) ||
1052 fp16_is_NaN(b_exp, b_mnt)) {
1053 *flags |= FPLIB_IOC;
1054 return 0;
1055 }
1056 if (!a_mnt && !b_mnt)
1057 return 0;
1058 if (a_sgn != b_sgn)
1059 return b_sgn;
1060 if (a_exp != b_exp)
1061 return a_sgn ^ (a_exp > b_exp);
1062 if (a_mnt != b_mnt)
1063 return a_sgn ^ (a_mnt > b_mnt);
1064 return 0;
1065}
1066
1067static int
1068fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
1069{
1070 int a_sgn, a_exp, b_sgn, b_exp;
1071 uint16_t a_mnt, b_mnt;
1072
1073 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1074 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1075
1076 if (fp16_is_NaN(a_exp, a_mnt) ||
1077 fp16_is_NaN(b_exp, b_mnt)) {
1078 if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
1079 fp16_is_signalling_NaN(b_exp, b_mnt))
1080 *flags |= FPLIB_IOC;
1081 return 1;
1082 }
1083 return 0;
1084}
1085
1086static int
1087fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
1088{
1089 int a_sgn, a_exp, b_sgn, b_exp;
1090 uint32_t a_mnt, b_mnt;
1091
1092 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1093 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1094
1095 if (fp32_is_NaN(a_exp, a_mnt) ||
1096 fp32_is_NaN(b_exp, b_mnt)) {
1097 if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1098 fp32_is_signalling_NaN(b_exp, b_mnt))
1099 *flags |= FPLIB_IOC;
1100 return 0;
1101 }
1102 return a == b || (!a_mnt && !b_mnt);
1103}
1104
1105static int
1106fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
1107{
1108 int a_sgn, a_exp, b_sgn, b_exp;
1109 uint32_t a_mnt, b_mnt;
1110
1111 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1112 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1113
1114 if (fp32_is_NaN(a_exp, a_mnt) ||
1115 fp32_is_NaN(b_exp, b_mnt)) {
1116 *flags |= FPLIB_IOC;
1117 return 0;
1118 }
1119 if (!a_mnt && !b_mnt)
1120 return 1;
1121 if (a_sgn != b_sgn)
1122 return b_sgn;
1123 if (a_exp != b_exp)
1124 return a_sgn ^ (a_exp > b_exp);
1125 if (a_mnt != b_mnt)
1126 return a_sgn ^ (a_mnt > b_mnt);
1127 return 1;
1128}
1129
1130static int
1131fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
1132{
1133 int a_sgn, a_exp, b_sgn, b_exp;
1134 uint32_t a_mnt, b_mnt;
1135
1136 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1137 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1138
1139 if (fp32_is_NaN(a_exp, a_mnt) ||
1140 fp32_is_NaN(b_exp, b_mnt)) {
1141 *flags |= FPLIB_IOC;
1142 return 0;
1143 }
1144 if (!a_mnt && !b_mnt)
1145 return 0;
1146 if (a_sgn != b_sgn)
1147 return b_sgn;
1148 if (a_exp != b_exp)
1149 return a_sgn ^ (a_exp > b_exp);
1150 if (a_mnt != b_mnt)
1151 return a_sgn ^ (a_mnt > b_mnt);
1152 return 0;
1153}
1154
1155static int
1156fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
1157{
1158 int a_sgn, a_exp, b_sgn, b_exp;
1159 uint32_t a_mnt, b_mnt;
1160
1161 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1162 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1163
1164 if (fp32_is_NaN(a_exp, a_mnt) ||
1165 fp32_is_NaN(b_exp, b_mnt)) {
1166 if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1167 fp32_is_signalling_NaN(b_exp, b_mnt))
1168 *flags |= FPLIB_IOC;
1169 return 1;
1170 }
1171 return 0;
1172}
1173
1174static int
1175fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
1176{
1177 int a_sgn, a_exp, b_sgn, b_exp;
1178 uint64_t a_mnt, b_mnt;
1179
1180 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1181 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1182
1183 if (fp64_is_NaN(a_exp, a_mnt) ||
1184 fp64_is_NaN(b_exp, b_mnt)) {
1185 if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1186 fp64_is_signalling_NaN(b_exp, b_mnt))
1187 *flags |= FPLIB_IOC;
1188 return 0;
1189 }
1190 return a == b || (!a_mnt && !b_mnt);
1191}
1192
1193static int
1194fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
1195{
1196 int a_sgn, a_exp, b_sgn, b_exp;
1197 uint64_t a_mnt, b_mnt;
1198
1199 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1200 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1201
1202 if (fp64_is_NaN(a_exp, a_mnt) ||
1203 fp64_is_NaN(b_exp, b_mnt)) {
1204 *flags |= FPLIB_IOC;
1205 return 0;
1206 }
1207 if (!a_mnt && !b_mnt)
1208 return 1;
1209 if (a_sgn != b_sgn)
1210 return b_sgn;
1211 if (a_exp != b_exp)
1212 return a_sgn ^ (a_exp > b_exp);
1213 if (a_mnt != b_mnt)
1214 return a_sgn ^ (a_mnt > b_mnt);
1215 return 1;
1216}
1217
1218static int
1219fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
1220{
1221 int a_sgn, a_exp, b_sgn, b_exp;
1222 uint64_t a_mnt, b_mnt;
1223
1224 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1225 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1226
1227 if (fp64_is_NaN(a_exp, a_mnt) ||
1228 fp64_is_NaN(b_exp, b_mnt)) {
1229 *flags |= FPLIB_IOC;
1230 return 0;
1231 }
1232 if (!a_mnt && !b_mnt)
1233 return 0;
1234 if (a_sgn != b_sgn)
1235 return b_sgn;
1236 if (a_exp != b_exp)
1237 return a_sgn ^ (a_exp > b_exp);
1238 if (a_mnt != b_mnt)
1239 return a_sgn ^ (a_mnt > b_mnt);
1240 return 0;
1241}
1242
1243static int
1244fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
1245{
1246 int a_sgn, a_exp, b_sgn, b_exp;
1247 uint64_t a_mnt, b_mnt;
1248
1249 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1250 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1251
1252 if (fp64_is_NaN(a_exp, a_mnt) ||
1253 fp64_is_NaN(b_exp, b_mnt)) {
1254 if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1255 fp64_is_signalling_NaN(b_exp, b_mnt))
1256 *flags |= FPLIB_IOC;
1257 return 1;
1258 }
1259 return 0;
1260}
1261
1262static uint16_t
1263fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
1264{
1265 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1266 uint16_t a_mnt, b_mnt, x, x_mnt;
1267
1268 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1269 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1270
1271 if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1272 return x;
1273 }
1274
1275 b_sgn ^= neg;
1276
1277 // Handle infinities and zeroes:
1278 if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) {
1279 *flags |= FPLIB_IOC;
1280 return fp16_defaultNaN();
1281 } else if (a_exp == FP16_EXP_INF) {
1282 return fp16_infinity(a_sgn);
1283 } else if (b_exp == FP16_EXP_INF) {
1284 return fp16_infinity(b_sgn);
1285 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1286 return fp16_zero(a_sgn);
1287 }
1288
1289 a_mnt <<= 3;
1290 b_mnt <<= 3;
1291 if (a_exp >= b_exp) {
1292 b_mnt = (lsr16(b_mnt, a_exp - b_exp) |
1293 !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1)));
1294 b_exp = a_exp;
1295 } else {
1296 a_mnt = (lsr16(a_mnt, b_exp - a_exp) |
1297 !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1)));
1298 a_exp = b_exp;
1299 }
1300 x_sgn = a_sgn;
1301 x_exp = a_exp;
1302 if (a_sgn == b_sgn) {
1303 x_mnt = a_mnt + b_mnt;
1304 } else if (a_mnt >= b_mnt) {
1305 x_mnt = a_mnt - b_mnt;
1306 } else {
1307 x_sgn ^= 1;
1308 x_mnt = b_mnt - a_mnt;
1309 }
1310
1311 if (!x_mnt) {
1312 // Sign of exact zero result depends on rounding mode
1313 return fp16_zero((mode & 3) == 2);
1314 }
1315
1316 x_mnt = fp16_normalise(x_mnt, &x_exp);
1317
1318 return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1,
1319 mode, flags);
1320}
1321
1322static uint32_t
1323fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
1324{
1325 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1326 uint32_t a_mnt, b_mnt, x, x_mnt;
1327
1328 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1329 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1330
1331 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1332 return x;
1333 }
1334
1335 b_sgn ^= neg;
1336
1337 // Handle infinities and zeroes:
1338 if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) {
1339 *flags |= FPLIB_IOC;
1340 return fp32_defaultNaN();
1341 } else if (a_exp == FP32_EXP_INF) {
1342 return fp32_infinity(a_sgn);
1343 } else if (b_exp == FP32_EXP_INF) {
1344 return fp32_infinity(b_sgn);
1345 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1346 return fp32_zero(a_sgn);
1347 }
1348
1349 a_mnt <<= 3;
1350 b_mnt <<= 3;
1351 if (a_exp >= b_exp) {
1352 b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
1353 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
1354 b_exp = a_exp;
1355 } else {
1356 a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
1357 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
1358 a_exp = b_exp;
1359 }
1360 x_sgn = a_sgn;
1361 x_exp = a_exp;
1362 if (a_sgn == b_sgn) {
1363 x_mnt = a_mnt + b_mnt;
1364 } else if (a_mnt >= b_mnt) {
1365 x_mnt = a_mnt - b_mnt;
1366 } else {
1367 x_sgn ^= 1;
1368 x_mnt = b_mnt - a_mnt;
1369 }
1370
1371 if (!x_mnt) {
1372 // Sign of exact zero result depends on rounding mode
1373 return fp32_zero((mode & 3) == 2);
1374 }
1375
1376 x_mnt = fp32_normalise(x_mnt, &x_exp);
1377
1378 return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1,
1379 mode, flags);
1380}
1381
1382static uint64_t
1383fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
1384{
1385 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1386 uint64_t a_mnt, b_mnt, x, x_mnt;
1387
1388 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1389 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1390
1391 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1392 return x;
1393 }
1394
1395 b_sgn ^= neg;
1396
1397 // Handle infinities and zeroes:
1398 if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) {
1399 *flags |= FPLIB_IOC;
1400 return fp64_defaultNaN();
1401 } else if (a_exp == FP64_EXP_INF) {
1402 return fp64_infinity(a_sgn);
1403 } else if (b_exp == FP64_EXP_INF) {
1404 return fp64_infinity(b_sgn);
1405 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1406 return fp64_zero(a_sgn);
1407 }
1408
1409 a_mnt <<= 3;
1410 b_mnt <<= 3;
1411 if (a_exp >= b_exp) {
1412 b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
1413 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
1414 b_exp = a_exp;
1415 } else {
1416 a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
1417 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
1418 a_exp = b_exp;
1419 }
1420 x_sgn = a_sgn;
1421 x_exp = a_exp;
1422 if (a_sgn == b_sgn) {
1423 x_mnt = a_mnt + b_mnt;
1424 } else if (a_mnt >= b_mnt) {
1425 x_mnt = a_mnt - b_mnt;
1426 } else {
1427 x_sgn ^= 1;
1428 x_mnt = b_mnt - a_mnt;
1429 }
1430
1431 if (!x_mnt) {
1432 // Sign of exact zero result depends on rounding mode
1433 return fp64_zero((mode & 3) == 2);
1434 }
1435
1436 x_mnt = fp64_normalise(x_mnt, &x_exp);
1437
1438 return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1,
1439 mode, flags);
1440}
1441
1442static uint16_t
1443fp16_halved_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
1444{
1445 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1446 uint16_t a_mnt, b_mnt, x, x_mnt;
1447
1448 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1449 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1450
1451 if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1452 return x;
1453 }
1454
1455 b_sgn ^= neg;
1456
1457 // Handle infinities and zeroes:
1458 if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) {
1459 *flags |= FPLIB_IOC;
1460 return fp16_defaultNaN();
1461 } else if (a_exp == FP16_EXP_INF) {
1462 return fp16_infinity(a_sgn);
1463 } else if (b_exp == FP16_EXP_INF) {
1464 return fp16_infinity(b_sgn);
1465 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1466 return fp16_zero(a_sgn);
1467 }
1468
1469 a_mnt <<= 3;
1470 b_mnt <<= 3;
1471 if (a_exp >= b_exp) {
1472 b_mnt = (lsr16(b_mnt, a_exp - b_exp) |
1473 !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1)));
1474 b_exp = a_exp;
1475 } else {
1476 a_mnt = (lsr16(a_mnt, b_exp - a_exp) |
1477 !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1)));
1478 a_exp = b_exp;
1479 }
1480 x_sgn = a_sgn;
1481 x_exp = a_exp;
1482 if (a_sgn == b_sgn) {
1483 x_mnt = a_mnt + b_mnt;
1484 } else if (a_mnt >= b_mnt) {
1485 x_mnt = a_mnt - b_mnt;
1486 } else {
1487 x_sgn ^= 1;
1488 x_mnt = b_mnt - a_mnt;
1489 }
1490
1491 if (!x_mnt) {
1492 // Sign of exact zero result depends on rounding mode
1493 return fp16_zero((mode & 3) == 2);
1494 }
1495
1496 x_exp -= 1; // halved
1497 x_mnt = fp16_normalise(x_mnt, &x_exp);
1498
1499 return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1,
1500 mode, flags);
1501}
1502
1503static uint16_t
1504fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
1505{
1506 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1507 uint16_t a_mnt, b_mnt, x;
1508 uint32_t x_mnt;
1509
1510 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1511 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1512
1513 if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1514 return x;
1515 }
1516
1517 // Handle infinities and zeroes:
1518 if ((a_exp == FP16_EXP_INF && !b_mnt) ||
1519 (b_exp == FP16_EXP_INF && !a_mnt)) {
1520 *flags |= FPLIB_IOC;
1521 return fp16_defaultNaN();
1522 } else if (a_exp == FP16_EXP_INF || b_exp == FP16_EXP_INF) {
1523 return fp16_infinity(a_sgn ^ b_sgn);
1524 } else if (!a_mnt || !b_mnt) {
1525 return fp16_zero(a_sgn ^ b_sgn);
1526 }
1527
1528 // Multiply and normalise:
1529 x_sgn = a_sgn ^ b_sgn;
1530 x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1;
1531 x_mnt = (uint32_t)a_mnt * b_mnt;
1532 x_mnt = fp32_normalise(x_mnt, &x_exp);
1533
1534 // Convert to FP16_BITS bits, collapsing error into bottom bit:
1535 x_mnt = lsr32(x_mnt, FP16_BITS - 1) | !!lsl32(x_mnt, FP16_BITS + 1);
1536
1537 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1538}
1539
1540static uint32_t
1541fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1542{
1543 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1544 uint32_t a_mnt, b_mnt, x;
1545 uint64_t x_mnt;
1546
1547 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1548 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1549
1550 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1551 return x;
1552 }
1553
1554 // Handle infinities and zeroes:
1555 if ((a_exp == FP32_EXP_INF && !b_mnt) ||
1556 (b_exp == FP32_EXP_INF && !a_mnt)) {
1557 *flags |= FPLIB_IOC;
1558 return fp32_defaultNaN();
1559 } else if (a_exp == FP32_EXP_INF || b_exp == FP32_EXP_INF) {
1560 return fp32_infinity(a_sgn ^ b_sgn);
1561 } else if (!a_mnt || !b_mnt) {
1562 return fp32_zero(a_sgn ^ b_sgn);
1563 }
1564
1565 // Multiply and normalise:
1566 x_sgn = a_sgn ^ b_sgn;
1567 x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1;
1568 x_mnt = (uint64_t)a_mnt * b_mnt;
1569 x_mnt = fp64_normalise(x_mnt, &x_exp);
1570
1571 // Convert to FP32_BITS bits, collapsing error into bottom bit:
1572 x_mnt = lsr64(x_mnt, FP32_BITS - 1) | !!lsl64(x_mnt, FP32_BITS + 1);
1573
1574 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1575}
1576
1577static uint64_t
1578fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1579{
1580 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1581 uint64_t a_mnt, b_mnt, x;
1582 uint64_t x0_mnt, x1_mnt;
1583
1584 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1585 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1586
1587 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1588 return x;
1589 }
1590
1591 // Handle infinities and zeroes:
1592 if ((a_exp == FP64_EXP_INF && !b_mnt) ||
1593 (b_exp == FP64_EXP_INF && !a_mnt)) {
1594 *flags |= FPLIB_IOC;
1595 return fp64_defaultNaN();
1596 } else if (a_exp == FP64_EXP_INF || b_exp == FP64_EXP_INF) {
1597 return fp64_infinity(a_sgn ^ b_sgn);
1598 } else if (!a_mnt || !b_mnt) {
1599 return fp64_zero(a_sgn ^ b_sgn);
1600 }
1601
1602 // Multiply and normalise:
1603 x_sgn = a_sgn ^ b_sgn;
1604 x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1;
1605 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1606 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1607
1608 // Convert to FP64_BITS bits, collapsing error into bottom bit:
1609 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1610
1611 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1612}
1613
1614static uint16_t
1615fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale,
1616 int mode, int *flags)
1617{
1618 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1619 uint16_t a_mnt, b_mnt, c_mnt, x;
1620 uint32_t x_mnt, y_mnt;
1621
1622 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1623 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1624 fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1625
1626 x = fp16_process_NaNs3(a, b, c, mode, flags);
1627
1628 // Quiet NaN added to product of zero and infinity:
1629 if (fp16_is_quiet_NaN(a_exp, a_mnt) &&
1630 ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) ||
1631 (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) {
1632 x = fp16_defaultNaN();
1633 *flags |= FPLIB_IOC;
1634 }
1635
1636 if (x) {
1637 return x;
1638 }
1639
1640 // Handle infinities and zeroes:
1641 if ((b_exp == FP16_EXP_INF && !c_mnt) ||
1642 (c_exp == FP16_EXP_INF && !b_mnt) ||
1643 (a_exp == FP16_EXP_INF &&
1644 (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) &&
1645 (a_sgn != (b_sgn ^ c_sgn)))) {
1646 *flags |= FPLIB_IOC;
1647 return fp16_defaultNaN();
1648 }
1649 if (a_exp == FP16_EXP_INF)
1650 return fp16_infinity(a_sgn);
1651 if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF)
1652 return fp16_infinity(b_sgn ^ c_sgn);
1653 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1654 return fp16_zero(a_sgn);
1655
1656 x_sgn = a_sgn;
1657 x_exp = a_exp + 2 * FP16_EXP_BITS - 3;
1658 x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4);
1659
1660 // Multiply:
1661 y_sgn = b_sgn ^ c_sgn;
1662 y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3;
1663 y_mnt = (uint32_t)b_mnt * c_mnt << 3;
1664 if (!y_mnt) {
1665 y_exp = x_exp;
1666 }
1667
1668 // Add:
1669 if (x_exp >= y_exp) {
1670 y_mnt = (lsr32(y_mnt, x_exp - y_exp) |
1671 !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1)));
1672 y_exp = x_exp;
1673 } else {
1674 x_mnt = (lsr32(x_mnt, y_exp - x_exp) |
1675 !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1)));
1676 x_exp = y_exp;
1677 }
1678 if (x_sgn == y_sgn) {
1679 x_mnt = x_mnt + y_mnt;
1680 } else if (x_mnt >= y_mnt) {
1681 x_mnt = x_mnt - y_mnt;
1682 } else {
1683 x_sgn ^= 1;
1684 x_mnt = y_mnt - x_mnt;
1685 }
1686
1687 if (!x_mnt) {
1688 // Sign of exact zero result depends on rounding mode
1689 return fp16_zero((mode & 3) == 2);
1690 }
1691
1692 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1693 x_mnt = fp32_normalise(x_mnt, &x_exp);
1694 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1695
1696 return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1697}
1698
1699static uint32_t
1700fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1701 int mode, int *flags)
1702{
1703 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1704 uint32_t a_mnt, b_mnt, c_mnt, x;
1705 uint64_t x_mnt, y_mnt;
1706
1707 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1708 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1709 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1710
1711 x = fp32_process_NaNs3(a, b, c, mode, flags);
1712
1713 // Quiet NaN added to product of zero and infinity:
1714 if (fp32_is_quiet_NaN(a_exp, a_mnt) &&
1715 ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) ||
1716 (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) {
1717 x = fp32_defaultNaN();
1718 *flags |= FPLIB_IOC;
1719 }
1720
1721 if (x) {
1722 return x;
1723 }
1724
1725 // Handle infinities and zeroes:
1726 if ((b_exp == FP32_EXP_INF && !c_mnt) ||
1727 (c_exp == FP32_EXP_INF && !b_mnt) ||
1728 (a_exp == FP32_EXP_INF &&
1729 (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) &&
1730 (a_sgn != (b_sgn ^ c_sgn)))) {
1731 *flags |= FPLIB_IOC;
1732 return fp32_defaultNaN();
1733 }
1734 if (a_exp == FP32_EXP_INF)
1735 return fp32_infinity(a_sgn);
1736 if (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF)
1737 return fp32_infinity(b_sgn ^ c_sgn);
1738 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1739 return fp32_zero(a_sgn);
1740
1741 x_sgn = a_sgn;
1742 x_exp = a_exp + 2 * FP32_EXP_BITS - 3;
1743 x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4);
1744
1745 // Multiply:
1746 y_sgn = b_sgn ^ c_sgn;
1747 y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3;
1748 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1749 if (!y_mnt) {
1750 y_exp = x_exp;
1751 }
1752
1753 // Add:
1754 if (x_exp >= y_exp) {
1755 y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1756 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1757 y_exp = x_exp;
1758 } else {
1759 x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1760 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1761 x_exp = y_exp;
1762 }
1763 if (x_sgn == y_sgn) {
1764 x_mnt = x_mnt + y_mnt;
1765 } else if (x_mnt >= y_mnt) {
1766 x_mnt = x_mnt - y_mnt;
1767 } else {
1768 x_sgn ^= 1;
1769 x_mnt = y_mnt - x_mnt;
1770 }
1771
1772 if (!x_mnt) {
1773 // Sign of exact zero result depends on rounding mode
1774 return fp32_zero((mode & 3) == 2);
1775 }
1776
1777 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1778 x_mnt = fp64_normalise(x_mnt, &x_exp);
1779 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1780
1781 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1782}
1783
1784static uint64_t
1785fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1786 int mode, int *flags)
1787{
1788 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1789 uint64_t a_mnt, b_mnt, c_mnt, x;
1790 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1791
1792 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1793 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1794 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1795
1796 x = fp64_process_NaNs3(a, b, c, mode, flags);
1797
1798 // Quiet NaN added to product of zero and infinity:
1799 if (fp64_is_quiet_NaN(a_exp, a_mnt) &&
1800 ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) ||
1801 (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) {
1802 x = fp64_defaultNaN();
1803 *flags |= FPLIB_IOC;
1804 }
1805
1806 if (x) {
1807 return x;
1808 }
1809
1810 // Handle infinities and zeroes:
1811 if ((b_exp == FP64_EXP_INF && !c_mnt) ||
1812 (c_exp == FP64_EXP_INF && !b_mnt) ||
1813 (a_exp == FP64_EXP_INF &&
1814 (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) &&
1815 (a_sgn != (b_sgn ^ c_sgn)))) {
1816 *flags |= FPLIB_IOC;
1817 return fp64_defaultNaN();
1818 }
1819 if (a_exp == FP64_EXP_INF)
1820 return fp64_infinity(a_sgn);
1821 if (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF)
1822 return fp64_infinity(b_sgn ^ c_sgn);
1823 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1824 return fp64_zero(a_sgn);
1825
1826 x_sgn = a_sgn;
1827 x_exp = a_exp + FP64_EXP_BITS;
1828 x0_mnt = 0;
1829 x1_mnt = a_mnt;
1830
1831 // Multiply:
1832 y_sgn = b_sgn ^ c_sgn;
1833 y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3;
1834 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1835 if (!y0_mnt && !y1_mnt) {
1836 y_exp = x_exp;
1837 }
1838
1839 // Add:
1840 if (x_exp >= y_exp) {
1841 uint64_t t0, t1;
1842 lsl128(&t0, &t1, y0_mnt, y1_mnt,
1843 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1844 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1845 y0_mnt |= !!(t0 | t1);
1846 y_exp = x_exp;
1847 } else {
1848 uint64_t t0, t1;
1849 lsl128(&t0, &t1, x0_mnt, x1_mnt,
1850 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1851 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1852 x0_mnt |= !!(t0 | t1);
1853 x_exp = y_exp;
1854 }
1855 if (x_sgn == y_sgn) {
1856 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1857 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1858 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1859 } else {
1860 x_sgn ^= 1;
1861 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1862 }
1863
1864 if (!x0_mnt && !x1_mnt) {
1865 // Sign of exact zero result depends on rounding mode
1866 return fp64_zero((mode & 3) == 2);
1867 }
1868
1869 // Normalise into FP64_BITS bits, collapsing error into bottom bit:
1870 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1871 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1872
1873 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1874}
1875
1876static uint32_t
1877fp32_muladdh(uint32_t a, uint16_t b, uint16_t c, int scale,
1878 int mode, int *flags, bool rm_odd=false)
1879{
1880 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1881 uint16_t b_mnt, c_mnt;
1882 uint32_t a_mnt, x;
1883 uint64_t x_mnt, y_mnt;
1884
1885 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1886 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1887 fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1888
1889 x = fp32_process_NaNs3H(a, b, c, mode, flags);
1890
1891 // Quiet NaN added to product of zero and infinity:
1892 if (fp32_is_quiet_NaN(a_exp, a_mnt) &&
1893 ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) ||
1894 (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) {
1895 x = fp32_defaultNaN();
1896 *flags |= FPLIB_IOC;
1897 }
1898
1899 if (x) {
1900 return x;
1901 }
1902
1903 // Handle infinities and zeroes:
1904 if ((b_exp == FP16_EXP_INF && !c_mnt) ||
1905 (c_exp == FP16_EXP_INF && !b_mnt) ||
1906 (a_exp == FP32_EXP_INF &&
1907 (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) &&
1908 (a_sgn != (b_sgn ^ c_sgn)))) {
1909 *flags |= FPLIB_IOC;
1910 return fp32_defaultNaN();
1911 }
1912 if (a_exp == FP32_EXP_INF)
1913 return fp32_infinity(a_sgn);
1914 if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF)
1915 return fp32_infinity(b_sgn ^ c_sgn);
1916 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1917 return fp32_zero(a_sgn);
1918
1919 x_sgn = a_sgn;
1920 x_exp = a_exp + 2 * FP32_EXP_BITS - 3;
1921 x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4);
1922
1923 // Multiply:
1924 y_sgn = b_sgn ^ c_sgn;
1925 y_exp = b_exp + c_exp - 2 * FP16_EXP_BIAS + 2 * FP32_EXP_BIAS -
1926 FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3;
1927 y_mnt = (uint64_t)b_mnt * c_mnt << (3 +
1929 if (!y_mnt) {
1930 y_exp = x_exp;
1931 }
1932
1933 // Add:
1934 if (x_exp >= y_exp) {
1935 y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1936 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1937 y_exp = x_exp;
1938 } else {
1939 x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1940 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1941 x_exp = y_exp;
1942 }
1943 if (x_sgn == y_sgn) {
1944 x_mnt = x_mnt + y_mnt;
1945 } else if (x_mnt >= y_mnt) {
1946 x_mnt = x_mnt - y_mnt;
1947 } else {
1948 x_sgn ^= 1;
1949 x_mnt = y_mnt - x_mnt;
1950 }
1951
1952 if (!x_mnt) {
1953 // Sign of exact zero result depends on rounding mode
1954 if (rm_odd) {
1955 return fp32_zero(x_sgn);
1956 } else {
1957 return fp32_zero((mode & 3) == 2);
1958 }
1959 }
1960
1961 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1962 x_mnt = fp64_normalise(x_mnt, &x_exp);
1963 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1964
1965 if (rm_odd) {
1966 return fp32_round_(x_sgn, x_exp + scale, x_mnt,
1967 FPRounding_ODD, mode, flags);
1968 } else {
1969 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1970 }
1971}
1972
1973static uint16_t
1974fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
1975{
1976 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1977 uint16_t a_mnt, b_mnt, x;
1978 uint32_t x_mnt;
1979
1980 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1981 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1982
1983 if ((x = fp16_process_NaNs(a, b, mode, flags)))
1984 return x;
1985
1986 // Handle infinities and zeroes:
1987 if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) ||
1988 (!a_mnt && !b_mnt)) {
1989 *flags |= FPLIB_IOC;
1990 return fp16_defaultNaN();
1991 }
1992 if (a_exp == FP16_EXP_INF || !b_mnt) {
1993 if (a_exp != FP16_EXP_INF)
1994 *flags |= FPLIB_DZC;
1995 return fp16_infinity(a_sgn ^ b_sgn);
1996 }
1997 if (!a_mnt || b_exp == FP16_EXP_INF)
1998 return fp16_zero(a_sgn ^ b_sgn);
1999
2000 // Divide, setting bottom bit if inexact:
2001 a_mnt = fp16_normalise(a_mnt, &a_exp);
2002 x_sgn = a_sgn ^ b_sgn;
2003 x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3);
2004 x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt;
2005 x_mnt |= (x_mnt * b_mnt !=
2006 (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3));
2007
2008 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
2009 x_mnt = fp32_normalise(x_mnt, &x_exp);
2010 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
2011
2012 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
2013}
2014
2015static uint32_t
2016fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
2017{
2018 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
2019 uint32_t a_mnt, b_mnt, x;
2020 uint64_t x_mnt;
2021
2022 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2023 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
2024
2025 if ((x = fp32_process_NaNs(a, b, mode, flags)))
2026 return x;
2027
2028 // Handle infinities and zeroes:
2029 if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) ||
2030 (!a_mnt && !b_mnt)) {
2031 *flags |= FPLIB_IOC;
2032 return fp32_defaultNaN();
2033 }
2034 if (a_exp == FP32_EXP_INF || !b_mnt) {
2035 if (a_exp != FP32_EXP_INF)
2036 *flags |= FPLIB_DZC;
2037 return fp32_infinity(a_sgn ^ b_sgn);
2038 }
2039 if (!a_mnt || b_exp == FP32_EXP_INF)
2040 return fp32_zero(a_sgn ^ b_sgn);
2041
2042 // Divide, setting bottom bit if inexact:
2043 a_mnt = fp32_normalise(a_mnt, &a_exp);
2044 x_sgn = a_sgn ^ b_sgn;
2045 x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3);
2046 x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt;
2047 x_mnt |= (x_mnt * b_mnt !=
2048 (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3));
2049
2050 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
2051 x_mnt = fp64_normalise(x_mnt, &x_exp);
2052 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
2053
2054 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
2055}
2056
2057static uint64_t
2058fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
2059{
2060 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
2061 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
2062
2063 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2064 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
2065
2066 if ((x = fp64_process_NaNs(a, b, mode, flags)))
2067 return x;
2068
2069 // Handle infinities and zeroes:
2070 if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) ||
2071 (!a_mnt && !b_mnt)) {
2072 *flags |= FPLIB_IOC;
2073 return fp64_defaultNaN();
2074 }
2075 if (a_exp == FP64_EXP_INF || !b_mnt) {
2076 if (a_exp != FP64_EXP_INF)
2077 *flags |= FPLIB_DZC;
2078 return fp64_infinity(a_sgn ^ b_sgn);
2079 }
2080 if (!a_mnt || b_exp == FP64_EXP_INF)
2081 return fp64_zero(a_sgn ^ b_sgn);
2082
2083 // Find reciprocal of divisor with Newton-Raphson:
2084 a_mnt = fp64_normalise(a_mnt, &a_exp);
2085 b_mnt = fp64_normalise(b_mnt, &b_exp);
2086 x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
2087 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
2088 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
2089 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
2090 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
2091 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
2092
2093 // Multiply by dividend:
2094 x_sgn = a_sgn ^ b_sgn;
2095 x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8;
2096 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
2097 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
2098 x_mnt = x1_mnt;
2099
2100 // This is an underestimate, so try adding one:
2101 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
2102 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
2103 if (c <= 0) {
2104 ++x_mnt;
2105 }
2106
2107 x_mnt = fp64_normalise(x_mnt, &x_exp);
2108
2109 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
2110}
2111
2112static void
2113set_fpscr0(FPSCR &fpscr, int flags)
2114{
2115 if (flags & FPLIB_IDC) {
2116 fpscr.idc = 1;
2117 }
2118 if (flags & FPLIB_IOC) {
2119 fpscr.ioc = 1;
2120 }
2121 if (flags & FPLIB_DZC) {
2122 fpscr.dzc = 1;
2123 }
2124 if (flags & FPLIB_OFC) {
2125 fpscr.ofc = 1;
2126 }
2127 if (flags & FPLIB_UFC) {
2128 fpscr.ufc = 1;
2129 }
2130 if (flags & FPLIB_IXC) {
2131 fpscr.ixc = 1;
2132 }
2133}
2134
2135static uint16_t
2136fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
2137{
2138 int a_sgn, a_exp;
2139 uint16_t a_mnt;
2140
2141 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2142
2143 // Handle NaNs:
2144 if (fp16_is_NaN(a_exp, a_mnt)) {
2145 return fp16_process_NaN(a, mode, flags);
2146 }
2147
2148 // Handle zeroes:
2149 if (!a_mnt) {
2150 return fp16_zero(a_sgn);
2151 }
2152
2153 // Handle infinities:
2154 if (a_exp == FP16_EXP_INF) {
2155 return fp16_infinity(a_sgn);
2156 }
2157
2158 b = b < -300 ? -300 : b;
2159 b = b > 300 ? 300 : b;
2160 a_exp += b;
2161 a_mnt <<= 3;
2162
2163 a_mnt = fp16_normalise(a_mnt, &a_exp);
2164
2165 return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1,
2166 mode, flags);
2167}
2168
2169static uint32_t
2170fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
2171{
2172 int a_sgn, a_exp;
2173 uint32_t a_mnt;
2174
2175 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2176
2177 // Handle NaNs:
2178 if (fp32_is_NaN(a_exp, a_mnt)) {
2179 return fp32_process_NaN(a, mode, flags);
2180 }
2181
2182 // Handle zeroes:
2183 if (!a_mnt) {
2184 return fp32_zero(a_sgn);
2185 }
2186
2187 // Handle infinities:
2188 if (a_exp == FP32_EXP_INF) {
2189 return fp32_infinity(a_sgn);
2190 }
2191
2192 b = b < -300 ? -300 : b;
2193 b = b > 300 ? 300 : b;
2194 a_exp += b;
2195 a_mnt <<= 3;
2196
2197 a_mnt = fp32_normalise(a_mnt, &a_exp);
2198
2199 return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1,
2200 mode, flags);
2201}
2202
2203static uint64_t
2204fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
2205{
2206 int a_sgn, a_exp;
2207 uint64_t a_mnt;
2208
2209 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2210
2211 // Handle NaNs:
2212 if (fp64_is_NaN(a_exp, a_mnt)) {
2213 return fp64_process_NaN(a, mode, flags);
2214 }
2215
2216 // Handle zeroes:
2217 if (!a_mnt) {
2218 return fp64_zero(a_sgn);
2219 }
2220
2221 // Handle infinities:
2222 if (a_exp == FP64_EXP_INF) {
2223 return fp64_infinity(a_sgn);
2224 }
2225
2226 b = b < -3000 ? -3000 : b;
2227 b = b > 3000 ? 3000 : b;
2228 a_exp += b;
2229 a_mnt <<= 3;
2230
2231 a_mnt = fp64_normalise(a_mnt, &a_exp);
2232
2233 return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1,
2234 mode, flags);
2235}
2236
2237static uint16_t
2238fp16_sqrt(uint16_t a, int mode, int *flags)
2239{
2240 int a_sgn, a_exp, x_sgn, x_exp;
2241 uint16_t a_mnt, x_mnt;
2242 uint32_t x, t0, t1;
2243
2244 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2245
2246 // Handle NaNs:
2247 if (fp16_is_NaN(a_exp, a_mnt))
2248 return fp16_process_NaN(a, mode, flags);
2249
2250 // Handle infinities and zeroes:
2251 if (!a_mnt)
2252 return fp16_zero(a_sgn);
2253 if (a_exp == FP16_EXP_INF && !a_sgn)
2254 return fp16_infinity(a_sgn);
2255 if (a_sgn) {
2256 *flags |= FPLIB_IOC;
2257 return fp16_defaultNaN();
2258 }
2259
2260 a_mnt = fp16_normalise(a_mnt, &a_exp);
2261 if (a_exp & 1) {
2262 ++a_exp;
2263 a_mnt >>= 1;
2264 }
2265
2266 // x = (a * 3 + 5) / 8
2267 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2268
2269 // x = (a / x + x) / 2; // 8-bit accuracy
2270 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2271
2272 // x = (a / x + x) / 2; // 16-bit accuracy
2273 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2274
2275 x_sgn = 0;
2276 x_exp = (a_exp + 27) >> 1;
2277 x_mnt = ((x - (1 << 18)) >> 19) + 1;
2278 t1 = (uint32_t)x_mnt * x_mnt;
2279 t0 = (uint32_t)a_mnt << 9;
2280 if (t1 > t0) {
2281 --x_mnt;
2282 }
2283
2284 x_mnt = fp16_normalise(x_mnt, &x_exp);
2285
2286 return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2287}
2288
2289static uint32_t
2290fp32_sqrt(uint32_t a, int mode, int *flags)
2291{
2292 int a_sgn, a_exp, x_sgn, x_exp;
2293 uint32_t a_mnt, x, x_mnt;
2294 uint64_t t0, t1;
2295
2296 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2297
2298 // Handle NaNs:
2299 if (fp32_is_NaN(a_exp, a_mnt))
2300 return fp32_process_NaN(a, mode, flags);
2301
2302 // Handle infinities and zeroes:
2303 if (!a_mnt)
2304 return fp32_zero(a_sgn);
2305 if (a_exp == FP32_EXP_INF && !a_sgn)
2306 return fp32_infinity(a_sgn);
2307 if (a_sgn) {
2308 *flags |= FPLIB_IOC;
2309 return fp32_defaultNaN();
2310 }
2311
2312 a_mnt = fp32_normalise(a_mnt, &a_exp);
2313 if (!(a_exp & 1)) {
2314 ++a_exp;
2315 a_mnt >>= 1;
2316 }
2317
2318 // x = (a * 3 + 5) / 8
2319 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
2320
2321 // x = (a / x + x) / 2; // 8-bit accuracy
2322 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2323
2324 // x = (a / x + x) / 2; // 16-bit accuracy
2325 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2326
2327 // x = (a / x + x) / 2; // 32-bit accuracy
2328 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
2329
2330 x_sgn = 0;
2331 x_exp = (a_exp + 147) >> 1;
2332 x_mnt = ((x - (1 << 5)) >> 6) + 1;
2333 t1 = (uint64_t)x_mnt * x_mnt;
2334 t0 = (uint64_t)a_mnt << 19;
2335 if (t1 > t0) {
2336 --x_mnt;
2337 }
2338
2339 x_mnt = fp32_normalise(x_mnt, &x_exp);
2340
2341 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2342}
2343
2344static uint64_t
2345fp64_sqrt(uint64_t a, int mode, int *flags)
2346{
2347 int a_sgn, a_exp, x_sgn, x_exp, c;
2348 uint64_t a_mnt, x_mnt, r, x0, x1;
2349 uint32_t x;
2350
2351 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2352
2353 // Handle NaNs:
2354 if (fp64_is_NaN(a_exp, a_mnt))
2355 return fp64_process_NaN(a, mode, flags);
2356
2357 // Handle infinities and zeroes:
2358 if (!a_mnt)
2359 return fp64_zero(a_sgn);
2360 if (a_exp == FP64_EXP_INF && !a_sgn)
2361 return fp64_infinity(a_sgn);
2362 if (a_sgn) {
2363 *flags |= FPLIB_IOC;
2364 return fp64_defaultNaN();
2365 }
2366
2367 a_mnt = fp64_normalise(a_mnt, &a_exp);
2368 if (a_exp & 1) {
2369 ++a_exp;
2370 a_mnt >>= 1;
2371 }
2372
2373 // x = (a * 3 + 5) / 8
2374 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
2375
2376 // x = (a / x + x) / 2; // 8-bit accuracy
2377 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2378
2379 // x = (a / x + x) / 2; // 16-bit accuracy
2380 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2381
2382 // x = (a / x + x) / 2; // 32-bit accuracy
2383 x = ((a_mnt / x) >> 2) + (x >> 1);
2384
2385 // r = 1 / x; // 32-bit accuracy
2386 r = ((uint64_t)1 << 62) / x;
2387
2388 // r = r * (2 - x * r); // 64-bit accuracy
2389 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
2390 lsr128(&x0, &x1, x0, x1, 31);
2391
2392 // x = (x + a * r) / 2; // 64-bit accuracy
2393 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
2394 lsl128(&x0, &x1, x0, x1, 5);
2395 lsr128(&x0, &x1, x0, x1, 56);
2396
2397 x0 = ((uint64_t)x << 31) + (x0 >> 1);
2398
2399 x_sgn = 0;
2400 x_exp = (a_exp + 1053) >> 1;
2401 x_mnt = x0;
2402 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
2403 mul62x62(&x0, &x1, x_mnt, x_mnt);
2404 lsl128(&x0, &x1, x0, x1, 19);
2405 c = cmp128(x0, x1, 0, a_mnt);
2406 if (c > 0)
2407 --x_mnt;
2408
2409 x_mnt = fp64_normalise(x_mnt, &x_exp);
2410
2411 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
2412}
2413
2414static int
2415modeConv(FPSCR fpscr)
2416{
2417 uint32_t x = (uint32_t)fpscr;
2418 return (x >> 22 & 0xf) | (x >> 19 & 1 ? FPLIB_FZ16 : 0);
2419 // AHP bit is ignored. Only fplibConvert uses AHP.
2420}
2421
2422static void
2423set_fpscr(FPSCR &fpscr, int flags)
2424{
2425 // translate back to FPSCR
2426 bool underflow = false;
2427 if (flags & FPLIB_IDC) {
2428 fpscr.idc = 1;
2429 }
2430 if (flags & FPLIB_IOC) {
2431 fpscr.ioc = 1;
2432 }
2433 if (flags & FPLIB_DZC) {
2434 fpscr.dzc = 1;
2435 }
2436 if (flags & FPLIB_OFC) {
2437 fpscr.ofc = 1;
2438 }
2439 if (flags & FPLIB_UFC) {
2440 underflow = true; //xx Why is this required?
2441 fpscr.ufc = 1;
2442 }
2443 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
2444 fpscr.ixc = 1;
2445 }
2446}
2447
2448template <>
2449bool
2450fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
2451{
2452 int flags = 0;
2453 int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags);
2454 set_fpscr(fpscr, flags);
2455 return x;
2456}
2457
2458template <>
2459bool
2460fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
2461{
2462 int flags = 0;
2463 int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags);
2464 set_fpscr(fpscr, flags);
2465 return x;
2466}
2467
2468template <>
2469bool
2470fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
2471{
2472 int flags = 0;
2473 int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags);
2474 set_fpscr(fpscr, flags);
2475 return x;
2476}
2477
2478template <>
2479bool
2480fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
2481{
2482 int flags = 0;
2483 int x = fp16_compare_un(a, b, modeConv(fpscr), &flags);
2484 set_fpscr(fpscr, flags);
2485 return x;
2486}
2487
2488template <>
2489bool
2490fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
2491{
2492 int flags = 0;
2493 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
2494 set_fpscr(fpscr, flags);
2495 return x;
2496}
2497
2498template <>
2499bool
2500fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
2501{
2502 int flags = 0;
2503 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
2504 set_fpscr(fpscr, flags);
2505 return x;
2506}
2507
2508template <>
2509bool
2510fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
2511{
2512 int flags = 0;
2513 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
2514 set_fpscr(fpscr, flags);
2515 return x;
2516}
2517
2518template <>
2519bool
2520fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr)
2521{
2522 int flags = 0;
2523 int x = fp32_compare_un(a, b, modeConv(fpscr), &flags);
2524 set_fpscr(fpscr, flags);
2525 return x;
2526}
2527
2528template <>
2529bool
2530fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
2531{
2532 int flags = 0;
2533 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
2534 set_fpscr(fpscr, flags);
2535 return x;
2536}
2537
2538template <>
2539bool
2540fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
2541{
2542 int flags = 0;
2543 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
2544 set_fpscr(fpscr, flags);
2545 return x;
2546}
2547
2548template <>
2549bool
2550fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
2551{
2552 int flags = 0;
2553 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
2554 set_fpscr(fpscr, flags);
2555 return x;
2556}
2557
2558template <>
2559bool
2560fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr)
2561{
2562 int flags = 0;
2563 int x = fp64_compare_un(a, b, modeConv(fpscr), &flags);
2564 set_fpscr(fpscr, flags);
2565 return x;
2566}
2567
2568template <>
2569uint16_t
2570fplibAbs(uint16_t op)
2571{
2572 return op & ~(1ULL << (FP16_BITS - 1));
2573}
2574
2575template <>
2576uint32_t
2577fplibAbs(uint32_t op)
2578{
2579 return op & ~(1ULL << (FP32_BITS - 1));
2580}
2581
2582template <>
2583uint64_t
2584fplibAbs(uint64_t op)
2585{
2586 return op & ~(1ULL << (FP64_BITS - 1));
2587}
2588
2589template <>
2590uint16_t
2591fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2592{
2593 int flags = 0;
2594 uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags);
2595 set_fpscr0(fpscr, flags);
2596 return result;
2597}
2598
2599template <>
2600uint32_t
2601fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2602{
2603 int flags = 0;
2604 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
2605 set_fpscr0(fpscr, flags);
2606 return result;
2607}
2608
2609template <>
2610uint64_t
2611fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2612{
2613 int flags = 0;
2614 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
2615 set_fpscr0(fpscr, flags);
2616 return result;
2617}
2618
2619template <>
2620int
2621fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
2622{
2623 int mode = modeConv(fpscr);
2624 int flags = 0;
2625 int sgn1, exp1, sgn2, exp2, result;
2626 uint16_t mnt1, mnt2;
2627
2628 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2629 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2630
2631 if (fp16_is_NaN(exp1, mnt1) || fp16_is_NaN(exp2, mnt2)) {
2632 result = 3;
2633 if (fp16_is_signalling_NaN(exp1, mnt1) ||
2634 fp16_is_signalling_NaN(exp2, mnt2) || signal_nans)
2635 flags |= FPLIB_IOC;
2636 } else {
2637 if (op1 == op2 || (!mnt1 && !mnt2)) {
2638 result = 6;
2639 } else if (sgn1 != sgn2) {
2640 result = sgn1 ? 8 : 2;
2641 } else if (exp1 != exp2) {
2642 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2643 } else {
2644 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2645 }
2646 }
2647
2648 set_fpscr0(fpscr, flags);
2649
2650 return result;
2651}
2652
2653template <>
2654int
2655fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
2656{
2657 int mode = modeConv(fpscr);
2658 int flags = 0;
2659 int sgn1, exp1, sgn2, exp2, result;
2660 uint32_t mnt1, mnt2;
2661
2662 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2663 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2664
2665 if (fp32_is_NaN(exp1, mnt1) || fp32_is_NaN(exp2, mnt2)) {
2666 result = 3;
2667 if (fp32_is_signalling_NaN(exp1, mnt1) ||
2668 fp32_is_signalling_NaN(exp2, mnt2) || signal_nans)
2669 flags |= FPLIB_IOC;
2670 } else {
2671 if (op1 == op2 || (!mnt1 && !mnt2)) {
2672 result = 6;
2673 } else if (sgn1 != sgn2) {
2674 result = sgn1 ? 8 : 2;
2675 } else if (exp1 != exp2) {
2676 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2677 } else {
2678 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2679 }
2680 }
2681
2682 set_fpscr0(fpscr, flags);
2683
2684 return result;
2685}
2686
2687template <>
2688int
2689fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
2690{
2691 int mode = modeConv(fpscr);
2692 int flags = 0;
2693 int sgn1, exp1, sgn2, exp2, result;
2694 uint64_t mnt1, mnt2;
2695
2696 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2697 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2698
2699 if (fp64_is_NaN(exp1, mnt1) || fp64_is_NaN(exp2, mnt2)) {
2700 result = 3;
2701 if (fp64_is_signalling_NaN(exp1, mnt1) ||
2702 fp64_is_signalling_NaN(exp2, mnt2) || signal_nans)
2703 flags |= FPLIB_IOC;
2704 } else {
2705 if (op1 == op2 || (!mnt1 && !mnt2)) {
2706 result = 6;
2707 } else if (sgn1 != sgn2) {
2708 result = sgn1 ? 8 : 2;
2709 } else if (exp1 != exp2) {
2710 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2711 } else {
2712 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2713 }
2714 }
2715
2716 set_fpscr0(fpscr, flags);
2717
2718 return result;
2719}
2720
2721static uint16_t
2723{
2724 return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF,
2725 1ULL << (FP16_MANT_BITS - 1) |
2727}
2728
2729static uint16_t
2731{
2732 return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF,
2733 1ULL << (FP16_MANT_BITS - 1) |
2735}
2736
2737static uint32_t
2739{
2740 return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF,
2741 1ULL << (FP32_MANT_BITS - 1) |
2742 (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS));
2743}
2744
2745static uint32_t
2747{
2748 return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF,
2749 1ULL << (FP32_MANT_BITS - 1) |
2751}
2752
2753static uint64_t
2755{
2756 return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF,
2757 1ULL << (FP64_MANT_BITS - 1) |
2758 (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS));
2759}
2760
2761static uint64_t
2763{
2764 return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF,
2765 1ULL << (FP64_MANT_BITS - 1) |
2766 (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS));
2767}
2768
2769static uint16_t
2771{
2772 return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1));
2773}
2774
2775static uint32_t
2777{
2778 return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1));
2779}
2780
2781static uint64_t
2783{
2784 return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1));
2785}
2786
2787static uint16_t
2789{
2790 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1));
2791}
2792
2793static uint32_t
2795{
2796 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1));
2797}
2798
2799static uint64_t
2801{
2802 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1));
2803}
2804
2805static uint16_t
2807{
2808 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0);
2809}
2810
2811static uint32_t
2813{
2814 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0);
2815}
2816
2817static uint64_t
2819{
2820 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0);
2821}
2822
2823template <>
2824uint16_t
2825fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2826{
2827 int mode = modeConv(fpscr);
2828 int flags = 0;
2829 int sgn, exp;
2830 uint32_t mnt;
2831 uint16_t result;
2832
2833 // Unpack floating-point operand optionally with flush-to-zero:
2834 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2835
2836 bool alt_hp = fpscr.ahp;
2837
2838 if (fp32_is_NaN(exp, mnt)) {
2839 if (alt_hp) {
2840 result = fp16_zero(sgn);
2841 } else if (fpscr.dn) {
2842 result = fp16_defaultNaN();
2843 } else {
2844 result = fp16_FPConvertNaN_32(op);
2845 }
2846 if (!(mnt >> (FP32_MANT_BITS - 1) & 1) || alt_hp) {
2847 flags |= FPLIB_IOC;
2848 }
2849 } else if (exp == FP32_EXP_INF) {
2850 if (alt_hp) {
2851 result = ((uint16_t)sgn << (FP16_BITS - 1) |
2852 ((1ULL << (FP16_BITS - 1)) - 1));
2853 flags |= FPLIB_IOC;
2854 } else {
2855 result = fp16_infinity(sgn);
2856 }
2857 } else if (!mnt) {
2858 result = fp16_zero(sgn);
2859 } else {
2860 result =
2862 mnt >> (FP32_MANT_BITS - FP16_BITS) |
2863 !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)),
2864 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2865 }
2866
2867 set_fpscr0(fpscr, flags);
2868
2869 return result;
2870}
2871
2872template <>
2873uint16_t
2874fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2875{
2876 int mode = modeConv(fpscr);
2877 int flags = 0;
2878 int sgn, exp;
2879 uint64_t mnt;
2880 uint16_t result;
2881
2882 // Unpack floating-point operand optionally with flush-to-zero:
2883 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2884
2885 bool alt_hp = fpscr.ahp;
2886
2887 if (fp64_is_NaN(exp, mnt)) {
2888 if (alt_hp) {
2889 result = fp16_zero(sgn);
2890 } else if (fpscr.dn) {
2891 result = fp16_defaultNaN();
2892 } else {
2893 result = fp16_FPConvertNaN_64(op);
2894 }
2895 if (!(mnt >> (FP64_MANT_BITS - 1) & 1) || alt_hp) {
2896 flags |= FPLIB_IOC;
2897 }
2898 } else if (exp == FP64_EXP_INF) {
2899 if (alt_hp) {
2900 result = ((uint16_t)sgn << (FP16_BITS - 1) |
2901 ((1ULL << (FP16_BITS - 1)) - 1));
2902 flags |= FPLIB_IOC;
2903 } else {
2904 result = fp16_infinity(sgn);
2905 }
2906 } else if (!mnt) {
2907 result = fp16_zero(sgn);
2908 } else {
2909 result =
2911 mnt >> (FP64_MANT_BITS - FP16_BITS) |
2912 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)),
2913 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2914 }
2915
2916 set_fpscr0(fpscr, flags);
2917
2918 return result;
2919}
2920
2921template <>
2922uint32_t
2923fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2924{
2925 int mode = modeConv(fpscr);
2926 int flags = 0;
2927 int sgn, exp;
2928 uint16_t mnt;
2929 uint32_t result;
2930
2931 // Unpack floating-point operand optionally with flush-to-zero:
2932 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2933
2934 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2935 if (fpscr.dn) {
2936 result = fp32_defaultNaN();
2937 } else {
2938 result = fp32_FPConvertNaN_16(op);
2939 }
2940 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2941 flags |= FPLIB_IOC;
2942 }
2943 } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2944 result = fp32_infinity(sgn);
2945 } else if (!mnt) {
2946 result = fp32_zero(sgn);
2947 } else {
2948 mnt = fp16_normalise(mnt, &exp);
2949 result = fp32_pack(sgn, (exp - FP16_EXP_BIAS +
2951 (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1));
2952 }
2953
2954 set_fpscr0(fpscr, flags);
2955
2956 return result;
2957}
2958
2959template <>
2960uint32_t
2961fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2962{
2963 int mode = modeConv(fpscr);
2964 int flags = 0;
2965 int sgn, exp;
2966 uint64_t mnt;
2967 uint32_t result;
2968
2969 // Unpack floating-point operand optionally with flush-to-zero:
2970 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2971
2972 if (fp64_is_NaN(exp, mnt)) {
2973 if (fpscr.dn) {
2974 result = fp32_defaultNaN();
2975 } else {
2976 result = fp32_FPConvertNaN_64(op);
2977 }
2978 if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) {
2979 flags |= FPLIB_IOC;
2980 }
2981 } else if (exp == FP64_EXP_INF) {
2982 result = fp32_infinity(sgn);
2983 } else if (!mnt) {
2984 result = fp32_zero(sgn);
2985 } else {
2986 result =
2988 mnt >> (FP64_MANT_BITS - FP32_BITS) |
2989 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)),
2990 rounding, mode, &flags);
2991 }
2992
2993 set_fpscr0(fpscr, flags);
2994
2995 return result;
2996}
2997
2998template <>
2999uint64_t
3000fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
3001{
3002 int mode = modeConv(fpscr);
3003 int flags = 0;
3004 int sgn, exp;
3005 uint16_t mnt;
3006 uint64_t result;
3007
3008 // Unpack floating-point operand optionally with flush-to-zero:
3009 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
3010
3011 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
3012 if (fpscr.dn) {
3013 result = fp64_defaultNaN();
3014 } else {
3015 result = fp64_FPConvertNaN_16(op);
3016 }
3017 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
3018 flags |= FPLIB_IOC;
3019 }
3020 } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
3021 result = fp64_infinity(sgn);
3022 } else if (!mnt) {
3023 result = fp64_zero(sgn);
3024 } else {
3025 mnt = fp16_normalise(mnt, &exp);
3026 result = fp64_pack(sgn, (exp - FP16_EXP_BIAS +
3028 (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1));
3029 }
3030
3031 set_fpscr0(fpscr, flags);
3032
3033 return result;
3034}
3035
3036template <>
3037uint64_t
3038fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
3039{
3040 int mode = modeConv(fpscr);
3041 int flags = 0;
3042 int sgn, exp;
3043 uint32_t mnt;
3044 uint64_t result;
3045
3046 // Unpack floating-point operand optionally with flush-to-zero:
3047 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3048
3049 if (fp32_is_NaN(exp, mnt)) {
3050 if (fpscr.dn) {
3051 result = fp64_defaultNaN();
3052 } else {
3053 result = fp64_FPConvertNaN_32(op);
3054 }
3055 if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) {
3056 flags |= FPLIB_IOC;
3057 }
3058 } else if (exp == FP32_EXP_INF) {
3059 result = fp64_infinity(sgn);
3060 } else if (!mnt) {
3061 result = fp64_zero(sgn);
3062 } else {
3063 mnt = fp32_normalise(mnt, &exp);
3064 result = fp64_pack(sgn, (exp - FP32_EXP_BIAS +
3066 (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1));
3067 }
3068
3069 set_fpscr0(fpscr, flags);
3070
3071 return result;
3072}
3073
3074template <>
3075uint16_t
3076fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
3077{
3078 int flags = 0;
3079 uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
3080 set_fpscr0(fpscr, flags);
3081 return result;
3082}
3083
3084template <>
3085uint32_t
3086fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
3087{
3088 int flags = 0;
3089 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
3090 set_fpscr0(fpscr, flags);
3091 return result;
3092}
3093
3094template <>
3095uint64_t
3096fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
3097{
3098 int flags = 0;
3099 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
3100 set_fpscr0(fpscr, flags);
3101 return result;
3102}
3103
3104template <>
3105uint32_t
3106fplibMulAddH(uint32_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
3107{
3108 int flags = 0;
3109 uint32_t result = fp32_muladdh(addend, op1, op2, 0,
3110 modeConv(fpscr), &flags);
3111 set_fpscr0(fpscr, flags);
3112 return result;
3113}
3114
3115template <>
3116uint16_t
3117fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3118{
3119 int flags = 0;
3120 uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags);
3121 set_fpscr0(fpscr, flags);
3122 return result;
3123}
3124
3125template <>
3126uint32_t
3127fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3128{
3129 int flags = 0;
3130 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
3131 set_fpscr0(fpscr, flags);
3132 return result;
3133}
3134
3135template <>
3136uint64_t
3137fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3138{
3139 int flags = 0;
3140 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
3141 set_fpscr0(fpscr, flags);
3142 return result;
3143}
3144
3145template <>
3146uint16_t
3147fplibExpA(uint16_t op)
3148{
3149 static uint16_t coeff[32] = {
3150 0x0000,
3151 0x0016,
3152 0x002d,
3153 0x0045,
3154 0x005d,
3155 0x0075,
3156 0x008e,
3157 0x00a8,
3158 0x00c2,
3159 0x00dc,
3160 0x00f8,
3161 0x0114,
3162 0x0130,
3163 0x014d,
3164 0x016b,
3165 0x0189,
3166 0x01a8,
3167 0x01c8,
3168 0x01e8,
3169 0x0209,
3170 0x022b,
3171 0x024e,
3172 0x0271,
3173 0x0295,
3174 0x02ba,
3175 0x02e0,
3176 0x0306,
3177 0x032e,
3178 0x0356,
3179 0x037f,
3180 0x03a9,
3181 0x03d4
3182 };
3183 return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) |
3184 coeff[op & ((1 << 5) - 1)]);
3185}
3186
3187template <>
3188uint32_t
3189fplibExpA(uint32_t op)
3190{
3191 static uint32_t coeff[64] = {
3192 0x000000,
3193 0x0164d2,
3194 0x02cd87,
3195 0x043a29,
3196 0x05aac3,
3197 0x071f62,
3198 0x08980f,
3199 0x0a14d5,
3200 0x0b95c2,
3201 0x0d1adf,
3202 0x0ea43a,
3203 0x1031dc,
3204 0x11c3d3,
3205 0x135a2b,
3206 0x14f4f0,
3207 0x16942d,
3208 0x1837f0,
3209 0x19e046,
3210 0x1b8d3a,
3211 0x1d3eda,
3212 0x1ef532,
3213 0x20b051,
3214 0x227043,
3215 0x243516,
3216 0x25fed7,
3217 0x27cd94,
3218 0x29a15b,
3219 0x2b7a3a,
3220 0x2d583f,
3221 0x2f3b79,
3222 0x3123f6,
3223 0x3311c4,
3224 0x3504f3,
3225 0x36fd92,
3226 0x38fbaf,
3227 0x3aff5b,
3228 0x3d08a4,
3229 0x3f179a,
3230 0x412c4d,
3231 0x4346cd,
3232 0x45672a,
3233 0x478d75,
3234 0x49b9be,
3235 0x4bec15,
3236 0x4e248c,
3237 0x506334,
3238 0x52a81e,
3239 0x54f35b,
3240 0x5744fd,
3241 0x599d16,
3242 0x5bfbb8,
3243 0x5e60f5,
3244 0x60ccdf,
3245 0x633f89,
3246 0x65b907,
3247 0x68396a,
3248 0x6ac0c7,
3249 0x6d4f30,
3250 0x6fe4ba,
3251 0x728177,
3252 0x75257d,
3253 0x77d0df,
3254 0x7a83b3,
3255 0x7d3e0c
3256 };
3257 return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) |
3258 coeff[op & ((1 << 6) - 1)]);
3259}
3260
3261template <>
3262uint64_t
3263fplibExpA(uint64_t op)
3264{
3265 static uint64_t coeff[64] = {
3266 0x0000000000000ULL,
3267 0x02c9a3e778061ULL,
3268 0x059b0d3158574ULL,
3269 0x0874518759bc8ULL,
3270 0x0b5586cf9890fULL,
3271 0x0e3ec32d3d1a2ULL,
3272 0x11301d0125b51ULL,
3273 0x1429aaea92de0ULL,
3274 0x172b83c7d517bULL,
3275 0x1a35beb6fcb75ULL,
3276 0x1d4873168b9aaULL,
3277 0x2063b88628cd6ULL,
3278 0x2387a6e756238ULL,
3279 0x26b4565e27cddULL,
3280 0x29e9df51fdee1ULL,
3281 0x2d285a6e4030bULL,
3282 0x306fe0a31b715ULL,
3283 0x33c08b26416ffULL,
3284 0x371a7373aa9cbULL,
3285 0x3a7db34e59ff7ULL,
3286 0x3dea64c123422ULL,
3287 0x4160a21f72e2aULL,
3288 0x44e086061892dULL,
3289 0x486a2b5c13cd0ULL,
3290 0x4bfdad5362a27ULL,
3291 0x4f9b2769d2ca7ULL,
3292 0x5342b569d4f82ULL,
3293 0x56f4736b527daULL,
3294 0x5ab07dd485429ULL,
3295 0x5e76f15ad2148ULL,
3296 0x6247eb03a5585ULL,
3297 0x6623882552225ULL,
3298 0x6a09e667f3bcdULL,
3299 0x6dfb23c651a2fULL,
3300 0x71f75e8ec5f74ULL,
3301 0x75feb564267c9ULL,
3302 0x7a11473eb0187ULL,
3303 0x7e2f336cf4e62ULL,
3304 0x82589994cce13ULL,
3305 0x868d99b4492edULL,
3306 0x8ace5422aa0dbULL,
3307 0x8f1ae99157736ULL,
3308 0x93737b0cdc5e5ULL,
3309 0x97d829fde4e50ULL,
3310 0x9c49182a3f090ULL,
3311 0xa0c667b5de565ULL,
3312 0xa5503b23e255dULL,
3313 0xa9e6b5579fdbfULL,
3314 0xae89f995ad3adULL,
3315 0xb33a2b84f15fbULL,
3316 0xb7f76f2fb5e47ULL,
3317 0xbcc1e904bc1d2ULL,
3318 0xc199bdd85529cULL,
3319 0xc67f12e57d14bULL,
3320 0xcb720dcef9069ULL,
3321 0xd072d4a07897cULL,
3322 0xd5818dcfba487ULL,
3323 0xda9e603db3285ULL,
3324 0xdfc97337b9b5fULL,
3325 0xe502ee78b3ff6ULL,
3326 0xea4afa2a490daULL,
3327 0xefa1bee615a27ULL,
3328 0xf50765b6e4540ULL,
3329 0xfa7c1819e90d8ULL
3330 };
3331 return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) |
3332 coeff[op & ((1 << 6) - 1)]);
3333}
3334
3335static uint16_t
3336fp16_repack(int sgn, int exp, uint16_t mnt)
3337{
3338 return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt);
3339}
3340
3341static uint32_t
3342fp32_repack(int sgn, int exp, uint32_t mnt)
3343{
3344 return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt);
3345}
3346
3347static uint64_t
3348fp64_repack(int sgn, int exp, uint64_t mnt)
3349{
3350 return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt);
3351}
3352
3353static void
3354fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
3355{
3356 // Treat a single quiet-NaN as +Infinity/-Infinity
3357 if (!((uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) &&
3358 (uint16_t)~(*op2 << 1) >> FP16_MANT_BITS)
3359 *op1 = fp16_infinity(sgn);
3360 if (!((uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) &&
3361 (uint16_t)~(*op1 << 1) >> FP16_MANT_BITS)
3362 *op2 = fp16_infinity(sgn);
3363}
3364
3365static void
3366fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
3367{
3368 // Treat a single quiet-NaN as +Infinity/-Infinity
3369 if (!((uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) &&
3370 (uint32_t)~(*op2 << 1) >> FP32_MANT_BITS)
3371 *op1 = fp32_infinity(sgn);
3372 if (!((uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) &&
3373 (uint32_t)~(*op1 << 1) >> FP32_MANT_BITS)
3374 *op2 = fp32_infinity(sgn);
3375}
3376
3377static void
3378fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
3379{
3380 // Treat a single quiet-NaN as +Infinity/-Infinity
3381 if (!((uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) &&
3382 (uint64_t)~(*op2 << 1) >> FP64_MANT_BITS)
3383 *op1 = fp64_infinity(sgn);
3384 if (!((uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) &&
3385 (uint64_t)~(*op1 << 1) >> FP64_MANT_BITS)
3386 *op2 = fp64_infinity(sgn);
3387}
3388
3389template <>
3390uint16_t
3391fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3392{
3393 int mode = modeConv(fpscr);
3394 int flags = 0;
3395 int sgn1, exp1, sgn2, exp2;
3396 uint16_t mnt1, mnt2, x, result;
3397
3398 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3399 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3400
3401 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3402 result = x;
3403 } else {
3404 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3405 fp16_repack(sgn1, exp1, mnt1) :
3406 fp16_repack(sgn2, exp2, mnt2));
3407 }
3408 set_fpscr0(fpscr, flags);
3409 return result;
3410}
3411
3412template <>
3413uint32_t
3414fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3415{
3416 int mode = modeConv(fpscr);
3417 int flags = 0;
3418 int sgn1, exp1, sgn2, exp2;
3419 uint32_t mnt1, mnt2, x, result;
3420
3421 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3422 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3423
3424 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3425 result = x;
3426 } else {
3427 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3428 fp32_repack(sgn1, exp1, mnt1) :
3429 fp32_repack(sgn2, exp2, mnt2));
3430 }
3431 set_fpscr0(fpscr, flags);
3432 return result;
3433}
3434
3435template <>
3436uint64_t
3437fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3438{
3439 int mode = modeConv(fpscr);
3440 int flags = 0;
3441 int sgn1, exp1, sgn2, exp2;
3442 uint64_t mnt1, mnt2, x, result;
3443
3444 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3445 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3446
3447 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3448 result = x;
3449 } else {
3450 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3451 fp64_repack(sgn1, exp1, mnt1) :
3452 fp64_repack(sgn2, exp2, mnt2));
3453 }
3454 set_fpscr0(fpscr, flags);
3455 return result;
3456}
3457
3458template <>
3459uint16_t
3460fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3461{
3462 fp16_minmaxnum(&op1, &op2, 1);
3463 return fplibMax<uint16_t>(op1, op2, fpscr);
3464}
3465
3466template <>
3467uint32_t
3468fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3469{
3470 fp32_minmaxnum(&op1, &op2, 1);
3471 return fplibMax<uint32_t>(op1, op2, fpscr);
3472}
3473
3474template <>
3475uint64_t
3476fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3477{
3478 fp64_minmaxnum(&op1, &op2, 1);
3479 return fplibMax<uint64_t>(op1, op2, fpscr);
3480}
3481
3482template <>
3483uint16_t
3484fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3485{
3486 int mode = modeConv(fpscr);
3487 int flags = 0;
3488 int sgn1, exp1, sgn2, exp2;
3489 uint16_t mnt1, mnt2, x, result;
3490
3491 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3492 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3493
3494 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3495 result = x;
3496 } else {
3497 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3498 fp16_repack(sgn1, exp1, mnt1) :
3499 fp16_repack(sgn2, exp2, mnt2));
3500 }
3501 set_fpscr0(fpscr, flags);
3502 return result;
3503}
3504
3505template <>
3506uint32_t
3507fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3508{
3509 int mode = modeConv(fpscr);
3510 int flags = 0;
3511 int sgn1, exp1, sgn2, exp2;
3512 uint32_t mnt1, mnt2, x, result;
3513
3514 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3515 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3516
3517 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3518 result = x;
3519 } else {
3520 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3521 fp32_repack(sgn1, exp1, mnt1) :
3522 fp32_repack(sgn2, exp2, mnt2));
3523 }
3524 set_fpscr0(fpscr, flags);
3525 return result;
3526}
3527
3528template <>
3529uint64_t
3530fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3531{
3532 int mode = modeConv(fpscr);
3533 int flags = 0;
3534 int sgn1, exp1, sgn2, exp2;
3535 uint64_t mnt1, mnt2, x, result;
3536
3537 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3538 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3539
3540 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3541 result = x;
3542 } else {
3543 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3544 fp64_repack(sgn1, exp1, mnt1) :
3545 fp64_repack(sgn2, exp2, mnt2));
3546 }
3547 set_fpscr0(fpscr, flags);
3548 return result;
3549}
3550
3551template <>
3552uint16_t
3553fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3554{
3555 fp16_minmaxnum(&op1, &op2, 0);
3556 return fplibMin<uint16_t>(op1, op2, fpscr);
3557}
3558
3559template <>
3560uint32_t
3561fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3562{
3563 fp32_minmaxnum(&op1, &op2, 0);
3564 return fplibMin<uint32_t>(op1, op2, fpscr);
3565}
3566
3567template <>
3568uint64_t
3569fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3570{
3571 fp64_minmaxnum(&op1, &op2, 0);
3572 return fplibMin<uint64_t>(op1, op2, fpscr);
3573}
3574
3575template <>
3576uint16_t
3577fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3578{
3579 int flags = 0;
3580 uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags);
3581 set_fpscr0(fpscr, flags);
3582 return result;
3583}
3584
3585template <>
3586uint32_t
3587fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3588{
3589 int flags = 0;
3590 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
3591 set_fpscr0(fpscr, flags);
3592 return result;
3593}
3594
3595template <>
3596uint64_t
3597fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3598{
3599 int flags = 0;
3600 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
3601 set_fpscr0(fpscr, flags);
3602 return result;
3603}
3604
3605template <>
3606uint16_t
3607fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3608{
3609 int mode = modeConv(fpscr);
3610 int flags = 0;
3611 int sgn1, exp1, sgn2, exp2;
3612 uint16_t mnt1, mnt2, result;
3613
3614 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3615 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3616
3617 result = fp16_process_NaNs(op1, op2, mode, &flags);
3618 if (!result) {
3619 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3620 (exp2 == FP16_EXP_INF && !mnt1)) {
3621 result = fp16_FPTwo(sgn1 ^ sgn2);
3622 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3623 result = fp16_infinity(sgn1 ^ sgn2);
3624 } else if (!mnt1 || !mnt2) {
3625 result = fp16_zero(sgn1 ^ sgn2);
3626 } else {
3627 result = fp16_mul(op1, op2, mode, &flags);
3628 }
3629 }
3630
3631 set_fpscr0(fpscr, flags);
3632
3633 return result;
3634}
3635
3636template <>
3637uint32_t
3638fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3639{
3640 int mode = modeConv(fpscr);
3641 int flags = 0;
3642 int sgn1, exp1, sgn2, exp2;
3643 uint32_t mnt1, mnt2, result;
3644
3645 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3646 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3647
3648 result = fp32_process_NaNs(op1, op2, mode, &flags);
3649 if (!result) {
3650 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3651 (exp2 == FP32_EXP_INF && !mnt1)) {
3652 result = fp32_FPTwo(sgn1 ^ sgn2);
3653 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3654 result = fp32_infinity(sgn1 ^ sgn2);
3655 } else if (!mnt1 || !mnt2) {
3656 result = fp32_zero(sgn1 ^ sgn2);
3657 } else {
3658 result = fp32_mul(op1, op2, mode, &flags);
3659 }
3660 }
3661
3662 set_fpscr0(fpscr, flags);
3663
3664 return result;
3665}
3666
3667template <>
3668uint64_t
3669fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3670{
3671 int mode = modeConv(fpscr);
3672 int flags = 0;
3673 int sgn1, exp1, sgn2, exp2;
3674 uint64_t mnt1, mnt2, result;
3675
3676 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3677 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3678
3679 result = fp64_process_NaNs(op1, op2, mode, &flags);
3680 if (!result) {
3681 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3682 (exp2 == FP64_EXP_INF && !mnt1)) {
3683 result = fp64_FPTwo(sgn1 ^ sgn2);
3684 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3685 result = fp64_infinity(sgn1 ^ sgn2);
3686 } else if (!mnt1 || !mnt2) {
3687 result = fp64_zero(sgn1 ^ sgn2);
3688 } else {
3689 result = fp64_mul(op1, op2, mode, &flags);
3690 }
3691 }
3692
3693 set_fpscr0(fpscr, flags);
3694
3695 return result;
3696}
3697
3698template <>
3699uint16_t
3700fplibNeg(uint16_t op)
3701{
3702 return op ^ 1ULL << (FP16_BITS - 1);
3703}
3704
3705template <>
3706uint32_t
3707fplibNeg(uint32_t op)
3708{
3709 return op ^ 1ULL << (FP32_BITS - 1);
3710}
3711
3712template <>
3713uint64_t
3714fplibNeg(uint64_t op)
3715{
3716 return op ^ 1ULL << (FP64_BITS - 1);
3717}
3718
3719static const uint8_t recip_sqrt_estimate[256] = {
3720 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
3721 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
3722 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
3723 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
3724 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
3725 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
3726 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
3727 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
3728 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
3729 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
3730 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
3731 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
3732 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
3733 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
3734 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
3735 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
3736};
3737
3738template <>
3739uint16_t
3740fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
3741{
3742 int mode = modeConv(fpscr);
3743 int flags = 0;
3744 int sgn, exp;
3745 uint16_t mnt, result;
3746
3747 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3748
3749 if (fp16_is_NaN(exp, mnt)) {
3750 result = fp16_process_NaN(op, mode, &flags);
3751 } else if (!mnt) {
3752 result = fp16_infinity(sgn);
3753 flags |= FPLIB_DZC;
3754 } else if (sgn) {
3755 result = fp16_defaultNaN();
3756 flags |= FPLIB_IOC;
3757 } else if (exp == FP16_EXP_INF) {
3758 result = fp16_zero(0);
3759 } else {
3760 exp += FP16_EXP_BITS;
3761 mnt = fp16_normalise(mnt, &exp);
3762 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3763 (mnt >> (FP16_BITS - 8) & 127)];
3764 result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1,
3765 mnt << (FP16_MANT_BITS - 8));
3766 }
3767
3768 set_fpscr0(fpscr, flags);
3769
3770 return result;
3771}
3772
3773template <>
3774uint32_t
3775fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
3776{
3777 int mode = modeConv(fpscr);
3778 int flags = 0;
3779 int sgn, exp;
3780 uint32_t mnt, result;
3781
3782 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3783
3784 if (fp32_is_NaN(exp, mnt)) {
3785 result = fp32_process_NaN(op, mode, &flags);
3786 } else if (!mnt) {
3787 result = fp32_infinity(sgn);
3788 flags |= FPLIB_DZC;
3789 } else if (sgn) {
3790 result = fp32_defaultNaN();
3791 flags |= FPLIB_IOC;
3792 } else if (exp == FP32_EXP_INF) {
3793 result = fp32_zero(0);
3794 } else {
3795 exp += FP32_EXP_BITS;
3796 mnt = fp32_normalise(mnt, &exp);
3797 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3798 (mnt >> (FP32_BITS - 8) & 127)];
3799 result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1,
3800 mnt << (FP32_MANT_BITS - 8));
3801 }
3802
3803 set_fpscr0(fpscr, flags);
3804
3805 return result;
3806}
3807
3808template <>
3809uint64_t
3810fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
3811{
3812 int mode = modeConv(fpscr);
3813 int flags = 0;
3814 int sgn, exp;
3815 uint64_t mnt, result;
3816
3817 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3818
3819 if (fp64_is_NaN(exp, mnt)) {
3820 result = fp64_process_NaN(op, mode, &flags);
3821 } else if (!mnt) {
3822 result = fp64_infinity(sgn);
3823 flags |= FPLIB_DZC;
3824 } else if (sgn) {
3825 result = fp64_defaultNaN();
3826 flags |= FPLIB_IOC;
3827 } else if (exp == FP64_EXP_INF) {
3828 result = fp32_zero(0);
3829 } else {
3830 exp += FP64_EXP_BITS;
3831 mnt = fp64_normalise(mnt, &exp);
3832 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3833 (mnt >> (FP64_BITS - 8) & 127)];
3834 result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1,
3835 mnt << (FP64_MANT_BITS - 8));
3836 }
3837
3838 set_fpscr0(fpscr, flags);
3839
3840 return result;
3841}
3842
3843template <>
3844uint16_t
3845fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3846{
3847 int mode = modeConv(fpscr);
3848 int flags = 0;
3849 int sgn1, exp1, sgn2, exp2;
3850 uint16_t mnt1, mnt2, result;
3851
3852 op1 = fplibNeg<uint16_t>(op1);
3853 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3854 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3855
3856 result = fp16_process_NaNs(op1, op2, mode, &flags);
3857 if (!result) {
3858 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3859 (exp2 == FP16_EXP_INF && !mnt1)) {
3860 result = fp16_FPOnePointFive(0);
3861 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3862 result = fp16_infinity(sgn1 ^ sgn2);
3863 } else {
3864 result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags);
3865 }
3866 }
3867
3868 set_fpscr0(fpscr, flags);
3869
3870 return result;
3871}
3872
3873template <>
3874uint32_t
3875fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3876{
3877 int mode = modeConv(fpscr);
3878 int flags = 0;
3879 int sgn1, exp1, sgn2, exp2;
3880 uint32_t mnt1, mnt2, result;
3881
3882 op1 = fplibNeg<uint32_t>(op1);
3883 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3884 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3885
3886 result = fp32_process_NaNs(op1, op2, mode, &flags);
3887 if (!result) {
3888 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3889 (exp2 == FP32_EXP_INF && !mnt1)) {
3890 result = fp32_FPOnePointFive(0);
3891 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3892 result = fp32_infinity(sgn1 ^ sgn2);
3893 } else {
3894 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
3895 }
3896 }
3897
3898 set_fpscr0(fpscr, flags);
3899
3900 return result;
3901}
3902
3903template <>
3904uint64_t
3905fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3906{
3907 int mode = modeConv(fpscr);
3908 int flags = 0;
3909 int sgn1, exp1, sgn2, exp2;
3910 uint64_t mnt1, mnt2, result;
3911
3912 op1 = fplibNeg<uint64_t>(op1);
3913 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3914 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3915
3916 result = fp64_process_NaNs(op1, op2, mode, &flags);
3917 if (!result) {
3918 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3919 (exp2 == FP64_EXP_INF && !mnt1)) {
3920 result = fp64_FPOnePointFive(0);
3921 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3922 result = fp64_infinity(sgn1 ^ sgn2);
3923 } else {
3924 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
3925 }
3926 }
3927
3928 set_fpscr0(fpscr, flags);
3929
3930 return result;
3931}
3932
3933template <>
3934uint16_t
3935fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
3936{
3937 int mode = modeConv(fpscr);
3938 int flags = 0;
3939 int sgn, exp;
3940 uint16_t mnt, result;
3941
3942 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3943
3944 if (fp16_is_NaN(exp, mnt)) {
3945 result = fp16_process_NaN(op, mode, &flags);
3946 } else if (exp == FP16_EXP_INF) {
3947 result = fp16_zero(sgn);
3948 } else if (!mnt) {
3949 result = fp16_infinity(sgn);
3950 flags |= FPLIB_DZC;
3951 } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) {
3952 bool overflow_to_inf = false;
3953 switch (FPCRRounding(fpscr)) {
3954 case FPRounding_TIEEVEN:
3955 overflow_to_inf = true;
3956 break;
3957 case FPRounding_POSINF:
3958 overflow_to_inf = !sgn;
3959 break;
3960 case FPRounding_NEGINF:
3961 overflow_to_inf = sgn;
3962 break;
3963 case FPRounding_ZERO:
3964 overflow_to_inf = false;
3965 break;
3966 default:
3967 panic("Unrecognized FP rounding mode");
3968 }
3969 result = overflow_to_inf ? fp16_infinity(sgn) : fp16_max_normal(sgn);
3970 flags |= FPLIB_OFC | FPLIB_IXC;
3971 } else if (fpscr.fz16 && exp >= 2 * FP16_EXP_BIAS - 1) {
3972 result = fp16_zero(sgn);
3973 flags |= FPLIB_UFC;
3974 } else {
3975 exp += FP16_EXP_BITS;
3976 mnt = fp16_normalise(mnt, &exp);
3977 int result_exp = 2 * FP16_EXP_BIAS - 1 - exp;
3978 uint16_t fraction = (((uint32_t)1 << 19) /
3979 (mnt >> (FP16_BITS - 10) | 1) + 1) >> 1;
3980 fraction <<= FP16_MANT_BITS - 8;
3981 if (result_exp == 0) {
3982 fraction >>= 1;
3983 } else if (result_exp == -1) {
3984 fraction >>= 2;
3985 result_exp = 0;
3986 }
3987 result = fp16_pack(sgn, result_exp, fraction);
3988 }
3989
3990 set_fpscr0(fpscr, flags);
3991
3992 return result;
3993}
3994
3995template <>
3996uint32_t
3997fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
3998{
3999 int mode = modeConv(fpscr);
4000 int flags = 0;
4001 int sgn, exp;
4002 uint32_t mnt, result;
4003
4004 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4005
4006 if (fp32_is_NaN(exp, mnt)) {
4007 result = fp32_process_NaN(op, mode, &flags);
4008 } else if (exp == FP32_EXP_INF) {
4009 result = fp32_zero(sgn);
4010 } else if (!mnt) {
4011 result = fp32_infinity(sgn);
4012 flags |= FPLIB_DZC;
4013 } else if (!((uint32_t)(op << 1) >> (FP32_MANT_BITS - 1))) {
4014 bool overflow_to_inf = false;
4015 switch (FPCRRounding(fpscr)) {
4016 case FPRounding_TIEEVEN:
4017 overflow_to_inf = true;
4018 break;
4019 case FPRounding_POSINF:
4020 overflow_to_inf = !sgn;
4021 break;
4022 case FPRounding_NEGINF:
4023 overflow_to_inf = sgn;
4024 break;
4025 case FPRounding_ZERO:
4026 overflow_to_inf = false;
4027 break;
4028 default:
4029 panic("Unrecognized FP rounding mode");
4030 }
4031 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
4032 flags |= FPLIB_OFC | FPLIB_IXC;
4033 } else if (fpscr.fz && exp >= 2 * FP32_EXP_BIAS - 1) {
4034 result = fp32_zero(sgn);
4035 flags |= FPLIB_UFC;
4036 } else {
4037 exp += FP32_EXP_BITS;
4038 mnt = fp32_normalise(mnt, &exp);
4039 int result_exp = 2 * FP32_EXP_BIAS - 1 - exp;
4040 uint32_t fraction = (((uint32_t)1 << 19) /
4041 (mnt >> (FP32_BITS - 10) | 1) + 1) >> 1;
4042 fraction <<= FP32_MANT_BITS - 8;
4043 if (result_exp == 0) {
4044 fraction >>= 1;
4045 } else if (result_exp == -1) {
4046 fraction >>= 2;
4047 result_exp = 0;
4048 }
4049 result = fp32_pack(sgn, result_exp, fraction);
4050 }
4051
4052 set_fpscr0(fpscr, flags);
4053
4054 return result;
4055}
4056
4057template <>
4058uint64_t
4059fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
4060{
4061 int mode = modeConv(fpscr);
4062 int flags = 0;
4063 int sgn, exp;
4064 uint64_t mnt, result;
4065
4066 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4067
4068 if (fp64_is_NaN(exp, mnt)) {
4069 result = fp64_process_NaN(op, mode, &flags);
4070 } else if (exp == FP64_EXP_INF) {
4071 result = fp64_zero(sgn);
4072 } else if (!mnt) {
4073 result = fp64_infinity(sgn);
4074 flags |= FPLIB_DZC;
4075 } else if (!((uint64_t)(op << 1) >> (FP64_MANT_BITS - 1))) {
4076 bool overflow_to_inf = false;
4077 switch (FPCRRounding(fpscr)) {
4078 case FPRounding_TIEEVEN:
4079 overflow_to_inf = true;
4080 break;
4081 case FPRounding_POSINF:
4082 overflow_to_inf = !sgn;
4083 break;
4084 case FPRounding_NEGINF:
4085 overflow_to_inf = sgn;
4086 break;
4087 case FPRounding_ZERO:
4088 overflow_to_inf = false;
4089 break;
4090 default:
4091 panic("Unrecognized FP rounding mode");
4092 }
4093 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
4094 flags |= FPLIB_OFC | FPLIB_IXC;
4095 } else if (fpscr.fz && exp >= 2 * FP64_EXP_BIAS - 1) {
4096 result = fp64_zero(sgn);
4097 flags |= FPLIB_UFC;
4098 } else {
4099 exp += FP64_EXP_BITS;
4100 mnt = fp64_normalise(mnt, &exp);
4101 int result_exp = 2 * FP64_EXP_BIAS - 1 - exp;
4102 uint64_t fraction = (((uint32_t)1 << 19) /
4103 (mnt >> (FP64_BITS - 10) | 1) + 1) >> 1;
4104 fraction <<= FP64_MANT_BITS - 8;
4105 if (result_exp == 0) {
4106 fraction >>= 1;
4107 } else if (result_exp == -1) {
4108 fraction >>= 2;
4109 result_exp = 0;
4110 }
4111 result = fp64_pack(sgn, result_exp, fraction);
4112 }
4113
4114 set_fpscr0(fpscr, flags);
4115
4116 return result;
4117}
4118
4119template <>
4120uint16_t
4121fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4122{
4123 int mode = modeConv(fpscr);
4124 int flags = 0;
4125 int sgn1, exp1, sgn2, exp2;
4126 uint16_t mnt1, mnt2, result;
4127
4128 op1 = fplibNeg<uint16_t>(op1);
4129 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
4130 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
4131
4132 result = fp16_process_NaNs(op1, op2, mode, &flags);
4133 if (!result) {
4134 if ((exp1 == FP16_EXP_INF && !mnt2) ||
4135 (exp2 == FP16_EXP_INF && !mnt1)) {
4136 result = fp16_FPTwo(0);
4137 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
4138 result = fp16_infinity(sgn1 ^ sgn2);
4139 } else {
4140 result = fp16_muladd(fp16_FPTwo(0), op1, op2, 0, mode, &flags);
4141 }
4142 }
4143
4144 set_fpscr0(fpscr, flags);
4145
4146 return result;
4147}
4148
4149template <>
4150uint32_t
4151fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4152{
4153 int mode = modeConv(fpscr);
4154 int flags = 0;
4155 int sgn1, exp1, sgn2, exp2;
4156 uint32_t mnt1, mnt2, result;
4157
4158 op1 = fplibNeg<uint32_t>(op1);
4159 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
4160 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
4161
4162 result = fp32_process_NaNs(op1, op2, mode, &flags);
4163 if (!result) {
4164 if ((exp1 == FP32_EXP_INF && !mnt2) ||
4165 (exp2 == FP32_EXP_INF && !mnt1)) {
4166 result = fp32_FPTwo(0);
4167 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
4168 result = fp32_infinity(sgn1 ^ sgn2);
4169 } else {
4170 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
4171 }
4172 }
4173
4174 set_fpscr0(fpscr, flags);
4175
4176 return result;
4177}
4178
4179template <>
4180uint64_t
4181fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4182{
4183 int mode = modeConv(fpscr);
4184 int flags = 0;
4185 int sgn1, exp1, sgn2, exp2;
4186 uint64_t mnt1, mnt2, result;
4187
4188 op1 = fplibNeg<uint64_t>(op1);
4189 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
4190 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
4191
4192 result = fp64_process_NaNs(op1, op2, mode, &flags);
4193 if (!result) {
4194 if ((exp1 == FP64_EXP_INF && !mnt2) ||
4195 (exp2 == FP64_EXP_INF && !mnt1)) {
4196 result = fp64_FPTwo(0);
4197 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
4198 result = fp64_infinity(sgn1 ^ sgn2);
4199 } else {
4200 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
4201 }
4202 }
4203
4204 set_fpscr0(fpscr, flags);
4205
4206 return result;
4207}
4208
4209template <>
4210uint16_t
4211fplibRecpX(uint16_t op, FPSCR &fpscr)
4212{
4213 int mode = modeConv(fpscr);
4214 int flags = 0;
4215 int sgn, exp;
4216 uint16_t mnt, result;
4217
4218 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4219
4220 if (fp16_is_NaN(exp, mnt)) {
4221 result = fp16_process_NaN(op, mode, &flags);
4222 }
4223 else {
4224 if (!mnt) { // Zero and denormals
4225 result = fp16_pack(sgn, FP16_EXP_INF - 1, 0);
4226 } else { // Infinities and normals
4227 result = fp16_pack(sgn, exp ^ FP16_EXP_INF, 0);
4228 }
4229 }
4230
4231 set_fpscr0(fpscr, flags);
4232
4233 return result;
4234}
4235
4236template <>
4237uint32_t
4238fplibRecpX(uint32_t op, FPSCR &fpscr)
4239{
4240 int mode = modeConv(fpscr);
4241 int flags = 0;
4242 int sgn, exp;
4243 uint32_t mnt, result;
4244
4245 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4246
4247 if (fp32_is_NaN(exp, mnt)) {
4248 result = fp32_process_NaN(op, mode, &flags);
4249 }
4250 else {
4251 if (!mnt) { // Zero and denormals
4252 result = fp32_pack(sgn, FP32_EXP_INF - 1, 0);
4253 } else { // Infinities and normals
4254 result = fp32_pack(sgn, exp ^ FP32_EXP_INF, 0);
4255 }
4256 }
4257
4258 set_fpscr0(fpscr, flags);
4259
4260 return result;
4261}
4262
4263template <>
4264uint64_t
4265fplibRecpX(uint64_t op, FPSCR &fpscr)
4266{
4267 int mode = modeConv(fpscr);
4268 int flags = 0;
4269 int sgn, exp;
4270 uint64_t mnt, result;
4271
4272 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4273
4274 if (fp64_is_NaN(exp, mnt)) {
4275 result = fp64_process_NaN(op, mode, &flags);
4276 }
4277 else {
4278 if (!mnt) { // Zero and denormals
4279 result = fp64_pack(sgn, FP64_EXP_INF - 1, 0);
4280 } else { // Infinities and normals
4281 result = fp64_pack(sgn, exp ^ FP64_EXP_INF, 0);
4282 }
4283 }
4284
4285 set_fpscr0(fpscr, flags);
4286
4287 return result;
4288}
4289
4290template <>
4291uint16_t
4292fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4293{
4294 int expint = FP16_EXP_BIAS + FP16_MANT_BITS;
4295 int mode = modeConv(fpscr);
4296 int flags = 0;
4297 int sgn, exp;
4298 uint16_t mnt, result;
4299
4300 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4301 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4302
4303 // Handle NaNs, infinities and zeroes:
4304 if (fp16_is_NaN(exp, mnt)) {
4305 result = fp16_process_NaN(op, mode, &flags);
4306 } else if (exp == FP16_EXP_INF) {
4307 result = fp16_infinity(sgn);
4308 } else if (!mnt) {
4309 result = fp16_zero(sgn);
4310 } else if (exp >= expint) {
4311 // There are no fractional bits
4312 result = op;
4313 } else {
4314 // Truncate towards zero:
4315 uint16_t x = expint - exp >= FP16_BITS ? 0 : mnt >> (expint - exp);
4316 int err = exp < expint - FP16_BITS ? 1 :
4317 ((mnt << 1 >> (expint - exp - 1) & 3) |
4318 ((uint16_t)(mnt << 2 << (FP16_BITS + exp - expint)) != 0));
4319 switch (rounding) {
4320 case FPRounding_TIEEVEN:
4321 x += (err == 3 || (err == 2 && (x & 1)));
4322 break;
4323 case FPRounding_POSINF:
4324 x += err && !sgn;
4325 break;
4326 case FPRounding_NEGINF:
4327 x += err && sgn;
4328 break;
4329 case FPRounding_ZERO:
4330 break;
4331 case FPRounding_TIEAWAY:
4332 x += err >> 1;
4333 break;
4334 default:
4335 panic("Unrecognized FP rounding mode");
4336 }
4337
4338 if (x == 0) {
4339 result = fp16_zero(sgn);
4340 } else {
4341 exp = expint;
4342 mnt = fp16_normalise(x, &exp);
4343 result = fp16_pack(sgn, exp + FP16_EXP_BITS, mnt >> FP16_EXP_BITS);
4344 }
4345
4346 if (err && exact)
4347 flags |= FPLIB_IXC;
4348 }
4349
4350 set_fpscr0(fpscr, flags);
4351
4352 return result;
4353}
4354
4355template <>
4356uint32_t
4357fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4358{
4359 int expint = FP32_EXP_BIAS + FP32_MANT_BITS;
4360 int mode = modeConv(fpscr);
4361 int flags = 0;
4362 int sgn, exp;
4363 uint32_t mnt, result;
4364
4365 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4366 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4367
4368 // Handle NaNs, infinities and zeroes:
4369 if (fp32_is_NaN(exp, mnt)) {
4370 result = fp32_process_NaN(op, mode, &flags);
4371 } else if (exp == FP32_EXP_INF) {
4372 result = fp32_infinity(sgn);
4373 } else if (!mnt) {
4374 result = fp32_zero(sgn);
4375 } else if (exp >= expint) {
4376 // There are no fractional bits
4377 result = op;
4378 } else {
4379 // Truncate towards zero:
4380 uint32_t x = expint - exp >= FP32_BITS ? 0 : mnt >> (expint - exp);
4381 int err = exp < expint - FP32_BITS ? 1 :
4382 ((mnt << 1 >> (expint - exp - 1) & 3) |
4383 ((uint32_t)(mnt << 2 << (FP32_BITS + exp - expint)) != 0));
4384 switch (rounding) {
4385 case FPRounding_TIEEVEN:
4386 x += (err == 3 || (err == 2 && (x & 1)));
4387 break;
4388 case FPRounding_POSINF:
4389 x += err && !sgn;
4390 break;
4391 case FPRounding_NEGINF:
4392 x += err && sgn;
4393 break;
4394 case FPRounding_ZERO:
4395 break;
4396 case FPRounding_TIEAWAY:
4397 x += err >> 1;
4398 break;
4399 default:
4400 panic("Unrecognized FP rounding mode");
4401 }
4402
4403 if (x == 0) {
4404 result = fp32_zero(sgn);
4405 } else {
4406 exp = expint;
4407 mnt = fp32_normalise(x, &exp);
4408 result = fp32_pack(sgn, exp + FP32_EXP_BITS, mnt >> FP32_EXP_BITS);
4409 }
4410
4411 if (err && exact)
4412 flags |= FPLIB_IXC;
4413 }
4414
4415 set_fpscr0(fpscr, flags);
4416
4417 return result;
4418}
4419
4420template <>
4421uint64_t
4422fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4423{
4424 int expint = FP64_EXP_BIAS + FP64_MANT_BITS;
4425 int mode = modeConv(fpscr);
4426 int flags = 0;
4427 int sgn, exp;
4428 uint64_t mnt, result;
4429
4430 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4431 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4432
4433 // Handle NaNs, infinities and zeroes:
4434 if (fp64_is_NaN(exp, mnt)) {
4435 result = fp64_process_NaN(op, mode, &flags);
4436 } else if (exp == FP64_EXP_INF) {
4437 result = fp64_infinity(sgn);
4438 } else if (!mnt) {
4439 result = fp64_zero(sgn);
4440 } else if (exp >= expint) {
4441 // There are no fractional bits
4442 result = op;
4443 } else {
4444 // Truncate towards zero:
4445 uint64_t x = expint - exp >= FP64_BITS ? 0 : mnt >> (expint - exp);
4446 int err = exp < expint - FP64_BITS ? 1 :
4447 ((mnt << 1 >> (expint - exp - 1) & 3) |
4448 ((uint64_t)(mnt << 2 << (FP64_BITS + exp - expint)) != 0));
4449 switch (rounding) {
4450 case FPRounding_TIEEVEN:
4451 x += (err == 3 || (err == 2 && (x & 1)));
4452 break;
4453 case FPRounding_POSINF:
4454 x += err && !sgn;
4455 break;
4456 case FPRounding_NEGINF:
4457 x += err && sgn;
4458 break;
4459 case FPRounding_ZERO:
4460 break;
4461 case FPRounding_TIEAWAY:
4462 x += err >> 1;
4463 break;
4464 default:
4465 panic("Unrecognized FP rounding mode");
4466 }
4467
4468 if (x == 0) {
4469 result = fp64_zero(sgn);
4470 } else {
4471 exp = expint;
4472 mnt = fp64_normalise(x, &exp);
4473 result = fp64_pack(sgn, exp + FP64_EXP_BITS, mnt >> FP64_EXP_BITS);
4474 }
4475
4476 if (err && exact)
4477 flags |= FPLIB_IXC;
4478 }
4479
4480 set_fpscr0(fpscr, flags);
4481
4482 return result;
4483}
4484
4485template <>
4486uint32_t
4487fplibRoundIntN(uint32_t op, FPRounding rounding, bool exact, int intsize,
4488 FPSCR &fpscr)
4489{
4490 int expint = FP32_EXP_BIAS + FP32_MANT_BITS;
4491 int mode = modeConv(fpscr);
4492 int flags = 0;
4493 int sgn, exp;
4494 uint32_t mnt, result;
4495
4496 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4497 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4498
4499 // Handle NaNs, infinities and zeroes:
4500 if (fp32_is_NaN(exp, mnt)) {
4501 result = fp32_pack(1, FP32_EXP_BIAS + intsize - 1, 0);
4502 flags |= FPLIB_IOC;
4503 } else if (exp == FP32_EXP_INF) {
4504 result = fp32_pack(1, FP32_EXP_BIAS + intsize - 1, 0);
4505 flags |= FPLIB_IOC;
4506 } else if (!mnt) {
4507 result = fp32_zero(sgn);
4508 } else if (exp >= expint) {
4509 // There are no fractional bits
4510 result = op;
4511 bool overflow = (exp > (FP32_EXP_BIAS + intsize - 2) && !sgn) ||
4512 (exp > (FP32_EXP_BIAS + intsize - 1) && sgn) ||
4513 (exp > (FP32_EXP_BIAS + intsize - 2) &&
4514 FP32_MANT(op) > 0 && sgn);
4515 if (overflow) {
4516 result = fp32_pack(1, FP32_EXP_BIAS + intsize - 1, 0);
4517 flags |= FPLIB_IOC;
4518 }
4519 } else {
4520 // Truncate towards zero:
4521 uint32_t x = expint - exp >= FP32_BITS ? 0 : mnt >> (expint - exp);
4522 int err = exp < expint - FP32_BITS ? 1 :
4523 ((mnt << 1 >> (expint - exp - 1) & 3) |
4524 ((uint32_t)(mnt << 2 << (FP32_BITS + exp - expint)) != 0));
4525 switch (rounding) {
4526 case FPRounding_TIEEVEN:
4527 x += (err == 3 || (err == 2 && (x & 1)));
4528 break;
4529 case FPRounding_POSINF:
4530 x += err && !sgn;
4531 break;
4532 case FPRounding_NEGINF:
4533 x += err && sgn;
4534 break;
4535 case FPRounding_ZERO:
4536 break;
4537 case FPRounding_TIEAWAY:
4538 x += err >> 1;
4539 break;
4540 default:
4541 panic("Unrecognized FP rounding mode");
4542 }
4543
4544 bool overflow = (x > (((uint32_t)1 << (intsize - 1)) - 1) && !sgn) ||
4545 (x > ((uint32_t)1 << (intsize - 1)) && sgn);
4546 if (overflow) {
4547 result = fp32_pack(1, FP32_EXP_BIAS + intsize - 1, 0);
4548 flags |= FPLIB_IOC;
4549 } else {
4550 if (x == 0) {
4551 result = fp32_zero(sgn);
4552 } else {
4553 exp = expint;
4554 mnt = fp32_normalise(x, &exp);
4555 result = fp32_pack(sgn, exp + FP32_EXP_BITS,
4556 mnt >> FP32_EXP_BITS);
4557 }
4558
4559 if (err && exact)
4560 flags |= FPLIB_IXC;
4561 }
4562 }
4563
4564 set_fpscr0(fpscr, flags);
4565
4566 return result;
4567}
4568
4569template <>
4570uint64_t
4571fplibRoundIntN(uint64_t op, FPRounding rounding, bool exact, int intsize,
4572 FPSCR &fpscr)
4573{
4574 int expint = FP64_EXP_BIAS + FP64_MANT_BITS;
4575 int mode = modeConv(fpscr);
4576 int flags = 0;
4577 int sgn, exp;
4578 uint64_t mnt, result;
4579
4580 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4581 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4582
4583 // Handle NaNs, infinities and zeroes:
4584 if (fp64_is_NaN(exp, mnt)) {
4585 result = fp64_pack(1, FP64_EXP_BIAS + intsize - 1, 0);
4586 flags |= FPLIB_IOC;
4587 } else if (exp == FP64_EXP_INF) {
4588 result = fp64_pack(1, FP64_EXP_BIAS + intsize - 1, 0);
4589 flags |= FPLIB_IOC;
4590 } else if (!mnt) {
4591 result = fp64_zero(sgn);
4592 } else if (exp >= expint) {
4593 // There are no fractional bits
4594 result = op;
4595 bool overflow = (exp > (FP64_EXP_BIAS + intsize - 2) && !sgn) ||
4596 (exp > (FP64_EXP_BIAS + intsize - 1) && sgn) ||
4597 (exp > (FP64_EXP_BIAS + intsize - 2) &&
4598 FP64_MANT(op) > 0 && sgn);
4599 if (overflow && intsize >= 0) {
4600 result = fp64_pack(1, FP64_EXP_BIAS + intsize - 1, 0);
4601 flags |= FPLIB_IOC;
4602 }
4603 } else {
4604 // Truncate towards zero:
4605 uint64_t x = expint - exp >= FP64_BITS ? 0 : mnt >> (expint - exp);
4606 int err = exp < expint - FP64_BITS ? 1 :
4607 ((mnt << 1 >> (expint - exp - 1) & 3) |
4608 ((uint64_t)(mnt << 2 << (FP64_BITS + exp - expint)) != 0));
4609 switch (rounding) {
4610 case FPRounding_TIEEVEN:
4611 x += (err == 3 || (err == 2 && (x & 1)));
4612 break;
4613 case FPRounding_POSINF:
4614 x += err && !sgn;
4615 break;
4616 case FPRounding_NEGINF:
4617 x += err && sgn;
4618 break;
4619 case FPRounding_ZERO:
4620 break;
4621 case FPRounding_TIEAWAY:
4622 x += err >> 1;
4623 break;
4624 default:
4625 panic("Unrecognized FP rounding mode");
4626 }
4627
4628 bool overflow = (x > (((uint64_t)1 << (intsize - 1)) - 1) && !sgn) ||
4629 (x > ((uint64_t)1 << (intsize - 1)) && sgn);
4630 if (overflow) {
4631 result = fp64_pack(1, FP64_EXP_BIAS + intsize - 1, 0);
4632 flags |= FPLIB_IOC;
4633 } else {
4634 if (x == 0) {
4635 result = fp64_zero(sgn);
4636 } else {
4637 exp = expint;
4638 mnt = fp64_normalise(x, &exp);
4639 result = fp64_pack(sgn, exp + FP64_EXP_BITS,
4640 mnt >> FP64_EXP_BITS);
4641 }
4642
4643 if (err && exact)
4644 flags |= FPLIB_IXC;
4645 }
4646 }
4647
4648 set_fpscr0(fpscr, flags);
4649
4650 return result;
4651}
4652
4653template <>
4654uint16_t
4655fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4656{
4657 int flags = 0;
4658 uint16_t result = fp16_scale(op1, (int16_t)op2, modeConv(fpscr), &flags);
4659 set_fpscr0(fpscr, flags);
4660 return result;
4661}
4662
4663template <>
4664uint32_t
4665fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4666{
4667 int flags = 0;
4668 uint32_t result = fp32_scale(op1, (int32_t)op2, modeConv(fpscr), &flags);
4669 set_fpscr0(fpscr, flags);
4670 return result;
4671}
4672
4673template <>
4674uint64_t
4675fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4676{
4677 int flags = 0;
4678 uint64_t result = fp64_scale(op1, (int64_t)op2, modeConv(fpscr), &flags);
4679 set_fpscr0(fpscr, flags);
4680 return result;
4681}
4682
4683template <>
4684uint16_t
4685fplibSqrt(uint16_t op, FPSCR &fpscr)
4686{
4687 int flags = 0;
4688 uint16_t result = fp16_sqrt(op, modeConv(fpscr), &flags);
4689 set_fpscr0(fpscr, flags);
4690 return result;
4691}
4692
4693template <>
4694uint32_t
4695fplibSqrt(uint32_t op, FPSCR &fpscr)
4696{
4697 int flags = 0;
4698 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
4699 set_fpscr0(fpscr, flags);
4700 return result;
4701}
4702
4703template <>
4704uint64_t
4705fplibSqrt(uint64_t op, FPSCR &fpscr)
4706{
4707 int flags = 0;
4708 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
4709 set_fpscr0(fpscr, flags);
4710 return result;
4711}
4712
4713template <>
4714uint16_t
4715fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4716{
4717 int flags = 0;
4718 uint16_t result = fp16_add(op1, op2, 1, modeConv(fpscr), &flags);
4719 set_fpscr0(fpscr, flags);
4720 return result;
4721}
4722
4723template <>
4724uint32_t
4725fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4726{
4727 int flags = 0;
4728 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
4729 set_fpscr0(fpscr, flags);
4730 return result;
4731}
4732
4733template <>
4734uint64_t
4735fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4736{
4737 int flags = 0;
4738 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
4739 set_fpscr0(fpscr, flags);
4740 return result;
4741}
4742
4743template <>
4744uint16_t
4745fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
4746{
4747 static uint16_t coeff[2][8] = {
4748 {
4749 0x3c00,
4750 0xb155,
4751 0x2030,
4752 0x0000,
4753 0x0000,
4754 0x0000,
4755 0x0000,
4756 0x0000,
4757 },
4758 {
4759 0x3c00,
4760 0xb800,
4761 0x293a,
4762 0x0000,
4763 0x0000,
4764 0x0000,
4765 0x0000,
4766 0x0000
4767 }
4768 };
4769 int flags = 0;
4770 uint16_t result =
4771 fp16_muladd(coeff[op2 >> (FP16_BITS - 1)][coeff_index], op1,
4772 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4773 set_fpscr0(fpscr, flags);
4774 return result;
4775}
4776
4777template <>
4778uint32_t
4779fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2, FPSCR &fpscr)
4780{
4781 static uint32_t coeff[2][8] = {
4782 {
4783 0x3f800000,
4784 0xbe2aaaab,
4785 0x3c088886,
4786 0xb95008b9,
4787 0x36369d6d,
4788 0x00000000,
4789 0x00000000,
4790 0x00000000
4791 },
4792 {
4793 0x3f800000,
4794 0xbf000000,
4795 0x3d2aaaa6,
4796 0xbab60705,
4797 0x37cd37cc,
4798 0x00000000,
4799 0x00000000,
4800 0x00000000
4801 }
4802 };
4803 int flags = 0;
4804 uint32_t result =
4805 fp32_muladd(coeff[op2 >> (FP32_BITS - 1)][coeff_index], op1,
4806 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4807 set_fpscr0(fpscr, flags);
4808 return result;
4809}
4810
4811template <>
4812uint64_t
4813fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2, FPSCR &fpscr)
4814{
4815 static uint64_t coeff[2][8] = {
4816 {
4817 0x3ff0000000000000ULL,
4818 0xbfc5555555555543ULL,
4819 0x3f8111111110f30cULL,
4820 0xbf2a01a019b92fc6ULL,
4821 0x3ec71de351f3d22bULL,
4822 0xbe5ae5e2b60f7b91ULL,
4823 0x3de5d8408868552fULL,
4824 0x0000000000000000ULL
4825 },
4826 {
4827 0x3ff0000000000000ULL,
4828 0xbfe0000000000000ULL,
4829 0x3fa5555555555536ULL,
4830 0xbf56c16c16c13a0bULL,
4831 0x3efa01a019b1e8d8ULL,
4832 0xbe927e4f7282f468ULL,
4833 0x3e21ee96d2641b13ULL,
4834 0xbda8f76380fbb401ULL
4835 }
4836 };
4837 int flags = 0;
4838 uint64_t result =
4839 fp64_muladd(coeff[op2 >> (FP64_BITS - 1)][coeff_index], op1,
4840 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4841 set_fpscr0(fpscr, flags);
4842 return result;
4843}
4844
4845template <>
4846uint16_t
4847fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4848{
4849 int flags = 0;
4850 int sgn, exp;
4851 uint16_t mnt;
4852
4853 int mode = modeConv(fpscr);
4854 uint16_t result = fp16_mul(op1, op1, mode, &flags);
4855 set_fpscr0(fpscr, flags);
4856
4857 fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4858 if (!fp16_is_NaN(exp, mnt)) {
4859 result = (result & ~(1ULL << (FP16_BITS - 1))) |
4860 op2 << (FP16_BITS - 1);
4861 }
4862 return result;
4863}
4864
4865template <>
4866uint32_t
4867fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4868{
4869 int flags = 0;
4870 int sgn, exp;
4871 uint32_t mnt;
4872
4873 int mode = modeConv(fpscr);
4874 uint32_t result = fp32_mul(op1, op1, mode, &flags);
4875 set_fpscr0(fpscr, flags);
4876
4877 fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4878 if (!fp32_is_NaN(exp, mnt)) {
4879 result = (result & ~(1ULL << (FP32_BITS - 1))) | op2 << (FP32_BITS - 1);
4880 }
4881 return result;
4882}
4883
4884template <>
4885uint64_t
4886fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4887{
4888 int flags = 0;
4889 int sgn, exp;
4890 uint64_t mnt;
4891
4892 int mode = modeConv(fpscr);
4893 uint64_t result = fp64_mul(op1, op1, mode, &flags);
4894 set_fpscr0(fpscr, flags);
4895
4896 fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4897 if (!fp64_is_NaN(exp, mnt)) {
4898 result = (result & ~(1ULL << (FP64_BITS - 1))) | op2 << (FP64_BITS - 1);
4899 }
4900 return result;
4901}
4902
4903template <>
4904uint16_t
4905fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4906{
4907 static constexpr uint16_t fpOne =
4908 (uint16_t)FP16_EXP_BIAS << FP16_MANT_BITS; // 1.0
4909 if (op2 & 1)
4910 op1 = fpOne;
4911 return op1 ^ ((op2 >> 1) << (FP16_BITS - 1));
4912}
4913
4914template <>
4915uint32_t
4916fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4917{
4918 static constexpr uint32_t fpOne =
4919 (uint32_t)FP32_EXP_BIAS << FP32_MANT_BITS; // 1.0
4920 if (op2 & 1)
4921 op1 = fpOne;
4922 return op1 ^ ((op2 >> 1) << (FP32_BITS - 1));
4923}
4924
4925template <>
4926uint64_t
4927fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4928{
4929 static constexpr uint64_t fpOne =
4930 (uint64_t)FP64_EXP_BIAS << FP64_MANT_BITS; // 1.0
4931 if (op2 & 1)
4932 op1 = fpOne;
4933 return op1 ^ ((op2 >> 1) << (FP64_BITS - 1));
4934}
4935
4936static uint64_t
4937FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4938 int *flags)
4939{
4940 int expmax = FP64_EXP_BIAS + FP64_BITS - 1;
4941 uint64_t x;
4942 int err;
4943
4944 if (exp > expmax) {
4945 *flags = FPLIB_IOC;
4946 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4947 }
4948
4949 x = lsr64(mnt << FP64_EXP_BITS, expmax - exp);
4950 err = (exp > expmax - 2 ? 0 :
4951 (lsr64(mnt << FP64_EXP_BITS, expmax - 2 - exp) & 3) |
4952 !!(mnt << FP64_EXP_BITS & (lsl64(1, expmax - 2 - exp) - 1)));
4953
4954 switch (rounding) {
4955 case FPRounding_TIEEVEN:
4956 x += (err == 3 || (err == 2 && (x & 1)));
4957 break;
4958 case FPRounding_POSINF:
4959 x += err && !sgn;
4960 break;
4961 case FPRounding_NEGINF:
4962 x += err && sgn;
4963 break;
4964 case FPRounding_ZERO:
4965 break;
4966 case FPRounding_TIEAWAY:
4967 x += err >> 1;
4968 break;
4969 default:
4970 panic("Unrecognized FP rounding mode");
4971 }
4972
4973 if (u ? sgn && x : x > (1ULL << (FP64_BITS - 1)) - !sgn) {
4974 *flags = FPLIB_IOC;
4975 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4976 }
4977
4978 if (err) {
4979 *flags = FPLIB_IXC;
4980 }
4981
4982 return sgn ? -x : x;
4983}
4984
4985static uint32_t
4986FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4987 int *flags)
4988{
4989 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4990 if (u ? x >= 1ULL << FP32_BITS :
4991 !(x < 1ULL << (FP32_BITS - 1) ||
4992 (uint64_t)-x <= (uint64_t)1 << (FP32_BITS - 1))) {
4993 *flags = FPLIB_IOC;
4994 x = ((uint32_t)!u << (FP32_BITS - 1)) - !sgn;
4995 }
4996 return x;
4997}
4998
4999static uint16_t
5000FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
5001 int *flags)
5002{
5003 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
5004 if (u ? x >= 1ULL << FP16_BITS :
5005 !(x < 1ULL << (FP16_BITS - 1) ||
5006 (uint64_t)-x <= (uint64_t)1 << (FP16_BITS - 1))) {
5007 *flags = FPLIB_IOC;
5008 x = ((uint16_t)!u << (FP16_BITS - 1)) - !sgn;
5009 }
5010 return x;
5011}
5012
5013template <>
5014uint16_t
5015fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
5016 FPSCR &fpscr)
5017{
5018 int flags = 0;
5019 int sgn, exp;
5020 uint16_t mnt, result;
5021
5022 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
5023 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
5024
5025 // If NaN, set cumulative flag or take exception:
5026 if (fp16_is_NaN(exp, mnt)) {
5027 flags = FPLIB_IOC;
5028 result = 0;
5029 } else {
5030 assert(fbits >= 0);
5031 // Infinity is treated as an ordinary normalised number that saturates.
5032 result =
5033 FPToFixed_16(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
5034 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
5035 u, rounding, &flags);
5036 }
5037
5038 set_fpscr0(fpscr, flags);
5039
5040 return result;
5041}
5042
5043template <>
5044uint32_t
5045fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
5046 FPSCR &fpscr)
5047{
5048 int flags = 0;
5049 int sgn, exp;
5050 uint16_t mnt;
5051 uint32_t result;
5052
5053 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
5054 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
5055
5056 // If NaN, set cumulative flag or take exception:
5057 if (fp16_is_NaN(exp, mnt)) {
5058 flags = FPLIB_IOC;
5059 result = 0;
5060 } else {
5061 assert(fbits >= 0);
5062 if (exp == FP16_EXP_INF)
5063 exp = 255; // infinity: make it big enough to saturate
5064 result =
5065 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
5066 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
5067 u, rounding, &flags);
5068 }
5069
5070 set_fpscr0(fpscr, flags);
5071
5072 return result;
5073}
5074
5075template <>
5076uint32_t
5077fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
5078{
5079 int flags = 0;
5080 int sgn, exp;
5081 uint32_t mnt, result;
5082
5083 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
5084 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
5085
5086 // If NaN, set cumulative flag or take exception:
5087 if (fp32_is_NaN(exp, mnt)) {
5088 flags = FPLIB_IOC;
5089 result = 0;
5090 } else {
5091 assert(fbits >= 0);
5092 // Infinity is treated as an ordinary normalised number that saturates.
5093 result =
5094 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
5095 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
5096 u, rounding, &flags);
5097 }
5098
5099 set_fpscr0(fpscr, flags);
5100
5101 return result;
5102}
5103
5104template <>
5105uint32_t
5106fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
5107{
5108 int flags = 0;
5109 int sgn, exp;
5110 uint64_t mnt;
5111 uint32_t result;
5112
5113 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
5114 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
5115
5116 // If NaN, set cumulative flag or take exception:
5117 if (fp64_is_NaN(exp, mnt)) {
5118 flags = FPLIB_IOC;
5119 result = 0;
5120 } else {
5121 assert(fbits >= 0);
5122 // Infinity is treated as an ordinary normalised number that saturates.
5123 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
5124 }
5125
5126 set_fpscr0(fpscr, flags);
5127
5128 return result;
5129}
5130
5131uint32_t
5132fplibFPToFixedJS(uint64_t op, FPSCR &fpscr, bool is64, uint8_t& nz)
5133{
5134 int flags = 0;
5135 uint32_t result;
5136 bool Z = true;
5137
5138 uint32_t sgn = bits(op, 63);
5139 int32_t exp = bits(op, 62, 52);
5140 uint64_t mnt = bits(op, 51, 0);
5141
5142 if (exp == 0) {
5143 if (mnt != 0) {
5144 if (fpscr.fz) {
5145 flags |= FPLIB_IDC;
5146 } else {
5147 flags |= FPLIB_IXC;
5148 Z = 0;
5149 }
5150 }
5151 result = 0;
5152 } else if (exp == 0x7ff) {
5153 flags |= FPLIB_IOC;
5154 result = 0;
5155 Z = 0;
5156 } else {
5157 mnt |= 1ULL << FP64_MANT_BITS;
5158 int mnt_shft = exp - FP64_EXP_BIAS - 52;
5159 bool err = true;
5160
5161 if (abs(mnt_shft) >= FP64_BITS) {
5162 result = 0;
5163 Z = 0;
5164 } else if (mnt_shft >= 0) {
5165 result = lsl64(mnt, mnt_shft);
5166 } else if (mnt_shft < 0) {
5167 err = lsl64(mnt, mnt_shft+FP64_BITS) != 0;
5168 result = lsr64(mnt, abs(mnt_shft));
5169 }
5170 uint64_t max_result = (1UL << (FP32_BITS - 1)) -!sgn;
5171 if ((exp - FP64_EXP_BIAS) > 31 || result > max_result) {
5172 flags |= FPLIB_IOC;
5173 Z = false;
5174 } else if (err) {
5175 flags |= FPLIB_IXC;
5176 Z = false;
5177 }
5178 result = sgn ? -result : result;
5179 }
5180 if (sgn == 1 && result == 0)
5181 Z = false;
5182
5183 if (is64) {
5184 nz = Z? 0x1: 0x0;
5185 } else {
5186 fpscr.n = 0;
5187 fpscr.z = (int)Z;
5188 fpscr.c = 0;
5189 fpscr.v = 0;
5190 }
5191
5192 set_fpscr0(fpscr, flags);
5193 return result;
5194}
5195
5196template <>
5197uint64_t
5198fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
5199 FPSCR &fpscr)
5200{
5201 int flags = 0;
5202 int sgn, exp;
5203 uint16_t mnt;
5204 uint64_t result;
5205
5206 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
5207 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
5208
5209 // If NaN, set cumulative flag or take exception:
5210 if (fp16_is_NaN(exp, mnt)) {
5211 flags = FPLIB_IOC;
5212 result = 0;
5213 } else {
5214 assert(fbits >= 0);
5215 if (exp == FP16_EXP_INF)
5216 exp = 255; // infinity: make it big enough to saturate
5217 result =
5218 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
5219 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
5220 u, rounding, &flags);
5221 }
5222
5223 set_fpscr0(fpscr, flags);
5224
5225 return result;
5226}
5227
5228template <>
5229uint64_t
5230fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
5231{
5232 int flags = 0;
5233 int sgn, exp;
5234 uint32_t mnt;
5235 uint64_t result;
5236
5237 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
5238 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
5239
5240 // If NaN, set cumulative flag or take exception:
5241 if (fp32_is_NaN(exp, mnt)) {
5242 flags = FPLIB_IOC;
5243 result = 0;
5244 } else {
5245 assert(fbits >= 0);
5246 // Infinity is treated as an ordinary normalised number that saturates.
5247 result =
5248 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
5249 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
5250 u, rounding, &flags);
5251 }
5252
5253 set_fpscr0(fpscr, flags);
5254
5255 return result;
5256}
5257
5258template <>
5259uint64_t
5260fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
5261{
5262 int flags = 0;
5263 int sgn, exp;
5264 uint64_t mnt, result;
5265
5266 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
5267 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
5268
5269 // If NaN, set cumulative flag or take exception:
5270 if (fp64_is_NaN(exp, mnt)) {
5271 flags = FPLIB_IOC;
5272 result = 0;
5273 } else {
5274 assert(fbits >= 0);
5275 // Infinity is treated as an ordinary normalised number that saturates.
5276 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
5277 }
5278
5279 set_fpscr0(fpscr, flags);
5280
5281 return result;
5282}
5283
5284static uint16_t
5285fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
5286{
5287 int x_sgn = !u && a >> (FP64_BITS - 1);
5288 int x_exp = FP16_EXP_BIAS + FP64_BITS - 1 - fbits;
5289 uint64_t x_mnt = x_sgn ? -a : a;
5290
5291 // Handle zero:
5292 if (!x_mnt) {
5293 return fp16_zero(0);
5294 }
5295
5296 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
5297 x_mnt = fp64_normalise(x_mnt, &x_exp);
5298 x_mnt = (x_mnt >> (FP64_BITS - FP16_BITS - 1) |
5299 !!(x_mnt & ((1ULL << (FP64_BITS - FP16_BITS - 1)) - 1)));
5300
5301 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
5302}
5303
5304static uint32_t
5305fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
5306{
5307 int x_sgn = !u && a >> (FP64_BITS - 1);
5308 int x_exp = FP32_EXP_BIAS + FP64_BITS - 1 - fbits;
5309 uint64_t x_mnt = x_sgn ? -a : a;
5310
5311 // Handle zero:
5312 if (!x_mnt) {
5313 return fp32_zero(0);
5314 }
5315
5316 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
5317 x_mnt = fp64_normalise(x_mnt, &x_exp);
5318 x_mnt = (x_mnt >> (FP64_BITS - FP32_BITS - 1) |
5319 !!(x_mnt & ((1ULL << (FP64_BITS - FP32_BITS - 1)) - 1)));
5320
5321 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
5322}
5323
5324static uint64_t
5325fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
5326{
5327 int x_sgn = !u && a >> (FP64_BITS - 1);
5328 int x_exp = FP64_EXP_BIAS + FP64_BITS - 1 - fbits;
5329 uint64_t x_mnt = x_sgn ? -a : a;
5330
5331 // Handle zero:
5332 if (!x_mnt) {
5333 return fp64_zero(0);
5334 }
5335
5336 x_mnt = fp64_normalise(x_mnt, &x_exp);
5337
5338 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
5339}
5340
5341template <>
5342uint16_t
5343fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
5344 FPSCR &fpscr)
5345{
5346 int flags = 0;
5347 uint16_t res = fp16_cvtf(op, fbits, u,
5348 (int)rounding | (modeConv(fpscr) & 0xFC),
5349 &flags);
5350 set_fpscr0(fpscr, flags);
5351 return res;
5352}
5353
5354template <>
5355uint32_t
5356fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
5357{
5358 int flags = 0;
5359 uint32_t res = fp32_cvtf(op, fbits, u,
5360 (int)rounding | (modeConv(fpscr) & 0xFC),
5361 &flags);
5362 set_fpscr0(fpscr, flags);
5363 return res;
5364}
5365
5366template <>
5367uint64_t
5368fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
5369{
5370 int flags = 0;
5371 uint64_t res = fp64_cvtf(op, fbits, u,
5372 (int)rounding | (modeConv(fpscr) & 0xFC),
5373 &flags);
5374 set_fpscr0(fpscr, flags);
5375 return res;
5376}
5377
5378template <>
5379uint16_t
5381{
5382 return fp16_infinity(sgn);
5383}
5384
5385template <>
5386uint32_t
5388{
5389 return fp32_infinity(sgn);
5390}
5391
5392template <>
5393uint64_t
5395{
5396 return fp64_infinity(sgn);
5397}
5398
5399template <>
5400uint16_t
5402{
5403 return fp16_defaultNaN();
5404}
5405
5406template <>
5407uint32_t
5409{
5410 return fp32_defaultNaN();
5411}
5412
5413template <>
5414uint64_t
5416{
5417 return fp64_defaultNaN();
5418}
5419
5420
5421template <>
5422uint16_t
5423fplib32RSqrtStep(uint16_t op1, uint16_t op2, FPSCR &fpscr)
5424{
5425 int mode = modeConv(fpscr);
5426 int flags = 0;
5427 int sgn1, exp1, sgn2, exp2;
5428 uint16_t mnt1, mnt2, result;
5429
5430 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
5431 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
5432
5433 result = fp16_process_NaNs(op1, op2, mode, &flags);
5434 if (!result) {
5435 if ((exp1 == FP16_EXP_INF && !mnt2) ||
5436 (exp2 == FP16_EXP_INF && !mnt1)) {
5437 result = fp16_FPOnePointFive(0);
5438 } else {
5439 uint16_t product = fp16_mul(op1, op2, mode, &flags);
5440 result = fp16_halved_add(fp16_FPThree(0), product, 1, mode,
5441 &flags);
5442 }
5443 }
5444
5445 set_fpscr0(fpscr, flags);
5446
5447 return result;
5448}
5449
5450template <>
5451uint16_t
5452fplib32RecipStep(uint16_t op1, uint16_t op2, FPSCR &fpscr)
5453{
5454 int mode = modeConv(fpscr);
5455 int flags = 0;
5456 int sgn1, exp1, sgn2, exp2;
5457 uint16_t mnt1, mnt2, result;
5458
5459 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
5460 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
5461
5462 result = fp16_process_NaNs(op1, op2, mode, &flags);
5463 if (!result) {
5464 if ((exp1 == FP16_EXP_INF && !mnt2) ||
5465 (exp2 == FP16_EXP_INF && !mnt1)) {
5466 result = fp16_FPTwo(0);
5467 } else {
5468 uint16_t product = fp16_mul(op1, op2, mode, &flags);
5469 result = fp16_add(fp16_FPTwo(0), product, 1, mode, &flags);
5470 }
5471 }
5472
5473 set_fpscr0(fpscr, flags);
5474
5475 return result;
5476}
5477
5478} // namespace ArmISA
5479} // namespace gem5
#define FPLIB_RN
Definition fplib.cc:53
#define FPLIB_DN
Definition fplib.cc:58
#define FPLIB_FZ16
Definition fplib.cc:60
#define FPLIB_DZC
Definition fplib.cc:66
#define FP32_EXP_BITS
Definition fplib.cc:74
#define FP16_MANT(x)
Definition fplib.cc:93
#define FP32_BITS
Definition fplib.cc:70
#define FP64_EXP_INF
Definition fplib.cc:83
#define FP32_MANT_BITS
Definition fplib.cc:86
#define FPLIB_RP
Definition fplib.cc:54
#define FPLIB_IOC
Definition fplib.cc:67
#define FP16_MANT_BITS
Definition fplib.cc:85
#define FP16_EXP_BITS
Definition fplib.cc:73
#define FP64_EXP_BITS
Definition fplib.cc:75
#define FP64_MANT_BITS
Definition fplib.cc:87
#define FPLIB_AHP
Definition fplib.cc:59
#define FPLIB_OFC
Definition fplib.cc:65
#define FP64_MANT(x)
Definition fplib.cc:95
#define FP16_EXP_INF
Definition fplib.cc:81
#define FP32_EXP(x)
Definition fplib.cc:90
#define FP64_EXP_BIAS
Definition fplib.cc:79
#define FP16_EXP(x)
Definition fplib.cc:89
#define FPLIB_IDC
Definition fplib.cc:62
#define FPLIB_FZ
Definition fplib.cc:57
#define FP16_BITS
Definition fplib.cc:69
#define FP32_MANT(x)
Definition fplib.cc:94
#define FP32_EXP_BIAS
Definition fplib.cc:78
#define FPLIB_IXC
Definition fplib.cc:63
#define FP32_EXP_INF
Definition fplib.cc:82
#define FP64_EXP(x)
Definition fplib.cc:91
#define FPLIB_UFC
Definition fplib.cc:64
#define FP64_BITS
Definition fplib.cc:71
#define FP16_EXP_BIAS
Definition fplib.cc:77
#define FPLIB_RM
Definition fplib.cc:55
Floating-point library code, which will gradually replace vfp.hh.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
uint16_t fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:3391
static uint64_t fp64_FPOnePointFive(int sgn)
Definition fplib.cc:2782
uint16_t fplib32RSqrtStep(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:5423
uint32_t fplibFPToFixedJS(uint64_t op, FPSCR &fpscr, bool is64, uint8_t &nz)
Floating-point JS convert to a signed integer, with rounding to zero.
Definition fplib.cc:5132
uint16_t fplib32RecipStep(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:5452
static int fp64_is_infinity(int exp, uint64_t mnt)
Definition fplib.cc:529
static uint32_t fp32_FPConvertNaN_16(uint16_t op)
Definition fplib.cc:2738
static FPRounding FPCRRounding(FPSCR &fpscr)
Definition fplib.hh:71
static uint64_t fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
Definition fplib.cc:611
uint16_t fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:3553
static uint16_t fp16_normalise(uint16_t mnt, int *exp)
Definition fplib.cc:219
static int fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
Definition fplib.cc:1156
static uint32_t fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
Definition fplib.cc:2170
static void fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, int *flags)
Definition fplib.cc:415
uint16_t fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:3460
static uint64_t fp64_FPConvertNaN_32(uint32_t op)
Definition fplib.cc:2762
static int modeConv(FPSCR fpscr)
Definition fplib.cc:2415
static uint64_t fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
Definition fplib.cc:993
uint16_t fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:3117
static int fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
Definition fplib.cc:1018
Bitfield< 22 > a1
static uint32_t fp32_FPThree(int sgn)
Definition fplib.cc:2794
static uint64_t fp64_process_NaN(uint64_t a, int mode, int *flags)
Definition fplib.cc:555
static uint64_t fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
Definition fplib.cc:314
static void fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, int *flags)
Definition fplib.cc:438
static uint16_t fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
Definition fplib.cc:2136
static uint64_t fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
Definition fplib.cc:692
uint16_t fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
Definition fplib.cc:3740
static uint16_t fp16_zero(int sgn)
Definition fplib.cc:320
uint16_t fplibNeg(uint16_t op)
Definition fplib.cc:3700
Bitfield< 3, 0 > mask
Definition pcstate.hh:63
static uint16_t fp16_max_normal(int sgn)
Definition fplib.cc:338
bool fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition fplib.cc:2470
static uint16_t lsl16(uint16_t x, uint32_t shift)
Definition fplib.cc:98
Bitfield< 4, 0 > mode
Definition misc_types.hh:74
static int fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
Definition fplib.cc:1244
uint16_t fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
Definition fplib.cc:3935
Bitfield< 3, 0 > rm
Definition types.hh:118
static uint16_t fp16_process_NaN(uint16_t a, int mode, int *flags)
Definition fplib.cc:535
static uint16_t fp16_FPConvertNaN_32(uint32_t op)
Definition fplib.cc:2722
static int fp16_is_quiet_NaN(int exp, uint16_t mnt)
Definition fplib.cc:499
static uint32_t fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
Definition fplib.cc:845
static void fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, int *flags)
Definition fplib.cc:392
static uint32_t fp32_defaultNaN()
Definition fplib.cc:380
static uint32_t FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
Definition fplib.cc:4986
static int fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
Definition fplib.cc:1194
static uint32_t lsr32(uint32_t x, uint32_t shift)
Definition fplib.cc:116
static uint64_t FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
Definition fplib.cc:4937
static uint32_t fp32_convert_default_nan(uint16_t op)
Definition fplib.cc:721
static uint16_t fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
Definition fplib.cc:1974
uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
Definition fplib.cc:2825
Bitfield< 7 > b
uint16_t fplibExpA(uint16_t op)
Definition fplib.cc:3147
static uint32_t fp32_FPTwo(int sgn)
Definition fplib.cc:2812
uint16_t fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:4905
static int fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
Definition fplib.cc:1131
static uint64_t fp64_FPConvertNaN_16(uint16_t op)
Definition fplib.cc:2754
uint16_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
Definition fplib.cc:5343
static uint32_t fp32_muladdh(uint32_t a, uint16_t b, uint16_t c, int scale, int mode, int *flags, bool rm_odd=false)
Definition fplib.cc:1877
Bitfield< 6 > err
static uint16_t fp16_FPOnePointFive(int sgn)
Definition fplib.cc:2770
static uint16_t fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
Definition fplib.cc:1263
static int fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
Definition fplib.cc:1087
uint16_t fplibSqrt(uint16_t op, FPSCR &fpscr)
Definition fplib.cc:4685
static uint16_t fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
Definition fplib.cc:302
static uint64_t fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
Definition fplib.cc:2058
static uint32_t fp32_normalise(uint32_t mnt, int *exp)
Definition fplib.cc:237
static uint32_t fp32_infinity(int sgn)
Definition fplib.cc:362
static uint64_t lsr64(uint64_t x, uint32_t shift)
Definition fplib.cc:128
bool fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition fplib.cc:2450
static uint32_t fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
Definition fplib.cc:663
uint32_t fplibMulAddH(uint32_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:3106
static int fp64_is_NaN(int exp, uint64_t mnt)
Definition fplib.cc:475
uint16_t fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:2591
static uint16_t fp16_FPTwo(int sgn)
Definition fplib.cc:2806
static uint16_t fp16_sqrt(uint16_t a, int mode, int *flags)
Definition fplib.cc:2238
static int fp32_is_NaN(int exp, uint32_t mnt)
Definition fplib.cc:469
static uint64_t fp64_FPTwo(int sgn)
Definition fplib.cc:2818
static uint16_t lsr16(uint16_t x, uint32_t shift)
Definition fplib.cc:104
static int fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
Definition fplib.cc:1175
static uint16_t fp16_infinity(int sgn)
Definition fplib.cc:356
uint16_t fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:3607
static uint32_t fp32_FPConvertNaN_64(uint64_t op)
Definition fplib.cc:2746
static uint16_t fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale, int mode, int *flags)
Definition fplib.cc:1615
static void set_fpscr0(FPSCR &fpscr, int flags)
Definition fplib.cc:2113
static uint64_t fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
Definition fplib.cc:5325
uint16_t fplibAbs(uint16_t op)
Definition fplib.cc:2570
static uint16_t FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
Definition fplib.cc:5000
static uint16_t fp16_defaultNaN()
Definition fplib.cc:374
static uint32_t lsl32(uint32_t x, uint32_t shift)
Definition fplib.cc:110
static void add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Definition fplib.cc:197
static uint32_t fp32_FPOnePointFive(int sgn)
Definition fplib.cc:2776
bool fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition fplib.cc:2460
static uint64_t fp64_infinity(int sgn)
Definition fplib.cc:368
static int fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
Definition fplib.cc:1219
static int fp16_is_infinity(int exp, uint16_t mnt)
Definition fplib.cc:517
uint16_t fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:3577
static uint64_t fp64_FPThree(int sgn)
Definition fplib.cc:2800
static uint64_t fp64_sqrt(uint64_t a, int mode, int *flags)
Definition fplib.cc:2345
static void fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
Definition fplib.cc:3378
static int fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
Definition fplib.cc:999
static uint16_t fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
Definition fplib.cc:565
uint16_t fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:4655
static int fp32_is_signalling_NaN(int exp, uint32_t mnt)
Definition fplib.cc:487
static int fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
Definition fplib.cc:1106
static uint16_t fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
Definition fplib.cc:761
static int fp16_is_NaN(int exp, uint16_t mnt)
Definition fplib.cc:463
Bitfield< 29 > c
Definition misc_types.hh:53
static void fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
Definition fplib.cc:3354
static uint16_t fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
Definition fplib.cc:1504
static void lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
Definition fplib.cc:134
static uint32_t fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
Definition fplib.cc:916
Bitfield< 8 > a
Definition misc_types.hh:66
static uint32_t fp32_zero(int sgn)
Definition fplib.cc:326
static void lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
Definition fplib.cc:152
static uint32_t fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
Definition fplib.cc:308
static void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
Definition fplib.cc:188
Bitfield< 22 > u
uint32_t fplibRoundIntN(uint32_t op, FPRounding rounding, bool exact, int intsize, FPSCR &fpscr)
Definition fplib.cc:4487
uint16_t fplibRecpX(uint16_t op, FPSCR &fpscr)
Definition fplib.cc:4211
static uint16_t fp16_repack(int sgn, int exp, uint16_t mnt)
Definition fplib.cc:3336
static uint64_t fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
Definition fplib.cc:2204
static int fp16_is_signalling_NaN(int exp, uint16_t mnt)
Definition fplib.cc:481
static uint64_t fp64_repack(int sgn, int exp, uint64_t mnt)
Definition fplib.cc:3348
uint16_t fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:4121
uint16_t fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:4847
uint16_t fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:4745
uint16_t fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:3845
static void fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
Definition fplib.cc:3366
int fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
Definition fplib.cc:2621
static uint64_t fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
Definition fplib.cc:1383
uint16_t fplibDefaultNaN()
Definition fplib.cc:5401
static uint32_t fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
Definition fplib.cc:588
static uint64_t fp64_defaultNaN()
Definition fplib.cc:386
static const uint8_t recip_sqrt_estimate[256]
Definition fplib.cc:3719
static uint32_t fp32_process_NaN(uint32_t a, int mode, int *flags)
Definition fplib.cc:545
uint16_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
Definition fplib.cc:5015
uint16_t fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:4715
static int fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
Definition fplib.cc:1043
static uint16_t fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
Definition fplib.cc:634
uint16_t fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:3076
static uint64_t lsl64(uint64_t x, uint32_t shift)
Definition fplib.cc:122
static int fp32_is_infinity(int exp, uint32_t mnt)
Definition fplib.cc:523
Bitfield< 1 > t1
static uint64_t fp64_zero(int sgn)
Definition fplib.cc:332
static uint32_t fp32_sqrt(uint32_t a, int mode, int *flags)
Definition fplib.cc:2290
static int fp64_is_quiet_NaN(int exp, uint64_t mnt)
Definition fplib.cc:511
static uint16_t fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
Definition fplib.cc:5285
static int fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
Definition fplib.cc:1068
static uint16_t fp16_FPThree(int sgn)
Definition fplib.cc:2788
static uint32_t fp32_process_NaNs3H(uint32_t a, uint16_t b, uint16_t c, int mode, int *flags)
Definition fplib.cc:732
static uint32_t fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, int mode, int *flags)
Definition fplib.cc:1700
static uint32_t fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
Definition fplib.cc:1323
static void sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Definition fplib.cc:205
static uint16_t fp16_halved_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
Definition fplib.cc:1443
static int fp32_is_quiet_NaN(int exp, uint32_t mnt)
Definition fplib.cc:505
static uint64_t fp64_max_normal(int sgn)
Definition fplib.cc:350
static uint32_t fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
Definition fplib.cc:2016
uint16_t fplibInfinity(int sgn)
Definition fplib.cc:5380
uint16_t fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
Definition fplib.cc:4292
static uint16_t fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
Definition fplib.cc:839
static void set_fpscr(FPSCR &fpscr, int flags)
Definition fplib.cc:2423
static uint64_t fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
Definition fplib.cc:1578
Bitfield< 6, 5 > shift
Definition types.hh:117
static uint16_t fp16_FPConvertNaN_64(uint64_t op)
Definition fplib.cc:2730
static uint32_t fp32_repack(int sgn, int exp, uint32_t mnt)
Definition fplib.cc:3342
static uint64_t fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
Definition fplib.cc:922
@ FPRounding_POSINF
Definition fplib.hh:63
@ FPRounding_ZERO
Definition fplib.hh:65
@ FPRounding_TIEEVEN
Definition fplib.hh:62
@ FPRounding_TIEAWAY
Definition fplib.hh:66
@ FPRounding_ODD
Definition fplib.hh:67
@ FPRounding_NEGINF
Definition fplib.hh:64
uint16_t fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition fplib.cc:3484
static uint64_t fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, int mode, int *flags)
Definition fplib.cc:1785
static uint32_t fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
Definition fplib.cc:1541
static int fp64_is_signalling_NaN(int exp, uint64_t mnt)
Definition fplib.cc:493
bool fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition fplib.cc:2480
static uint32_t fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
Definition fplib.cc:5305
static void fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
Definition fplib.cc:273
static uint64_t fp64_normalise(uint64_t mnt, int *exp)
Definition fplib.cc:255
Bitfield< 0 > t0
static uint32_t fp32_max_normal(int sgn)
Definition fplib.cc:344
static void mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
Definition fplib.cc:170
static int cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Definition fplib.cc:213
Bitfield< 20, 18 > a0
Bitfield< 3 > r0
Bitfield< 7, 4 > b1
Definition qarma.hh:65
Bitfield< 3, 0 > b0
Definition qarma.hh:66
Bitfield< 3 > x
Definition pagetable.hh:78
Bitfield< 4 > op
Definition types.hh:83
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36

Generated on Sat Oct 18 2025 08:06:37 for gem5 by doxygen 1.14.0