gem5 v24.0.0.0
Loading...
Searching...
No Matches
fplib.cc
Go to the documentation of this file.
1/*
2* Copyright (c) 2012-2013, 2017-2018, 2020 ARM Limited
3* Copyright (c) 2020 Metempsy Technology Consulting
4* All rights reserved
5*
6* The license below extends only to copyright in the software and shall
7* not be construed as granting a license to any other intellectual
8* property including but not limited to intellectual property relating
9* to a hardware implementation of the functionality of the software
10* licensed hereunder. You may use the software subject to the license
11* terms below provided that you ensure that this notice is replicated
12* unmodified and in its entirety in all distributions of the software,
13* modified or unmodified, in source code or in binary form.
14*
15* Redistribution and use in source and binary forms, with or without
16* modification, are permitted provided that the following conditions are
17* met: redistributions of source code must retain the above copyright
18* notice, this list of conditions and the following disclaimer;
19* redistributions in binary form must reproduce the above copyright
20* notice, this list of conditions and the following disclaimer in the
21* documentation and/or other materials provided with the distribution;
22* neither the name of the copyright holders nor the names of its
23* contributors may be used to endorse or promote products derived from
24* this software without specific prior written permission.
25*
26* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37*/
38
39#include <stdint.h>
40
41#include <cassert>
42#include <cmath>
43
44#include "base/logging.hh"
45#include "fplib.hh"
46
47namespace gem5
48{
49
50namespace ArmISA
51{
52
53#define FPLIB_RN 0
54#define FPLIB_RP 1
55#define FPLIB_RM 2
56#define FPLIB_RZ 3
57#define FPLIB_FZ 4
58#define FPLIB_DN 8
59#define FPLIB_AHP 16
60#define FPLIB_FZ16 32
61
62#define FPLIB_IDC 128 // Input Denormal
63#define FPLIB_IXC 16 // Inexact
64#define FPLIB_UFC 8 // Underflow
65#define FPLIB_OFC 4 // Overflow
66#define FPLIB_DZC 2 // Division by Zero
67#define FPLIB_IOC 1 // Invalid Operation
68
69#define FP16_BITS 16
70#define FP32_BITS 32
71#define FP64_BITS 64
72
73#define FP16_EXP_BITS 5
74#define FP32_EXP_BITS 8
75#define FP64_EXP_BITS 11
76
77#define FP16_EXP_BIAS 15
78#define FP32_EXP_BIAS 127
79#define FP64_EXP_BIAS 1023
80
81#define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1)
82#define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1)
83#define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1)
84
85#define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1)
86#define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1)
87#define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1)
88
89#define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1))
90#define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1))
91#define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1))
92
93#define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1))
94#define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1))
95#define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1))
96
97static inline uint16_t
98lsl16(uint16_t x, uint32_t shift)
99{
100 return shift < 16 ? x << shift : 0;
101}
102
103static inline uint16_t
104lsr16(uint16_t x, uint32_t shift)
105{
106 return shift < 16 ? x >> shift : 0;
107}
108
109static inline uint32_t
110lsl32(uint32_t x, uint32_t shift)
111{
112 return shift < 32 ? x << shift : 0;
113}
114
115static inline uint32_t
116lsr32(uint32_t x, uint32_t shift)
117{
118 return shift < 32 ? x >> shift : 0;
119}
120
121static inline uint64_t
122lsl64(uint64_t x, uint32_t shift)
123{
124 return shift < 64 ? x << shift : 0;
125}
126
127static inline uint64_t
128lsr64(uint64_t x, uint32_t shift)
129{
130 return shift < 64 ? x >> shift : 0;
131}
132
133static inline void
134lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
135{
136 if (shift == 0) {
137 *r1 = x1;
138 *r0 = x0;
139 } else if (shift < 64) {
140 *r1 = x1 << shift | x0 >> (64 - shift);
141 *r0 = x0 << shift;
142 } else if (shift < 128) {
143 *r1 = x0 << (shift - 64);
144 *r0 = 0;
145 } else {
146 *r1 = 0;
147 *r0 = 0;
148 }
149}
150
151static inline void
152lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
153{
154 if (shift == 0) {
155 *r1 = x1;
156 *r0 = x0;
157 } else if (shift < 64) {
158 *r0 = x0 >> shift | x1 << (64 - shift);
159 *r1 = x1 >> shift;
160 } else if (shift < 128) {
161 *r0 = x1 >> (shift - 64);
162 *r1 = 0;
163 } else {
164 *r0 = 0;
165 *r1 = 0;
166 }
167}
168
169static inline void
170mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
171{
172 uint32_t mask = ((uint32_t)1 << 31) - 1;
173 uint64_t a0 = a & mask;
174 uint64_t a1 = a >> 31 & mask;
175 uint64_t b0 = b & mask;
176 uint64_t b1 = b >> 31 & mask;
177 uint64_t p0 = a0 * b0;
178 uint64_t p2 = a1 * b1;
179 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
180 uint64_t s0 = p0;
181 uint64_t s1 = (s0 >> 31) + p1;
182 uint64_t s2 = (s1 >> 31) + p2;
183 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
184 *x1 = s2 >> 2;
185}
186
187static inline
188void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
189{
190 uint64_t t0 = (uint64_t)(uint32_t)a * b;
191 uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
192 *x0 = t1 << 32 | (uint32_t)t0;
193 *x1 = t1 >> 32;
194}
195
196static inline void
197add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
198 uint64_t b1)
199{
200 *x0 = a0 + b0;
201 *x1 = a1 + b1 + (*x0 < a0);
202}
203
204static inline void
205sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
206 uint64_t b1)
207{
208 *x0 = a0 - b0;
209 *x1 = a1 - b1 - (*x0 > a0);
210}
211
212static inline int
213cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
214{
215 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
216}
217
218static inline uint16_t
219fp16_normalise(uint16_t mnt, int *exp)
220{
221 int shift;
222
223 if (!mnt) {
224 return 0;
225 }
226
227 for (shift = 8; shift; shift >>= 1) {
228 if (!(mnt >> (16 - shift))) {
229 mnt <<= shift;
230 *exp -= shift;
231 }
232 }
233 return mnt;
234}
235
236static inline uint32_t
237fp32_normalise(uint32_t mnt, int *exp)
238{
239 int shift;
240
241 if (!mnt) {
242 return 0;
243 }
244
245 for (shift = 16; shift; shift >>= 1) {
246 if (!(mnt >> (32 - shift))) {
247 mnt <<= shift;
248 *exp -= shift;
249 }
250 }
251 return mnt;
252}
253
254static inline uint64_t
255fp64_normalise(uint64_t mnt, int *exp)
256{
257 int shift;
258
259 if (!mnt) {
260 return 0;
261 }
262
263 for (shift = 32; shift; shift >>= 1) {
264 if (!(mnt >> (64 - shift))) {
265 mnt <<= shift;
266 *exp -= shift;
267 }
268 }
269 return mnt;
270}
271
272static inline void
273fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
274{
275 uint64_t x0 = *mnt0;
276 uint64_t x1 = *mnt1;
277 int shift;
278
279 if (!x0 && !x1) {
280 return;
281 }
282
283 if (!x1) {
284 x1 = x0;
285 x0 = 0;
286 *exp -= 64;
287 }
288
289 for (shift = 32; shift; shift >>= 1) {
290 if (!(x1 >> (64 - shift))) {
291 x1 = x1 << shift | x0 >> (64 - shift);
292 x0 <<= shift;
293 *exp -= shift;
294 }
295 }
296
297 *mnt0 = x0;
298 *mnt1 = x1;
299}
300
301static inline uint16_t
302fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
303{
304 return sgn << (FP16_BITS - 1) | exp << FP16_MANT_BITS | FP16_MANT(mnt);
305}
306
307static inline uint32_t
308fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
309{
310 return sgn << (FP32_BITS - 1) | exp << FP32_MANT_BITS | FP32_MANT(mnt);
311}
312
313static inline uint64_t
314fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
315{
316 return sgn << (FP64_BITS - 1) | exp << FP64_MANT_BITS | FP64_MANT(mnt);
317}
318
319static inline uint16_t
320fp16_zero(int sgn)
321{
322 return fp16_pack(sgn, 0, 0);
323}
324
325static inline uint32_t
326fp32_zero(int sgn)
327{
328 return fp32_pack(sgn, 0, 0);
329}
330
331static inline uint64_t
332fp64_zero(int sgn)
333{
334 return fp64_pack(sgn, 0, 0);
335}
336
337static inline uint16_t
339{
340 return fp16_pack(sgn, FP16_EXP_INF - 1, -1);
341}
342
343static inline uint32_t
345{
346 return fp32_pack(sgn, FP32_EXP_INF - 1, -1);
347}
348
349static inline uint64_t
351{
352 return fp64_pack(sgn, FP64_EXP_INF - 1, -1);
353}
354
355static inline uint16_t
357{
358 return fp16_pack(sgn, FP16_EXP_INF, 0);
359}
360
361static inline uint32_t
363{
364 return fp32_pack(sgn, FP32_EXP_INF, 0);
365}
366
367static inline uint64_t
369{
370 return fp64_pack(sgn, FP64_EXP_INF, 0);
371}
372
373static inline uint16_t
375{
376 return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1));
377}
378
379static inline uint32_t
381{
382 return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1));
383}
384
385static inline uint64_t
387{
388 return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1));
389}
390
391static inline void
392fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
393 int *flags)
394{
395 *sgn = x >> (FP16_BITS - 1);
396 *exp = FP16_EXP(x);
397 *mnt = FP16_MANT(x);
398
399 if (*exp) {
400 *mnt |= 1ULL << FP16_MANT_BITS;
401 } else {
402 // Handle subnormals:
403 // IDC (Input Denormal) is not set in this case.
404 if (*mnt) {
405 if (mode & FPLIB_FZ16) {
406 *mnt = 0;
407 } else {
408 ++*exp;
409 }
410 }
411 }
412}
413
414static inline void
415fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
416 int *flags)
417{
418 *sgn = x >> (FP32_BITS - 1);
419 *exp = FP32_EXP(x);
420 *mnt = FP32_MANT(x);
421
422 if (*exp) {
423 *mnt |= 1ULL << FP32_MANT_BITS;
424 } else {
425 // Handle subnormals:
426 if (*mnt) {
427 if (mode & FPLIB_FZ) {
428 *flags |= FPLIB_IDC;
429 *mnt = 0;
430 } else {
431 ++*exp;
432 }
433 }
434 }
435}
436
437static inline void
438fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
439 int *flags)
440{
441
442
443 *sgn = x >> (FP64_BITS - 1);
444 *exp = FP64_EXP(x);
445 *mnt = FP64_MANT(x);
446
447 if (*exp) {
448 *mnt |= 1ULL << FP64_MANT_BITS;
449 } else {
450 // Handle subnormals:
451 if (*mnt) {
452 if (mode & FPLIB_FZ) {
453 *flags |= FPLIB_IDC;
454 *mnt = 0;
455 } else {
456 ++*exp;
457 }
458 }
459 }
460}
461
462static inline int
463fp16_is_NaN(int exp, uint16_t mnt)
464{
465 return exp == FP16_EXP_INF && FP16_MANT(mnt);
466}
467
468static inline int
469fp32_is_NaN(int exp, uint32_t mnt)
470{
471 return exp == FP32_EXP_INF && FP32_MANT(mnt);
472}
473
474static inline int
475fp64_is_NaN(int exp, uint64_t mnt)
476{
477 return exp == FP64_EXP_INF && FP64_MANT(mnt);
478}
479
480static inline int
481fp16_is_signalling_NaN(int exp, uint16_t mnt)
482{
483 return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1);
484}
485
486static inline int
487fp32_is_signalling_NaN(int exp, uint32_t mnt)
488{
489 return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1);
490}
491
492static inline int
493fp64_is_signalling_NaN(int exp, uint64_t mnt)
494{
495 return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1);
496}
497
498static inline int
499fp16_is_quiet_NaN(int exp, uint16_t mnt)
500{
501 return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1);
502}
503
504static inline int
505fp32_is_quiet_NaN(int exp, uint32_t mnt)
506{
507 return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1);
508}
509
510static inline int
511fp64_is_quiet_NaN(int exp, uint64_t mnt)
512{
513 return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1);
514}
515
516static inline int
517fp16_is_infinity(int exp, uint16_t mnt)
518{
519 return exp == FP16_EXP_INF && !FP16_MANT(mnt);
520}
521
522static inline int
523fp32_is_infinity(int exp, uint32_t mnt)
524{
525 return exp == FP32_EXP_INF && !FP32_MANT(mnt);
526}
527
528static inline int
529fp64_is_infinity(int exp, uint64_t mnt)
530{
531 return exp == FP64_EXP_INF && !FP64_MANT(mnt);
532}
533
534static inline uint16_t
535fp16_process_NaN(uint16_t a, int mode, int *flags)
536{
537 if (!(a >> (FP16_MANT_BITS - 1) & 1)) {
538 *flags |= FPLIB_IOC;
539 a |= 1ULL << (FP16_MANT_BITS - 1);
540 }
541 return mode & FPLIB_DN ? fp16_defaultNaN() : a;
542}
543
544static inline uint32_t
545fp32_process_NaN(uint32_t a, int mode, int *flags)
546{
547 if (!(a >> (FP32_MANT_BITS - 1) & 1)) {
548 *flags |= FPLIB_IOC;
549 a |= 1ULL << (FP32_MANT_BITS - 1);
550 }
551 return mode & FPLIB_DN ? fp32_defaultNaN() : a;
552}
553
554static inline uint64_t
555fp64_process_NaN(uint64_t a, int mode, int *flags)
556{
557 if (!(a >> (FP64_MANT_BITS - 1) & 1)) {
558 *flags |= FPLIB_IOC;
559 a |= 1ULL << (FP64_MANT_BITS - 1);
560 }
561 return mode & FPLIB_DN ? fp64_defaultNaN() : a;
562}
563
564static uint16_t
565fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
566{
567 int a_exp = FP16_EXP(a);
568 uint16_t a_mnt = FP16_MANT(a);
569 int b_exp = FP16_EXP(b);
570 uint16_t b_mnt = FP16_MANT(b);
571
572 // Handle signalling NaNs:
573 if (fp16_is_signalling_NaN(a_exp, a_mnt))
574 return fp16_process_NaN(a, mode, flags);
575 if (fp16_is_signalling_NaN(b_exp, b_mnt))
576 return fp16_process_NaN(b, mode, flags);
577
578 // Handle quiet NaNs:
579 if (fp16_is_NaN(a_exp, a_mnt))
580 return fp16_process_NaN(a, mode, flags);
581 if (fp16_is_NaN(b_exp, b_mnt))
582 return fp16_process_NaN(b, mode, flags);
583
584 return 0;
585}
586
587static uint32_t
588fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
589{
590 int a_exp = FP32_EXP(a);
591 uint32_t a_mnt = FP32_MANT(a);
592 int b_exp = FP32_EXP(b);
593 uint32_t b_mnt = FP32_MANT(b);
594
595 // Handle signalling NaNs:
596 if (fp32_is_signalling_NaN(a_exp, a_mnt))
597 return fp32_process_NaN(a, mode, flags);
598 if (fp32_is_signalling_NaN(b_exp, b_mnt))
599 return fp32_process_NaN(b, mode, flags);
600
601 // Handle quiet NaNs:
602 if (fp32_is_NaN(a_exp, a_mnt))
603 return fp32_process_NaN(a, mode, flags);
604 if (fp32_is_NaN(b_exp, b_mnt))
605 return fp32_process_NaN(b, mode, flags);
606
607 return 0;
608}
609
610static uint64_t
611fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
612{
613 int a_exp = FP64_EXP(a);
614 uint64_t a_mnt = FP64_MANT(a);
615 int b_exp = FP64_EXP(b);
616 uint64_t b_mnt = FP64_MANT(b);
617
618 // Handle signalling NaNs:
619 if (fp64_is_signalling_NaN(a_exp, a_mnt))
620 return fp64_process_NaN(a, mode, flags);
621 if (fp64_is_signalling_NaN(b_exp, b_mnt))
622 return fp64_process_NaN(b, mode, flags);
623
624 // Handle quiet NaNs:
625 if (fp64_is_NaN(a_exp, a_mnt))
626 return fp64_process_NaN(a, mode, flags);
627 if (fp64_is_NaN(b_exp, b_mnt))
628 return fp64_process_NaN(b, mode, flags);
629
630 return 0;
631}
632
633static uint16_t
634fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
635{
636 int a_exp = FP16_EXP(a);
637 uint16_t a_mnt = FP16_MANT(a);
638 int b_exp = FP16_EXP(b);
639 uint16_t b_mnt = FP16_MANT(b);
640 int c_exp = FP16_EXP(c);
641 uint16_t c_mnt = FP16_MANT(c);
642
643 // Handle signalling NaNs:
644 if (fp16_is_signalling_NaN(a_exp, a_mnt))
645 return fp16_process_NaN(a, mode, flags);
646 if (fp16_is_signalling_NaN(b_exp, b_mnt))
647 return fp16_process_NaN(b, mode, flags);
648 if (fp16_is_signalling_NaN(c_exp, c_mnt))
649 return fp16_process_NaN(c, mode, flags);
650
651 // Handle quiet NaNs:
652 if (fp16_is_NaN(a_exp, a_mnt))
653 return fp16_process_NaN(a, mode, flags);
654 if (fp16_is_NaN(b_exp, b_mnt))
655 return fp16_process_NaN(b, mode, flags);
656 if (fp16_is_NaN(c_exp, c_mnt))
657 return fp16_process_NaN(c, mode, flags);
658
659 return 0;
660}
661
662static uint32_t
663fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
664{
665 int a_exp = FP32_EXP(a);
666 uint32_t a_mnt = FP32_MANT(a);
667 int b_exp = FP32_EXP(b);
668 uint32_t b_mnt = FP32_MANT(b);
669 int c_exp = FP32_EXP(c);
670 uint32_t c_mnt = FP32_MANT(c);
671
672 // Handle signalling NaNs:
673 if (fp32_is_signalling_NaN(a_exp, a_mnt))
674 return fp32_process_NaN(a, mode, flags);
675 if (fp32_is_signalling_NaN(b_exp, b_mnt))
676 return fp32_process_NaN(b, mode, flags);
677 if (fp32_is_signalling_NaN(c_exp, c_mnt))
678 return fp32_process_NaN(c, mode, flags);
679
680 // Handle quiet NaNs:
681 if (fp32_is_NaN(a_exp, a_mnt))
682 return fp32_process_NaN(a, mode, flags);
683 if (fp32_is_NaN(b_exp, b_mnt))
684 return fp32_process_NaN(b, mode, flags);
685 if (fp32_is_NaN(c_exp, c_mnt))
686 return fp32_process_NaN(c, mode, flags);
687
688 return 0;
689}
690
691static uint64_t
692fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
693{
694 int a_exp = FP64_EXP(a);
695 uint64_t a_mnt = FP64_MANT(a);
696 int b_exp = FP64_EXP(b);
697 uint64_t b_mnt = FP64_MANT(b);
698 int c_exp = FP64_EXP(c);
699 uint64_t c_mnt = FP64_MANT(c);
700
701 // Handle signalling NaNs:
702 if (fp64_is_signalling_NaN(a_exp, a_mnt))
703 return fp64_process_NaN(a, mode, flags);
704 if (fp64_is_signalling_NaN(b_exp, b_mnt))
705 return fp64_process_NaN(b, mode, flags);
706 if (fp64_is_signalling_NaN(c_exp, c_mnt))
707 return fp64_process_NaN(c, mode, flags);
708
709 // Handle quiet NaNs:
710 if (fp64_is_NaN(a_exp, a_mnt))
711 return fp64_process_NaN(a, mode, flags);
712 if (fp64_is_NaN(b_exp, b_mnt))
713 return fp64_process_NaN(b, mode, flags);
714 if (fp64_is_NaN(c_exp, c_mnt))
715 return fp64_process_NaN(c, mode, flags);
716
717 return 0;
718}
719
720static uint16_t
721fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
722{
723 int biased_exp; // non-negative exponent value for result
724 uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS)
725 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
726
727 assert(rm != FPRounding_TIEAWAY);
728
729 // Flush to zero:
730 if ((mode & FPLIB_FZ16) && exp < 1) {
731 *flags |= FPLIB_UFC;
732 return fp16_zero(sgn);
733 }
734
735 // The bottom FP16_EXP_BITS bits of mnt are orred together:
736 mnt = (4ULL << FP16_MANT_BITS | mnt >> (FP16_EXP_BITS - 1) |
737 ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0));
738
739 if (exp > 0) {
740 biased_exp = exp;
741 int_mant = mnt >> 2;
742 error = mnt & 3;
743 } else {
744 biased_exp = 0;
745 int_mant = lsr16(mnt, 3 - exp);
746 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
747 }
748
749 if (!biased_exp && error) { // xx should also check fpscr_val<11>
750 *flags |= FPLIB_UFC;
751 }
752
753 // Round up:
754 if ((rm == FPLIB_RN && (error == 3 ||
755 (error == 2 && (int_mant & 1)))) ||
756 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
757 ++int_mant;
758 if (int_mant == 1ULL << FP16_MANT_BITS) {
759 // Rounded up from denormalized to normalized
760 biased_exp = 1;
761 }
762 if (int_mant == 2ULL << FP16_MANT_BITS) {
763 // Rounded up to next exponent
764 ++biased_exp;
765 int_mant >>= 1;
766 }
767 }
768
769 // Handle rounding to odd aka Von Neumann rounding:
770 if (error && rm == FPRounding_ODD)
771 int_mant |= 1;
772
773 // Handle overflow:
774 if (!(mode & FPLIB_AHP)) {
775 if (biased_exp >= (int)FP16_EXP_INF) {
777 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
778 (rm == FPLIB_RM && sgn)) {
779 return fp16_infinity(sgn);
780 } else {
781 return fp16_max_normal(sgn);
782 }
783 }
784 } else {
785 if (biased_exp >= (int)FP16_EXP_INF + 1) {
786 *flags |= FPLIB_IOC;
787 return fp16_pack(sgn, FP16_EXP_INF, -1);
788 }
789 }
790
791 if (error) {
792 *flags |= FPLIB_IXC;
793 }
794
795 return fp16_pack(sgn, biased_exp, int_mant);
796}
797
798static uint16_t
799fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
800{
801 return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags);
802}
803
804static uint32_t
805fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
806{
807 int biased_exp; // non-negative exponent value for result
808 uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS)
809 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
810
811 assert(rm != FPRounding_TIEAWAY);
812
813 // Flush to zero:
814 if ((mode & FPLIB_FZ) && exp < 1) {
815 *flags |= FPLIB_UFC;
816 return fp32_zero(sgn);
817 }
818
819 // The bottom FP32_EXP_BITS bits of mnt are orred together:
820 mnt = (4ULL << FP32_MANT_BITS | mnt >> (FP32_EXP_BITS - 1) |
821 ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0));
822
823 if (exp > 0) {
824 biased_exp = exp;
825 int_mant = mnt >> 2;
826 error = mnt & 3;
827 } else {
828 biased_exp = 0;
829 int_mant = lsr32(mnt, 3 - exp);
830 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
831 }
832
833 if (!biased_exp && error) { // xx should also check fpscr_val<11>
834 *flags |= FPLIB_UFC;
835 }
836
837 // Round up:
838 if ((rm == FPLIB_RN && (error == 3 ||
839 (error == 2 && (int_mant & 1)))) ||
840 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
841 ++int_mant;
842 if (int_mant == 1ULL << FP32_MANT_BITS) {
843 // Rounded up from denormalized to normalized
844 biased_exp = 1;
845 }
846 if (int_mant == 2ULL << FP32_MANT_BITS) {
847 // Rounded up to next exponent
848 ++biased_exp;
849 int_mant >>= 1;
850 }
851 }
852
853 // Handle rounding to odd aka Von Neumann rounding:
854 if (error && rm == FPRounding_ODD)
855 int_mant |= 1;
856
857 // Handle overflow:
858 if (biased_exp >= (int)FP32_EXP_INF) {
860 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
861 (rm == FPLIB_RM && sgn)) {
862 return fp32_infinity(sgn);
863 } else {
864 return fp32_max_normal(sgn);
865 }
866 }
867
868 if (error) {
869 *flags |= FPLIB_IXC;
870 }
871
872 return fp32_pack(sgn, biased_exp, int_mant);
873}
874
875static uint32_t
876fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
877{
878 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
879}
880
881static uint64_t
882fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
883{
884 int biased_exp; // non-negative exponent value for result
885 uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS)
886 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
887
888 assert(rm != FPRounding_TIEAWAY);
889
890 // Flush to zero:
891 if ((mode & FPLIB_FZ) && exp < 1) {
892 *flags |= FPLIB_UFC;
893 return fp64_zero(sgn);
894 }
895
896 // The bottom FP64_EXP_BITS bits of mnt are orred together:
897 mnt = (4ULL << FP64_MANT_BITS | mnt >> (FP64_EXP_BITS - 1) |
898 ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0));
899
900 if (exp > 0) {
901 biased_exp = exp;
902 int_mant = mnt >> 2;
903 error = mnt & 3;
904 } else {
905 biased_exp = 0;
906 int_mant = lsr64(mnt, 3 - exp);
907 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
908 }
909
910 if (!biased_exp && error) { // xx should also check fpscr_val<11>
911 *flags |= FPLIB_UFC;
912 }
913
914 // Round up:
915 if ((rm == FPLIB_RN && (error == 3 ||
916 (error == 2 && (int_mant & 1)))) ||
917 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
918 ++int_mant;
919 if (int_mant == 1ULL << FP64_MANT_BITS) {
920 // Rounded up from denormalized to normalized
921 biased_exp = 1;
922 }
923 if (int_mant == 2ULL << FP64_MANT_BITS) {
924 // Rounded up to next exponent
925 ++biased_exp;
926 int_mant >>= 1;
927 }
928 }
929
930 // Handle rounding to odd aka Von Neumann rounding:
931 if (error && rm == FPRounding_ODD)
932 int_mant |= 1;
933
934 // Handle overflow:
935 if (biased_exp >= (int)FP64_EXP_INF) {
937 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
938 (rm == FPLIB_RM && sgn)) {
939 return fp64_infinity(sgn);
940 } else {
941 return fp64_max_normal(sgn);
942 }
943 }
944
945 if (error) {
946 *flags |= FPLIB_IXC;
947 }
948
949 return fp64_pack(sgn, biased_exp, int_mant);
950}
951
952static uint64_t
953fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
954{
955 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
956}
957
958static int
959fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
960{
961 int a_sgn, a_exp, b_sgn, b_exp;
962 uint16_t a_mnt, b_mnt;
963
964 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
965 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
966
967 if (fp16_is_NaN(a_exp, a_mnt) ||
968 fp16_is_NaN(b_exp, b_mnt)) {
969 if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
970 fp16_is_signalling_NaN(b_exp, b_mnt))
971 *flags |= FPLIB_IOC;
972 return 0;
973 }
974 return a == b || (!a_mnt && !b_mnt);
975}
976
977static int
978fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
979{
980 int a_sgn, a_exp, b_sgn, b_exp;
981 uint16_t a_mnt, b_mnt;
982
983 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
984 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
985
986 if (fp16_is_NaN(a_exp, a_mnt) ||
987 fp16_is_NaN(b_exp, b_mnt)) {
988 *flags |= FPLIB_IOC;
989 return 0;
990 }
991 if (!a_mnt && !b_mnt)
992 return 1;
993 if (a_sgn != b_sgn)
994 return b_sgn;
995 if (a_exp != b_exp)
996 return a_sgn ^ (a_exp > b_exp);
997 if (a_mnt != b_mnt)
998 return a_sgn ^ (a_mnt > b_mnt);
999 return 1;
1000}
1001
1002static int
1003fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
1004{
1005 int a_sgn, a_exp, b_sgn, b_exp;
1006 uint16_t a_mnt, b_mnt;
1007
1008 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1009 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1010
1011 if (fp16_is_NaN(a_exp, a_mnt) ||
1012 fp16_is_NaN(b_exp, b_mnt)) {
1013 *flags |= FPLIB_IOC;
1014 return 0;
1015 }
1016 if (!a_mnt && !b_mnt)
1017 return 0;
1018 if (a_sgn != b_sgn)
1019 return b_sgn;
1020 if (a_exp != b_exp)
1021 return a_sgn ^ (a_exp > b_exp);
1022 if (a_mnt != b_mnt)
1023 return a_sgn ^ (a_mnt > b_mnt);
1024 return 0;
1025}
1026
1027static int
1028fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
1029{
1030 int a_sgn, a_exp, b_sgn, b_exp;
1031 uint16_t a_mnt, b_mnt;
1032
1033 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1034 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1035
1036 if (fp16_is_NaN(a_exp, a_mnt) ||
1037 fp16_is_NaN(b_exp, b_mnt)) {
1038 if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
1039 fp16_is_signalling_NaN(b_exp, b_mnt))
1040 *flags |= FPLIB_IOC;
1041 return 1;
1042 }
1043 return 0;
1044}
1045
1046static int
1047fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
1048{
1049 int a_sgn, a_exp, b_sgn, b_exp;
1050 uint32_t a_mnt, b_mnt;
1051
1052 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1053 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1054
1055 if (fp32_is_NaN(a_exp, a_mnt) ||
1056 fp32_is_NaN(b_exp, b_mnt)) {
1057 if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1058 fp32_is_signalling_NaN(b_exp, b_mnt))
1059 *flags |= FPLIB_IOC;
1060 return 0;
1061 }
1062 return a == b || (!a_mnt && !b_mnt);
1063}
1064
1065static int
1066fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
1067{
1068 int a_sgn, a_exp, b_sgn, b_exp;
1069 uint32_t a_mnt, b_mnt;
1070
1071 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1072 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1073
1074 if (fp32_is_NaN(a_exp, a_mnt) ||
1075 fp32_is_NaN(b_exp, b_mnt)) {
1076 *flags |= FPLIB_IOC;
1077 return 0;
1078 }
1079 if (!a_mnt && !b_mnt)
1080 return 1;
1081 if (a_sgn != b_sgn)
1082 return b_sgn;
1083 if (a_exp != b_exp)
1084 return a_sgn ^ (a_exp > b_exp);
1085 if (a_mnt != b_mnt)
1086 return a_sgn ^ (a_mnt > b_mnt);
1087 return 1;
1088}
1089
1090static int
1091fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
1092{
1093 int a_sgn, a_exp, b_sgn, b_exp;
1094 uint32_t a_mnt, b_mnt;
1095
1096 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1097 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1098
1099 if (fp32_is_NaN(a_exp, a_mnt) ||
1100 fp32_is_NaN(b_exp, b_mnt)) {
1101 *flags |= FPLIB_IOC;
1102 return 0;
1103 }
1104 if (!a_mnt && !b_mnt)
1105 return 0;
1106 if (a_sgn != b_sgn)
1107 return b_sgn;
1108 if (a_exp != b_exp)
1109 return a_sgn ^ (a_exp > b_exp);
1110 if (a_mnt != b_mnt)
1111 return a_sgn ^ (a_mnt > b_mnt);
1112 return 0;
1113}
1114
1115static int
1116fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
1117{
1118 int a_sgn, a_exp, b_sgn, b_exp;
1119 uint32_t a_mnt, b_mnt;
1120
1121 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1122 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1123
1124 if (fp32_is_NaN(a_exp, a_mnt) ||
1125 fp32_is_NaN(b_exp, b_mnt)) {
1126 if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1127 fp32_is_signalling_NaN(b_exp, b_mnt))
1128 *flags |= FPLIB_IOC;
1129 return 1;
1130 }
1131 return 0;
1132}
1133
1134static int
1135fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
1136{
1137 int a_sgn, a_exp, b_sgn, b_exp;
1138 uint64_t a_mnt, b_mnt;
1139
1140 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1141 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1142
1143 if (fp64_is_NaN(a_exp, a_mnt) ||
1144 fp64_is_NaN(b_exp, b_mnt)) {
1145 if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1146 fp64_is_signalling_NaN(b_exp, b_mnt))
1147 *flags |= FPLIB_IOC;
1148 return 0;
1149 }
1150 return a == b || (!a_mnt && !b_mnt);
1151}
1152
1153static int
1154fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
1155{
1156 int a_sgn, a_exp, b_sgn, b_exp;
1157 uint64_t a_mnt, b_mnt;
1158
1159 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1160 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1161
1162 if (fp64_is_NaN(a_exp, a_mnt) ||
1163 fp64_is_NaN(b_exp, b_mnt)) {
1164 *flags |= FPLIB_IOC;
1165 return 0;
1166 }
1167 if (!a_mnt && !b_mnt)
1168 return 1;
1169 if (a_sgn != b_sgn)
1170 return b_sgn;
1171 if (a_exp != b_exp)
1172 return a_sgn ^ (a_exp > b_exp);
1173 if (a_mnt != b_mnt)
1174 return a_sgn ^ (a_mnt > b_mnt);
1175 return 1;
1176}
1177
1178static int
1179fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
1180{
1181 int a_sgn, a_exp, b_sgn, b_exp;
1182 uint64_t a_mnt, b_mnt;
1183
1184 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1185 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1186
1187 if (fp64_is_NaN(a_exp, a_mnt) ||
1188 fp64_is_NaN(b_exp, b_mnt)) {
1189 *flags |= FPLIB_IOC;
1190 return 0;
1191 }
1192 if (!a_mnt && !b_mnt)
1193 return 0;
1194 if (a_sgn != b_sgn)
1195 return b_sgn;
1196 if (a_exp != b_exp)
1197 return a_sgn ^ (a_exp > b_exp);
1198 if (a_mnt != b_mnt)
1199 return a_sgn ^ (a_mnt > b_mnt);
1200 return 0;
1201}
1202
1203static int
1204fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
1205{
1206 int a_sgn, a_exp, b_sgn, b_exp;
1207 uint64_t a_mnt, b_mnt;
1208
1209 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1210 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1211
1212 if (fp64_is_NaN(a_exp, a_mnt) ||
1213 fp64_is_NaN(b_exp, b_mnt)) {
1214 if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1215 fp64_is_signalling_NaN(b_exp, b_mnt))
1216 *flags |= FPLIB_IOC;
1217 return 1;
1218 }
1219 return 0;
1220}
1221
1222static uint16_t
1223fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
1224{
1225 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1226 uint16_t a_mnt, b_mnt, x, x_mnt;
1227
1228 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1229 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1230
1231 if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1232 return x;
1233 }
1234
1235 b_sgn ^= neg;
1236
1237 // Handle infinities and zeroes:
1238 if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) {
1239 *flags |= FPLIB_IOC;
1240 return fp16_defaultNaN();
1241 } else if (a_exp == FP16_EXP_INF) {
1242 return fp16_infinity(a_sgn);
1243 } else if (b_exp == FP16_EXP_INF) {
1244 return fp16_infinity(b_sgn);
1245 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1246 return fp16_zero(a_sgn);
1247 }
1248
1249 a_mnt <<= 3;
1250 b_mnt <<= 3;
1251 if (a_exp >= b_exp) {
1252 b_mnt = (lsr16(b_mnt, a_exp - b_exp) |
1253 !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1)));
1254 b_exp = a_exp;
1255 } else {
1256 a_mnt = (lsr16(a_mnt, b_exp - a_exp) |
1257 !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1)));
1258 a_exp = b_exp;
1259 }
1260 x_sgn = a_sgn;
1261 x_exp = a_exp;
1262 if (a_sgn == b_sgn) {
1263 x_mnt = a_mnt + b_mnt;
1264 } else if (a_mnt >= b_mnt) {
1265 x_mnt = a_mnt - b_mnt;
1266 } else {
1267 x_sgn ^= 1;
1268 x_mnt = b_mnt - a_mnt;
1269 }
1270
1271 if (!x_mnt) {
1272 // Sign of exact zero result depends on rounding mode
1273 return fp16_zero((mode & 3) == 2);
1274 }
1275
1276 x_mnt = fp16_normalise(x_mnt, &x_exp);
1277
1278 return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1,
1279 mode, flags);
1280}
1281
1282static uint32_t
1283fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
1284{
1285 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1286 uint32_t a_mnt, b_mnt, x, x_mnt;
1287
1288 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1289 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1290
1291 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1292 return x;
1293 }
1294
1295 b_sgn ^= neg;
1296
1297 // Handle infinities and zeroes:
1298 if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) {
1299 *flags |= FPLIB_IOC;
1300 return fp32_defaultNaN();
1301 } else if (a_exp == FP32_EXP_INF) {
1302 return fp32_infinity(a_sgn);
1303 } else if (b_exp == FP32_EXP_INF) {
1304 return fp32_infinity(b_sgn);
1305 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1306 return fp32_zero(a_sgn);
1307 }
1308
1309 a_mnt <<= 3;
1310 b_mnt <<= 3;
1311 if (a_exp >= b_exp) {
1312 b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
1313 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
1314 b_exp = a_exp;
1315 } else {
1316 a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
1317 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
1318 a_exp = b_exp;
1319 }
1320 x_sgn = a_sgn;
1321 x_exp = a_exp;
1322 if (a_sgn == b_sgn) {
1323 x_mnt = a_mnt + b_mnt;
1324 } else if (a_mnt >= b_mnt) {
1325 x_mnt = a_mnt - b_mnt;
1326 } else {
1327 x_sgn ^= 1;
1328 x_mnt = b_mnt - a_mnt;
1329 }
1330
1331 if (!x_mnt) {
1332 // Sign of exact zero result depends on rounding mode
1333 return fp32_zero((mode & 3) == 2);
1334 }
1335
1336 x_mnt = fp32_normalise(x_mnt, &x_exp);
1337
1338 return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1,
1339 mode, flags);
1340}
1341
1342static uint64_t
1343fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
1344{
1345 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1346 uint64_t a_mnt, b_mnt, x, x_mnt;
1347
1348 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1349 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1350
1351 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1352 return x;
1353 }
1354
1355 b_sgn ^= neg;
1356
1357 // Handle infinities and zeroes:
1358 if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) {
1359 *flags |= FPLIB_IOC;
1360 return fp64_defaultNaN();
1361 } else if (a_exp == FP64_EXP_INF) {
1362 return fp64_infinity(a_sgn);
1363 } else if (b_exp == FP64_EXP_INF) {
1364 return fp64_infinity(b_sgn);
1365 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1366 return fp64_zero(a_sgn);
1367 }
1368
1369 a_mnt <<= 3;
1370 b_mnt <<= 3;
1371 if (a_exp >= b_exp) {
1372 b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
1373 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
1374 b_exp = a_exp;
1375 } else {
1376 a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
1377 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
1378 a_exp = b_exp;
1379 }
1380 x_sgn = a_sgn;
1381 x_exp = a_exp;
1382 if (a_sgn == b_sgn) {
1383 x_mnt = a_mnt + b_mnt;
1384 } else if (a_mnt >= b_mnt) {
1385 x_mnt = a_mnt - b_mnt;
1386 } else {
1387 x_sgn ^= 1;
1388 x_mnt = b_mnt - a_mnt;
1389 }
1390
1391 if (!x_mnt) {
1392 // Sign of exact zero result depends on rounding mode
1393 return fp64_zero((mode & 3) == 2);
1394 }
1395
1396 x_mnt = fp64_normalise(x_mnt, &x_exp);
1397
1398 return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1,
1399 mode, flags);
1400}
1401
1402static uint16_t
1403fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
1404{
1405 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1406 uint16_t a_mnt, b_mnt, x;
1407 uint32_t x_mnt;
1408
1409 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1410 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1411
1412 if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1413 return x;
1414 }
1415
1416 // Handle infinities and zeroes:
1417 if ((a_exp == FP16_EXP_INF && !b_mnt) ||
1418 (b_exp == FP16_EXP_INF && !a_mnt)) {
1419 *flags |= FPLIB_IOC;
1420 return fp16_defaultNaN();
1421 } else if (a_exp == FP16_EXP_INF || b_exp == FP16_EXP_INF) {
1422 return fp16_infinity(a_sgn ^ b_sgn);
1423 } else if (!a_mnt || !b_mnt) {
1424 return fp16_zero(a_sgn ^ b_sgn);
1425 }
1426
1427 // Multiply and normalise:
1428 x_sgn = a_sgn ^ b_sgn;
1429 x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1;
1430 x_mnt = (uint32_t)a_mnt * b_mnt;
1431 x_mnt = fp32_normalise(x_mnt, &x_exp);
1432
1433 // Convert to FP16_BITS bits, collapsing error into bottom bit:
1434 x_mnt = lsr32(x_mnt, FP16_BITS - 1) | !!lsl32(x_mnt, FP16_BITS + 1);
1435
1436 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1437}
1438
1439static uint32_t
1440fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1441{
1442 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1443 uint32_t a_mnt, b_mnt, x;
1444 uint64_t x_mnt;
1445
1446 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1447 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1448
1449 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1450 return x;
1451 }
1452
1453 // Handle infinities and zeroes:
1454 if ((a_exp == FP32_EXP_INF && !b_mnt) ||
1455 (b_exp == FP32_EXP_INF && !a_mnt)) {
1456 *flags |= FPLIB_IOC;
1457 return fp32_defaultNaN();
1458 } else if (a_exp == FP32_EXP_INF || b_exp == FP32_EXP_INF) {
1459 return fp32_infinity(a_sgn ^ b_sgn);
1460 } else if (!a_mnt || !b_mnt) {
1461 return fp32_zero(a_sgn ^ b_sgn);
1462 }
1463
1464 // Multiply and normalise:
1465 x_sgn = a_sgn ^ b_sgn;
1466 x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1;
1467 x_mnt = (uint64_t)a_mnt * b_mnt;
1468 x_mnt = fp64_normalise(x_mnt, &x_exp);
1469
1470 // Convert to FP32_BITS bits, collapsing error into bottom bit:
1471 x_mnt = lsr64(x_mnt, FP32_BITS - 1) | !!lsl64(x_mnt, FP32_BITS + 1);
1472
1473 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1474}
1475
1476static uint64_t
1477fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1478{
1479 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1480 uint64_t a_mnt, b_mnt, x;
1481 uint64_t x0_mnt, x1_mnt;
1482
1483 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1484 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1485
1486 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1487 return x;
1488 }
1489
1490 // Handle infinities and zeroes:
1491 if ((a_exp == FP64_EXP_INF && !b_mnt) ||
1492 (b_exp == FP64_EXP_INF && !a_mnt)) {
1493 *flags |= FPLIB_IOC;
1494 return fp64_defaultNaN();
1495 } else if (a_exp == FP64_EXP_INF || b_exp == FP64_EXP_INF) {
1496 return fp64_infinity(a_sgn ^ b_sgn);
1497 } else if (!a_mnt || !b_mnt) {
1498 return fp64_zero(a_sgn ^ b_sgn);
1499 }
1500
1501 // Multiply and normalise:
1502 x_sgn = a_sgn ^ b_sgn;
1503 x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1;
1504 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1505 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1506
1507 // Convert to FP64_BITS bits, collapsing error into bottom bit:
1508 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1509
1510 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1511}
1512
1513static uint16_t
1514fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale,
1515 int mode, int *flags)
1516{
1517 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1518 uint16_t a_mnt, b_mnt, c_mnt, x;
1519 uint32_t x_mnt, y_mnt;
1520
1521 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1522 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1523 fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1524
1526
1527 // Quiet NaN added to product of zero and infinity:
1528 if (fp16_is_quiet_NaN(a_exp, a_mnt) &&
1529 ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) ||
1530 (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) {
1531 x = fp16_defaultNaN();
1532 *flags |= FPLIB_IOC;
1533 }
1534
1535 if (x) {
1536 return x;
1537 }
1538
1539 // Handle infinities and zeroes:
1540 if ((b_exp == FP16_EXP_INF && !c_mnt) ||
1541 (c_exp == FP16_EXP_INF && !b_mnt) ||
1542 (a_exp == FP16_EXP_INF &&
1543 (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) &&
1544 (a_sgn != (b_sgn ^ c_sgn)))) {
1545 *flags |= FPLIB_IOC;
1546 return fp16_defaultNaN();
1547 }
1548 if (a_exp == FP16_EXP_INF)
1549 return fp16_infinity(a_sgn);
1550 if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF)
1551 return fp16_infinity(b_sgn ^ c_sgn);
1552 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1553 return fp16_zero(a_sgn);
1554
1555 x_sgn = a_sgn;
1556 x_exp = a_exp + 2 * FP16_EXP_BITS - 3;
1557 x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4);
1558
1559 // Multiply:
1560 y_sgn = b_sgn ^ c_sgn;
1561 y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3;
1562 y_mnt = (uint32_t)b_mnt * c_mnt << 3;
1563 if (!y_mnt) {
1564 y_exp = x_exp;
1565 }
1566
1567 // Add:
1568 if (x_exp >= y_exp) {
1569 y_mnt = (lsr32(y_mnt, x_exp - y_exp) |
1570 !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1)));
1571 y_exp = x_exp;
1572 } else {
1573 x_mnt = (lsr32(x_mnt, y_exp - x_exp) |
1574 !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1)));
1575 x_exp = y_exp;
1576 }
1577 if (x_sgn == y_sgn) {
1578 x_mnt = x_mnt + y_mnt;
1579 } else if (x_mnt >= y_mnt) {
1580 x_mnt = x_mnt - y_mnt;
1581 } else {
1582 x_sgn ^= 1;
1583 x_mnt = y_mnt - x_mnt;
1584 }
1585
1586 if (!x_mnt) {
1587 // Sign of exact zero result depends on rounding mode
1588 return fp16_zero((mode & 3) == 2);
1589 }
1590
1591 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1592 x_mnt = fp32_normalise(x_mnt, &x_exp);
1593 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1594
1595 return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1596}
1597
1598static uint32_t
1599fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1600 int mode, int *flags)
1601{
1602 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1603 uint32_t a_mnt, b_mnt, c_mnt, x;
1604 uint64_t x_mnt, y_mnt;
1605
1606 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1607 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1608 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1609
1611
1612 // Quiet NaN added to product of zero and infinity:
1613 if (fp32_is_quiet_NaN(a_exp, a_mnt) &&
1614 ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) ||
1615 (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) {
1616 x = fp32_defaultNaN();
1617 *flags |= FPLIB_IOC;
1618 }
1619
1620 if (x) {
1621 return x;
1622 }
1623
1624 // Handle infinities and zeroes:
1625 if ((b_exp == FP32_EXP_INF && !c_mnt) ||
1626 (c_exp == FP32_EXP_INF && !b_mnt) ||
1627 (a_exp == FP32_EXP_INF &&
1628 (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) &&
1629 (a_sgn != (b_sgn ^ c_sgn)))) {
1630 *flags |= FPLIB_IOC;
1631 return fp32_defaultNaN();
1632 }
1633 if (a_exp == FP32_EXP_INF)
1634 return fp32_infinity(a_sgn);
1635 if (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF)
1636 return fp32_infinity(b_sgn ^ c_sgn);
1637 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1638 return fp32_zero(a_sgn);
1639
1640 x_sgn = a_sgn;
1641 x_exp = a_exp + 2 * FP32_EXP_BITS - 3;
1642 x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4);
1643
1644 // Multiply:
1645 y_sgn = b_sgn ^ c_sgn;
1646 y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3;
1647 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1648 if (!y_mnt) {
1649 y_exp = x_exp;
1650 }
1651
1652 // Add:
1653 if (x_exp >= y_exp) {
1654 y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1655 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1656 y_exp = x_exp;
1657 } else {
1658 x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1659 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1660 x_exp = y_exp;
1661 }
1662 if (x_sgn == y_sgn) {
1663 x_mnt = x_mnt + y_mnt;
1664 } else if (x_mnt >= y_mnt) {
1665 x_mnt = x_mnt - y_mnt;
1666 } else {
1667 x_sgn ^= 1;
1668 x_mnt = y_mnt - x_mnt;
1669 }
1670
1671 if (!x_mnt) {
1672 // Sign of exact zero result depends on rounding mode
1673 return fp32_zero((mode & 3) == 2);
1674 }
1675
1676 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1677 x_mnt = fp64_normalise(x_mnt, &x_exp);
1678 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1679
1680 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1681}
1682
1683static uint64_t
1684fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1685 int mode, int *flags)
1686{
1687 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1688 uint64_t a_mnt, b_mnt, c_mnt, x;
1689 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1690
1691 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1692 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1693 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1694
1696
1697 // Quiet NaN added to product of zero and infinity:
1698 if (fp64_is_quiet_NaN(a_exp, a_mnt) &&
1699 ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) ||
1700 (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) {
1701 x = fp64_defaultNaN();
1702 *flags |= FPLIB_IOC;
1703 }
1704
1705 if (x) {
1706 return x;
1707 }
1708
1709 // Handle infinities and zeroes:
1710 if ((b_exp == FP64_EXP_INF && !c_mnt) ||
1711 (c_exp == FP64_EXP_INF && !b_mnt) ||
1712 (a_exp == FP64_EXP_INF &&
1713 (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) &&
1714 (a_sgn != (b_sgn ^ c_sgn)))) {
1715 *flags |= FPLIB_IOC;
1716 return fp64_defaultNaN();
1717 }
1718 if (a_exp == FP64_EXP_INF)
1719 return fp64_infinity(a_sgn);
1720 if (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF)
1721 return fp64_infinity(b_sgn ^ c_sgn);
1722 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1723 return fp64_zero(a_sgn);
1724
1725 x_sgn = a_sgn;
1726 x_exp = a_exp + FP64_EXP_BITS;
1727 x0_mnt = 0;
1728 x1_mnt = a_mnt;
1729
1730 // Multiply:
1731 y_sgn = b_sgn ^ c_sgn;
1732 y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3;
1733 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1734 if (!y0_mnt && !y1_mnt) {
1735 y_exp = x_exp;
1736 }
1737
1738 // Add:
1739 if (x_exp >= y_exp) {
1740 uint64_t t0, t1;
1741 lsl128(&t0, &t1, y0_mnt, y1_mnt,
1742 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1743 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1744 y0_mnt |= !!(t0 | t1);
1745 y_exp = x_exp;
1746 } else {
1747 uint64_t t0, t1;
1748 lsl128(&t0, &t1, x0_mnt, x1_mnt,
1749 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1750 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1751 x0_mnt |= !!(t0 | t1);
1752 x_exp = y_exp;
1753 }
1754 if (x_sgn == y_sgn) {
1755 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1756 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1757 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1758 } else {
1759 x_sgn ^= 1;
1760 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1761 }
1762
1763 if (!x0_mnt && !x1_mnt) {
1764 // Sign of exact zero result depends on rounding mode
1765 return fp64_zero((mode & 3) == 2);
1766 }
1767
1768 // Normalise into FP64_BITS bits, collapsing error into bottom bit:
1769 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1770 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1771
1772 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1773}
1774
1775static uint16_t
1776fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
1777{
1778 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1779 uint16_t a_mnt, b_mnt, x;
1780 uint32_t x_mnt;
1781
1782 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1783 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1784
1785 if ((x = fp16_process_NaNs(a, b, mode, flags)))
1786 return x;
1787
1788 // Handle infinities and zeroes:
1789 if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) ||
1790 (!a_mnt && !b_mnt)) {
1791 *flags |= FPLIB_IOC;
1792 return fp16_defaultNaN();
1793 }
1794 if (a_exp == FP16_EXP_INF || !b_mnt) {
1795 if (a_exp != FP16_EXP_INF)
1796 *flags |= FPLIB_DZC;
1797 return fp16_infinity(a_sgn ^ b_sgn);
1798 }
1799 if (!a_mnt || b_exp == FP16_EXP_INF)
1800 return fp16_zero(a_sgn ^ b_sgn);
1801
1802 // Divide, setting bottom bit if inexact:
1803 a_mnt = fp16_normalise(a_mnt, &a_exp);
1804 x_sgn = a_sgn ^ b_sgn;
1805 x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3);
1806 x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt;
1807 x_mnt |= (x_mnt * b_mnt !=
1808 (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3));
1809
1810 // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1811 x_mnt = fp32_normalise(x_mnt, &x_exp);
1812 x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1813
1814 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1815}
1816
1817static uint32_t
1818fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1819{
1820 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1821 uint32_t a_mnt, b_mnt, x;
1822 uint64_t x_mnt;
1823
1824 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1825 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1826
1827 if ((x = fp32_process_NaNs(a, b, mode, flags)))
1828 return x;
1829
1830 // Handle infinities and zeroes:
1831 if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) ||
1832 (!a_mnt && !b_mnt)) {
1833 *flags |= FPLIB_IOC;
1834 return fp32_defaultNaN();
1835 }
1836 if (a_exp == FP32_EXP_INF || !b_mnt) {
1837 if (a_exp != FP32_EXP_INF)
1838 *flags |= FPLIB_DZC;
1839 return fp32_infinity(a_sgn ^ b_sgn);
1840 }
1841 if (!a_mnt || b_exp == FP32_EXP_INF)
1842 return fp32_zero(a_sgn ^ b_sgn);
1843
1844 // Divide, setting bottom bit if inexact:
1845 a_mnt = fp32_normalise(a_mnt, &a_exp);
1846 x_sgn = a_sgn ^ b_sgn;
1847 x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3);
1848 x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt;
1849 x_mnt |= (x_mnt * b_mnt !=
1850 (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3));
1851
1852 // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1853 x_mnt = fp64_normalise(x_mnt, &x_exp);
1854 x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1855
1856 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1857}
1858
1859static uint64_t
1860fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1861{
1862 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1863 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1864
1865 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1866 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1867
1868 if ((x = fp64_process_NaNs(a, b, mode, flags)))
1869 return x;
1870
1871 // Handle infinities and zeroes:
1872 if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) ||
1873 (!a_mnt && !b_mnt)) {
1874 *flags |= FPLIB_IOC;
1875 return fp64_defaultNaN();
1876 }
1877 if (a_exp == FP64_EXP_INF || !b_mnt) {
1878 if (a_exp != FP64_EXP_INF)
1879 *flags |= FPLIB_DZC;
1880 return fp64_infinity(a_sgn ^ b_sgn);
1881 }
1882 if (!a_mnt || b_exp == FP64_EXP_INF)
1883 return fp64_zero(a_sgn ^ b_sgn);
1884
1885 // Find reciprocal of divisor with Newton-Raphson:
1886 a_mnt = fp64_normalise(a_mnt, &a_exp);
1887 b_mnt = fp64_normalise(b_mnt, &b_exp);
1888 x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1889 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1890 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1891 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1892 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1893 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1894
1895 // Multiply by dividend:
1896 x_sgn = a_sgn ^ b_sgn;
1897 x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8;
1898 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
1899 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1900 x_mnt = x1_mnt;
1901
1902 // This is an underestimate, so try adding one:
1903 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
1904 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1905 if (c <= 0) {
1906 ++x_mnt;
1907 }
1908
1909 x_mnt = fp64_normalise(x_mnt, &x_exp);
1910
1911 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1912}
1913
1914static void
1915set_fpscr0(FPSCR &fpscr, int flags)
1916{
1917 if (flags & FPLIB_IDC) {
1918 fpscr.idc = 1;
1919 }
1920 if (flags & FPLIB_IOC) {
1921 fpscr.ioc = 1;
1922 }
1923 if (flags & FPLIB_DZC) {
1924 fpscr.dzc = 1;
1925 }
1926 if (flags & FPLIB_OFC) {
1927 fpscr.ofc = 1;
1928 }
1929 if (flags & FPLIB_UFC) {
1930 fpscr.ufc = 1;
1931 }
1932 if (flags & FPLIB_IXC) {
1933 fpscr.ixc = 1;
1934 }
1935}
1936
1937static uint16_t
1938fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
1939{
1940 int a_sgn, a_exp;
1941 uint16_t a_mnt;
1942
1943 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1944
1945 // Handle NaNs:
1946 if (fp16_is_NaN(a_exp, a_mnt)) {
1947 return fp16_process_NaN(a, mode, flags);
1948 }
1949
1950 // Handle zeroes:
1951 if (!a_mnt) {
1952 return fp16_zero(a_sgn);
1953 }
1954
1955 // Handle infinities:
1956 if (a_exp == FP16_EXP_INF) {
1957 return fp16_infinity(a_sgn);
1958 }
1959
1960 b = b < -300 ? -300 : b;
1961 b = b > 300 ? 300 : b;
1962 a_exp += b;
1963 a_mnt <<= 3;
1964
1965 a_mnt = fp16_normalise(a_mnt, &a_exp);
1966
1967 return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1,
1968 mode, flags);
1969}
1970
1971static uint32_t
1972fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
1973{
1974 int a_sgn, a_exp;
1975 uint32_t a_mnt;
1976
1977 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1978
1979 // Handle NaNs:
1980 if (fp32_is_NaN(a_exp, a_mnt)) {
1981 return fp32_process_NaN(a, mode, flags);
1982 }
1983
1984 // Handle zeroes:
1985 if (!a_mnt) {
1986 return fp32_zero(a_sgn);
1987 }
1988
1989 // Handle infinities:
1990 if (a_exp == FP32_EXP_INF) {
1991 return fp32_infinity(a_sgn);
1992 }
1993
1994 b = b < -300 ? -300 : b;
1995 b = b > 300 ? 300 : b;
1996 a_exp += b;
1997 a_mnt <<= 3;
1998
1999 a_mnt = fp32_normalise(a_mnt, &a_exp);
2000
2001 return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1,
2002 mode, flags);
2003}
2004
2005static uint64_t
2006fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
2007{
2008 int a_sgn, a_exp;
2009 uint64_t a_mnt;
2010
2011 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2012
2013 // Handle NaNs:
2014 if (fp64_is_NaN(a_exp, a_mnt)) {
2015 return fp64_process_NaN(a, mode, flags);
2016 }
2017
2018 // Handle zeroes:
2019 if (!a_mnt) {
2020 return fp64_zero(a_sgn);
2021 }
2022
2023 // Handle infinities:
2024 if (a_exp == FP64_EXP_INF) {
2025 return fp64_infinity(a_sgn);
2026 }
2027
2028 b = b < -3000 ? -3000 : b;
2029 b = b > 3000 ? 3000 : b;
2030 a_exp += b;
2031 a_mnt <<= 3;
2032
2033 a_mnt = fp64_normalise(a_mnt, &a_exp);
2034
2035 return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1,
2036 mode, flags);
2037}
2038
2039static uint16_t
2040fp16_sqrt(uint16_t a, int mode, int *flags)
2041{
2042 int a_sgn, a_exp, x_sgn, x_exp;
2043 uint16_t a_mnt, x_mnt;
2044 uint32_t x, t0, t1;
2045
2046 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2047
2048 // Handle NaNs:
2049 if (fp16_is_NaN(a_exp, a_mnt))
2050 return fp16_process_NaN(a, mode, flags);
2051
2052 // Handle infinities and zeroes:
2053 if (!a_mnt)
2054 return fp16_zero(a_sgn);
2055 if (a_exp == FP16_EXP_INF && !a_sgn)
2056 return fp16_infinity(a_sgn);
2057 if (a_sgn) {
2058 *flags |= FPLIB_IOC;
2059 return fp16_defaultNaN();
2060 }
2061
2062 a_mnt = fp16_normalise(a_mnt, &a_exp);
2063 if (a_exp & 1) {
2064 ++a_exp;
2065 a_mnt >>= 1;
2066 }
2067
2068 // x = (a * 3 + 5) / 8
2069 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2070
2071 // x = (a / x + x) / 2; // 8-bit accuracy
2072 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2073
2074 // x = (a / x + x) / 2; // 16-bit accuracy
2075 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2076
2077 x_sgn = 0;
2078 x_exp = (a_exp + 27) >> 1;
2079 x_mnt = ((x - (1 << 18)) >> 19) + 1;
2080 t1 = (uint32_t)x_mnt * x_mnt;
2081 t0 = (uint32_t)a_mnt << 9;
2082 if (t1 > t0) {
2083 --x_mnt;
2084 }
2085
2086 x_mnt = fp16_normalise(x_mnt, &x_exp);
2087
2088 return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2089}
2090
2091static uint32_t
2092fp32_sqrt(uint32_t a, int mode, int *flags)
2093{
2094 int a_sgn, a_exp, x_sgn, x_exp;
2095 uint32_t a_mnt, x, x_mnt;
2096 uint64_t t0, t1;
2097
2098 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2099
2100 // Handle NaNs:
2101 if (fp32_is_NaN(a_exp, a_mnt))
2102 return fp32_process_NaN(a, mode, flags);
2103
2104 // Handle infinities and zeroes:
2105 if (!a_mnt)
2106 return fp32_zero(a_sgn);
2107 if (a_exp == FP32_EXP_INF && !a_sgn)
2108 return fp32_infinity(a_sgn);
2109 if (a_sgn) {
2110 *flags |= FPLIB_IOC;
2111 return fp32_defaultNaN();
2112 }
2113
2114 a_mnt = fp32_normalise(a_mnt, &a_exp);
2115 if (!(a_exp & 1)) {
2116 ++a_exp;
2117 a_mnt >>= 1;
2118 }
2119
2120 // x = (a * 3 + 5) / 8
2121 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
2122
2123 // x = (a / x + x) / 2; // 8-bit accuracy
2124 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2125
2126 // x = (a / x + x) / 2; // 16-bit accuracy
2127 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2128
2129 // x = (a / x + x) / 2; // 32-bit accuracy
2130 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
2131
2132 x_sgn = 0;
2133 x_exp = (a_exp + 147) >> 1;
2134 x_mnt = ((x - (1 << 5)) >> 6) + 1;
2135 t1 = (uint64_t)x_mnt * x_mnt;
2136 t0 = (uint64_t)a_mnt << 19;
2137 if (t1 > t0) {
2138 --x_mnt;
2139 }
2140
2141 x_mnt = fp32_normalise(x_mnt, &x_exp);
2142
2143 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2144}
2145
2146static uint64_t
2147fp64_sqrt(uint64_t a, int mode, int *flags)
2148{
2149 int a_sgn, a_exp, x_sgn, x_exp, c;
2150 uint64_t a_mnt, x_mnt, r, x0, x1;
2151 uint32_t x;
2152
2153 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2154
2155 // Handle NaNs:
2156 if (fp64_is_NaN(a_exp, a_mnt))
2157 return fp64_process_NaN(a, mode, flags);
2158
2159 // Handle infinities and zeroes:
2160 if (!a_mnt)
2161 return fp64_zero(a_sgn);
2162 if (a_exp == FP64_EXP_INF && !a_sgn)
2163 return fp64_infinity(a_sgn);
2164 if (a_sgn) {
2165 *flags |= FPLIB_IOC;
2166 return fp64_defaultNaN();
2167 }
2168
2169 a_mnt = fp64_normalise(a_mnt, &a_exp);
2170 if (a_exp & 1) {
2171 ++a_exp;
2172 a_mnt >>= 1;
2173 }
2174
2175 // x = (a * 3 + 5) / 8
2176 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
2177
2178 // x = (a / x + x) / 2; // 8-bit accuracy
2179 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2180
2181 // x = (a / x + x) / 2; // 16-bit accuracy
2182 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2183
2184 // x = (a / x + x) / 2; // 32-bit accuracy
2185 x = ((a_mnt / x) >> 2) + (x >> 1);
2186
2187 // r = 1 / x; // 32-bit accuracy
2188 r = ((uint64_t)1 << 62) / x;
2189
2190 // r = r * (2 - x * r); // 64-bit accuracy
2191 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
2192 lsr128(&x0, &x1, x0, x1, 31);
2193
2194 // x = (x + a * r) / 2; // 64-bit accuracy
2195 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
2196 lsl128(&x0, &x1, x0, x1, 5);
2197 lsr128(&x0, &x1, x0, x1, 56);
2198
2199 x0 = ((uint64_t)x << 31) + (x0 >> 1);
2200
2201 x_sgn = 0;
2202 x_exp = (a_exp + 1053) >> 1;
2203 x_mnt = x0;
2204 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
2205 mul62x62(&x0, &x1, x_mnt, x_mnt);
2206 lsl128(&x0, &x1, x0, x1, 19);
2207 c = cmp128(x0, x1, 0, a_mnt);
2208 if (c > 0)
2209 --x_mnt;
2210
2211 x_mnt = fp64_normalise(x_mnt, &x_exp);
2212
2213 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
2214}
2215
2216static int
2217modeConv(FPSCR fpscr)
2218{
2219 uint32_t x = (uint32_t)fpscr;
2220 return (x >> 22 & 0xf) | (x >> 19 & 1 ? FPLIB_FZ16 : 0);
2221 // AHP bit is ignored. Only fplibConvert uses AHP.
2222}
2223
2224static void
2225set_fpscr(FPSCR &fpscr, int flags)
2226{
2227 // translate back to FPSCR
2228 bool underflow = false;
2229 if (flags & FPLIB_IDC) {
2230 fpscr.idc = 1;
2231 }
2232 if (flags & FPLIB_IOC) {
2233 fpscr.ioc = 1;
2234 }
2235 if (flags & FPLIB_DZC) {
2236 fpscr.dzc = 1;
2237 }
2238 if (flags & FPLIB_OFC) {
2239 fpscr.ofc = 1;
2240 }
2241 if (flags & FPLIB_UFC) {
2242 underflow = true; //xx Why is this required?
2243 fpscr.ufc = 1;
2244 }
2245 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
2246 fpscr.ixc = 1;
2247 }
2248}
2249
2250template <>
2251bool
2252fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
2253{
2254 int flags = 0;
2255 int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags);
2256 set_fpscr(fpscr, flags);
2257 return x;
2258}
2259
2260template <>
2261bool
2262fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
2263{
2264 int flags = 0;
2265 int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags);
2266 set_fpscr(fpscr, flags);
2267 return x;
2268}
2269
2270template <>
2271bool
2272fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
2273{
2274 int flags = 0;
2275 int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags);
2276 set_fpscr(fpscr, flags);
2277 return x;
2278}
2279
2280template <>
2281bool
2282fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
2283{
2284 int flags = 0;
2285 int x = fp16_compare_un(a, b, modeConv(fpscr), &flags);
2286 set_fpscr(fpscr, flags);
2287 return x;
2288}
2289
2290template <>
2291bool
2292fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
2293{
2294 int flags = 0;
2295 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
2296 set_fpscr(fpscr, flags);
2297 return x;
2298}
2299
2300template <>
2301bool
2302fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
2303{
2304 int flags = 0;
2305 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
2306 set_fpscr(fpscr, flags);
2307 return x;
2308}
2309
2310template <>
2311bool
2312fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
2313{
2314 int flags = 0;
2315 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
2316 set_fpscr(fpscr, flags);
2317 return x;
2318}
2319
2320template <>
2321bool
2322fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr)
2323{
2324 int flags = 0;
2325 int x = fp32_compare_un(a, b, modeConv(fpscr), &flags);
2326 set_fpscr(fpscr, flags);
2327 return x;
2328}
2329
2330template <>
2331bool
2332fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
2333{
2334 int flags = 0;
2335 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
2336 set_fpscr(fpscr, flags);
2337 return x;
2338}
2339
2340template <>
2341bool
2342fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
2343{
2344 int flags = 0;
2345 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
2346 set_fpscr(fpscr, flags);
2347 return x;
2348}
2349
2350template <>
2351bool
2352fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
2353{
2354 int flags = 0;
2355 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
2356 set_fpscr(fpscr, flags);
2357 return x;
2358}
2359
2360template <>
2361bool
2362fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr)
2363{
2364 int flags = 0;
2365 int x = fp64_compare_un(a, b, modeConv(fpscr), &flags);
2366 set_fpscr(fpscr, flags);
2367 return x;
2368}
2369
2370template <>
2371uint16_t
2372fplibAbs(uint16_t op)
2373{
2374 return op & ~(1ULL << (FP16_BITS - 1));
2375}
2376
2377template <>
2378uint32_t
2379fplibAbs(uint32_t op)
2380{
2381 return op & ~(1ULL << (FP32_BITS - 1));
2382}
2383
2384template <>
2385uint64_t
2386fplibAbs(uint64_t op)
2387{
2388 return op & ~(1ULL << (FP64_BITS - 1));
2389}
2390
2391template <>
2392uint16_t
2393fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2394{
2395 int flags = 0;
2396 uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags);
2397 set_fpscr0(fpscr, flags);
2398 return result;
2399}
2400
2401template <>
2402uint32_t
2403fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2404{
2405 int flags = 0;
2406 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
2407 set_fpscr0(fpscr, flags);
2408 return result;
2409}
2410
2411template <>
2412uint64_t
2413fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2414{
2415 int flags = 0;
2416 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
2417 set_fpscr0(fpscr, flags);
2418 return result;
2419}
2420
2421template <>
2422int
2423fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
2424{
2425 int mode = modeConv(fpscr);
2426 int flags = 0;
2427 int sgn1, exp1, sgn2, exp2, result;
2428 uint16_t mnt1, mnt2;
2429
2430 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2431 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2432
2433 if (fp16_is_NaN(exp1, mnt1) || fp16_is_NaN(exp2, mnt2)) {
2434 result = 3;
2435 if (fp16_is_signalling_NaN(exp1, mnt1) ||
2436 fp16_is_signalling_NaN(exp2, mnt2) || signal_nans)
2437 flags |= FPLIB_IOC;
2438 } else {
2439 if (op1 == op2 || (!mnt1 && !mnt2)) {
2440 result = 6;
2441 } else if (sgn1 != sgn2) {
2442 result = sgn1 ? 8 : 2;
2443 } else if (exp1 != exp2) {
2444 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2445 } else {
2446 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2447 }
2448 }
2449
2450 set_fpscr0(fpscr, flags);
2451
2452 return result;
2453}
2454
2455template <>
2456int
2457fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
2458{
2459 int mode = modeConv(fpscr);
2460 int flags = 0;
2461 int sgn1, exp1, sgn2, exp2, result;
2462 uint32_t mnt1, mnt2;
2463
2464 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2465 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2466
2467 if (fp32_is_NaN(exp1, mnt1) || fp32_is_NaN(exp2, mnt2)) {
2468 result = 3;
2469 if (fp32_is_signalling_NaN(exp1, mnt1) ||
2470 fp32_is_signalling_NaN(exp2, mnt2) || signal_nans)
2471 flags |= FPLIB_IOC;
2472 } else {
2473 if (op1 == op2 || (!mnt1 && !mnt2)) {
2474 result = 6;
2475 } else if (sgn1 != sgn2) {
2476 result = sgn1 ? 8 : 2;
2477 } else if (exp1 != exp2) {
2478 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2479 } else {
2480 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2481 }
2482 }
2483
2484 set_fpscr0(fpscr, flags);
2485
2486 return result;
2487}
2488
2489template <>
2490int
2491fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
2492{
2493 int mode = modeConv(fpscr);
2494 int flags = 0;
2495 int sgn1, exp1, sgn2, exp2, result;
2496 uint64_t mnt1, mnt2;
2497
2498 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2499 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2500
2501 if (fp64_is_NaN(exp1, mnt1) || fp64_is_NaN(exp2, mnt2)) {
2502 result = 3;
2503 if (fp64_is_signalling_NaN(exp1, mnt1) ||
2504 fp64_is_signalling_NaN(exp2, mnt2) || signal_nans)
2505 flags |= FPLIB_IOC;
2506 } else {
2507 if (op1 == op2 || (!mnt1 && !mnt2)) {
2508 result = 6;
2509 } else if (sgn1 != sgn2) {
2510 result = sgn1 ? 8 : 2;
2511 } else if (exp1 != exp2) {
2512 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2513 } else {
2514 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2515 }
2516 }
2517
2518 set_fpscr0(fpscr, flags);
2519
2520 return result;
2521}
2522
2523static uint16_t
2525{
2526 return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF,
2527 1ULL << (FP16_MANT_BITS - 1) |
2529}
2530
2531static uint16_t
2533{
2534 return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF,
2535 1ULL << (FP16_MANT_BITS - 1) |
2537}
2538
2539static uint32_t
2541{
2542 return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF,
2543 1ULL << (FP32_MANT_BITS - 1) |
2544 (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS));
2545}
2546
2547static uint32_t
2549{
2550 return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF,
2551 1ULL << (FP32_MANT_BITS - 1) |
2553}
2554
2555static uint64_t
2557{
2558 return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF,
2559 1ULL << (FP64_MANT_BITS - 1) |
2560 (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS));
2561}
2562
2563static uint64_t
2565{
2566 return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF,
2567 1ULL << (FP64_MANT_BITS - 1) |
2568 (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS));
2569}
2570
2571static uint16_t
2573{
2574 return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1));
2575}
2576
2577static uint32_t
2579{
2580 return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1));
2581}
2582
2583static uint64_t
2585{
2586 return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1));
2587}
2588
2589static uint16_t
2591{
2592 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1));
2593}
2594
2595static uint32_t
2597{
2598 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1));
2599}
2600
2601static uint64_t
2603{
2604 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1));
2605}
2606
2607static uint16_t
2609{
2610 return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0);
2611}
2612
2613static uint32_t
2615{
2616 return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0);
2617}
2618
2619static uint64_t
2621{
2622 return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0);
2623}
2624
2625template <>
2626uint16_t
2627fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2628{
2629 int mode = modeConv(fpscr);
2630 int flags = 0;
2631 int sgn, exp;
2632 uint32_t mnt;
2633 uint16_t result;
2634
2635 // Unpack floating-point operand optionally with flush-to-zero:
2636 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2637
2638 bool alt_hp = fpscr.ahp;
2639
2640 if (fp32_is_NaN(exp, mnt)) {
2641 if (alt_hp) {
2642 result = fp16_zero(sgn);
2643 } else if (fpscr.dn) {
2644 result = fp16_defaultNaN();
2645 } else {
2646 result = fp16_FPConvertNaN_32(op);
2647 }
2648 if (!(mnt >> (FP32_MANT_BITS - 1) & 1) || alt_hp) {
2649 flags |= FPLIB_IOC;
2650 }
2651 } else if (exp == FP32_EXP_INF) {
2652 if (alt_hp) {
2653 result = ((uint16_t)sgn << (FP16_BITS - 1) |
2654 ((1ULL << (FP16_BITS - 1)) - 1));
2655 flags |= FPLIB_IOC;
2656 } else {
2657 result = fp16_infinity(sgn);
2658 }
2659 } else if (!mnt) {
2660 result = fp16_zero(sgn);
2661 } else {
2662 result =
2664 mnt >> (FP32_MANT_BITS - FP16_BITS) |
2665 !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)),
2666 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2667 }
2668
2669 set_fpscr0(fpscr, flags);
2670
2671 return result;
2672}
2673
2674template <>
2675uint16_t
2676fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2677{
2678 int mode = modeConv(fpscr);
2679 int flags = 0;
2680 int sgn, exp;
2681 uint64_t mnt;
2682 uint16_t result;
2683
2684 // Unpack floating-point operand optionally with flush-to-zero:
2685 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2686
2687 bool alt_hp = fpscr.ahp;
2688
2689 if (fp64_is_NaN(exp, mnt)) {
2690 if (alt_hp) {
2691 result = fp16_zero(sgn);
2692 } else if (fpscr.dn) {
2693 result = fp16_defaultNaN();
2694 } else {
2695 result = fp16_FPConvertNaN_64(op);
2696 }
2697 if (!(mnt >> (FP64_MANT_BITS - 1) & 1) || alt_hp) {
2698 flags |= FPLIB_IOC;
2699 }
2700 } else if (exp == FP64_EXP_INF) {
2701 if (alt_hp) {
2702 result = ((uint16_t)sgn << (FP16_BITS - 1) |
2703 ((1ULL << (FP16_BITS - 1)) - 1));
2704 flags |= FPLIB_IOC;
2705 } else {
2706 result = fp16_infinity(sgn);
2707 }
2708 } else if (!mnt) {
2709 result = fp16_zero(sgn);
2710 } else {
2711 result =
2713 mnt >> (FP64_MANT_BITS - FP16_BITS) |
2714 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)),
2715 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2716 }
2717
2718 set_fpscr0(fpscr, flags);
2719
2720 return result;
2721}
2722
2723template <>
2724uint32_t
2725fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2726{
2727 int mode = modeConv(fpscr);
2728 int flags = 0;
2729 int sgn, exp;
2730 uint16_t mnt;
2731 uint32_t result;
2732
2733 // Unpack floating-point operand optionally with flush-to-zero:
2734 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2735
2736 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2737 if (fpscr.dn) {
2738 result = fp32_defaultNaN();
2739 } else {
2740 result = fp32_FPConvertNaN_16(op);
2741 }
2742 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2743 flags |= FPLIB_IOC;
2744 }
2745 } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2746 result = fp32_infinity(sgn);
2747 } else if (!mnt) {
2748 result = fp32_zero(sgn);
2749 } else {
2750 mnt = fp16_normalise(mnt, &exp);
2751 result = fp32_pack(sgn, (exp - FP16_EXP_BIAS +
2753 (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1));
2754 }
2755
2756 set_fpscr0(fpscr, flags);
2757
2758 return result;
2759}
2760
2761template <>
2762uint32_t
2763fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2764{
2765 int mode = modeConv(fpscr);
2766 int flags = 0;
2767 int sgn, exp;
2768 uint64_t mnt;
2769 uint32_t result;
2770
2771 // Unpack floating-point operand optionally with flush-to-zero:
2772 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2773
2774 if (fp64_is_NaN(exp, mnt)) {
2775 if (fpscr.dn) {
2776 result = fp32_defaultNaN();
2777 } else {
2778 result = fp32_FPConvertNaN_64(op);
2779 }
2780 if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) {
2781 flags |= FPLIB_IOC;
2782 }
2783 } else if (exp == FP64_EXP_INF) {
2784 result = fp32_infinity(sgn);
2785 } else if (!mnt) {
2786 result = fp32_zero(sgn);
2787 } else {
2788 result =
2790 mnt >> (FP64_MANT_BITS - FP32_BITS) |
2791 !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)),
2792 rounding, mode, &flags);
2793 }
2794
2795 set_fpscr0(fpscr, flags);
2796
2797 return result;
2798}
2799
2800template <>
2801uint64_t
2802fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2803{
2804 int mode = modeConv(fpscr);
2805 int flags = 0;
2806 int sgn, exp;
2807 uint16_t mnt;
2808 uint64_t result;
2809
2810 // Unpack floating-point operand optionally with flush-to-zero:
2811 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2812
2813 if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2814 if (fpscr.dn) {
2815 result = fp64_defaultNaN();
2816 } else {
2817 result = fp64_FPConvertNaN_16(op);
2818 }
2819 if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2820 flags |= FPLIB_IOC;
2821 }
2822 } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2823 result = fp64_infinity(sgn);
2824 } else if (!mnt) {
2825 result = fp64_zero(sgn);
2826 } else {
2827 mnt = fp16_normalise(mnt, &exp);
2828 result = fp64_pack(sgn, (exp - FP16_EXP_BIAS +
2830 (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1));
2831 }
2832
2833 set_fpscr0(fpscr, flags);
2834
2835 return result;
2836}
2837
2838template <>
2839uint64_t
2840fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2841{
2842 int mode = modeConv(fpscr);
2843 int flags = 0;
2844 int sgn, exp;
2845 uint32_t mnt;
2846 uint64_t result;
2847
2848 // Unpack floating-point operand optionally with flush-to-zero:
2849 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2850
2851 if (fp32_is_NaN(exp, mnt)) {
2852 if (fpscr.dn) {
2853 result = fp64_defaultNaN();
2854 } else {
2855 result = fp64_FPConvertNaN_32(op);
2856 }
2857 if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) {
2858 flags |= FPLIB_IOC;
2859 }
2860 } else if (exp == FP32_EXP_INF) {
2861 result = fp64_infinity(sgn);
2862 } else if (!mnt) {
2863 result = fp64_zero(sgn);
2864 } else {
2865 mnt = fp32_normalise(mnt, &exp);
2866 result = fp64_pack(sgn, (exp - FP32_EXP_BIAS +
2868 (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1));
2869 }
2870
2871 set_fpscr0(fpscr, flags);
2872
2873 return result;
2874}
2875
2876template <>
2877uint16_t
2878fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
2879{
2880 int flags = 0;
2881 uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2882 set_fpscr0(fpscr, flags);
2883 return result;
2884}
2885
2886template <>
2887uint32_t
2888fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2889{
2890 int flags = 0;
2891 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2892 set_fpscr0(fpscr, flags);
2893 return result;
2894}
2895
2896template <>
2897uint64_t
2898fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2899{
2900 int flags = 0;
2901 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2902 set_fpscr0(fpscr, flags);
2903 return result;
2904}
2905
2906template <>
2907uint16_t
2908fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2909{
2910 int flags = 0;
2911 uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags);
2912 set_fpscr0(fpscr, flags);
2913 return result;
2914}
2915
2916template <>
2917uint32_t
2918fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2919{
2920 int flags = 0;
2921 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2922 set_fpscr0(fpscr, flags);
2923 return result;
2924}
2925
2926template <>
2927uint64_t
2928fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2929{
2930 int flags = 0;
2931 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2932 set_fpscr0(fpscr, flags);
2933 return result;
2934}
2935
2936template <>
2937uint16_t
2938fplibExpA(uint16_t op)
2939{
2940 static uint16_t coeff[32] = {
2941 0x0000,
2942 0x0016,
2943 0x002d,
2944 0x0045,
2945 0x005d,
2946 0x0075,
2947 0x008e,
2948 0x00a8,
2949 0x00c2,
2950 0x00dc,
2951 0x00f8,
2952 0x0114,
2953 0x0130,
2954 0x014d,
2955 0x016b,
2956 0x0189,
2957 0x01a8,
2958 0x01c8,
2959 0x01e8,
2960 0x0209,
2961 0x022b,
2962 0x024e,
2963 0x0271,
2964 0x0295,
2965 0x02ba,
2966 0x02e0,
2967 0x0306,
2968 0x032e,
2969 0x0356,
2970 0x037f,
2971 0x03a9,
2972 0x03d4
2973 };
2974 return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) |
2975 coeff[op & ((1 << 5) - 1)]);
2976}
2977
2978template <>
2979uint32_t
2980fplibExpA(uint32_t op)
2981{
2982 static uint32_t coeff[64] = {
2983 0x000000,
2984 0x0164d2,
2985 0x02cd87,
2986 0x043a29,
2987 0x05aac3,
2988 0x071f62,
2989 0x08980f,
2990 0x0a14d5,
2991 0x0b95c2,
2992 0x0d1adf,
2993 0x0ea43a,
2994 0x1031dc,
2995 0x11c3d3,
2996 0x135a2b,
2997 0x14f4f0,
2998 0x16942d,
2999 0x1837f0,
3000 0x19e046,
3001 0x1b8d3a,
3002 0x1d3eda,
3003 0x1ef532,
3004 0x20b051,
3005 0x227043,
3006 0x243516,
3007 0x25fed7,
3008 0x27cd94,
3009 0x29a15b,
3010 0x2b7a3a,
3011 0x2d583f,
3012 0x2f3b79,
3013 0x3123f6,
3014 0x3311c4,
3015 0x3504f3,
3016 0x36fd92,
3017 0x38fbaf,
3018 0x3aff5b,
3019 0x3d08a4,
3020 0x3f179a,
3021 0x412c4d,
3022 0x4346cd,
3023 0x45672a,
3024 0x478d75,
3025 0x49b9be,
3026 0x4bec15,
3027 0x4e248c,
3028 0x506334,
3029 0x52a81e,
3030 0x54f35b,
3031 0x5744fd,
3032 0x599d16,
3033 0x5bfbb8,
3034 0x5e60f5,
3035 0x60ccdf,
3036 0x633f89,
3037 0x65b907,
3038 0x68396a,
3039 0x6ac0c7,
3040 0x6d4f30,
3041 0x6fe4ba,
3042 0x728177,
3043 0x75257d,
3044 0x77d0df,
3045 0x7a83b3,
3046 0x7d3e0c
3047 };
3048 return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) |
3049 coeff[op & ((1 << 6) - 1)]);
3050}
3051
3052template <>
3053uint64_t
3054fplibExpA(uint64_t op)
3055{
3056 static uint64_t coeff[64] = {
3057 0x0000000000000ULL,
3058 0x02c9a3e778061ULL,
3059 0x059b0d3158574ULL,
3060 0x0874518759bc8ULL,
3061 0x0b5586cf9890fULL,
3062 0x0e3ec32d3d1a2ULL,
3063 0x11301d0125b51ULL,
3064 0x1429aaea92de0ULL,
3065 0x172b83c7d517bULL,
3066 0x1a35beb6fcb75ULL,
3067 0x1d4873168b9aaULL,
3068 0x2063b88628cd6ULL,
3069 0x2387a6e756238ULL,
3070 0x26b4565e27cddULL,
3071 0x29e9df51fdee1ULL,
3072 0x2d285a6e4030bULL,
3073 0x306fe0a31b715ULL,
3074 0x33c08b26416ffULL,
3075 0x371a7373aa9cbULL,
3076 0x3a7db34e59ff7ULL,
3077 0x3dea64c123422ULL,
3078 0x4160a21f72e2aULL,
3079 0x44e086061892dULL,
3080 0x486a2b5c13cd0ULL,
3081 0x4bfdad5362a27ULL,
3082 0x4f9b2769d2ca7ULL,
3083 0x5342b569d4f82ULL,
3084 0x56f4736b527daULL,
3085 0x5ab07dd485429ULL,
3086 0x5e76f15ad2148ULL,
3087 0x6247eb03a5585ULL,
3088 0x6623882552225ULL,
3089 0x6a09e667f3bcdULL,
3090 0x6dfb23c651a2fULL,
3091 0x71f75e8ec5f74ULL,
3092 0x75feb564267c9ULL,
3093 0x7a11473eb0187ULL,
3094 0x7e2f336cf4e62ULL,
3095 0x82589994cce13ULL,
3096 0x868d99b4492edULL,
3097 0x8ace5422aa0dbULL,
3098 0x8f1ae99157736ULL,
3099 0x93737b0cdc5e5ULL,
3100 0x97d829fde4e50ULL,
3101 0x9c49182a3f090ULL,
3102 0xa0c667b5de565ULL,
3103 0xa5503b23e255dULL,
3104 0xa9e6b5579fdbfULL,
3105 0xae89f995ad3adULL,
3106 0xb33a2b84f15fbULL,
3107 0xb7f76f2fb5e47ULL,
3108 0xbcc1e904bc1d2ULL,
3109 0xc199bdd85529cULL,
3110 0xc67f12e57d14bULL,
3111 0xcb720dcef9069ULL,
3112 0xd072d4a07897cULL,
3113 0xd5818dcfba487ULL,
3114 0xda9e603db3285ULL,
3115 0xdfc97337b9b5fULL,
3116 0xe502ee78b3ff6ULL,
3117 0xea4afa2a490daULL,
3118 0xefa1bee615a27ULL,
3119 0xf50765b6e4540ULL,
3120 0xfa7c1819e90d8ULL
3121 };
3122 return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) |
3123 coeff[op & ((1 << 6) - 1)]);
3124}
3125
3126static uint16_t
3127fp16_repack(int sgn, int exp, uint16_t mnt)
3128{
3129 return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt);
3130}
3131
3132static uint32_t
3133fp32_repack(int sgn, int exp, uint32_t mnt)
3134{
3135 return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt);
3136}
3137
3138static uint64_t
3139fp64_repack(int sgn, int exp, uint64_t mnt)
3140{
3141 return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt);
3142}
3143
3144static void
3145fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
3146{
3147 // Treat a single quiet-NaN as +Infinity/-Infinity
3148 if (!((uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) &&
3149 (uint16_t)~(*op2 << 1) >> FP16_MANT_BITS)
3150 *op1 = fp16_infinity(sgn);
3151 if (!((uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) &&
3152 (uint16_t)~(*op1 << 1) >> FP16_MANT_BITS)
3153 *op2 = fp16_infinity(sgn);
3154}
3155
3156static void
3157fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
3158{
3159 // Treat a single quiet-NaN as +Infinity/-Infinity
3160 if (!((uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) &&
3161 (uint32_t)~(*op2 << 1) >> FP32_MANT_BITS)
3162 *op1 = fp32_infinity(sgn);
3163 if (!((uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) &&
3164 (uint32_t)~(*op1 << 1) >> FP32_MANT_BITS)
3165 *op2 = fp32_infinity(sgn);
3166}
3167
3168static void
3169fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
3170{
3171 // Treat a single quiet-NaN as +Infinity/-Infinity
3172 if (!((uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) &&
3173 (uint64_t)~(*op2 << 1) >> FP64_MANT_BITS)
3174 *op1 = fp64_infinity(sgn);
3175 if (!((uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) &&
3176 (uint64_t)~(*op1 << 1) >> FP64_MANT_BITS)
3177 *op2 = fp64_infinity(sgn);
3178}
3179
3180template <>
3181uint16_t
3182fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3183{
3184 int mode = modeConv(fpscr);
3185 int flags = 0;
3186 int sgn1, exp1, sgn2, exp2;
3187 uint16_t mnt1, mnt2, x, result;
3188
3189 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3190 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3191
3192 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3193 result = x;
3194 } else {
3195 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3196 fp16_repack(sgn1, exp1, mnt1) :
3197 fp16_repack(sgn2, exp2, mnt2));
3198 }
3199 set_fpscr0(fpscr, flags);
3200 return result;
3201}
3202
3203template <>
3204uint32_t
3205fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3206{
3207 int mode = modeConv(fpscr);
3208 int flags = 0;
3209 int sgn1, exp1, sgn2, exp2;
3210 uint32_t mnt1, mnt2, x, result;
3211
3212 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3213 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3214
3215 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3216 result = x;
3217 } else {
3218 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3219 fp32_repack(sgn1, exp1, mnt1) :
3220 fp32_repack(sgn2, exp2, mnt2));
3221 }
3222 set_fpscr0(fpscr, flags);
3223 return result;
3224}
3225
3226template <>
3227uint64_t
3228fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3229{
3230 int mode = modeConv(fpscr);
3231 int flags = 0;
3232 int sgn1, exp1, sgn2, exp2;
3233 uint64_t mnt1, mnt2, x, result;
3234
3235 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3236 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3237
3238 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3239 result = x;
3240 } else {
3241 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3242 fp64_repack(sgn1, exp1, mnt1) :
3243 fp64_repack(sgn2, exp2, mnt2));
3244 }
3245 set_fpscr0(fpscr, flags);
3246 return result;
3247}
3248
3249template <>
3250uint16_t
3251fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3252{
3253 fp16_minmaxnum(&op1, &op2, 1);
3254 return fplibMax<uint16_t>(op1, op2, fpscr);
3255}
3256
3257template <>
3258uint32_t
3259fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3260{
3261 fp32_minmaxnum(&op1, &op2, 1);
3262 return fplibMax<uint32_t>(op1, op2, fpscr);
3263}
3264
3265template <>
3266uint64_t
3267fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3268{
3269 fp64_minmaxnum(&op1, &op2, 1);
3270 return fplibMax<uint64_t>(op1, op2, fpscr);
3271}
3272
3273template <>
3274uint16_t
3275fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3276{
3277 int mode = modeConv(fpscr);
3278 int flags = 0;
3279 int sgn1, exp1, sgn2, exp2;
3280 uint16_t mnt1, mnt2, x, result;
3281
3282 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3283 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3284
3285 if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3286 result = x;
3287 } else {
3288 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3289 fp16_repack(sgn1, exp1, mnt1) :
3290 fp16_repack(sgn2, exp2, mnt2));
3291 }
3292 set_fpscr0(fpscr, flags);
3293 return result;
3294}
3295
3296template <>
3297uint32_t
3298fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3299{
3300 int mode = modeConv(fpscr);
3301 int flags = 0;
3302 int sgn1, exp1, sgn2, exp2;
3303 uint32_t mnt1, mnt2, x, result;
3304
3305 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3306 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3307
3308 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3309 result = x;
3310 } else {
3311 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3312 fp32_repack(sgn1, exp1, mnt1) :
3313 fp32_repack(sgn2, exp2, mnt2));
3314 }
3315 set_fpscr0(fpscr, flags);
3316 return result;
3317}
3318
3319template <>
3320uint64_t
3321fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3322{
3323 int mode = modeConv(fpscr);
3324 int flags = 0;
3325 int sgn1, exp1, sgn2, exp2;
3326 uint64_t mnt1, mnt2, x, result;
3327
3328 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3329 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3330
3331 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3332 result = x;
3333 } else {
3334 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3335 fp64_repack(sgn1, exp1, mnt1) :
3336 fp64_repack(sgn2, exp2, mnt2));
3337 }
3338 set_fpscr0(fpscr, flags);
3339 return result;
3340}
3341
3342template <>
3343uint16_t
3344fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3345{
3346 fp16_minmaxnum(&op1, &op2, 0);
3347 return fplibMin<uint16_t>(op1, op2, fpscr);
3348}
3349
3350template <>
3351uint32_t
3352fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3353{
3354 fp32_minmaxnum(&op1, &op2, 0);
3355 return fplibMin<uint32_t>(op1, op2, fpscr);
3356}
3357
3358template <>
3359uint64_t
3360fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3361{
3362 fp64_minmaxnum(&op1, &op2, 0);
3363 return fplibMin<uint64_t>(op1, op2, fpscr);
3364}
3365
3366template <>
3367uint16_t
3368fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3369{
3370 int flags = 0;
3371 uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags);
3372 set_fpscr0(fpscr, flags);
3373 return result;
3374}
3375
3376template <>
3377uint32_t
3378fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3379{
3380 int flags = 0;
3381 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
3382 set_fpscr0(fpscr, flags);
3383 return result;
3384}
3385
3386template <>
3387uint64_t
3388fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3389{
3390 int flags = 0;
3391 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
3392 set_fpscr0(fpscr, flags);
3393 return result;
3394}
3395
3396template <>
3397uint16_t
3398fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3399{
3400 int mode = modeConv(fpscr);
3401 int flags = 0;
3402 int sgn1, exp1, sgn2, exp2;
3403 uint16_t mnt1, mnt2, result;
3404
3405 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3406 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3407
3408 result = fp16_process_NaNs(op1, op2, mode, &flags);
3409 if (!result) {
3410 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3411 (exp2 == FP16_EXP_INF && !mnt1)) {
3412 result = fp16_FPTwo(sgn1 ^ sgn2);
3413 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3414 result = fp16_infinity(sgn1 ^ sgn2);
3415 } else if (!mnt1 || !mnt2) {
3416 result = fp16_zero(sgn1 ^ sgn2);
3417 } else {
3418 result = fp16_mul(op1, op2, mode, &flags);
3419 }
3420 }
3421
3422 set_fpscr0(fpscr, flags);
3423
3424 return result;
3425}
3426
3427template <>
3428uint32_t
3429fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3430{
3431 int mode = modeConv(fpscr);
3432 int flags = 0;
3433 int sgn1, exp1, sgn2, exp2;
3434 uint32_t mnt1, mnt2, result;
3435
3436 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3437 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3438
3439 result = fp32_process_NaNs(op1, op2, mode, &flags);
3440 if (!result) {
3441 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3442 (exp2 == FP32_EXP_INF && !mnt1)) {
3443 result = fp32_FPTwo(sgn1 ^ sgn2);
3444 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3445 result = fp32_infinity(sgn1 ^ sgn2);
3446 } else if (!mnt1 || !mnt2) {
3447 result = fp32_zero(sgn1 ^ sgn2);
3448 } else {
3449 result = fp32_mul(op1, op2, mode, &flags);
3450 }
3451 }
3452
3453 set_fpscr0(fpscr, flags);
3454
3455 return result;
3456}
3457
3458template <>
3459uint64_t
3460fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3461{
3462 int mode = modeConv(fpscr);
3463 int flags = 0;
3464 int sgn1, exp1, sgn2, exp2;
3465 uint64_t mnt1, mnt2, result;
3466
3467 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3468 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3469
3470 result = fp64_process_NaNs(op1, op2, mode, &flags);
3471 if (!result) {
3472 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3473 (exp2 == FP64_EXP_INF && !mnt1)) {
3474 result = fp64_FPTwo(sgn1 ^ sgn2);
3475 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3476 result = fp64_infinity(sgn1 ^ sgn2);
3477 } else if (!mnt1 || !mnt2) {
3478 result = fp64_zero(sgn1 ^ sgn2);
3479 } else {
3480 result = fp64_mul(op1, op2, mode, &flags);
3481 }
3482 }
3483
3484 set_fpscr0(fpscr, flags);
3485
3486 return result;
3487}
3488
3489template <>
3490uint16_t
3491fplibNeg(uint16_t op)
3492{
3493 return op ^ 1ULL << (FP16_BITS - 1);
3494}
3495
3496template <>
3497uint32_t
3498fplibNeg(uint32_t op)
3499{
3500 return op ^ 1ULL << (FP32_BITS - 1);
3501}
3502
3503template <>
3504uint64_t
3505fplibNeg(uint64_t op)
3506{
3507 return op ^ 1ULL << (FP64_BITS - 1);
3508}
3509
3510static const uint8_t recip_sqrt_estimate[256] = {
3511 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
3512 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
3513 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
3514 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
3515 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
3516 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
3517 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
3518 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
3519 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
3520 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
3521 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
3522 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
3523 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
3524 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
3525 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
3526 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
3527};
3528
3529template <>
3530uint16_t
3531fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
3532{
3533 int mode = modeConv(fpscr);
3534 int flags = 0;
3535 int sgn, exp;
3536 uint16_t mnt, result;
3537
3538 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3539
3540 if (fp16_is_NaN(exp, mnt)) {
3541 result = fp16_process_NaN(op, mode, &flags);
3542 } else if (!mnt) {
3543 result = fp16_infinity(sgn);
3544 flags |= FPLIB_DZC;
3545 } else if (sgn) {
3546 result = fp16_defaultNaN();
3547 flags |= FPLIB_IOC;
3548 } else if (exp == FP16_EXP_INF) {
3549 result = fp16_zero(0);
3550 } else {
3551 exp += FP16_EXP_BITS;
3552 mnt = fp16_normalise(mnt, &exp);
3553 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3554 (mnt >> (FP16_BITS - 8) & 127)];
3555 result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1,
3556 mnt << (FP16_MANT_BITS - 8));
3557 }
3558
3559 set_fpscr0(fpscr, flags);
3560
3561 return result;
3562}
3563
3564template <>
3565uint32_t
3566fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
3567{
3568 int mode = modeConv(fpscr);
3569 int flags = 0;
3570 int sgn, exp;
3571 uint32_t mnt, result;
3572
3573 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3574
3575 if (fp32_is_NaN(exp, mnt)) {
3576 result = fp32_process_NaN(op, mode, &flags);
3577 } else if (!mnt) {
3578 result = fp32_infinity(sgn);
3579 flags |= FPLIB_DZC;
3580 } else if (sgn) {
3581 result = fp32_defaultNaN();
3582 flags |= FPLIB_IOC;
3583 } else if (exp == FP32_EXP_INF) {
3584 result = fp32_zero(0);
3585 } else {
3586 exp += FP32_EXP_BITS;
3587 mnt = fp32_normalise(mnt, &exp);
3588 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3589 (mnt >> (FP32_BITS - 8) & 127)];
3590 result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1,
3591 mnt << (FP32_MANT_BITS - 8));
3592 }
3593
3594 set_fpscr0(fpscr, flags);
3595
3596 return result;
3597}
3598
3599template <>
3600uint64_t
3601fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
3602{
3603 int mode = modeConv(fpscr);
3604 int flags = 0;
3605 int sgn, exp;
3606 uint64_t mnt, result;
3607
3608 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3609
3610 if (fp64_is_NaN(exp, mnt)) {
3611 result = fp64_process_NaN(op, mode, &flags);
3612 } else if (!mnt) {
3613 result = fp64_infinity(sgn);
3614 flags |= FPLIB_DZC;
3615 } else if (sgn) {
3616 result = fp64_defaultNaN();
3617 flags |= FPLIB_IOC;
3618 } else if (exp == FP64_EXP_INF) {
3619 result = fp32_zero(0);
3620 } else {
3621 exp += FP64_EXP_BITS;
3622 mnt = fp64_normalise(mnt, &exp);
3623 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3624 (mnt >> (FP64_BITS - 8) & 127)];
3625 result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1,
3626 mnt << (FP64_MANT_BITS - 8));
3627 }
3628
3629 set_fpscr0(fpscr, flags);
3630
3631 return result;
3632}
3633
3634template <>
3635uint16_t
3636fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3637{
3638 int mode = modeConv(fpscr);
3639 int flags = 0;
3640 int sgn1, exp1, sgn2, exp2;
3641 uint16_t mnt1, mnt2, result;
3642
3643 op1 = fplibNeg<uint16_t>(op1);
3644 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3645 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3646
3647 result = fp16_process_NaNs(op1, op2, mode, &flags);
3648 if (!result) {
3649 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3650 (exp2 == FP16_EXP_INF && !mnt1)) {
3651 result = fp16_FPOnePointFive(0);
3652 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3653 result = fp16_infinity(sgn1 ^ sgn2);
3654 } else {
3655 result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags);
3656 }
3657 }
3658
3659 set_fpscr0(fpscr, flags);
3660
3661 return result;
3662}
3663
3664template <>
3665uint32_t
3666fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3667{
3668 int mode = modeConv(fpscr);
3669 int flags = 0;
3670 int sgn1, exp1, sgn2, exp2;
3671 uint32_t mnt1, mnt2, result;
3672
3673 op1 = fplibNeg<uint32_t>(op1);
3674 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3675 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3676
3677 result = fp32_process_NaNs(op1, op2, mode, &flags);
3678 if (!result) {
3679 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3680 (exp2 == FP32_EXP_INF && !mnt1)) {
3681 result = fp32_FPOnePointFive(0);
3682 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3683 result = fp32_infinity(sgn1 ^ sgn2);
3684 } else {
3685 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
3686 }
3687 }
3688
3689 set_fpscr0(fpscr, flags);
3690
3691 return result;
3692}
3693
3694template <>
3695uint64_t
3696fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3697{
3698 int mode = modeConv(fpscr);
3699 int flags = 0;
3700 int sgn1, exp1, sgn2, exp2;
3701 uint64_t mnt1, mnt2, result;
3702
3703 op1 = fplibNeg<uint64_t>(op1);
3704 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3705 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3706
3707 result = fp64_process_NaNs(op1, op2, mode, &flags);
3708 if (!result) {
3709 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3710 (exp2 == FP64_EXP_INF && !mnt1)) {
3711 result = fp64_FPOnePointFive(0);
3712 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3713 result = fp64_infinity(sgn1 ^ sgn2);
3714 } else {
3715 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
3716 }
3717 }
3718
3719 set_fpscr0(fpscr, flags);
3720
3721 return result;
3722}
3723
3724template <>
3725uint16_t
3726fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
3727{
3728 int mode = modeConv(fpscr);
3729 int flags = 0;
3730 int sgn, exp;
3731 uint16_t mnt, result;
3732
3733 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3734
3735 if (fp16_is_NaN(exp, mnt)) {
3736 result = fp16_process_NaN(op, mode, &flags);
3737 } else if (exp == FP16_EXP_INF) {
3738 result = fp16_zero(sgn);
3739 } else if (!mnt) {
3740 result = fp16_infinity(sgn);
3741 flags |= FPLIB_DZC;
3742 } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) {
3743 bool overflow_to_inf = false;
3744 switch (FPCRRounding(fpscr)) {
3745 case FPRounding_TIEEVEN:
3746 overflow_to_inf = true;
3747 break;
3748 case FPRounding_POSINF:
3749 overflow_to_inf = !sgn;
3750 break;
3751 case FPRounding_NEGINF:
3752 overflow_to_inf = sgn;
3753 break;
3754 case FPRounding_ZERO:
3755 overflow_to_inf = false;
3756 break;
3757 default:
3758 panic("Unrecognized FP rounding mode");
3759 }
3760 result = overflow_to_inf ? fp16_infinity(sgn) : fp16_max_normal(sgn);
3762 } else if (fpscr.fz16 && exp >= 2 * FP16_EXP_BIAS - 1) {
3763 result = fp16_zero(sgn);
3764 flags |= FPLIB_UFC;
3765 } else {
3766 exp += FP16_EXP_BITS;
3767 mnt = fp16_normalise(mnt, &exp);
3768 int result_exp = 2 * FP16_EXP_BIAS - 1 - exp;
3769 uint16_t fraction = (((uint32_t)1 << 19) /
3770 (mnt >> (FP16_BITS - 10) | 1) + 1) >> 1;
3771 fraction <<= FP16_MANT_BITS - 8;
3772 if (result_exp == 0) {
3773 fraction >>= 1;
3774 } else if (result_exp == -1) {
3775 fraction >>= 2;
3776 result_exp = 0;
3777 }
3778 result = fp16_pack(sgn, result_exp, fraction);
3779 }
3780
3781 set_fpscr0(fpscr, flags);
3782
3783 return result;
3784}
3785
3786template <>
3787uint32_t
3788fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
3789{
3790 int mode = modeConv(fpscr);
3791 int flags = 0;
3792 int sgn, exp;
3793 uint32_t mnt, result;
3794
3795 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3796
3797 if (fp32_is_NaN(exp, mnt)) {
3798 result = fp32_process_NaN(op, mode, &flags);
3799 } else if (exp == FP32_EXP_INF) {
3800 result = fp32_zero(sgn);
3801 } else if (!mnt) {
3802 result = fp32_infinity(sgn);
3803 flags |= FPLIB_DZC;
3804 } else if (!((uint32_t)(op << 1) >> (FP32_MANT_BITS - 1))) {
3805 bool overflow_to_inf = false;
3806 switch (FPCRRounding(fpscr)) {
3807 case FPRounding_TIEEVEN:
3808 overflow_to_inf = true;
3809 break;
3810 case FPRounding_POSINF:
3811 overflow_to_inf = !sgn;
3812 break;
3813 case FPRounding_NEGINF:
3814 overflow_to_inf = sgn;
3815 break;
3816 case FPRounding_ZERO:
3817 overflow_to_inf = false;
3818 break;
3819 default:
3820 panic("Unrecognized FP rounding mode");
3821 }
3822 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
3824 } else if (fpscr.fz && exp >= 2 * FP32_EXP_BIAS - 1) {
3825 result = fp32_zero(sgn);
3826 flags |= FPLIB_UFC;
3827 } else {
3828 exp += FP32_EXP_BITS;
3829 mnt = fp32_normalise(mnt, &exp);
3830 int result_exp = 2 * FP32_EXP_BIAS - 1 - exp;
3831 uint32_t fraction = (((uint32_t)1 << 19) /
3832 (mnt >> (FP32_BITS - 10) | 1) + 1) >> 1;
3833 fraction <<= FP32_MANT_BITS - 8;
3834 if (result_exp == 0) {
3835 fraction >>= 1;
3836 } else if (result_exp == -1) {
3837 fraction >>= 2;
3838 result_exp = 0;
3839 }
3840 result = fp32_pack(sgn, result_exp, fraction);
3841 }
3842
3843 set_fpscr0(fpscr, flags);
3844
3845 return result;
3846}
3847
3848template <>
3849uint64_t
3850fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
3851{
3852 int mode = modeConv(fpscr);
3853 int flags = 0;
3854 int sgn, exp;
3855 uint64_t mnt, result;
3856
3857 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3858
3859 if (fp64_is_NaN(exp, mnt)) {
3860 result = fp64_process_NaN(op, mode, &flags);
3861 } else if (exp == FP64_EXP_INF) {
3862 result = fp64_zero(sgn);
3863 } else if (!mnt) {
3864 result = fp64_infinity(sgn);
3865 flags |= FPLIB_DZC;
3866 } else if (!((uint64_t)(op << 1) >> (FP64_MANT_BITS - 1))) {
3867 bool overflow_to_inf = false;
3868 switch (FPCRRounding(fpscr)) {
3869 case FPRounding_TIEEVEN:
3870 overflow_to_inf = true;
3871 break;
3872 case FPRounding_POSINF:
3873 overflow_to_inf = !sgn;
3874 break;
3875 case FPRounding_NEGINF:
3876 overflow_to_inf = sgn;
3877 break;
3878 case FPRounding_ZERO:
3879 overflow_to_inf = false;
3880 break;
3881 default:
3882 panic("Unrecognized FP rounding mode");
3883 }
3884 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
3886 } else if (fpscr.fz && exp >= 2 * FP64_EXP_BIAS - 1) {
3887 result = fp64_zero(sgn);
3888 flags |= FPLIB_UFC;
3889 } else {
3890 exp += FP64_EXP_BITS;
3891 mnt = fp64_normalise(mnt, &exp);
3892 int result_exp = 2 * FP64_EXP_BIAS - 1 - exp;
3893 uint64_t fraction = (((uint32_t)1 << 19) /
3894 (mnt >> (FP64_BITS - 10) | 1) + 1) >> 1;
3895 fraction <<= FP64_MANT_BITS - 8;
3896 if (result_exp == 0) {
3897 fraction >>= 1;
3898 } else if (result_exp == -1) {
3899 fraction >>= 2;
3900 result_exp = 0;
3901 }
3902 result = fp64_pack(sgn, result_exp, fraction);
3903 }
3904
3905 set_fpscr0(fpscr, flags);
3906
3907 return result;
3908}
3909
3910template <>
3911uint16_t
3912fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3913{
3914 int mode = modeConv(fpscr);
3915 int flags = 0;
3916 int sgn1, exp1, sgn2, exp2;
3917 uint16_t mnt1, mnt2, result;
3918
3919 op1 = fplibNeg<uint16_t>(op1);
3920 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3921 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3922
3923 result = fp16_process_NaNs(op1, op2, mode, &flags);
3924 if (!result) {
3925 if ((exp1 == FP16_EXP_INF && !mnt2) ||
3926 (exp2 == FP16_EXP_INF && !mnt1)) {
3927 result = fp16_FPTwo(0);
3928 } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3929 result = fp16_infinity(sgn1 ^ sgn2);
3930 } else {
3931 result = fp16_muladd(fp16_FPTwo(0), op1, op2, 0, mode, &flags);
3932 }
3933 }
3934
3935 set_fpscr0(fpscr, flags);
3936
3937 return result;
3938}
3939
3940template <>
3941uint32_t
3942fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3943{
3944 int mode = modeConv(fpscr);
3945 int flags = 0;
3946 int sgn1, exp1, sgn2, exp2;
3947 uint32_t mnt1, mnt2, result;
3948
3949 op1 = fplibNeg<uint32_t>(op1);
3950 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3951 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3952
3953 result = fp32_process_NaNs(op1, op2, mode, &flags);
3954 if (!result) {
3955 if ((exp1 == FP32_EXP_INF && !mnt2) ||
3956 (exp2 == FP32_EXP_INF && !mnt1)) {
3957 result = fp32_FPTwo(0);
3958 } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3959 result = fp32_infinity(sgn1 ^ sgn2);
3960 } else {
3961 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
3962 }
3963 }
3964
3965 set_fpscr0(fpscr, flags);
3966
3967 return result;
3968}
3969
3970template <>
3971uint64_t
3972fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3973{
3974 int mode = modeConv(fpscr);
3975 int flags = 0;
3976 int sgn1, exp1, sgn2, exp2;
3977 uint64_t mnt1, mnt2, result;
3978
3979 op1 = fplibNeg<uint64_t>(op1);
3980 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3981 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3982
3983 result = fp64_process_NaNs(op1, op2, mode, &flags);
3984 if (!result) {
3985 if ((exp1 == FP64_EXP_INF && !mnt2) ||
3986 (exp2 == FP64_EXP_INF && !mnt1)) {
3987 result = fp64_FPTwo(0);
3988 } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3989 result = fp64_infinity(sgn1 ^ sgn2);
3990 } else {
3991 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
3992 }
3993 }
3994
3995 set_fpscr0(fpscr, flags);
3996
3997 return result;
3998}
3999
4000template <>
4001uint16_t
4002fplibRecpX(uint16_t op, FPSCR &fpscr)
4003{
4004 int mode = modeConv(fpscr);
4005 int flags = 0;
4006 int sgn, exp;
4007 uint16_t mnt, result;
4008
4009 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4010
4011 if (fp16_is_NaN(exp, mnt)) {
4012 result = fp16_process_NaN(op, mode, &flags);
4013 }
4014 else {
4015 if (!mnt) { // Zero and denormals
4016 result = fp16_pack(sgn, FP16_EXP_INF - 1, 0);
4017 } else { // Infinities and normals
4018 result = fp16_pack(sgn, exp ^ FP16_EXP_INF, 0);
4019 }
4020 }
4021
4022 set_fpscr0(fpscr, flags);
4023
4024 return result;
4025}
4026
4027template <>
4028uint32_t
4029fplibRecpX(uint32_t op, FPSCR &fpscr)
4030{
4031 int mode = modeConv(fpscr);
4032 int flags = 0;
4033 int sgn, exp;
4034 uint32_t mnt, result;
4035
4036 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4037
4038 if (fp32_is_NaN(exp, mnt)) {
4039 result = fp32_process_NaN(op, mode, &flags);
4040 }
4041 else {
4042 if (!mnt) { // Zero and denormals
4043 result = fp32_pack(sgn, FP32_EXP_INF - 1, 0);
4044 } else { // Infinities and normals
4045 result = fp32_pack(sgn, exp ^ FP32_EXP_INF, 0);
4046 }
4047 }
4048
4049 set_fpscr0(fpscr, flags);
4050
4051 return result;
4052}
4053
4054template <>
4055uint64_t
4056fplibRecpX(uint64_t op, FPSCR &fpscr)
4057{
4058 int mode = modeConv(fpscr);
4059 int flags = 0;
4060 int sgn, exp;
4061 uint64_t mnt, result;
4062
4063 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4064
4065 if (fp64_is_NaN(exp, mnt)) {
4066 result = fp64_process_NaN(op, mode, &flags);
4067 }
4068 else {
4069 if (!mnt) { // Zero and denormals
4070 result = fp64_pack(sgn, FP64_EXP_INF - 1, 0);
4071 } else { // Infinities and normals
4072 result = fp64_pack(sgn, exp ^ FP64_EXP_INF, 0);
4073 }
4074 }
4075
4076 set_fpscr0(fpscr, flags);
4077
4078 return result;
4079}
4080
4081template <>
4082uint16_t
4083fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4084{
4085 int expint = FP16_EXP_BIAS + FP16_MANT_BITS;
4086 int mode = modeConv(fpscr);
4087 int flags = 0;
4088 int sgn, exp;
4089 uint16_t mnt, result;
4090
4091 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4092 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4093
4094 // Handle NaNs, infinities and zeroes:
4095 if (fp16_is_NaN(exp, mnt)) {
4096 result = fp16_process_NaN(op, mode, &flags);
4097 } else if (exp == FP16_EXP_INF) {
4098 result = fp16_infinity(sgn);
4099 } else if (!mnt) {
4100 result = fp16_zero(sgn);
4101 } else if (exp >= expint) {
4102 // There are no fractional bits
4103 result = op;
4104 } else {
4105 // Truncate towards zero:
4106 uint16_t x = expint - exp >= FP16_BITS ? 0 : mnt >> (expint - exp);
4107 int err = exp < expint - FP16_BITS ? 1 :
4108 ((mnt << 1 >> (expint - exp - 1) & 3) |
4109 ((uint16_t)(mnt << 2 << (FP16_BITS + exp - expint)) != 0));
4110 switch (rounding) {
4111 case FPRounding_TIEEVEN:
4112 x += (err == 3 || (err == 2 && (x & 1)));
4113 break;
4114 case FPRounding_POSINF:
4115 x += err && !sgn;
4116 break;
4117 case FPRounding_NEGINF:
4118 x += err && sgn;
4119 break;
4120 case FPRounding_ZERO:
4121 break;
4122 case FPRounding_TIEAWAY:
4123 x += err >> 1;
4124 break;
4125 default:
4126 panic("Unrecognized FP rounding mode");
4127 }
4128
4129 if (x == 0) {
4130 result = fp16_zero(sgn);
4131 } else {
4132 exp = expint;
4133 mnt = fp16_normalise(x, &exp);
4134 result = fp16_pack(sgn, exp + FP16_EXP_BITS, mnt >> FP16_EXP_BITS);
4135 }
4136
4137 if (err && exact)
4138 flags |= FPLIB_IXC;
4139 }
4140
4141 set_fpscr0(fpscr, flags);
4142
4143 return result;
4144}
4145
4146template <>
4147uint32_t
4148fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4149{
4150 int expint = FP32_EXP_BIAS + FP32_MANT_BITS;
4151 int mode = modeConv(fpscr);
4152 int flags = 0;
4153 int sgn, exp;
4154 uint32_t mnt, result;
4155
4156 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4157 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4158
4159 // Handle NaNs, infinities and zeroes:
4160 if (fp32_is_NaN(exp, mnt)) {
4161 result = fp32_process_NaN(op, mode, &flags);
4162 } else if (exp == FP32_EXP_INF) {
4163 result = fp32_infinity(sgn);
4164 } else if (!mnt) {
4165 result = fp32_zero(sgn);
4166 } else if (exp >= expint) {
4167 // There are no fractional bits
4168 result = op;
4169 } else {
4170 // Truncate towards zero:
4171 uint32_t x = expint - exp >= FP32_BITS ? 0 : mnt >> (expint - exp);
4172 int err = exp < expint - FP32_BITS ? 1 :
4173 ((mnt << 1 >> (expint - exp - 1) & 3) |
4174 ((uint32_t)(mnt << 2 << (FP32_BITS + exp - expint)) != 0));
4175 switch (rounding) {
4176 case FPRounding_TIEEVEN:
4177 x += (err == 3 || (err == 2 && (x & 1)));
4178 break;
4179 case FPRounding_POSINF:
4180 x += err && !sgn;
4181 break;
4182 case FPRounding_NEGINF:
4183 x += err && sgn;
4184 break;
4185 case FPRounding_ZERO:
4186 break;
4187 case FPRounding_TIEAWAY:
4188 x += err >> 1;
4189 break;
4190 default:
4191 panic("Unrecognized FP rounding mode");
4192 }
4193
4194 if (x == 0) {
4195 result = fp32_zero(sgn);
4196 } else {
4197 exp = expint;
4198 mnt = fp32_normalise(x, &exp);
4199 result = fp32_pack(sgn, exp + FP32_EXP_BITS, mnt >> FP32_EXP_BITS);
4200 }
4201
4202 if (err && exact)
4203 flags |= FPLIB_IXC;
4204 }
4205
4206 set_fpscr0(fpscr, flags);
4207
4208 return result;
4209}
4210
4211template <>
4212uint64_t
4213fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4214{
4215 int expint = FP64_EXP_BIAS + FP64_MANT_BITS;
4216 int mode = modeConv(fpscr);
4217 int flags = 0;
4218 int sgn, exp;
4219 uint64_t mnt, result;
4220
4221 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4222 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4223
4224 // Handle NaNs, infinities and zeroes:
4225 if (fp64_is_NaN(exp, mnt)) {
4226 result = fp64_process_NaN(op, mode, &flags);
4227 } else if (exp == FP64_EXP_INF) {
4228 result = fp64_infinity(sgn);
4229 } else if (!mnt) {
4230 result = fp64_zero(sgn);
4231 } else if (exp >= expint) {
4232 // There are no fractional bits
4233 result = op;
4234 } else {
4235 // Truncate towards zero:
4236 uint64_t x = expint - exp >= FP64_BITS ? 0 : mnt >> (expint - exp);
4237 int err = exp < expint - FP64_BITS ? 1 :
4238 ((mnt << 1 >> (expint - exp - 1) & 3) |
4239 ((uint64_t)(mnt << 2 << (FP64_BITS + exp - expint)) != 0));
4240 switch (rounding) {
4241 case FPRounding_TIEEVEN:
4242 x += (err == 3 || (err == 2 && (x & 1)));
4243 break;
4244 case FPRounding_POSINF:
4245 x += err && !sgn;
4246 break;
4247 case FPRounding_NEGINF:
4248 x += err && sgn;
4249 break;
4250 case FPRounding_ZERO:
4251 break;
4252 case FPRounding_TIEAWAY:
4253 x += err >> 1;
4254 break;
4255 default:
4256 panic("Unrecognized FP rounding mode");
4257 }
4258
4259 if (x == 0) {
4260 result = fp64_zero(sgn);
4261 } else {
4262 exp = expint;
4263 mnt = fp64_normalise(x, &exp);
4264 result = fp64_pack(sgn, exp + FP64_EXP_BITS, mnt >> FP64_EXP_BITS);
4265 }
4266
4267 if (err && exact)
4268 flags |= FPLIB_IXC;
4269 }
4270
4271 set_fpscr0(fpscr, flags);
4272
4273 return result;
4274}
4275
4276template <>
4277uint16_t
4278fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4279{
4280 int flags = 0;
4281 uint16_t result = fp16_scale(op1, (int16_t)op2, modeConv(fpscr), &flags);
4282 set_fpscr0(fpscr, flags);
4283 return result;
4284}
4285
4286template <>
4287uint32_t
4288fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4289{
4290 int flags = 0;
4291 uint32_t result = fp32_scale(op1, (int32_t)op2, modeConv(fpscr), &flags);
4292 set_fpscr0(fpscr, flags);
4293 return result;
4294}
4295
4296template <>
4297uint64_t
4298fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4299{
4300 int flags = 0;
4301 uint64_t result = fp64_scale(op1, (int64_t)op2, modeConv(fpscr), &flags);
4302 set_fpscr0(fpscr, flags);
4303 return result;
4304}
4305
4306template <>
4307uint16_t
4308fplibSqrt(uint16_t op, FPSCR &fpscr)
4309{
4310 int flags = 0;
4311 uint16_t result = fp16_sqrt(op, modeConv(fpscr), &flags);
4312 set_fpscr0(fpscr, flags);
4313 return result;
4314}
4315
4316template <>
4317uint32_t
4318fplibSqrt(uint32_t op, FPSCR &fpscr)
4319{
4320 int flags = 0;
4321 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
4322 set_fpscr0(fpscr, flags);
4323 return result;
4324}
4325
4326template <>
4327uint64_t
4328fplibSqrt(uint64_t op, FPSCR &fpscr)
4329{
4330 int flags = 0;
4331 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
4332 set_fpscr0(fpscr, flags);
4333 return result;
4334}
4335
4336template <>
4337uint16_t
4338fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4339{
4340 int flags = 0;
4341 uint16_t result = fp16_add(op1, op2, 1, modeConv(fpscr), &flags);
4342 set_fpscr0(fpscr, flags);
4343 return result;
4344}
4345
4346template <>
4347uint32_t
4348fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4349{
4350 int flags = 0;
4351 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
4352 set_fpscr0(fpscr, flags);
4353 return result;
4354}
4355
4356template <>
4357uint64_t
4358fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4359{
4360 int flags = 0;
4361 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
4362 set_fpscr0(fpscr, flags);
4363 return result;
4364}
4365
4366template <>
4367uint16_t
4368fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
4369{
4370 static uint16_t coeff[2][8] = {
4371 {
4372 0x3c00,
4373 0xb155,
4374 0x2030,
4375 0x0000,
4376 0x0000,
4377 0x0000,
4378 0x0000,
4379 0x0000,
4380 },
4381 {
4382 0x3c00,
4383 0xb800,
4384 0x293a,
4385 0x0000,
4386 0x0000,
4387 0x0000,
4388 0x0000,
4389 0x0000
4390 }
4391 };
4392 int flags = 0;
4393 uint16_t result =
4394 fp16_muladd(coeff[op2 >> (FP16_BITS - 1)][coeff_index], op1,
4395 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4396 set_fpscr0(fpscr, flags);
4397 return result;
4398}
4399
4400template <>
4401uint32_t
4402fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2, FPSCR &fpscr)
4403{
4404 static uint32_t coeff[2][8] = {
4405 {
4406 0x3f800000,
4407 0xbe2aaaab,
4408 0x3c088886,
4409 0xb95008b9,
4410 0x36369d6d,
4411 0x00000000,
4412 0x00000000,
4413 0x00000000
4414 },
4415 {
4416 0x3f800000,
4417 0xbf000000,
4418 0x3d2aaaa6,
4419 0xbab60705,
4420 0x37cd37cc,
4421 0x00000000,
4422 0x00000000,
4423 0x00000000
4424 }
4425 };
4426 int flags = 0;
4427 uint32_t result =
4428 fp32_muladd(coeff[op2 >> (FP32_BITS - 1)][coeff_index], op1,
4429 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4430 set_fpscr0(fpscr, flags);
4431 return result;
4432}
4433
4434template <>
4435uint64_t
4436fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2, FPSCR &fpscr)
4437{
4438 static uint64_t coeff[2][8] = {
4439 {
4440 0x3ff0000000000000ULL,
4441 0xbfc5555555555543ULL,
4442 0x3f8111111110f30cULL,
4443 0xbf2a01a019b92fc6ULL,
4444 0x3ec71de351f3d22bULL,
4445 0xbe5ae5e2b60f7b91ULL,
4446 0x3de5d8408868552fULL,
4447 0x0000000000000000ULL
4448 },
4449 {
4450 0x3ff0000000000000ULL,
4451 0xbfe0000000000000ULL,
4452 0x3fa5555555555536ULL,
4453 0xbf56c16c16c13a0bULL,
4454 0x3efa01a019b1e8d8ULL,
4455 0xbe927e4f7282f468ULL,
4456 0x3e21ee96d2641b13ULL,
4457 0xbda8f76380fbb401ULL
4458 }
4459 };
4460 int flags = 0;
4461 uint64_t result =
4462 fp64_muladd(coeff[op2 >> (FP64_BITS - 1)][coeff_index], op1,
4463 fplibAbs(op2), 0, modeConv(fpscr), &flags);
4464 set_fpscr0(fpscr, flags);
4465 return result;
4466}
4467
4468template <>
4469uint16_t
4470fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4471{
4472 int flags = 0;
4473 int sgn, exp;
4474 uint16_t mnt;
4475
4476 int mode = modeConv(fpscr);
4477 uint16_t result = fp16_mul(op1, op1, mode, &flags);
4478 set_fpscr0(fpscr, flags);
4479
4480 fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4481 if (!fp16_is_NaN(exp, mnt)) {
4482 result = (result & ~(1ULL << (FP16_BITS - 1))) |
4483 op2 << (FP16_BITS - 1);
4484 }
4485 return result;
4486}
4487
4488template <>
4489uint32_t
4490fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4491{
4492 int flags = 0;
4493 int sgn, exp;
4494 uint32_t mnt;
4495
4496 int mode = modeConv(fpscr);
4497 uint32_t result = fp32_mul(op1, op1, mode, &flags);
4498 set_fpscr0(fpscr, flags);
4499
4500 fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4501 if (!fp32_is_NaN(exp, mnt)) {
4502 result = (result & ~(1ULL << (FP32_BITS - 1))) | op2 << (FP32_BITS - 1);
4503 }
4504 return result;
4505}
4506
4507template <>
4508uint64_t
4509fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4510{
4511 int flags = 0;
4512 int sgn, exp;
4513 uint64_t mnt;
4514
4515 int mode = modeConv(fpscr);
4516 uint64_t result = fp64_mul(op1, op1, mode, &flags);
4517 set_fpscr0(fpscr, flags);
4518
4519 fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4520 if (!fp64_is_NaN(exp, mnt)) {
4521 result = (result & ~(1ULL << (FP64_BITS - 1))) | op2 << (FP64_BITS - 1);
4522 }
4523 return result;
4524}
4525
4526template <>
4527uint16_t
4528fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4529{
4530 static constexpr uint16_t fpOne =
4531 (uint16_t)FP16_EXP_BIAS << FP16_MANT_BITS; // 1.0
4532 if (op2 & 1)
4533 op1 = fpOne;
4534 return op1 ^ ((op2 >> 1) << (FP16_BITS - 1));
4535}
4536
4537template <>
4538uint32_t
4539fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4540{
4541 static constexpr uint32_t fpOne =
4542 (uint32_t)FP32_EXP_BIAS << FP32_MANT_BITS; // 1.0
4543 if (op2 & 1)
4544 op1 = fpOne;
4545 return op1 ^ ((op2 >> 1) << (FP32_BITS - 1));
4546}
4547
4548template <>
4549uint64_t
4550fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4551{
4552 static constexpr uint64_t fpOne =
4553 (uint64_t)FP64_EXP_BIAS << FP64_MANT_BITS; // 1.0
4554 if (op2 & 1)
4555 op1 = fpOne;
4556 return op1 ^ ((op2 >> 1) << (FP64_BITS - 1));
4557}
4558
4559static uint64_t
4560FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4561 int *flags)
4562{
4563 int expmax = FP64_EXP_BIAS + FP64_BITS - 1;
4564 uint64_t x;
4565 int err;
4566
4567 if (exp > expmax) {
4568 *flags = FPLIB_IOC;
4569 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4570 }
4571
4572 x = lsr64(mnt << FP64_EXP_BITS, expmax - exp);
4573 err = (exp > expmax - 2 ? 0 :
4574 (lsr64(mnt << FP64_EXP_BITS, expmax - 2 - exp) & 3) |
4575 !!(mnt << FP64_EXP_BITS & (lsl64(1, expmax - 2 - exp) - 1)));
4576
4577 switch (rounding) {
4578 case FPRounding_TIEEVEN:
4579 x += (err == 3 || (err == 2 && (x & 1)));
4580 break;
4581 case FPRounding_POSINF:
4582 x += err && !sgn;
4583 break;
4584 case FPRounding_NEGINF:
4585 x += err && sgn;
4586 break;
4587 case FPRounding_ZERO:
4588 break;
4589 case FPRounding_TIEAWAY:
4590 x += err >> 1;
4591 break;
4592 default:
4593 panic("Unrecognized FP rounding mode");
4594 }
4595
4596 if (u ? sgn && x : x > (1ULL << (FP64_BITS - 1)) - !sgn) {
4597 *flags = FPLIB_IOC;
4598 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4599 }
4600
4601 if (err) {
4602 *flags = FPLIB_IXC;
4603 }
4604
4605 return sgn ? -x : x;
4606}
4607
4608static uint32_t
4609FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4610 int *flags)
4611{
4612 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4613 if (u ? x >= 1ULL << FP32_BITS :
4614 !(x < 1ULL << (FP32_BITS - 1) ||
4615 (uint64_t)-x <= (uint64_t)1 << (FP32_BITS - 1))) {
4616 *flags = FPLIB_IOC;
4617 x = ((uint32_t)!u << (FP32_BITS - 1)) - !sgn;
4618 }
4619 return x;
4620}
4621
4622static uint16_t
4623FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4624 int *flags)
4625{
4626 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4627 if (u ? x >= 1ULL << FP16_BITS :
4628 !(x < 1ULL << (FP16_BITS - 1) ||
4629 (uint64_t)-x <= (uint64_t)1 << (FP16_BITS - 1))) {
4630 *flags = FPLIB_IOC;
4631 x = ((uint16_t)!u << (FP16_BITS - 1)) - !sgn;
4632 }
4633 return x;
4634}
4635
4636template <>
4637uint16_t
4638fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4639 FPSCR &fpscr)
4640{
4641 int flags = 0;
4642 int sgn, exp;
4643 uint16_t mnt, result;
4644
4645 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4646 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4647
4648 // If NaN, set cumulative flag or take exception:
4649 if (fp16_is_NaN(exp, mnt)) {
4650 flags = FPLIB_IOC;
4651 result = 0;
4652 } else {
4653 assert(fbits >= 0);
4654 // Infinity is treated as an ordinary normalised number that saturates.
4655 result =
4656 FPToFixed_16(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4657 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4658 u, rounding, &flags);
4659 }
4660
4661 set_fpscr0(fpscr, flags);
4662
4663 return result;
4664}
4665
4666template <>
4667uint32_t
4668fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4669 FPSCR &fpscr)
4670{
4671 int flags = 0;
4672 int sgn, exp;
4673 uint16_t mnt;
4674 uint32_t result;
4675
4676 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4677 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4678
4679 // If NaN, set cumulative flag or take exception:
4680 if (fp16_is_NaN(exp, mnt)) {
4681 flags = FPLIB_IOC;
4682 result = 0;
4683 } else {
4684 assert(fbits >= 0);
4685 if (exp == FP16_EXP_INF)
4686 exp = 255; // infinity: make it big enough to saturate
4687 result =
4688 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4689 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4690 u, rounding, &flags);
4691 }
4692
4693 set_fpscr0(fpscr, flags);
4694
4695 return result;
4696}
4697
4698template <>
4699uint32_t
4700fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4701{
4702 int flags = 0;
4703 int sgn, exp;
4704 uint32_t mnt, result;
4705
4706 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4707 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4708
4709 // If NaN, set cumulative flag or take exception:
4710 if (fp32_is_NaN(exp, mnt)) {
4711 flags = FPLIB_IOC;
4712 result = 0;
4713 } else {
4714 assert(fbits >= 0);
4715 // Infinity is treated as an ordinary normalised number that saturates.
4716 result =
4717 FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4718 (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4719 u, rounding, &flags);
4720 }
4721
4722 set_fpscr0(fpscr, flags);
4723
4724 return result;
4725}
4726
4727template <>
4728uint32_t
4729fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4730{
4731 int flags = 0;
4732 int sgn, exp;
4733 uint64_t mnt;
4734 uint32_t result;
4735
4736 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4737 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4738
4739 // If NaN, set cumulative flag or take exception:
4740 if (fp64_is_NaN(exp, mnt)) {
4741 flags = FPLIB_IOC;
4742 result = 0;
4743 } else {
4744 assert(fbits >= 0);
4745 // Infinity is treated as an ordinary normalised number that saturates.
4746 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
4747 }
4748
4749 set_fpscr0(fpscr, flags);
4750
4751 return result;
4752}
4753
4754uint32_t
4755fplibFPToFixedJS(uint64_t op, FPSCR &fpscr, bool is64, uint8_t& nz)
4756{
4757 int flags = 0;
4758 uint32_t result;
4759 bool Z = true;
4760
4761 uint32_t sgn = bits(op, 63);
4762 int32_t exp = bits(op, 62, 52);
4763 uint64_t mnt = bits(op, 51, 0);
4764
4765 if (exp == 0) {
4766 if (mnt != 0) {
4767 if (fpscr.fz) {
4768 flags |= FPLIB_IDC;
4769 } else {
4770 flags |= FPLIB_IXC;
4771 Z = 0;
4772 }
4773 }
4774 result = 0;
4775 } else if (exp == 0x7ff) {
4776 flags |= FPLIB_IOC;
4777 result = 0;
4778 Z = 0;
4779 } else {
4780 mnt |= 1ULL << FP64_MANT_BITS;
4781 int mnt_shft = exp - FP64_EXP_BIAS - 52;
4782 bool err = true;
4783
4784 if (abs(mnt_shft) >= FP64_BITS) {
4785 result = 0;
4786 Z = 0;
4787 } else if (mnt_shft >= 0) {
4788 result = lsl64(mnt, mnt_shft);
4789 } else if (mnt_shft < 0) {
4790 err = lsl64(mnt, mnt_shft+FP64_BITS) != 0;
4791 result = lsr64(mnt, abs(mnt_shft));
4792 }
4793 uint64_t max_result = (1UL << (FP32_BITS - 1)) -!sgn;
4794 if ((exp - FP64_EXP_BIAS) > 31 || result > max_result) {
4795 flags |= FPLIB_IOC;
4796 Z = false;
4797 } else if (err) {
4798 flags |= FPLIB_IXC;
4799 Z = false;
4800 }
4801 result = sgn ? -result : result;
4802 }
4803 if (sgn == 1 && result == 0)
4804 Z = false;
4805
4806 if (is64) {
4807 nz = Z? 0x1: 0x0;
4808 } else {
4809 fpscr.n = 0;
4810 fpscr.z = (int)Z;
4811 fpscr.c = 0;
4812 fpscr.v = 0;
4813 }
4814
4815 set_fpscr0(fpscr, flags);
4816 return result;
4817}
4818
4819template <>
4820uint64_t
4821fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4822 FPSCR &fpscr)
4823{
4824 int flags = 0;
4825 int sgn, exp;
4826 uint16_t mnt;
4827 uint64_t result;
4828
4829 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4830 fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4831
4832 // If NaN, set cumulative flag or take exception:
4833 if (fp16_is_NaN(exp, mnt)) {
4834 flags = FPLIB_IOC;
4835 result = 0;
4836 } else {
4837 assert(fbits >= 0);
4838 if (exp == FP16_EXP_INF)
4839 exp = 255; // infinity: make it big enough to saturate
4840 result =
4841 FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4842 (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4843 u, rounding, &flags);
4844 }
4845
4846 set_fpscr0(fpscr, flags);
4847
4848 return result;
4849}
4850
4851template <>
4852uint64_t
4853fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4854{
4855 int flags = 0;
4856 int sgn, exp;
4857 uint32_t mnt;
4858 uint64_t result;
4859
4860 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4861 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4862
4863 // If NaN, set cumulative flag or take exception:
4864 if (fp32_is_NaN(exp, mnt)) {
4865 flags =