gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
fplib.cc
Go to the documentation of this file.
1 /*
2 * Copyright (c) 2012-2013, 2017-2018 ARM Limited
3 * Copyright (c) 2020 Metempsy Technology Consulting
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions are
17 * met: redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer;
19 * redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution;
22 * neither the name of the copyright holders nor the names of its
23 * contributors may be used to endorse or promote products derived from
24 * this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 *
38 * Authors: Edmund Grimley Evans
39 * Thomas Grocutt
40 */
41 #include <stdint.h>
42 
43 #include <cassert>
44 #include <cmath>
45 
46 #include "base/logging.hh"
47 #include "fplib.hh"
48 
49 namespace ArmISA
50 {
51 
52 #define FPLIB_RN 0
53 #define FPLIB_RP 1
54 #define FPLIB_RM 2
55 #define FPLIB_RZ 3
56 #define FPLIB_FZ 4
57 #define FPLIB_DN 8
58 #define FPLIB_AHP 16
59 #define FPLIB_FZ16 32
60 
61 #define FPLIB_IDC 128 // Input Denormal
62 #define FPLIB_IXC 16 // Inexact
63 #define FPLIB_UFC 8 // Underflow
64 #define FPLIB_OFC 4 // Overflow
65 #define FPLIB_DZC 2 // Division by Zero
66 #define FPLIB_IOC 1 // Invalid Operation
67 
68 #define FP16_BITS 16
69 #define FP32_BITS 32
70 #define FP64_BITS 64
71 
72 #define FP16_EXP_BITS 5
73 #define FP32_EXP_BITS 8
74 #define FP64_EXP_BITS 11
75 
76 #define FP16_EXP_BIAS 15
77 #define FP32_EXP_BIAS 127
78 #define FP64_EXP_BIAS 1023
79 
80 #define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1)
81 #define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1)
82 #define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1)
83 
84 #define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1)
85 #define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1)
86 #define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1)
87 
88 #define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1))
89 #define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1))
90 #define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1))
91 
92 #define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1))
93 #define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1))
94 #define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1))
95 
96 static inline uint16_t
97 lsl16(uint16_t x, uint32_t shift)
98 {
99  return shift < 16 ? x << shift : 0;
100 }
101 
102 static inline uint16_t
103 lsr16(uint16_t x, uint32_t shift)
104 {
105  return shift < 16 ? x >> shift : 0;
106 }
107 
108 static inline uint32_t
109 lsl32(uint32_t x, uint32_t shift)
110 {
111  return shift < 32 ? x << shift : 0;
112 }
113 
114 static inline uint32_t
115 lsr32(uint32_t x, uint32_t shift)
116 {
117  return shift < 32 ? x >> shift : 0;
118 }
119 
120 static inline uint64_t
121 lsl64(uint64_t x, uint32_t shift)
122 {
123  return shift < 64 ? x << shift : 0;
124 }
125 
126 static inline uint64_t
127 lsr64(uint64_t x, uint32_t shift)
128 {
129  return shift < 64 ? x >> shift : 0;
130 }
131 
132 static inline void
133 lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
134 {
135  if (shift == 0) {
136  *r1 = x1;
137  *r0 = x0;
138  } else if (shift < 64) {
139  *r1 = x1 << shift | x0 >> (64 - shift);
140  *r0 = x0 << shift;
141  } else if (shift < 128) {
142  *r1 = x0 << (shift - 64);
143  *r0 = 0;
144  } else {
145  *r1 = 0;
146  *r0 = 0;
147  }
148 }
149 
150 static inline void
151 lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
152 {
153  if (shift == 0) {
154  *r1 = x1;
155  *r0 = x0;
156  } else if (shift < 64) {
157  *r0 = x0 >> shift | x1 << (64 - shift);
158  *r1 = x1 >> shift;
159  } else if (shift < 128) {
160  *r0 = x1 >> (shift - 64);
161  *r1 = 0;
162  } else {
163  *r0 = 0;
164  *r1 = 0;
165  }
166 }
167 
168 static inline void
169 mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
170 {
171  uint32_t mask = ((uint32_t)1 << 31) - 1;
172  uint64_t a0 = a & mask;
173  uint64_t a1 = a >> 31 & mask;
174  uint64_t b0 = b & mask;
175  uint64_t b1 = b >> 31 & mask;
176  uint64_t p0 = a0 * b0;
177  uint64_t p2 = a1 * b1;
178  uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
179  uint64_t s0 = p0;
180  uint64_t s1 = (s0 >> 31) + p1;
181  uint64_t s2 = (s1 >> 31) + p2;
182  *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
183  *x1 = s2 >> 2;
184 }
185 
186 static inline
187 void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
188 {
189  uint64_t t0 = (uint64_t)(uint32_t)a * b;
190  uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
191  *x0 = t1 << 32 | (uint32_t)t0;
192  *x1 = t1 >> 32;
193 }
194 
195 static inline void
196 add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
197  uint64_t b1)
198 {
199  *x0 = a0 + b0;
200  *x1 = a1 + b1 + (*x0 < a0);
201 }
202 
203 static inline void
204 sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
205  uint64_t b1)
206 {
207  *x0 = a0 - b0;
208  *x1 = a1 - b1 - (*x0 > a0);
209 }
210 
211 static inline int
212 cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
213 {
214  return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
215 }
216 
217 static inline uint16_t
218 fp16_normalise(uint16_t mnt, int *exp)
219 {
220  int shift;
221 
222  if (!mnt) {
223  return 0;
224  }
225 
226  for (shift = 8; shift; shift >>= 1) {
227  if (!(mnt >> (16 - shift))) {
228  mnt <<= shift;
229  *exp -= shift;
230  }
231  }
232  return mnt;
233 }
234 
235 static inline uint32_t
236 fp32_normalise(uint32_t mnt, int *exp)
237 {
238  int shift;
239 
240  if (!mnt) {
241  return 0;
242  }
243 
244  for (shift = 16; shift; shift >>= 1) {
245  if (!(mnt >> (32 - shift))) {
246  mnt <<= shift;
247  *exp -= shift;
248  }
249  }
250  return mnt;
251 }
252 
253 static inline uint64_t
254 fp64_normalise(uint64_t mnt, int *exp)
255 {
256  int shift;
257 
258  if (!mnt) {
259  return 0;
260  }
261 
262  for (shift = 32; shift; shift >>= 1) {
263  if (!(mnt >> (64 - shift))) {
264  mnt <<= shift;
265  *exp -= shift;
266  }
267  }
268  return mnt;
269 }
270 
271 static inline void
272 fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
273 {
274  uint64_t x0 = *mnt0;
275  uint64_t x1 = *mnt1;
276  int shift;
277 
278  if (!x0 && !x1) {
279  return;
280  }
281 
282  if (!x1) {
283  x1 = x0;
284  x0 = 0;
285  *exp -= 64;
286  }
287 
288  for (shift = 32; shift; shift >>= 1) {
289  if (!(x1 >> (64 - shift))) {
290  x1 = x1 << shift | x0 >> (64 - shift);
291  x0 <<= shift;
292  *exp -= shift;
293  }
294  }
295 
296  *mnt0 = x0;
297  *mnt1 = x1;
298 }
299 
300 static inline uint16_t
301 fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
302 {
303  return sgn << (FP16_BITS - 1) | exp << FP16_MANT_BITS | FP16_MANT(mnt);
304 }
305 
306 static inline uint32_t
307 fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
308 {
309  return sgn << (FP32_BITS - 1) | exp << FP32_MANT_BITS | FP32_MANT(mnt);
310 }
311 
312 static inline uint64_t
313 fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
314 {
315  return sgn << (FP64_BITS - 1) | exp << FP64_MANT_BITS | FP64_MANT(mnt);
316 }
317 
318 static inline uint16_t
319 fp16_zero(int sgn)
320 {
321  return fp16_pack(sgn, 0, 0);
322 }
323 
324 static inline uint32_t
325 fp32_zero(int sgn)
326 {
327  return fp32_pack(sgn, 0, 0);
328 }
329 
330 static inline uint64_t
331 fp64_zero(int sgn)
332 {
333  return fp64_pack(sgn, 0, 0);
334 }
335 
336 static inline uint16_t
338 {
339  return fp16_pack(sgn, FP16_EXP_INF - 1, -1);
340 }
341 
342 static inline uint32_t
344 {
345  return fp32_pack(sgn, FP32_EXP_INF - 1, -1);
346 }
347 
348 static inline uint64_t
350 {
351  return fp64_pack(sgn, FP64_EXP_INF - 1, -1);
352 }
353 
354 static inline uint16_t
356 {
357  return fp16_pack(sgn, FP16_EXP_INF, 0);
358 }
359 
360 static inline uint32_t
362 {
363  return fp32_pack(sgn, FP32_EXP_INF, 0);
364 }
365 
366 static inline uint64_t
368 {
369  return fp64_pack(sgn, FP64_EXP_INF, 0);
370 }
371 
372 static inline uint16_t
374 {
375  return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1));
376 }
377 
378 static inline uint32_t
380 {
381  return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1));
382 }
383 
384 static inline uint64_t
386 {
387  return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1));
388 }
389 
390 static inline void
391 fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
392  int *flags)
393 {
394  *sgn = x >> (FP16_BITS - 1);
395  *exp = FP16_EXP(x);
396  *mnt = FP16_MANT(x);
397 
398  // Handle subnormals:
399  if (*exp) {
400  *mnt |= 1ULL << FP16_MANT_BITS;
401  } else {
402  ++*exp;
403  // IDC (Input Denormal) is not set in this case.
404  if (mode & FPLIB_FZ16)
405  *mnt = 0;
406  }
407 }
408 
409 static inline void
410 fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
411  int *flags)
412 {
413  *sgn = x >> (FP32_BITS - 1);
414  *exp = FP32_EXP(x);
415  *mnt = FP32_MANT(x);
416 
417  // Handle subnormals:
418  if (*exp) {
419  *mnt |= 1ULL << FP32_MANT_BITS;
420  } else {
421  ++*exp;
422  if ((mode & FPLIB_FZ) && *mnt) {
423  *flags |= FPLIB_IDC;
424  *mnt = 0;
425  }
426  }
427 }
428 
429 static inline void
430 fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
431  int *flags)
432 {
433 
434 
435  *sgn = x >> (FP64_BITS - 1);
436  *exp = FP64_EXP(x);
437  *mnt = FP64_MANT(x);
438 
439  // Handle subnormals:
440  if (*exp) {
441  *mnt |= 1ULL << FP64_MANT_BITS;
442  } else {
443  ++*exp;
444  if ((mode & FPLIB_FZ) && *mnt) {
445  *flags |= FPLIB_IDC;
446  *mnt = 0;
447  }
448  }
449 }
450 
451 static inline int
452 fp16_is_NaN(int exp, uint16_t mnt)
453 {
454  return exp == FP16_EXP_INF && FP16_MANT(mnt);
455 }
456 
457 static inline int
458 fp32_is_NaN(int exp, uint32_t mnt)
459 {
460  return exp == FP32_EXP_INF && FP32_MANT(mnt);
461 }
462 
463 static inline int
464 fp64_is_NaN(int exp, uint64_t mnt)
465 {
466  return exp == FP64_EXP_INF && FP64_MANT(mnt);
467 }
468 
469 static inline int
470 fp16_is_signalling_NaN(int exp, uint16_t mnt)
471 {
472  return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1);
473 }
474 
475 static inline int
476 fp32_is_signalling_NaN(int exp, uint32_t mnt)
477 {
478  return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1);
479 }
480 
481 static inline int
482 fp64_is_signalling_NaN(int exp, uint64_t mnt)
483 {
484  return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1);
485 }
486 
487 static inline int
488 fp16_is_quiet_NaN(int exp, uint16_t mnt)
489 {
490  return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1);
491 }
492 
493 static inline int
494 fp32_is_quiet_NaN(int exp, uint32_t mnt)
495 {
496  return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1);
497 }
498 
499 static inline int
500 fp64_is_quiet_NaN(int exp, uint64_t mnt)
501 {
502  return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1);
503 }
504 
505 static inline int
506 fp16_is_infinity(int exp, uint16_t mnt)
507 {
508  return exp == FP16_EXP_INF && !FP16_MANT(mnt);
509 }
510 
511 static inline int
512 fp32_is_infinity(int exp, uint32_t mnt)
513 {
514  return exp == FP32_EXP_INF && !FP32_MANT(mnt);
515 }
516 
517 static inline int
518 fp64_is_infinity(int exp, uint64_t mnt)
519 {
520  return exp == FP64_EXP_INF && !FP64_MANT(mnt);
521 }
522 
523 static inline uint16_t
524 fp16_process_NaN(uint16_t a, int mode, int *flags)
525 {
526  if (!(a >> (FP16_MANT_BITS - 1) & 1)) {
527  *flags |= FPLIB_IOC;
528  a |= 1ULL << (FP16_MANT_BITS - 1);
529  }
530  return mode & FPLIB_DN ? fp16_defaultNaN() : a;
531 }
532 
533 static inline uint32_t
534 fp32_process_NaN(uint32_t a, int mode, int *flags)
535 {
536  if (!(a >> (FP32_MANT_BITS - 1) & 1)) {
537  *flags |= FPLIB_IOC;
538  a |= 1ULL << (FP32_MANT_BITS - 1);
539  }
540  return mode & FPLIB_DN ? fp32_defaultNaN() : a;
541 }
542 
543 static inline uint64_t
544 fp64_process_NaN(uint64_t a, int mode, int *flags)
545 {
546  if (!(a >> (FP64_MANT_BITS - 1) & 1)) {
547  *flags |= FPLIB_IOC;
548  a |= 1ULL << (FP64_MANT_BITS - 1);
549  }
550  return mode & FPLIB_DN ? fp64_defaultNaN() : a;
551 }
552 
553 static uint16_t
554 fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
555 {
556  int a_exp = FP16_EXP(a);
557  uint16_t a_mnt = FP16_MANT(a);
558  int b_exp = FP16_EXP(b);
559  uint16_t b_mnt = FP16_MANT(b);
560 
561  // Handle signalling NaNs:
562  if (fp16_is_signalling_NaN(a_exp, a_mnt))
563  return fp16_process_NaN(a, mode, flags);
564  if (fp16_is_signalling_NaN(b_exp, b_mnt))
565  return fp16_process_NaN(b, mode, flags);
566 
567  // Handle quiet NaNs:
568  if (fp16_is_NaN(a_exp, a_mnt))
569  return fp16_process_NaN(a, mode, flags);
570  if (fp16_is_NaN(b_exp, b_mnt))
571  return fp16_process_NaN(b, mode, flags);
572 
573  return 0;
574 }
575 
576 static uint32_t
577 fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
578 {
579  int a_exp = FP32_EXP(a);
580  uint32_t a_mnt = FP32_MANT(a);
581  int b_exp = FP32_EXP(b);
582  uint32_t b_mnt = FP32_MANT(b);
583 
584  // Handle signalling NaNs:
585  if (fp32_is_signalling_NaN(a_exp, a_mnt))
586  return fp32_process_NaN(a, mode, flags);
587  if (fp32_is_signalling_NaN(b_exp, b_mnt))
588  return fp32_process_NaN(b, mode, flags);
589 
590  // Handle quiet NaNs:
591  if (fp32_is_NaN(a_exp, a_mnt))
592  return fp32_process_NaN(a, mode, flags);
593  if (fp32_is_NaN(b_exp, b_mnt))
594  return fp32_process_NaN(b, mode, flags);
595 
596  return 0;
597 }
598 
599 static uint64_t
600 fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
601 {
602  int a_exp = FP64_EXP(a);
603  uint64_t a_mnt = FP64_MANT(a);
604  int b_exp = FP64_EXP(b);
605  uint64_t b_mnt = FP64_MANT(b);
606 
607  // Handle signalling NaNs:
608  if (fp64_is_signalling_NaN(a_exp, a_mnt))
609  return fp64_process_NaN(a, mode, flags);
610  if (fp64_is_signalling_NaN(b_exp, b_mnt))
611  return fp64_process_NaN(b, mode, flags);
612 
613  // Handle quiet NaNs:
614  if (fp64_is_NaN(a_exp, a_mnt))
615  return fp64_process_NaN(a, mode, flags);
616  if (fp64_is_NaN(b_exp, b_mnt))
617  return fp64_process_NaN(b, mode, flags);
618 
619  return 0;
620 }
621 
622 static uint16_t
623 fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
624 {
625  int a_exp = FP16_EXP(a);
626  uint16_t a_mnt = FP16_MANT(a);
627  int b_exp = FP16_EXP(b);
628  uint16_t b_mnt = FP16_MANT(b);
629  int c_exp = FP16_EXP(c);
630  uint16_t c_mnt = FP16_MANT(c);
631 
632  // Handle signalling NaNs:
633  if (fp16_is_signalling_NaN(a_exp, a_mnt))
634  return fp16_process_NaN(a, mode, flags);
635  if (fp16_is_signalling_NaN(b_exp, b_mnt))
636  return fp16_process_NaN(b, mode, flags);
637  if (fp16_is_signalling_NaN(c_exp, c_mnt))
638  return fp16_process_NaN(c, mode, flags);
639 
640  // Handle quiet NaNs:
641  if (fp16_is_NaN(a_exp, a_mnt))
642  return fp16_process_NaN(a, mode, flags);
643  if (fp16_is_NaN(b_exp, b_mnt))
644  return fp16_process_NaN(b, mode, flags);
645  if (fp16_is_NaN(c_exp, c_mnt))
646  return fp16_process_NaN(c, mode, flags);
647 
648  return 0;
649 }
650 
651 static uint32_t
652 fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
653 {
654  int a_exp = FP32_EXP(a);
655  uint32_t a_mnt = FP32_MANT(a);
656  int b_exp = FP32_EXP(b);
657  uint32_t b_mnt = FP32_MANT(b);
658  int c_exp = FP32_EXP(c);
659  uint32_t c_mnt = FP32_MANT(c);
660 
661  // Handle signalling NaNs:
662  if (fp32_is_signalling_NaN(a_exp, a_mnt))
663  return fp32_process_NaN(a, mode, flags);
664  if (fp32_is_signalling_NaN(b_exp, b_mnt))
665  return fp32_process_NaN(b, mode, flags);
666  if (fp32_is_signalling_NaN(c_exp, c_mnt))
667  return fp32_process_NaN(c, mode, flags);
668 
669  // Handle quiet NaNs:
670  if (fp32_is_NaN(a_exp, a_mnt))
671  return fp32_process_NaN(a, mode, flags);
672  if (fp32_is_NaN(b_exp, b_mnt))
673  return fp32_process_NaN(b, mode, flags);
674  if (fp32_is_NaN(c_exp, c_mnt))
675  return fp32_process_NaN(c, mode, flags);
676 
677  return 0;
678 }
679 
680 static uint64_t
681 fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
682 {
683  int a_exp = FP64_EXP(a);
684  uint64_t a_mnt = FP64_MANT(a);
685  int b_exp = FP64_EXP(b);
686  uint64_t b_mnt = FP64_MANT(b);
687  int c_exp = FP64_EXP(c);
688  uint64_t c_mnt = FP64_MANT(c);
689 
690  // Handle signalling NaNs:
691  if (fp64_is_signalling_NaN(a_exp, a_mnt))
692  return fp64_process_NaN(a, mode, flags);
693  if (fp64_is_signalling_NaN(b_exp, b_mnt))
694  return fp64_process_NaN(b, mode, flags);
695  if (fp64_is_signalling_NaN(c_exp, c_mnt))
696  return fp64_process_NaN(c, mode, flags);
697 
698  // Handle quiet NaNs:
699  if (fp64_is_NaN(a_exp, a_mnt))
700  return fp64_process_NaN(a, mode, flags);
701  if (fp64_is_NaN(b_exp, b_mnt))
702  return fp64_process_NaN(b, mode, flags);
703  if (fp64_is_NaN(c_exp, c_mnt))
704  return fp64_process_NaN(c, mode, flags);
705 
706  return 0;
707 }
708 
709 static uint16_t
710 fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
711 {
712  int biased_exp; // non-negative exponent value for result
713  uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS)
714  int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
715 
716  assert(rm != FPRounding_TIEAWAY);
717 
718  // Flush to zero:
719  if ((mode & FPLIB_FZ16) && exp < 1) {
720  *flags |= FPLIB_UFC;
721  return fp16_zero(sgn);
722  }
723 
724  // The bottom FP16_EXP_BITS bits of mnt are orred together:
725  mnt = (4ULL << FP16_MANT_BITS | mnt >> (FP16_EXP_BITS - 1) |
726  ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0));
727 
728  if (exp > 0) {
729  biased_exp = exp;
730  int_mant = mnt >> 2;
731  error = mnt & 3;
732  } else {
733  biased_exp = 0;
734  int_mant = lsr16(mnt, 3 - exp);
735  error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
736  }
737 
738  if (!biased_exp && error) { // xx should also check fpscr_val<11>
739  *flags |= FPLIB_UFC;
740  }
741 
742  // Round up:
743  if ((rm == FPLIB_RN && (error == 3 ||
744  (error == 2 && (int_mant & 1)))) ||
745  (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
746  ++int_mant;
747  if (int_mant == 1ULL << FP16_MANT_BITS) {
748  // Rounded up from denormalized to normalized
749  biased_exp = 1;
750  }
751  if (int_mant == 2ULL << FP16_MANT_BITS) {
752  // Rounded up to next exponent
753  ++biased_exp;
754  int_mant >>= 1;
755  }
756  }
757 
758  // Handle rounding to odd aka Von Neumann rounding:
759  if (error && rm == FPRounding_ODD)
760  int_mant |= 1;
761 
762  // Handle overflow:
763  if (!(mode & FPLIB_AHP)) {
764  if (biased_exp >= (int)FP16_EXP_INF) {
765  *flags |= FPLIB_OFC | FPLIB_IXC;
766  if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
767  (rm == FPLIB_RM && sgn)) {
768  return fp16_infinity(sgn);
769  } else {
770  return fp16_max_normal(sgn);
771  }
772  }
773  } else {
774  if (biased_exp >= (int)FP16_EXP_INF + 1) {
775  *flags |= FPLIB_IOC;
776  return fp16_pack(sgn, FP16_EXP_INF, -1);
777  }
778  }
779 
780  if (error) {
781  *flags |= FPLIB_IXC;
782  }
783 
784  return fp16_pack(sgn, biased_exp, int_mant);
785 }
786 
787 static uint16_t
788 fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
789 {
790  return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags);
791 }
792 
793 static uint32_t
794 fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
795 {
796  int biased_exp; // non-negative exponent value for result
797  uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS)
798  int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
799 
800  assert(rm != FPRounding_TIEAWAY);
801 
802  // Flush to zero:
803  if ((mode & FPLIB_FZ) && exp < 1) {
804  *flags |= FPLIB_UFC;
805  return fp32_zero(sgn);
806  }
807 
808  // The bottom FP32_EXP_BITS bits of mnt are orred together:
809  mnt = (4ULL << FP32_MANT_BITS | mnt >> (FP32_EXP_BITS - 1) |
810  ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0));
811 
812  if (exp > 0) {
813  biased_exp = exp;
814  int_mant = mnt >> 2;
815  error = mnt & 3;
816  } else {
817  biased_exp = 0;
818  int_mant = lsr32(mnt, 3 - exp);
819  error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
820  }
821 
822  if (!biased_exp && error) { // xx should also check fpscr_val<11>
823  *flags |= FPLIB_UFC;
824  }
825 
826  // Round up:
827  if ((rm == FPLIB_RN && (error == 3 ||
828  (error == 2 && (int_mant & 1)))) ||
829  (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
830  ++int_mant;
831  if (int_mant == 1ULL << FP32_MANT_BITS) {
832  // Rounded up from denormalized to normalized
833  biased_exp = 1;
834  }
835  if (int_mant == 2ULL << FP32_MANT_BITS) {
836  // Rounded up to next exponent
837  ++biased_exp;
838  int_mant >>= 1;
839  }
840  }
841 
842  // Handle rounding to odd aka Von Neumann rounding:
843  if (error && rm == FPRounding_ODD)
844  int_mant |= 1;
845 
846  // Handle overflow:
847  if (biased_exp >= (int)FP32_EXP_INF) {
848  *flags |= FPLIB_OFC | FPLIB_IXC;
849  if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
850  (rm == FPLIB_RM && sgn)) {
851  return fp32_infinity(sgn);
852  } else {
853  return fp32_max_normal(sgn);
854  }
855  }
856 
857  if (error) {
858  *flags |= FPLIB_IXC;
859  }
860 
861  return fp32_pack(sgn, biased_exp, int_mant);
862 }
863 
864 static uint32_t
865 fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
866 {
867  return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
868 }
869 
870 static uint64_t
871 fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
872 {
873  int biased_exp; // non-negative exponent value for result
874  uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS)
875  int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
876 
877  assert(rm != FPRounding_TIEAWAY);
878 
879  // Flush to zero:
880  if ((mode & FPLIB_FZ) && exp < 1) {
881  *flags |= FPLIB_UFC;
882  return fp64_zero(sgn);
883  }
884 
885  // The bottom FP64_EXP_BITS bits of mnt are orred together:
886  mnt = (4ULL << FP64_MANT_BITS | mnt >> (FP64_EXP_BITS - 1) |
887  ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0));
888 
889  if (exp > 0) {
890  biased_exp = exp;
891  int_mant = mnt >> 2;
892  error = mnt & 3;
893  } else {
894  biased_exp = 0;
895  int_mant = lsr64(mnt, 3 - exp);
896  error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
897  }
898 
899  if (!biased_exp && error) { // xx should also check fpscr_val<11>
900  *flags |= FPLIB_UFC;
901  }
902 
903  // Round up:
904  if ((rm == FPLIB_RN && (error == 3 ||
905  (error == 2 && (int_mant & 1)))) ||
906  (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
907  ++int_mant;
908  if (int_mant == 1ULL << FP64_MANT_BITS) {
909  // Rounded up from denormalized to normalized
910  biased_exp = 1;
911  }
912  if (int_mant == 2ULL << FP64_MANT_BITS) {
913  // Rounded up to next exponent
914  ++biased_exp;
915  int_mant >>= 1;
916  }
917  }
918 
919  // Handle rounding to odd aka Von Neumann rounding:
920  if (error && rm == FPRounding_ODD)
921  int_mant |= 1;
922 
923  // Handle overflow:
924  if (biased_exp >= (int)FP64_EXP_INF) {
925  *flags |= FPLIB_OFC | FPLIB_IXC;
926  if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
927  (rm == FPLIB_RM && sgn)) {
928  return fp64_infinity(sgn);
929  } else {
930  return fp64_max_normal(sgn);
931  }
932  }
933 
934  if (error) {
935  *flags |= FPLIB_IXC;
936  }
937 
938  return fp64_pack(sgn, biased_exp, int_mant);
939 }
940 
941 static uint64_t
942 fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
943 {
944  return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
945 }
946 
947 static int
948 fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
949 {
950  int a_sgn, a_exp, b_sgn, b_exp;
951  uint16_t a_mnt, b_mnt;
952 
953  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
954  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
955 
956  if (fp16_is_NaN(a_exp, a_mnt) ||
957  fp16_is_NaN(b_exp, b_mnt)) {
958  if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
959  fp16_is_signalling_NaN(b_exp, b_mnt))
960  *flags |= FPLIB_IOC;
961  return 0;
962  }
963  return a == b || (!a_mnt && !b_mnt);
964 }
965 
966 static int
967 fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
968 {
969  int a_sgn, a_exp, b_sgn, b_exp;
970  uint16_t a_mnt, b_mnt;
971 
972  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
973  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
974 
975  if (fp16_is_NaN(a_exp, a_mnt) ||
976  fp16_is_NaN(b_exp, b_mnt)) {
977  *flags |= FPLIB_IOC;
978  return 0;
979  }
980  if (!a_mnt && !b_mnt)
981  return 1;
982  if (a_sgn != b_sgn)
983  return b_sgn;
984  if (a_exp != b_exp)
985  return a_sgn ^ (a_exp > b_exp);
986  if (a_mnt != b_mnt)
987  return a_sgn ^ (a_mnt > b_mnt);
988  return 1;
989 }
990 
991 static int
992 fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
993 {
994  int a_sgn, a_exp, b_sgn, b_exp;
995  uint16_t a_mnt, b_mnt;
996 
997  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
998  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
999 
1000  if (fp16_is_NaN(a_exp, a_mnt) ||
1001  fp16_is_NaN(b_exp, b_mnt)) {
1002  *flags |= FPLIB_IOC;
1003  return 0;
1004  }
1005  if (!a_mnt && !b_mnt)
1006  return 0;
1007  if (a_sgn != b_sgn)
1008  return b_sgn;
1009  if (a_exp != b_exp)
1010  return a_sgn ^ (a_exp > b_exp);
1011  if (a_mnt != b_mnt)
1012  return a_sgn ^ (a_mnt > b_mnt);
1013  return 0;
1014 }
1015 
1016 static int
1017 fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
1018 {
1019  int a_sgn, a_exp, b_sgn, b_exp;
1020  uint16_t a_mnt, b_mnt;
1021 
1022  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1023  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1024 
1025  if (fp16_is_NaN(a_exp, a_mnt) ||
1026  fp16_is_NaN(b_exp, b_mnt)) {
1027  if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
1028  fp16_is_signalling_NaN(b_exp, b_mnt))
1029  *flags |= FPLIB_IOC;
1030  return 1;
1031  }
1032  return 0;
1033 }
1034 
1035 static int
1036 fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
1037 {
1038  int a_sgn, a_exp, b_sgn, b_exp;
1039  uint32_t a_mnt, b_mnt;
1040 
1041  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1042  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1043 
1044  if (fp32_is_NaN(a_exp, a_mnt) ||
1045  fp32_is_NaN(b_exp, b_mnt)) {
1046  if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1047  fp32_is_signalling_NaN(b_exp, b_mnt))
1048  *flags |= FPLIB_IOC;
1049  return 0;
1050  }
1051  return a == b || (!a_mnt && !b_mnt);
1052 }
1053 
1054 static int
1055 fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
1056 {
1057  int a_sgn, a_exp, b_sgn, b_exp;
1058  uint32_t a_mnt, b_mnt;
1059 
1060  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1061  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1062 
1063  if (fp32_is_NaN(a_exp, a_mnt) ||
1064  fp32_is_NaN(b_exp, b_mnt)) {
1065  *flags |= FPLIB_IOC;
1066  return 0;
1067  }
1068  if (!a_mnt && !b_mnt)
1069  return 1;
1070  if (a_sgn != b_sgn)
1071  return b_sgn;
1072  if (a_exp != b_exp)
1073  return a_sgn ^ (a_exp > b_exp);
1074  if (a_mnt != b_mnt)
1075  return a_sgn ^ (a_mnt > b_mnt);
1076  return 1;
1077 }
1078 
1079 static int
1080 fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
1081 {
1082  int a_sgn, a_exp, b_sgn, b_exp;
1083  uint32_t a_mnt, b_mnt;
1084 
1085  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1086  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1087 
1088  if (fp32_is_NaN(a_exp, a_mnt) ||
1089  fp32_is_NaN(b_exp, b_mnt)) {
1090  *flags |= FPLIB_IOC;
1091  return 0;
1092  }
1093  if (!a_mnt && !b_mnt)
1094  return 0;
1095  if (a_sgn != b_sgn)
1096  return b_sgn;
1097  if (a_exp != b_exp)
1098  return a_sgn ^ (a_exp > b_exp);
1099  if (a_mnt != b_mnt)
1100  return a_sgn ^ (a_mnt > b_mnt);
1101  return 0;
1102 }
1103 
1104 static int
1105 fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
1106 {
1107  int a_sgn, a_exp, b_sgn, b_exp;
1108  uint32_t a_mnt, b_mnt;
1109 
1110  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1111  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1112 
1113  if (fp32_is_NaN(a_exp, a_mnt) ||
1114  fp32_is_NaN(b_exp, b_mnt)) {
1115  if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1116  fp32_is_signalling_NaN(b_exp, b_mnt))
1117  *flags |= FPLIB_IOC;
1118  return 1;
1119  }
1120  return 0;
1121 }
1122 
1123 static int
1124 fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
1125 {
1126  int a_sgn, a_exp, b_sgn, b_exp;
1127  uint64_t a_mnt, b_mnt;
1128 
1129  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1130  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1131 
1132  if (fp64_is_NaN(a_exp, a_mnt) ||
1133  fp64_is_NaN(b_exp, b_mnt)) {
1134  if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1135  fp64_is_signalling_NaN(b_exp, b_mnt))
1136  *flags |= FPLIB_IOC;
1137  return 0;
1138  }
1139  return a == b || (!a_mnt && !b_mnt);
1140 }
1141 
1142 static int
1143 fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
1144 {
1145  int a_sgn, a_exp, b_sgn, b_exp;
1146  uint64_t a_mnt, b_mnt;
1147 
1148  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1149  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1150 
1151  if (fp64_is_NaN(a_exp, a_mnt) ||
1152  fp64_is_NaN(b_exp, b_mnt)) {
1153  *flags |= FPLIB_IOC;
1154  return 0;
1155  }
1156  if (!a_mnt && !b_mnt)
1157  return 1;
1158  if (a_sgn != b_sgn)
1159  return b_sgn;
1160  if (a_exp != b_exp)
1161  return a_sgn ^ (a_exp > b_exp);
1162  if (a_mnt != b_mnt)
1163  return a_sgn ^ (a_mnt > b_mnt);
1164  return 1;
1165 }
1166 
1167 static int
1168 fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
1169 {
1170  int a_sgn, a_exp, b_sgn, b_exp;
1171  uint64_t a_mnt, b_mnt;
1172 
1173  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1174  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1175 
1176  if (fp64_is_NaN(a_exp, a_mnt) ||
1177  fp64_is_NaN(b_exp, b_mnt)) {
1178  *flags |= FPLIB_IOC;
1179  return 0;
1180  }
1181  if (!a_mnt && !b_mnt)
1182  return 0;
1183  if (a_sgn != b_sgn)
1184  return b_sgn;
1185  if (a_exp != b_exp)
1186  return a_sgn ^ (a_exp > b_exp);
1187  if (a_mnt != b_mnt)
1188  return a_sgn ^ (a_mnt > b_mnt);
1189  return 0;
1190 }
1191 
1192 static int
1193 fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
1194 {
1195  int a_sgn, a_exp, b_sgn, b_exp;
1196  uint64_t a_mnt, b_mnt;
1197 
1198  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1199  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1200 
1201  if (fp64_is_NaN(a_exp, a_mnt) ||
1202  fp64_is_NaN(b_exp, b_mnt)) {
1203  if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1204  fp64_is_signalling_NaN(b_exp, b_mnt))
1205  *flags |= FPLIB_IOC;
1206  return 1;
1207  }
1208  return 0;
1209 }
1210 
1211 static uint16_t
1212 fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
1213 {
1214  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1215  uint16_t a_mnt, b_mnt, x, x_mnt;
1216 
1217  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1218  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1219 
1220  if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1221  return x;
1222  }
1223 
1224  b_sgn ^= neg;
1225 
1226  // Handle infinities and zeroes:
1227  if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) {
1228  *flags |= FPLIB_IOC;
1229  return fp16_defaultNaN();
1230  } else if (a_exp == FP16_EXP_INF) {
1231  return fp16_infinity(a_sgn);
1232  } else if (b_exp == FP16_EXP_INF) {
1233  return fp16_infinity(b_sgn);
1234  } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1235  return fp16_zero(a_sgn);
1236  }
1237 
1238  a_mnt <<= 3;
1239  b_mnt <<= 3;
1240  if (a_exp >= b_exp) {
1241  b_mnt = (lsr16(b_mnt, a_exp - b_exp) |
1242  !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1)));
1243  b_exp = a_exp;
1244  } else {
1245  a_mnt = (lsr16(a_mnt, b_exp - a_exp) |
1246  !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1)));
1247  a_exp = b_exp;
1248  }
1249  x_sgn = a_sgn;
1250  x_exp = a_exp;
1251  if (a_sgn == b_sgn) {
1252  x_mnt = a_mnt + b_mnt;
1253  } else if (a_mnt >= b_mnt) {
1254  x_mnt = a_mnt - b_mnt;
1255  } else {
1256  x_sgn ^= 1;
1257  x_mnt = b_mnt - a_mnt;
1258  }
1259 
1260  if (!x_mnt) {
1261  // Sign of exact zero result depends on rounding mode
1262  return fp16_zero((mode & 3) == 2);
1263  }
1264 
1265  x_mnt = fp16_normalise(x_mnt, &x_exp);
1266 
1267  return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1,
1268  mode, flags);
1269 }
1270 
1271 static uint32_t
1272 fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
1273 {
1274  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1275  uint32_t a_mnt, b_mnt, x, x_mnt;
1276 
1277  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1278  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1279 
1280  if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1281  return x;
1282  }
1283 
1284  b_sgn ^= neg;
1285 
1286  // Handle infinities and zeroes:
1287  if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) {
1288  *flags |= FPLIB_IOC;
1289  return fp32_defaultNaN();
1290  } else if (a_exp == FP32_EXP_INF) {
1291  return fp32_infinity(a_sgn);
1292  } else if (b_exp == FP32_EXP_INF) {
1293  return fp32_infinity(b_sgn);
1294  } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1295  return fp32_zero(a_sgn);
1296  }
1297 
1298  a_mnt <<= 3;
1299  b_mnt <<= 3;
1300  if (a_exp >= b_exp) {
1301  b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
1302  !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
1303  b_exp = a_exp;
1304  } else {
1305  a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
1306  !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
1307  a_exp = b_exp;
1308  }
1309  x_sgn = a_sgn;
1310  x_exp = a_exp;
1311  if (a_sgn == b_sgn) {
1312  x_mnt = a_mnt + b_mnt;
1313  } else if (a_mnt >= b_mnt) {
1314  x_mnt = a_mnt - b_mnt;
1315  } else {
1316  x_sgn ^= 1;
1317  x_mnt = b_mnt - a_mnt;
1318  }
1319 
1320  if (!x_mnt) {
1321  // Sign of exact zero result depends on rounding mode
1322  return fp32_zero((mode & 3) == 2);
1323  }
1324 
1325  x_mnt = fp32_normalise(x_mnt, &x_exp);
1326 
1327  return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1,
1328  mode, flags);
1329 }
1330 
1331 static uint64_t
1332 fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
1333 {
1334  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1335  uint64_t a_mnt, b_mnt, x, x_mnt;
1336 
1337  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1338  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1339 
1340  if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1341  return x;
1342  }
1343 
1344  b_sgn ^= neg;
1345 
1346  // Handle infinities and zeroes:
1347  if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) {
1348  *flags |= FPLIB_IOC;
1349  return fp64_defaultNaN();
1350  } else if (a_exp == FP64_EXP_INF) {
1351  return fp64_infinity(a_sgn);
1352  } else if (b_exp == FP64_EXP_INF) {
1353  return fp64_infinity(b_sgn);
1354  } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1355  return fp64_zero(a_sgn);
1356  }
1357 
1358  a_mnt <<= 3;
1359  b_mnt <<= 3;
1360  if (a_exp >= b_exp) {
1361  b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
1362  !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
1363  b_exp = a_exp;
1364  } else {
1365  a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
1366  !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
1367  a_exp = b_exp;
1368  }
1369  x_sgn = a_sgn;
1370  x_exp = a_exp;
1371  if (a_sgn == b_sgn) {
1372  x_mnt = a_mnt + b_mnt;
1373  } else if (a_mnt >= b_mnt) {
1374  x_mnt = a_mnt - b_mnt;
1375  } else {
1376  x_sgn ^= 1;
1377  x_mnt = b_mnt - a_mnt;
1378  }
1379 
1380  if (!x_mnt) {
1381  // Sign of exact zero result depends on rounding mode
1382  return fp64_zero((mode & 3) == 2);
1383  }
1384 
1385  x_mnt = fp64_normalise(x_mnt, &x_exp);
1386 
1387  return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1,
1388  mode, flags);
1389 }
1390 
1391 static uint16_t
1392 fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
1393 {
1394  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1395  uint16_t a_mnt, b_mnt, x;
1396  uint32_t x_mnt;
1397 
1398  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1399  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1400 
1401  if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1402  return x;
1403  }
1404 
1405  // Handle infinities and zeroes:
1406  if ((a_exp == FP16_EXP_INF && !b_mnt) ||
1407  (b_exp == FP16_EXP_INF && !a_mnt)) {
1408  *flags |= FPLIB_IOC;
1409  return fp16_defaultNaN();
1410  } else if (a_exp == FP16_EXP_INF || b_exp == FP16_EXP_INF) {
1411  return fp16_infinity(a_sgn ^ b_sgn);
1412  } else if (!a_mnt || !b_mnt) {
1413  return fp16_zero(a_sgn ^ b_sgn);
1414  }
1415 
1416  // Multiply and normalise:
1417  x_sgn = a_sgn ^ b_sgn;
1418  x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1;
1419  x_mnt = (uint32_t)a_mnt * b_mnt;
1420  x_mnt = fp32_normalise(x_mnt, &x_exp);
1421 
1422  // Convert to FP16_BITS bits, collapsing error into bottom bit:
1423  x_mnt = lsr32(x_mnt, FP16_BITS - 1) | !!lsl32(x_mnt, FP16_BITS + 1);
1424 
1425  return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1426 }
1427 
1428 static uint32_t
1429 fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1430 {
1431  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1432  uint32_t a_mnt, b_mnt, x;
1433  uint64_t x_mnt;
1434 
1435  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1436  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1437 
1438  if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1439  return x;
1440  }
1441 
1442  // Handle infinities and zeroes:
1443  if ((a_exp == FP32_EXP_INF && !b_mnt) ||
1444  (b_exp == FP32_EXP_INF && !a_mnt)) {
1445  *flags |= FPLIB_IOC;
1446  return fp32_defaultNaN();
1447  } else if (a_exp == FP32_EXP_INF || b_exp == FP32_EXP_INF) {
1448  return fp32_infinity(a_sgn ^ b_sgn);
1449  } else if (!a_mnt || !b_mnt) {
1450  return fp32_zero(a_sgn ^ b_sgn);
1451  }
1452 
1453  // Multiply and normalise:
1454  x_sgn = a_sgn ^ b_sgn;
1455  x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1;
1456  x_mnt = (uint64_t)a_mnt * b_mnt;
1457  x_mnt = fp64_normalise(x_mnt, &x_exp);
1458 
1459  // Convert to FP32_BITS bits, collapsing error into bottom bit:
1460  x_mnt = lsr64(x_mnt, FP32_BITS - 1) | !!lsl64(x_mnt, FP32_BITS + 1);
1461 
1462  return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1463 }
1464 
1465 static uint64_t
1466 fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1467 {
1468  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1469  uint64_t a_mnt, b_mnt, x;
1470  uint64_t x0_mnt, x1_mnt;
1471 
1472  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1473  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1474 
1475  if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1476  return x;
1477  }
1478 
1479  // Handle infinities and zeroes:
1480  if ((a_exp == FP64_EXP_INF && !b_mnt) ||
1481  (b_exp == FP64_EXP_INF && !a_mnt)) {
1482  *flags |= FPLIB_IOC;
1483  return fp64_defaultNaN();
1484  } else if (a_exp == FP64_EXP_INF || b_exp == FP64_EXP_INF) {
1485  return fp64_infinity(a_sgn ^ b_sgn);
1486  } else if (!a_mnt || !b_mnt) {
1487  return fp64_zero(a_sgn ^ b_sgn);
1488  }
1489 
1490  // Multiply and normalise:
1491  x_sgn = a_sgn ^ b_sgn;
1492  x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1;
1493  mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1494  fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1495 
1496  // Convert to FP64_BITS bits, collapsing error into bottom bit:
1497  x0_mnt = x1_mnt << 1 | !!x0_mnt;
1498 
1499  return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1500 }
1501 
1502 static uint16_t
1503 fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale,
1504  int mode, int *flags)
1505 {
1506  int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1507  uint16_t a_mnt, b_mnt, c_mnt, x;
1508  uint32_t x_mnt, y_mnt;
1509 
1510  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1511  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1512  fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1513 
1514  x = fp16_process_NaNs3(a, b, c, mode, flags);
1515 
1516  // Quiet NaN added to product of zero and infinity:
1517  if (fp16_is_quiet_NaN(a_exp, a_mnt) &&
1518  ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) ||
1519  (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) {
1520  x = fp16_defaultNaN();
1521  *flags |= FPLIB_IOC;
1522  }
1523 
1524  if (x) {
1525  return x;
1526  }
1527 
1528  // Handle infinities and zeroes:
1529  if ((b_exp == FP16_EXP_INF && !c_mnt) ||
1530  (c_exp == FP16_EXP_INF && !b_mnt) ||
1531  (a_exp == FP16_EXP_INF &&
1532  (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) &&
1533  (a_sgn != (b_sgn ^ c_sgn)))) {
1534  *flags |= FPLIB_IOC;
1535  return fp16_defaultNaN();
1536  }
1537  if (a_exp == FP16_EXP_INF)
1538  return fp16_infinity(a_sgn);
1539  if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF)
1540  return fp16_infinity(b_sgn ^ c_sgn);
1541  if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1542  return fp16_zero(a_sgn);
1543 
1544  x_sgn = a_sgn;
1545  x_exp = a_exp + 2 * FP16_EXP_BITS - 3;
1546  x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4);
1547 
1548  // Multiply:
1549  y_sgn = b_sgn ^ c_sgn;
1550  y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3;
1551  y_mnt = (uint32_t)b_mnt * c_mnt << 3;
1552  if (!y_mnt) {
1553  y_exp = x_exp;
1554  }
1555 
1556  // Add:
1557  if (x_exp >= y_exp) {
1558  y_mnt = (lsr32(y_mnt, x_exp - y_exp) |
1559  !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1)));
1560  y_exp = x_exp;
1561  } else {
1562  x_mnt = (lsr32(x_mnt, y_exp - x_exp) |
1563  !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1)));
1564  x_exp = y_exp;
1565  }
1566  if (x_sgn == y_sgn) {
1567  x_mnt = x_mnt + y_mnt;
1568  } else if (x_mnt >= y_mnt) {
1569  x_mnt = x_mnt - y_mnt;
1570  } else {
1571  x_sgn ^= 1;
1572  x_mnt = y_mnt - x_mnt;
1573  }
1574 
1575  if (!x_mnt) {
1576  // Sign of exact zero result depends on rounding mode
1577  return fp16_zero((mode & 3) == 2);
1578  }
1579 
1580  // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1581  x_mnt = fp32_normalise(x_mnt, &x_exp);
1582  x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1583 
1584  return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1585 }
1586 
1587 static uint32_t
1588 fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1589  int mode, int *flags)
1590 {
1591  int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1592  uint32_t a_mnt, b_mnt, c_mnt, x;
1593  uint64_t x_mnt, y_mnt;
1594 
1595  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1596  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1597  fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1598 
1599  x = fp32_process_NaNs3(a, b, c, mode, flags);
1600 
1601  // Quiet NaN added to product of zero and infinity:
1602  if (fp32_is_quiet_NaN(a_exp, a_mnt) &&
1603  ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) ||
1604  (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) {
1605  x = fp32_defaultNaN();
1606  *flags |= FPLIB_IOC;
1607  }
1608 
1609  if (x) {
1610  return x;
1611  }
1612 
1613  // Handle infinities and zeroes:
1614  if ((b_exp == FP32_EXP_INF && !c_mnt) ||
1615  (c_exp == FP32_EXP_INF && !b_mnt) ||
1616  (a_exp == FP32_EXP_INF &&
1617  (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) &&
1618  (a_sgn != (b_sgn ^ c_sgn)))) {
1619  *flags |= FPLIB_IOC;
1620  return fp32_defaultNaN();
1621  }
1622  if (a_exp == FP32_EXP_INF)
1623  return fp32_infinity(a_sgn);
1624  if (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF)
1625  return fp32_infinity(b_sgn ^ c_sgn);
1626  if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1627  return fp32_zero(a_sgn);
1628 
1629  x_sgn = a_sgn;
1630  x_exp = a_exp + 2 * FP32_EXP_BITS - 3;
1631  x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4);
1632 
1633  // Multiply:
1634  y_sgn = b_sgn ^ c_sgn;
1635  y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3;
1636  y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1637  if (!y_mnt) {
1638  y_exp = x_exp;
1639  }
1640 
1641  // Add:
1642  if (x_exp >= y_exp) {
1643  y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1644  !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1645  y_exp = x_exp;
1646  } else {
1647  x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1648  !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1649  x_exp = y_exp;
1650  }
1651  if (x_sgn == y_sgn) {
1652  x_mnt = x_mnt + y_mnt;
1653  } else if (x_mnt >= y_mnt) {
1654  x_mnt = x_mnt - y_mnt;
1655  } else {
1656  x_sgn ^= 1;
1657  x_mnt = y_mnt - x_mnt;
1658  }
1659 
1660  if (!x_mnt) {
1661  // Sign of exact zero result depends on rounding mode
1662  return fp32_zero((mode & 3) == 2);
1663  }
1664 
1665  // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1666  x_mnt = fp64_normalise(x_mnt, &x_exp);
1667  x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1668 
1669  return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1670 }
1671 
1672 static uint64_t
1673 fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1674  int mode, int *flags)
1675 {
1676  int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1677  uint64_t a_mnt, b_mnt, c_mnt, x;
1678  uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1679 
1680  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1681  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1682  fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1683 
1684  x = fp64_process_NaNs3(a, b, c, mode, flags);
1685 
1686  // Quiet NaN added to product of zero and infinity:
1687  if (fp64_is_quiet_NaN(a_exp, a_mnt) &&
1688  ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) ||
1689  (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) {
1690  x = fp64_defaultNaN();
1691  *flags |= FPLIB_IOC;
1692  }
1693 
1694  if (x) {
1695  return x;
1696  }
1697 
1698  // Handle infinities and zeroes:
1699  if ((b_exp == FP64_EXP_INF && !c_mnt) ||
1700  (c_exp == FP64_EXP_INF && !b_mnt) ||
1701  (a_exp == FP64_EXP_INF &&
1702  (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) &&
1703  (a_sgn != (b_sgn ^ c_sgn)))) {
1704  *flags |= FPLIB_IOC;
1705  return fp64_defaultNaN();
1706  }
1707  if (a_exp == FP64_EXP_INF)
1708  return fp64_infinity(a_sgn);
1709  if (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF)
1710  return fp64_infinity(b_sgn ^ c_sgn);
1711  if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1712  return fp64_zero(a_sgn);
1713 
1714  x_sgn = a_sgn;
1715  x_exp = a_exp + FP64_EXP_BITS;
1716  x0_mnt = 0;
1717  x1_mnt = a_mnt;
1718 
1719  // Multiply:
1720  y_sgn = b_sgn ^ c_sgn;
1721  y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3;
1722  mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1723  if (!y0_mnt && !y1_mnt) {
1724  y_exp = x_exp;
1725  }
1726 
1727  // Add:
1728  if (x_exp >= y_exp) {
1729  uint64_t t0, t1;
1730  lsl128(&t0, &t1, y0_mnt, y1_mnt,
1731  x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1732  lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1733  y0_mnt |= !!(t0 | t1);
1734  y_exp = x_exp;
1735  } else {
1736  uint64_t t0, t1;
1737  lsl128(&t0, &t1, x0_mnt, x1_mnt,
1738  y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1739  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1740  x0_mnt |= !!(t0 | t1);
1741  x_exp = y_exp;
1742  }
1743  if (x_sgn == y_sgn) {
1744  add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1745  } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1746  sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1747  } else {
1748  x_sgn ^= 1;
1749  sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1750  }
1751 
1752  if (!x0_mnt && !x1_mnt) {
1753  // Sign of exact zero result depends on rounding mode
1754  return fp64_zero((mode & 3) == 2);
1755  }
1756 
1757  // Normalise into FP64_BITS bits, collapsing error into bottom bit:
1758  fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1759  x0_mnt = x1_mnt << 1 | !!x0_mnt;
1760 
1761  return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1762 }
1763 
1764 static uint16_t
1765 fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
1766 {
1767  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1768  uint16_t a_mnt, b_mnt, x;
1769  uint32_t x_mnt;
1770 
1771  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1772  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1773 
1774  if ((x = fp16_process_NaNs(a, b, mode, flags)))
1775  return x;
1776 
1777  // Handle infinities and zeroes:
1778  if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) ||
1779  (!a_mnt && !b_mnt)) {
1780  *flags |= FPLIB_IOC;
1781  return fp16_defaultNaN();
1782  }
1783  if (a_exp == FP16_EXP_INF || !b_mnt) {
1784  if (a_exp != FP16_EXP_INF)
1785  *flags |= FPLIB_DZC;
1786  return fp16_infinity(a_sgn ^ b_sgn);
1787  }
1788  if (!a_mnt || b_exp == FP16_EXP_INF)
1789  return fp16_zero(a_sgn ^ b_sgn);
1790 
1791  // Divide, setting bottom bit if inexact:
1792  a_mnt = fp16_normalise(a_mnt, &a_exp);
1793  x_sgn = a_sgn ^ b_sgn;
1794  x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3);
1795  x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt;
1796  x_mnt |= (x_mnt * b_mnt !=
1797  (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3));
1798 
1799  // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1800  x_mnt = fp32_normalise(x_mnt, &x_exp);
1801  x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1802 
1803  return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1804 }
1805 
1806 static uint32_t
1807 fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1808 {
1809  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1810  uint32_t a_mnt, b_mnt, x;
1811  uint64_t x_mnt;
1812 
1813  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1814  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1815 
1816  if ((x = fp32_process_NaNs(a, b, mode, flags)))
1817  return x;
1818 
1819  // Handle infinities and zeroes:
1820  if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) ||
1821  (!a_mnt && !b_mnt)) {
1822  *flags |= FPLIB_IOC;
1823  return fp32_defaultNaN();
1824  }
1825  if (a_exp == FP32_EXP_INF || !b_mnt) {
1826  if (a_exp != FP32_EXP_INF)
1827  *flags |= FPLIB_DZC;
1828  return fp32_infinity(a_sgn ^ b_sgn);
1829  }
1830  if (!a_mnt || b_exp == FP32_EXP_INF)
1831  return fp32_zero(a_sgn ^ b_sgn);
1832 
1833  // Divide, setting bottom bit if inexact:
1834  a_mnt = fp32_normalise(a_mnt, &a_exp);
1835  x_sgn = a_sgn ^ b_sgn;
1836  x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3);
1837  x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt;
1838  x_mnt |= (x_mnt * b_mnt !=
1839  (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3));
1840 
1841  // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1842  x_mnt = fp64_normalise(x_mnt, &x_exp);
1843  x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1844 
1845  return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1846 }
1847 
1848 static uint64_t
1849 fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1850 {
1851  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1852  uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1853 
1854  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1855  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1856 
1857  if ((x = fp64_process_NaNs(a, b, mode, flags)))
1858  return x;
1859 
1860  // Handle infinities and zeroes:
1861  if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) ||
1862  (!a_mnt && !b_mnt)) {
1863  *flags |= FPLIB_IOC;
1864  return fp64_defaultNaN();
1865  }
1866  if (a_exp == FP64_EXP_INF || !b_mnt) {
1867  if (a_exp != FP64_EXP_INF)
1868  *flags |= FPLIB_DZC;
1869  return fp64_infinity(a_sgn ^ b_sgn);
1870  }
1871  if (!a_mnt || b_exp == FP64_EXP_INF)
1872  return fp64_zero(a_sgn ^ b_sgn);
1873 
1874  // Find reciprocal of divisor with Newton-Raphson:
1875  a_mnt = fp64_normalise(a_mnt, &a_exp);
1876  b_mnt = fp64_normalise(b_mnt, &b_exp);
1877  x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1878  mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1879  sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1880  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1881  mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1882  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1883 
1884  // Multiply by dividend:
1885  x_sgn = a_sgn ^ b_sgn;
1886  x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8;
1887  mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
1888  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1889  x_mnt = x1_mnt;
1890 
1891  // This is an underestimate, so try adding one:
1892  mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
1893  c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1894  if (c <= 0) {
1895  ++x_mnt;
1896  }
1897 
1898  x_mnt = fp64_normalise(x_mnt, &x_exp);
1899 
1900  return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1901 }
1902 
1903 static void
1904 set_fpscr0(FPSCR &fpscr, int flags)
1905 {
1906  if (flags & FPLIB_IDC) {
1907  fpscr.idc = 1;
1908  }
1909  if (flags & FPLIB_IOC) {
1910  fpscr.ioc = 1;
1911  }
1912  if (flags & FPLIB_DZC) {
1913  fpscr.dzc = 1;
1914  }
1915  if (flags & FPLIB_OFC) {
1916  fpscr.ofc = 1;
1917  }
1918  if (flags & FPLIB_UFC) {
1919  fpscr.ufc = 1;
1920  }
1921  if (flags & FPLIB_IXC) {
1922  fpscr.ixc = 1;
1923  }
1924 }
1925 
1926 static uint16_t
1927 fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
1928 {
1929  int a_sgn, a_exp;
1930  uint16_t a_mnt;
1931 
1932  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1933 
1934  // Handle NaNs:
1935  if (fp16_is_NaN(a_exp, a_mnt)) {
1936  return fp16_process_NaN(a, mode, flags);
1937  }
1938 
1939  // Handle zeroes:
1940  if (!a_mnt) {
1941  return fp16_zero(a_sgn);
1942  }
1943 
1944  // Handle infinities:
1945  if (a_exp == FP16_EXP_INF) {
1946  return fp16_infinity(a_sgn);
1947  }
1948 
1949  b = b < -300 ? -300 : b;
1950  b = b > 300 ? 300 : b;
1951  a_exp += b;
1952  a_mnt <<= 3;
1953 
1954  a_mnt = fp16_normalise(a_mnt, &a_exp);
1955 
1956  return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1,
1957  mode, flags);
1958 }
1959 
1960 static uint32_t
1961 fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
1962 {
1963  int a_sgn, a_exp;
1964  uint32_t a_mnt;
1965 
1966  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1967 
1968  // Handle NaNs:
1969  if (fp32_is_NaN(a_exp, a_mnt)) {
1970  return fp32_process_NaN(a, mode, flags);
1971  }
1972 
1973  // Handle zeroes:
1974  if (!a_mnt) {
1975  return fp32_zero(a_sgn);
1976  }
1977 
1978  // Handle infinities:
1979  if (a_exp == FP32_EXP_INF) {
1980  return fp32_infinity(a_sgn);
1981  }
1982 
1983  b = b < -300 ? -300 : b;
1984  b = b > 300 ? 300 : b;
1985  a_exp += b;
1986  a_mnt <<= 3;
1987 
1988  a_mnt = fp32_normalise(a_mnt, &a_exp);
1989 
1990  return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1,
1991  mode, flags);
1992 }
1993 
1994 static uint64_t
1995 fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
1996 {
1997  int a_sgn, a_exp;
1998  uint64_t a_mnt;
1999 
2000  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2001 
2002  // Handle NaNs:
2003  if (fp64_is_NaN(a_exp, a_mnt)) {
2004  return fp64_process_NaN(a, mode, flags);
2005  }
2006 
2007  // Handle zeroes:
2008  if (!a_mnt) {
2009  return fp64_zero(a_sgn);
2010  }
2011 
2012  // Handle infinities:
2013  if (a_exp == FP64_EXP_INF) {
2014  return fp64_infinity(a_sgn);
2015  }
2016 
2017  b = b < -3000 ? -3000 : b;
2018  b = b > 3000 ? 3000 : b;
2019  a_exp += b;
2020  a_mnt <<= 3;
2021 
2022  a_mnt = fp64_normalise(a_mnt, &a_exp);
2023 
2024  return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1,
2025  mode, flags);
2026 }
2027 
2028 static uint16_t
2029 fp16_sqrt(uint16_t a, int mode, int *flags)
2030 {
2031  int a_sgn, a_exp, x_sgn, x_exp;
2032  uint16_t a_mnt, x_mnt;
2033  uint32_t x, t0, t1;
2034 
2035  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2036 
2037  // Handle NaNs:
2038  if (fp16_is_NaN(a_exp, a_mnt))
2039  return fp16_process_NaN(a, mode, flags);
2040 
2041  // Handle infinities and zeroes:
2042  if (!a_mnt)
2043  return fp16_zero(a_sgn);
2044  if (a_exp == FP16_EXP_INF && !a_sgn)
2045  return fp16_infinity(a_sgn);
2046  if (a_sgn) {
2047  *flags |= FPLIB_IOC;
2048  return fp16_defaultNaN();
2049  }
2050 
2051  a_mnt = fp16_normalise(a_mnt, &a_exp);
2052  if (a_exp & 1) {
2053  ++a_exp;
2054  a_mnt >>= 1;
2055  }
2056 
2057  // x = (a * 3 + 5) / 8
2058  x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2059 
2060  // x = (a / x + x) / 2; // 8-bit accuracy
2061  x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2062 
2063  // x = (a / x + x) / 2; // 16-bit accuracy
2064  x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2065 
2066  x_sgn = 0;
2067  x_exp = (a_exp + 27) >> 1;
2068  x_mnt = ((x - (1 << 18)) >> 19) + 1;
2069  t1 = (uint32_t)x_mnt * x_mnt;
2070  t0 = (uint32_t)a_mnt << 9;
2071  if (t1 > t0) {
2072  --x_mnt;
2073  }
2074 
2075  x_mnt = fp16_normalise(x_mnt, &x_exp);
2076 
2077  return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2078 }
2079 
2080 static uint32_t
2081 fp32_sqrt(uint32_t a, int mode, int *flags)
2082 {
2083  int a_sgn, a_exp, x_sgn, x_exp;
2084  uint32_t a_mnt, x, x_mnt;
2085  uint64_t t0, t1;
2086 
2087  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2088 
2089  // Handle NaNs:
2090  if (fp32_is_NaN(a_exp, a_mnt))
2091  return fp32_process_NaN(a, mode, flags);
2092 
2093  // Handle infinities and zeroes:
2094  if (!a_mnt)
2095  return fp32_zero(a_sgn);
2096  if (a_exp == FP32_EXP_INF && !a_sgn)
2097  return fp32_infinity(a_sgn);
2098  if (a_sgn) {
2099  *flags |= FPLIB_IOC;
2100  return fp32_defaultNaN();
2101  }
2102 
2103  a_mnt = fp32_normalise(a_mnt, &a_exp);
2104  if (!(a_exp & 1)) {
2105  ++a_exp;
2106  a_mnt >>= 1;
2107  }
2108 
2109  // x = (a * 3 + 5) / 8
2110  x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
2111 
2112  // x = (a / x + x) / 2; // 8-bit accuracy
2113  x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2114 
2115  // x = (a / x + x) / 2; // 16-bit accuracy
2116  x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2117 
2118  // x = (a / x + x) / 2; // 32-bit accuracy
2119  x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
2120 
2121  x_sgn = 0;
2122  x_exp = (a_exp + 147) >> 1;
2123  x_mnt = ((x - (1 << 5)) >> 6) + 1;
2124  t1 = (uint64_t)x_mnt * x_mnt;
2125  t0 = (uint64_t)a_mnt << 19;
2126  if (t1 > t0) {
2127  --x_mnt;
2128  }
2129 
2130  x_mnt = fp32_normalise(x_mnt, &x_exp);
2131 
2132  return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2133 }
2134 
2135 static uint64_t
2136 fp64_sqrt(uint64_t a, int mode, int *flags)
2137 {
2138  int a_sgn, a_exp, x_sgn, x_exp, c;
2139  uint64_t a_mnt, x_mnt, r, x0, x1;
2140  uint32_t x;
2141 
2142  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2143 
2144  // Handle NaNs:
2145  if (fp64_is_NaN(a_exp, a_mnt))
2146  return fp64_process_NaN(a, mode, flags);
2147 
2148  // Handle infinities and zeroes:
2149  if (!a_mnt)
2150  return fp64_zero(a_sgn);
2151  if (a_exp == FP64_EXP_INF && !a_sgn)
2152  return fp64_infinity(a_sgn);
2153  if (a_sgn) {
2154  *flags |= FPLIB_IOC;
2155  return fp64_defaultNaN();
2156  }
2157 
2158  a_mnt = fp64_normalise(a_mnt, &a_exp);
2159  if (a_exp & 1) {
2160  ++a_exp;
2161  a_mnt >>= 1;
2162  }
2163 
2164  // x = (a * 3 + 5) / 8
2165  x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
2166 
2167  // x = (a / x + x) / 2; // 8-bit accuracy
2168  x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2169 
2170  // x = (a / x + x) / 2; // 16-bit accuracy
2171  x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2172 
2173  // x = (a / x + x) / 2; // 32-bit accuracy
2174  x = ((a_mnt / x) >> 2) + (x >> 1);
2175 
2176  // r = 1 / x; // 32-bit accuracy
2177  r = ((uint64_t)1 << 62) / x;
2178 
2179  // r = r * (2 - x * r); // 64-bit accuracy
2180  mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
2181  lsr128(&x0, &x1, x0, x1, 31);
2182 
2183  // x = (x + a * r) / 2; // 64-bit accuracy
2184  mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
2185  lsl128(&x0, &x1, x0, x1, 5);
2186  lsr128(&x0, &x1, x0, x1, 56);
2187 
2188  x0 = ((uint64_t)x << 31) + (x0 >> 1);
2189 
2190  x_sgn = 0;
2191  x_exp = (a_exp + 1053) >> 1;
2192  x_mnt = x0;
2193  x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
2194  mul62x62(&x0, &x1, x_mnt, x_mnt);
2195  lsl128(&x0, &x1, x0, x1, 19);
2196  c = cmp128(x0, x1, 0, a_mnt);
2197  if (c > 0)
2198  --x_mnt;
2199 
2200  x_mnt = fp64_normalise(x_mnt, &x_exp);
2201 
2202  return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
2203 }
2204 
2205 static int
2206 modeConv(FPSCR fpscr)
2207 {
2208  uint32_t x = (uint32_t)fpscr;
2209  return (x >> 22 & 0xf) | (x >> 19 & 1 ? FPLIB_FZ16 : 0);
2210  // AHP bit is ignored. Only fplibConvert uses AHP.
2211 }
2212 
2213 static void
2214 set_fpscr(FPSCR &fpscr, int flags)
2215 {
2216  // translate back to FPSCR
2217  bool underflow = false;
2218  if (flags & FPLIB_IDC) {
2219  fpscr.idc = 1;
2220  }
2221  if (flags & FPLIB_IOC) {
2222  fpscr.ioc = 1;
2223  }
2224  if (flags & FPLIB_DZC) {
2225  fpscr.dzc = 1;
2226  }
2227  if (flags & FPLIB_OFC) {
2228  fpscr.ofc = 1;
2229  }
2230  if (flags & FPLIB_UFC) {
2231  underflow = true; //xx Why is this required?
2232  fpscr.ufc = 1;
2233  }
2234  if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
2235  fpscr.ixc = 1;
2236  }
2237 }
2238 
2239 template <>
2240 bool
2241 fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
2242 {
2243  int flags = 0;
2244  int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags);
2245  set_fpscr(fpscr, flags);
2246  return x;
2247 }
2248 
2249 template <>
2250 bool
2251 fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
2252 {
2253  int flags = 0;
2254  int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags);
2255  set_fpscr(fpscr, flags);
2256  return x;
2257 }
2258 
2259 template <>
2260 bool
2261 fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
2262 {
2263  int flags = 0;
2264  int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags);
2265  set_fpscr(fpscr, flags);
2266  return x;
2267 }
2268 
2269 template <>
2270 bool
2271 fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
2272 {
2273  int flags = 0;
2274  int x = fp16_compare_un(a, b, modeConv(fpscr), &flags);
2275  set_fpscr(fpscr, flags);
2276  return x;
2277 }
2278 
2279 template <>
2280 bool
2281 fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
2282 {
2283  int flags = 0;
2284  int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
2285  set_fpscr(fpscr, flags);
2286  return x;
2287 }
2288 
2289 template <>
2290 bool
2291 fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
2292 {
2293  int flags = 0;
2294  int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
2295  set_fpscr(fpscr, flags);
2296  return x;
2297 }
2298 
2299 template <>
2300 bool
2301 fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
2302 {
2303  int flags = 0;
2304  int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
2305  set_fpscr(fpscr, flags);
2306  return x;
2307 }
2308 
2309 template <>
2310 bool
2311 fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr)
2312 {
2313  int flags = 0;
2314  int x = fp32_compare_un(a, b, modeConv(fpscr), &flags);
2315  set_fpscr(fpscr, flags);
2316  return x;
2317 }
2318 
2319 template <>
2320 bool
2321 fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
2322 {
2323  int flags = 0;
2324  int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
2325  set_fpscr(fpscr, flags);
2326  return x;
2327 }
2328 
2329 template <>
2330 bool
2331 fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
2332 {
2333  int flags = 0;
2334  int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
2335  set_fpscr(fpscr, flags);
2336  return x;
2337 }
2338 
2339 template <>
2340 bool
2341 fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
2342 {
2343  int flags = 0;
2344  int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
2345  set_fpscr(fpscr, flags);
2346  return x;
2347 }
2348 
2349 template <>
2350 bool
2351 fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr)
2352 {
2353  int flags = 0;
2354  int x = fp64_compare_un(a, b, modeConv(fpscr), &flags);
2355  set_fpscr(fpscr, flags);
2356  return x;
2357 }
2358 
2359 template <>
2360 uint16_t
2361 fplibAbs(uint16_t op)
2362 {
2363  return op & ~(1ULL << (FP16_BITS - 1));
2364 }
2365 
2366 template <>
2367 uint32_t
2368 fplibAbs(uint32_t op)
2369 {
2370  return op & ~(1ULL << (FP32_BITS - 1));
2371 }
2372 
2373 template <>
2374 uint64_t
2375 fplibAbs(uint64_t op)
2376 {
2377  return op & ~(1ULL << (FP64_BITS - 1));
2378 }
2379 
2380 template <>
2381 uint16_t
2382 fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2383 {
2384  int flags = 0;
2385  uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags);
2386  set_fpscr0(fpscr, flags);
2387  return result;
2388 }
2389 
2390 template <>
2391 uint32_t
2392 fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2393 {
2394  int flags = 0;
2395  uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
2396  set_fpscr0(fpscr, flags);
2397  return result;
2398 }
2399 
2400 template <>
2401 uint64_t
2402 fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2403 {
2404  int flags = 0;
2405  uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
2406  set_fpscr0(fpscr, flags);
2407  return result;
2408 }
2409 
2410 template <>
2411 int
2412 fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
2413 {
2414  int mode = modeConv(fpscr);
2415  int flags = 0;
2416  int sgn1, exp1, sgn2, exp2, result;
2417  uint16_t mnt1, mnt2;
2418 
2419  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2420  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2421 
2422  if (fp16_is_NaN(exp1, mnt1) || fp16_is_NaN(exp2, mnt2)) {
2423  result = 3;
2424  if (fp16_is_signalling_NaN(exp1, mnt1) ||
2425  fp16_is_signalling_NaN(exp2, mnt2) || signal_nans)
2426  flags |= FPLIB_IOC;
2427  } else {
2428  if (op1 == op2 || (!mnt1 && !mnt2)) {
2429  result = 6;
2430  } else if (sgn1 != sgn2) {
2431  result = sgn1 ? 8 : 2;
2432  } else if (exp1 != exp2) {
2433  result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2434  } else {
2435  result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2436  }
2437  }
2438 
2439  set_fpscr0(fpscr, flags);
2440 
2441  return result;
2442 }
2443 
2444 template <>
2445 int
2446 fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
2447 {
2448  int mode = modeConv(fpscr);
2449  int flags = 0;
2450  int sgn1, exp1, sgn2, exp2, result;
2451  uint32_t mnt1, mnt2;
2452 
2453  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2454  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2455 
2456  if (fp32_is_NaN(exp1, mnt1) || fp32_is_NaN(exp2, mnt2)) {
2457  result = 3;
2458  if (fp32_is_signalling_NaN(exp1, mnt1) ||
2459  fp32_is_signalling_NaN(exp2, mnt2) || signal_nans)
2460  flags |= FPLIB_IOC;
2461  } else {
2462  if (op1 == op2 || (!mnt1 && !mnt2)) {
2463  result = 6;
2464  } else if (sgn1 != sgn2) {
2465  result = sgn1 ? 8 : 2;
2466  } else if (exp1 != exp2) {
2467  result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2468  } else {
2469  result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2470  }
2471  }
2472 
2473  set_fpscr0(fpscr, flags);
2474 
2475  return result;
2476 }
2477 
2478 template <>
2479 int
2480 fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
2481 {
2482  int mode = modeConv(fpscr);
2483  int flags = 0;
2484  int sgn1, exp1, sgn2, exp2, result;
2485  uint64_t mnt1, mnt2;
2486 
2487  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2488  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2489 
2490  if (fp64_is_NaN(exp1, mnt1) || fp64_is_NaN(exp2, mnt2)) {
2491  result = 3;
2492  if (fp64_is_signalling_NaN(exp1, mnt1) ||
2493  fp64_is_signalling_NaN(exp2, mnt2) || signal_nans)
2494  flags |= FPLIB_IOC;
2495  } else {
2496  if (op1 == op2 || (!mnt1 && !mnt2)) {
2497  result = 6;
2498  } else if (sgn1 != sgn2) {
2499  result = sgn1 ? 8 : 2;
2500  } else if (exp1 != exp2) {
2501  result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2502  } else {
2503  result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2504  }
2505  }
2506 
2507  set_fpscr0(fpscr, flags);
2508 
2509  return result;
2510 }
2511 
2512 static uint16_t
2514 {
2515  return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF,
2516  1ULL << (FP16_MANT_BITS - 1) |
2517  op >> (FP32_MANT_BITS - FP16_MANT_BITS));
2518 }
2519 
2520 static uint16_t
2522 {
2523  return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF,
2524  1ULL << (FP16_MANT_BITS - 1) |
2525  op >> (FP64_MANT_BITS - FP16_MANT_BITS));
2526 }
2527 
2528 static uint32_t
2530 {
2531  return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF,
2532  1ULL << (FP32_MANT_BITS - 1) |
2533  (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS));
2534 }
2535 
2536 static uint32_t
2538 {
2539  return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF,
2540  1ULL << (FP32_MANT_BITS - 1) |
2541  op >> (FP64_MANT_BITS - FP32_MANT_BITS));
2542 }
2543 
2544 static uint64_t
2546 {
2547  return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF,
2548  1ULL << (FP64_MANT_BITS - 1) |
2549  (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS));
2550 }
2551 
2552 static uint64_t
2554 {
2555  return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF,
2556  1ULL << (FP64_MANT_BITS - 1) |
2557  (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS));
2558 }
2559 
2560 static uint16_t
2562 {
2563  return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1));
2564 }
2565 
2566 static uint32_t
2568 {
2569  return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1));
2570 }
2571 
2572 static uint64_t
2574 {
2575  return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1));
2576 }
2577 
2578 static uint16_t
2580 {
2581  return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1));
2582 }
2583 
2584 static uint32_t
2586 {
2587  return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1));
2588 }
2589 
2590 static uint64_t
2592 {
2593  return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1));
2594 }
2595 
2596 static uint16_t
2597 fp16_FPTwo(int sgn)
2598 {
2599  return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0);
2600 }
2601 
2602 static uint32_t
2603 fp32_FPTwo(int sgn)
2604 {
2605  return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0);
2606 }
2607 
2608 static uint64_t
2609 fp64_FPTwo(int sgn)
2610 {
2611  return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0);
2612 }
2613 
2614 template <>
2615 uint16_t
2616 fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2617 {
2618  int mode = modeConv(fpscr);
2619  int flags = 0;
2620  int sgn, exp;
2621  uint32_t mnt;
2622  uint16_t result;
2623 
2624  // Unpack floating-point operand optionally with flush-to-zero:
2625  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2626 
2627  bool alt_hp = fpscr.ahp;
2628 
2629  if (fp32_is_NaN(exp, mnt)) {
2630  if (alt_hp) {
2631  result = fp16_zero(sgn);
2632  } else if (fpscr.dn) {
2633  result = fp16_defaultNaN();
2634  } else {
2635  result = fp16_FPConvertNaN_32(op);
2636  }
2637  if (!(mnt >> (FP32_MANT_BITS - 1) & 1) || alt_hp) {
2638  flags |= FPLIB_IOC;
2639  }
2640  } else if (exp == FP32_EXP_INF) {
2641  if (alt_hp) {
2642  result = ((uint16_t)sgn << (FP16_BITS - 1) |
2643  ((1ULL << (FP16_BITS - 1)) - 1));
2644  flags |= FPLIB_IOC;
2645  } else {
2646  result = fp16_infinity(sgn);
2647  }
2648  } else if (!mnt) {
2649  result = fp16_zero(sgn);
2650  } else {
2651  result =
2653  mnt >> (FP32_MANT_BITS - FP16_BITS) |
2654  !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)),
2655  rounding, (mode & 0xf) | alt_hp << 4, &flags);
2656  }
2657 
2658  set_fpscr0(fpscr, flags);
2659 
2660  return result;
2661 }
2662 
2663 template <>
2664 uint16_t
2665 fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2666 {
2667  int mode = modeConv(fpscr);
2668  int flags = 0;
2669  int sgn, exp;
2670  uint64_t mnt;
2671  uint16_t result;
2672 
2673  // Unpack floating-point operand optionally with flush-to-zero:
2674  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2675 
2676  bool alt_hp = fpscr.ahp;
2677 
2678  if (fp64_is_NaN(exp, mnt)) {
2679  if (alt_hp) {
2680  result = fp16_zero(sgn);
2681  } else if (fpscr.dn) {
2682  result = fp16_defaultNaN();
2683  } else {
2684  result = fp16_FPConvertNaN_64(op);
2685  }
2686  if (!(mnt >> (FP64_MANT_BITS - 1) & 1) || alt_hp) {
2687  flags |= FPLIB_IOC;
2688  }
2689  } else if (exp == FP64_EXP_INF) {
2690  if (alt_hp) {
2691  result = ((uint16_t)sgn << (FP16_BITS - 1) |
2692  ((1ULL << (FP16_BITS - 1)) - 1));
2693  flags |= FPLIB_IOC;
2694  } else {
2695  result = fp16_infinity(sgn);
2696  }
2697  } else if (!mnt) {
2698  result = fp16_zero(sgn);
2699  } else {
2700  result =
2702  mnt >> (FP64_MANT_BITS - FP16_BITS) |
2703  !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)),
2704  rounding, (mode & 0xf) | alt_hp << 4, &flags);
2705  }
2706 
2707  set_fpscr0(fpscr, flags);
2708 
2709  return result;
2710 }
2711 
2712 template <>
2713 uint32_t
2714 fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2715 {
2716  int mode = modeConv(fpscr);
2717  int flags = 0;
2718  int sgn, exp;
2719  uint16_t mnt;
2720  uint32_t result;
2721 
2722  // Unpack floating-point operand optionally with flush-to-zero:
2723  fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2724 
2725  if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2726  if (fpscr.dn) {
2727  result = fp32_defaultNaN();
2728  } else {
2729  result = fp32_FPConvertNaN_16(op);
2730  }
2731  if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2732  flags |= FPLIB_IOC;
2733  }
2734  } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2735  result = fp32_infinity(sgn);
2736  } else if (!mnt) {
2737  result = fp32_zero(sgn);
2738  } else {
2739  mnt = fp16_normalise(mnt, &exp);
2740  result = fp32_pack(sgn, (exp - FP16_EXP_BIAS +
2742  (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1));
2743  }
2744 
2745  set_fpscr0(fpscr, flags);
2746 
2747  return result;
2748 }
2749 
2750 template <>
2751 uint32_t
2752 fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2753 {
2754  int mode = modeConv(fpscr);
2755  int flags = 0;
2756  int sgn, exp;
2757  uint64_t mnt;
2758  uint32_t result;
2759 
2760  // Unpack floating-point operand optionally with flush-to-zero:
2761  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2762 
2763  if (fp64_is_NaN(exp, mnt)) {
2764  if (fpscr.dn) {
2765  result = fp32_defaultNaN();
2766  } else {
2767  result = fp32_FPConvertNaN_64(op);
2768  }
2769  if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) {
2770  flags |= FPLIB_IOC;
2771  }
2772  } else if (exp == FP64_EXP_INF) {
2773  result = fp32_infinity(sgn);
2774  } else if (!mnt) {
2775  result = fp32_zero(sgn);
2776  } else {
2777  result =
2779  mnt >> (FP64_MANT_BITS - FP32_BITS) |
2780  !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)),
2781  rounding, mode, &flags);
2782  }
2783 
2784  set_fpscr0(fpscr, flags);
2785 
2786  return result;
2787 }
2788 
2789 template <>
2790 uint64_t
2791 fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2792 {
2793  int mode = modeConv(fpscr);
2794  int flags = 0;
2795  int sgn, exp;
2796  uint16_t mnt;
2797  uint64_t result;
2798 
2799  // Unpack floating-point operand optionally with flush-to-zero:
2800  fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2801 
2802  if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2803  if (fpscr.dn) {
2804  result = fp64_defaultNaN();
2805  } else {
2806  result = fp64_FPConvertNaN_16(op);
2807  }
2808  if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2809  flags |= FPLIB_IOC;
2810  }
2811  } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2812  result = fp64_infinity(sgn);
2813  } else if (!mnt) {
2814  result = fp64_zero(sgn);
2815  } else {
2816  mnt = fp16_normalise(mnt, &exp);
2817  result = fp64_pack(sgn, (exp - FP16_EXP_BIAS +
2819  (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1));
2820  }
2821 
2822  set_fpscr0(fpscr, flags);
2823 
2824  return result;
2825 }
2826 
2827 template <>
2828 uint64_t
2829 fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2830 {
2831  int mode = modeConv(fpscr);
2832  int flags = 0;
2833  int sgn, exp;
2834  uint32_t mnt;
2835  uint64_t result;
2836 
2837  // Unpack floating-point operand optionally with flush-to-zero:
2838  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2839 
2840  if (fp32_is_NaN(exp, mnt)) {
2841  if (fpscr.dn) {
2842  result = fp64_defaultNaN();
2843  } else {
2844  result = fp64_FPConvertNaN_32(op);
2845  }
2846  if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) {
2847  flags |= FPLIB_IOC;
2848  }
2849  } else if (exp == FP32_EXP_INF) {
2850  result = fp64_infinity(sgn);
2851  } else if (!mnt) {
2852  result = fp64_zero(sgn);
2853  } else {
2854  mnt = fp32_normalise(mnt, &exp);
2855  result = fp64_pack(sgn, (exp - FP32_EXP_BIAS +
2857  (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1));
2858  }
2859 
2860  set_fpscr0(fpscr, flags);
2861 
2862  return result;
2863 }
2864 
2865 template <>
2866 uint16_t
2867 fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
2868 {
2869  int flags = 0;
2870  uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2871  set_fpscr0(fpscr, flags);
2872  return result;
2873 }
2874 
2875 template <>
2876 uint32_t
2877 fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2878 {
2879  int flags = 0;
2880  uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2881  set_fpscr0(fpscr, flags);
2882  return result;
2883 }
2884 
2885 template <>
2886 uint64_t
2887 fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2888 {
2889  int flags = 0;
2890  uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2891  set_fpscr0(fpscr, flags);
2892  return result;
2893 }
2894 
2895 template <>
2896 uint16_t
2897 fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2898 {
2899  int flags = 0;
2900  uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags);
2901  set_fpscr0(fpscr, flags);
2902  return result;
2903 }
2904 
2905 template <>
2906 uint32_t
2907 fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2908 {
2909  int flags = 0;
2910  uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2911  set_fpscr0(fpscr, flags);
2912  return result;
2913 }
2914 
2915 template <>
2916 uint64_t
2917 fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2918 {
2919  int flags = 0;
2920  uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2921  set_fpscr0(fpscr, flags);
2922  return result;
2923 }
2924 
2925 template <>
2926 uint16_t
2927 fplibExpA(uint16_t op)
2928 {
2929  static uint16_t coeff[32] = {
2930  0x0000,
2931  0x0016,
2932  0x002d,
2933  0x0045,
2934  0x005d,
2935  0x0075,
2936  0x008e,
2937  0x00a8,
2938  0x00c2,
2939  0x00dc,
2940  0x00f8,
2941  0x0114,
2942  0x0130,
2943  0x014d,
2944  0x016b,
2945  0x0189,
2946  0x01a8,
2947  0x01c8,
2948  0x01e8,
2949  0x0209,
2950  0x022b,
2951  0x024e,
2952  0x0271,
2953  0x0295,
2954  0x02ba,
2955  0x02e0,
2956  0x0306,
2957  0x032e,
2958  0x0356,
2959  0x037f,
2960  0x03a9,
2961  0x03d4
2962  };
2963  return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) |
2964  coeff[op & ((1 << 5) - 1)]);
2965 }
2966 
2967 template <>
2968 uint32_t
2969 fplibExpA(uint32_t op)
2970 {
2971  static uint32_t coeff[64] = {
2972  0x000000,
2973  0x0164d2,
2974  0x02cd87,
2975  0x043a29,
2976  0x05aac3,
2977  0x071f62,
2978  0x08980f,
2979  0x0a14d5,
2980  0x0b95c2,
2981  0x0d1adf,
2982  0x0ea43a,
2983  0x1031dc,
2984  0x11c3d3,
2985  0x135a2b,
2986  0x14f4f0,
2987  0x16942d,
2988  0x1837f0,
2989  0x19e046,
2990  0x1b8d3a,
2991  0x1d3eda,
2992  0x1ef532,
2993  0x20b051,
2994  0x227043,
2995  0x243516,
2996  0x25fed7,
2997  0x27cd94,
2998  0x29a15b,
2999  0x2b7a3a,
3000  0x2d583f,
3001  0x2f3b79,
3002  0x3123f6,
3003  0x3311c4,
3004  0x3504f3,
3005  0x36fd92,
3006  0x38fbaf,
3007  0x3aff5b,
3008  0x3d08a4,
3009  0x3f179a,
3010  0x412c4d,
3011  0x4346cd,
3012  0x45672a,
3013  0x478d75,
3014  0x49b9be,
3015  0x4bec15,
3016  0x4e248c,
3017  0x506334,
3018  0x52a81e,
3019  0x54f35b,
3020  0x5744fd,
3021  0x599d16,
3022  0x5bfbb8,
3023  0x5e60f5,
3024  0x60ccdf,
3025  0x633f89,
3026  0x65b907,
3027  0x68396a,
3028  0x6ac0c7,
3029  0x6d4f30,
3030  0x6fe4ba,
3031  0x728177,
3032  0x75257d,
3033  0x77d0df,
3034  0x7a83b3,
3035  0x7d3e0c
3036  };
3037  return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) |
3038  coeff[op & ((1 << 6) - 1)]);
3039 }
3040 
3041 template <>
3042 uint64_t
3043 fplibExpA(uint64_t op)
3044 {
3045  static uint64_t coeff[64] = {
3046  0x0000000000000ULL,
3047  0x02c9a3e778061ULL,
3048  0x059b0d3158574ULL,
3049  0x0874518759bc8ULL,
3050  0x0b5586cf9890fULL,
3051  0x0e3ec32d3d1a2ULL,
3052  0x11301d0125b51ULL,
3053  0x1429aaea92de0ULL,
3054  0x172b83c7d517bULL,
3055  0x1a35beb6fcb75ULL,
3056  0x1d4873168b9aaULL,
3057  0x2063b88628cd6ULL,
3058  0x2387a6e756238ULL,
3059  0x26b4565e27cddULL,
3060  0x29e9df51fdee1ULL,
3061  0x2d285a6e4030bULL,
3062  0x306fe0a31b715ULL,
3063  0x33c08b26416ffULL,
3064  0x371a7373aa9cbULL,
3065  0x3a7db34e59ff7ULL,
3066  0x3dea64c123422ULL,
3067  0x4160a21f72e2aULL,
3068  0x44e086061892dULL,
3069  0x486a2b5c13cd0ULL,
3070  0x4bfdad5362a27ULL,
3071  0x4f9b2769d2ca7ULL,
3072  0x5342b569d4f82ULL,
3073  0x56f4736b527daULL,
3074  0x5ab07dd485429ULL,
3075  0x5e76f15ad2148ULL,
3076  0x6247eb03a5585ULL,
3077  0x6623882552225ULL,
3078  0x6a09e667f3bcdULL,
3079  0x6dfb23c651a2fULL,
3080  0x71f75e8ec5f74ULL,
3081  0x75feb564267c9ULL,
3082  0x7a11473eb0187ULL,
3083  0x7e2f336cf4e62ULL,
3084  0x82589994cce13ULL,
3085  0x868d99b4492edULL,
3086  0x8ace5422aa0dbULL,
3087  0x8f1ae99157736ULL,
3088  0x93737b0cdc5e5ULL,
3089  0x97d829fde4e50ULL,
3090  0x9c49182a3f090ULL,
3091  0xa0c667b5de565ULL,
3092  0xa5503b23e255dULL,
3093  0xa9e6b5579fdbfULL,
3094  0xae89f995ad3adULL,
3095  0xb33a2b84f15fbULL,
3096  0xb7f76f2fb5e47ULL,
3097  0xbcc1e904bc1d2ULL,
3098  0xc199bdd85529cULL,
3099  0xc67f12e57d14bULL,
3100  0xcb720dcef9069ULL,
3101  0xd072d4a07897cULL,
3102  0xd5818dcfba487ULL,
3103  0xda9e603db3285ULL,
3104  0xdfc97337b9b5fULL,
3105  0xe502ee78b3ff6ULL,
3106  0xea4afa2a490daULL,
3107  0xefa1bee615a27ULL,
3108  0xf50765b6e4540ULL,
3109  0xfa7c1819e90d8ULL
3110  };
3111  return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) |
3112  coeff[op & ((1 << 6) - 1)]);
3113 }
3114 
3115 static uint16_t
3116 fp16_repack(int sgn, int exp, uint16_t mnt)
3117 {
3118  return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt);
3119 }
3120 
3121 static uint32_t
3122 fp32_repack(int sgn, int exp, uint32_t mnt)
3123 {
3124  return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt);
3125 }
3126 
3127 static uint64_t
3128 fp64_repack(int sgn, int exp, uint64_t mnt)
3129 {
3130  return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt);
3131 }
3132 
3133 static void
3134 fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
3135 {
3136  // Treat a single quiet-NaN as +Infinity/-Infinity
3137  if (!((uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) &&
3138  (uint16_t)~(*op2 << 1) >> FP16_MANT_BITS)
3139  *op1 = fp16_infinity(sgn);
3140  if (!((uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) &&
3141  (uint16_t)~(*op1 << 1) >> FP16_MANT_BITS)
3142  *op2 = fp16_infinity(sgn);
3143 }
3144 
3145 static void
3146 fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
3147 {
3148  // Treat a single quiet-NaN as +Infinity/-Infinity
3149  if (!((uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) &&
3150  (uint32_t)~(*op2 << 1) >> FP32_MANT_BITS)
3151  *op1 = fp32_infinity(sgn);
3152  if (!((uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) &&
3153  (uint32_t)~(*op1 << 1) >> FP32_MANT_BITS)
3154  *op2 = fp32_infinity(sgn);
3155 }
3156 
3157 static void
3158 fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
3159 {
3160  // Treat a single quiet-NaN as +Infinity/-Infinity
3161  if (!((uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) &&
3162  (uint64_t)~(*op2 << 1) >> FP64_MANT_BITS)
3163  *op1 = fp64_infinity(sgn);
3164  if (!((uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) &&
3165  (uint64_t)~(*op1 << 1) >> FP64_MANT_BITS)
3166  *op2 = fp64_infinity(sgn);
3167 }
3168 
3169 template <>
3170 uint16_t
3171 fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3172 {
3173  int mode = modeConv(fpscr);
3174  int flags = 0;
3175  int sgn1, exp1, sgn2, exp2;
3176  uint16_t mnt1, mnt2, x, result;
3177 
3178  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3179  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3180 
3181  if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3182  result = x;
3183  } else {
3184  result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3185  fp16_repack(sgn1, exp1, mnt1) :
3186  fp16_repack(sgn2, exp2, mnt2));
3187  }
3188  set_fpscr0(fpscr, flags);
3189  return result;
3190 }
3191 
3192 template <>
3193 uint32_t
3194 fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3195 {
3196  int mode = modeConv(fpscr);
3197  int flags = 0;
3198  int sgn1, exp1, sgn2, exp2;
3199  uint32_t mnt1, mnt2, x, result;
3200 
3201  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3202  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3203 
3204  if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3205  result = x;
3206  } else {
3207  result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3208  fp32_repack(sgn1, exp1, mnt1) :
3209  fp32_repack(sgn2, exp2, mnt2));
3210  }
3211  set_fpscr0(fpscr, flags);
3212  return result;
3213 }
3214 
3215 template <>
3216 uint64_t
3217 fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3218 {
3219  int mode = modeConv(fpscr);
3220  int flags = 0;
3221  int sgn1, exp1, sgn2, exp2;
3222  uint64_t mnt1, mnt2, x, result;
3223 
3224  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3225  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3226 
3227  if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3228  result = x;
3229  } else {
3230  result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3231  fp64_repack(sgn1, exp1, mnt1) :
3232  fp64_repack(sgn2, exp2, mnt2));
3233  }
3234  set_fpscr0(fpscr, flags);
3235  return result;
3236 }
3237 
3238 template <>
3239 uint16_t
3240 fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3241 {
3242  fp16_minmaxnum(&op1, &op2, 1);
3243  return fplibMax<uint16_t>(op1, op2, fpscr);
3244 }
3245 
3246 template <>
3247 uint32_t
3248 fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3249 {
3250  fp32_minmaxnum(&op1, &op2, 1);
3251  return fplibMax<uint32_t>(op1, op2, fpscr);
3252 }
3253 
3254 template <>
3255 uint64_t
3256 fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3257 {
3258  fp64_minmaxnum(&op1, &op2, 1);
3259  return fplibMax<uint64_t>(op1, op2, fpscr);
3260 }
3261 
3262 template <>
3263 uint16_t
3264 fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3265 {
3266  int mode = modeConv(fpscr);
3267  int flags = 0;
3268  int sgn1, exp1, sgn2, exp2;
3269  uint16_t mnt1, mnt2, x, result;
3270 
3271  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3272  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3273 
3274  if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3275  result = x;
3276  } else {
3277  result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3278  fp16_repack(sgn1, exp1, mnt1) :
3279  fp16_repack(sgn2, exp2, mnt2));
3280  }
3281  set_fpscr0(fpscr, flags);
3282  return result;
3283 }
3284 
3285 template <>
3286 uint32_t
3287 fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3288 {
3289  int mode = modeConv(fpscr);
3290  int flags = 0;
3291  int sgn1, exp1, sgn2, exp2;
3292  uint32_t mnt1, mnt2, x, result;
3293 
3294  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3295  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3296 
3297  if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3298  result = x;
3299  } else {
3300  result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3301  fp32_repack(sgn1, exp1, mnt1) :
3302  fp32_repack(sgn2, exp2, mnt2));
3303  }
3304  set_fpscr0(fpscr, flags);
3305  return result;
3306 }
3307 
3308 template <>
3309 uint64_t
3310 fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3311 {
3312  int mode = modeConv(fpscr);
3313  int flags = 0;
3314  int sgn1, exp1, sgn2, exp2;
3315  uint64_t mnt1, mnt2, x, result;
3316 
3317  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3318  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3319 
3320  if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3321  result = x;
3322  } else {
3323  result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3324  fp64_repack(sgn1, exp1, mnt1) :
3325  fp64_repack(sgn2, exp2, mnt2));
3326  }
3327  set_fpscr0(fpscr, flags);
3328  return result;
3329 }
3330 
3331 template <>
3332 uint16_t
3333 fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3334 {
3335  fp16_minmaxnum(&op1, &op2, 0);
3336  return fplibMin<uint16_t>(op1, op2, fpscr);
3337 }
3338 
3339 template <>
3340 uint32_t
3341 fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3342 {
3343  fp32_minmaxnum(&op1, &op2, 0);
3344  return fplibMin<uint32_t>(op1, op2, fpscr);
3345 }
3346 
3347 template <>
3348 uint64_t
3349 fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3350 {
3351  fp64_minmaxnum(&op1, &op2, 0);
3352  return fplibMin<uint64_t>(op1, op2, fpscr);
3353 }
3354 
3355 template <>
3356 uint16_t
3357 fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3358 {
3359  int flags = 0;
3360  uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags);
3361  set_fpscr0(fpscr, flags);
3362  return result;
3363 }
3364 
3365 template <>
3366 uint32_t
3367 fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3368 {
3369  int flags = 0;
3370  uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
3371  set_fpscr0(fpscr, flags);
3372  return result;
3373 }
3374 
3375 template <>
3376 uint64_t
3377 fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3378 {
3379  int flags = 0;
3380  uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
3381  set_fpscr0(fpscr, flags);
3382  return result;
3383 }
3384 
3385 template <>
3386 uint16_t
3387 fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3388 {
3389  int mode = modeConv(fpscr);
3390  int flags = 0;
3391  int sgn1, exp1, sgn2, exp2;
3392  uint16_t mnt1, mnt2, result;
3393 
3394  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3395  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3396 
3397  result = fp16_process_NaNs(op1, op2, mode, &flags);
3398  if (!result) {
3399  if ((exp1 == FP16_EXP_INF && !mnt2) ||
3400  (exp2 == FP16_EXP_INF && !mnt1)) {
3401  result = fp16_FPTwo(sgn1 ^ sgn2);
3402  } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3403  result = fp16_infinity(sgn1 ^ sgn2);
3404  } else if (!mnt1 || !mnt2) {
3405  result = fp16_zero(sgn1 ^ sgn2);
3406  } else {
3407  result = fp16_mul(op1, op2, mode, &flags);
3408  }
3409  }
3410 
3411  set_fpscr0(fpscr, flags);
3412 
3413  return result;
3414 }
3415 
3416 template <>
3417 uint32_t
3418 fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3419 {
3420  int mode = modeConv(fpscr);
3421  int flags = 0;
3422  int sgn1, exp1, sgn2, exp2;
3423  uint32_t mnt1, mnt2, result;
3424 
3425  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3426  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3427 
3428  result = fp32_process_NaNs(op1, op2, mode, &flags);
3429  if (!result) {
3430  if ((exp1 == FP32_EXP_INF && !mnt2) ||
3431  (exp2 == FP32_EXP_INF && !mnt1)) {
3432  result = fp32_FPTwo(sgn1 ^ sgn2);
3433  } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3434  result = fp32_infinity(sgn1 ^ sgn2);
3435  } else if (!mnt1 || !mnt2) {
3436  result = fp32_zero(sgn1 ^ sgn2);
3437  } else {
3438  result = fp32_mul(op1, op2, mode, &flags);
3439  }
3440  }
3441 
3442  set_fpscr0(fpscr, flags);
3443 
3444  return result;
3445 }
3446 
3447 template <>
3448 uint64_t
3449 fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3450 {
3451  int mode = modeConv(fpscr);
3452  int flags = 0;
3453  int sgn1, exp1, sgn2, exp2;
3454  uint64_t mnt1, mnt2, result;
3455 
3456  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3457  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3458 
3459  result = fp64_process_NaNs(op1, op2, mode, &flags);
3460  if (!result) {
3461  if ((exp1 == FP64_EXP_INF && !mnt2) ||
3462  (exp2 == FP64_EXP_INF && !mnt1)) {
3463  result = fp64_FPTwo(sgn1 ^ sgn2);
3464  } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3465  result = fp64_infinity(sgn1 ^ sgn2);
3466  } else if (!mnt1 || !mnt2) {
3467  result = fp64_zero(sgn1 ^ sgn2);
3468  } else {
3469  result = fp64_mul(op1, op2, mode, &flags);
3470  }
3471  }
3472 
3473  set_fpscr0(fpscr, flags);
3474 
3475  return result;
3476 }
3477 
3478 template <>
3479 uint16_t
3480 fplibNeg(uint16_t op)
3481 {
3482  return op ^ 1ULL << (FP16_BITS - 1);
3483 }
3484 
3485 template <>
3486 uint32_t
3487 fplibNeg(uint32_t op)
3488 {
3489  return op ^ 1ULL << (FP32_BITS - 1);
3490 }
3491 
3492 template <>
3493 uint64_t
3494 fplibNeg(uint64_t op)
3495 {
3496  return op ^ 1ULL << (FP64_BITS - 1);
3497 }
3498 
3499 static const uint8_t recip_sqrt_estimate[256] = {
3500  255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
3501  226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
3502  201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
3503  180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
3504  162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
3505  145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
3506  131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
3507  118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
3508  105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
3509  85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
3510  67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
3511  52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
3512  39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
3513  28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
3514  17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
3515  8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
3516 };
3517 
3518 template <>
3519 uint16_t
3520 fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
3521 {
3522  int mode = modeConv(fpscr);
3523  int flags = 0;
3524  int sgn, exp;
3525  uint16_t mnt, result;
3526 
3527  fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3528 
3529  if (fp16_is_NaN(exp, mnt)) {
3530  result = fp16_process_NaN(op, mode, &flags);
3531  } else if (!mnt) {
3532  result = fp16_infinity(sgn);
3533  flags |= FPLIB_DZC;
3534  } else if (sgn) {
3535  result = fp16_defaultNaN();
3536  flags |= FPLIB_IOC;
3537  } else if (exp == FP16_EXP_INF) {
3538  result = fp16_zero(0);
3539  } else {
3540  exp += FP16_EXP_BITS;
3541  mnt = fp16_normalise(mnt, &exp);
3542  mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3543  (mnt >> (FP16_BITS - 8) & 127)];
3544  result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1,
3545  mnt << (FP16_MANT_BITS - 8));
3546  }
3547 
3548  set_fpscr0(fpscr, flags);
3549 
3550  return result;
3551 }
3552 
3553 template <>
3554 uint32_t
3555 fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
3556 {
3557  int mode = modeConv(fpscr);
3558  int flags = 0;
3559  int sgn, exp;
3560  uint32_t mnt, result;
3561 
3562  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3563 
3564  if (fp32_is_NaN(exp, mnt)) {
3565  result = fp32_process_NaN(op, mode, &flags);
3566  } else if (!mnt) {
3567  result = fp32_infinity(sgn);
3568  flags |= FPLIB_DZC;
3569  } else if (sgn) {
3570  result = fp32_defaultNaN();
3571  flags |= FPLIB_IOC;
3572  } else if (exp == FP32_EXP_INF) {
3573  result = fp32_zero(0);
3574  } else {
3575  exp += FP32_EXP_BITS;
3576  mnt = fp32_normalise(mnt, &exp);
3577  mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3578  (mnt >> (FP32_BITS - 8) & 127)];
3579  result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1,
3580  mnt << (FP32_MANT_BITS - 8));
3581  }
3582 
3583  set_fpscr0(fpscr, flags);
3584 
3585  return result;
3586 }
3587 
3588 template <>
3589 uint64_t
3590 fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
3591 {
3592  int mode = modeConv(fpscr);
3593  int flags = 0;
3594  int sgn, exp;
3595  uint64_t mnt, result;
3596 
3597  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3598 
3599  if (fp64_is_NaN(exp, mnt)) {
3600  result = fp64_process_NaN(op, mode, &flags);
3601  } else if (!mnt) {
3602  result = fp64_infinity(sgn);
3603  flags |= FPLIB_DZC;
3604  } else if (sgn) {
3605  result = fp64_defaultNaN();
3606  flags |= FPLIB_IOC;
3607  } else if (exp == FP64_EXP_INF) {
3608  result = fp32_zero(0);
3609  } else {
3610  exp += FP64_EXP_BITS;
3611  mnt = fp64_normalise(mnt, &exp);
3612  mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3613  (mnt >> (FP64_BITS - 8) & 127)];
3614  result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1,
3615  mnt << (FP64_MANT_BITS - 8));
3616  }
3617 
3618  set_fpscr0(fpscr, flags);
3619 
3620  return result;
3621 }
3622 
3623 template <>
3624 uint16_t
3625 fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3626 {
3627  int mode = modeConv(fpscr);
3628  int flags = 0;
3629  int sgn1, exp1, sgn2, exp2;
3630  uint16_t mnt1, mnt2, result;
3631 
3632  op1 = fplibNeg<uint16_t>(op1);
3633  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3634  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3635 
3636  result = fp16_process_NaNs(op1, op2, mode, &flags);
3637  if (!result) {
3638  if ((exp1 == FP16_EXP_INF && !mnt2) ||
3639  (exp2 == FP16_EXP_INF && !mnt1)) {
3640  result = fp16_FPOnePointFive(0);
3641  } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3642  result = fp16_infinity(sgn1 ^ sgn2);
3643  } else {
3644  result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags);
3645  }
3646  }
3647 
3648  set_fpscr0(fpscr, flags);
3649 
3650  return result;
3651 }
3652 
3653 template <>
3654 uint32_t
3655 fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3656 {
3657  int mode = modeConv(fpscr);
3658  int flags = 0;
3659  int sgn1, exp1, sgn2, exp2;
3660  uint32_t mnt1, mnt2, result;
3661 
3662  op1 = fplibNeg<uint32_t>(op1);
3663  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3664  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3665 
3666  result = fp32_process_NaNs(op1, op2, mode, &flags);
3667  if (!result) {
3668  if ((exp1 == FP32_EXP_INF && !mnt2) ||
3669  (exp2 == FP32_EXP_INF && !mnt1)) {
3670  result = fp32_FPOnePointFive(0);
3671  } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3672  result = fp32_infinity(sgn1 ^ sgn2);
3673  } else {
3674  result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
3675  }
3676  }
3677 
3678  set_fpscr0(fpscr, flags);
3679 
3680  return result;
3681 }
3682 
3683 template <>
3684 uint64_t
3685 fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3686 {
3687  int mode = modeConv(fpscr);
3688  int flags = 0;
3689  int sgn1, exp1, sgn2, exp2;
3690  uint64_t mnt1, mnt2, result;
3691 
3692  op1 = fplibNeg<uint64_t>(op1);
3693  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3694  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3695 
3696  result = fp64_process_NaNs(op1, op2, mode, &flags);
3697  if (!result) {
3698  if ((exp1 == FP64_EXP_INF && !mnt2) ||
3699  (exp2 == FP64_EXP_INF && !mnt1)) {
3700  result = fp64_FPOnePointFive(0);
3701  } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3702  result = fp64_infinity(sgn1 ^ sgn2);
3703  } else {
3704  result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
3705  }
3706  }
3707 
3708  set_fpscr0(fpscr, flags);
3709 
3710  return result;
3711 }
3712 
3713 template <>
3714 uint16_t
3715 fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
3716 {
3717  int mode = modeConv(fpscr);
3718  int flags = 0;
3719  int sgn, exp;
3720  uint16_t mnt, result;
3721 
3722  fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3723 
3724  if (fp16_is_NaN(exp, mnt)) {
3725  result = fp16_process_NaN(op, mode, &flags);
3726  } else if (exp == FP16_EXP_INF) {
3727  result = fp16_zero(sgn);
3728  } else if (!mnt) {
3729  result = fp16_infinity(sgn);
3730  flags |= FPLIB_DZC;
3731  } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) {
3732  bool overflow_to_inf = false;
3733  switch (FPCRRounding(fpscr)) {
3734  case FPRounding_TIEEVEN:
3735  overflow_to_inf = true;
3736  break;
3737  case FPRounding_POSINF:
3738  overflow_to_inf = !sgn;
3739  break;
3740  case FPRounding_NEGINF:
3741  overflow_to_inf = sgn;
3742  break;
3743  case FPRounding_ZERO:
3744  overflow_to_inf = false;
3745  break;
3746  default:
3747  panic("Unrecognized FP rounding mode");
3748  }
3749  result = overflow_to_inf ? fp16_infinity(sgn) : fp16_max_normal(sgn);
3750  flags |= FPLIB_OFC | FPLIB_IXC;
3751  } else if (fpscr.fz16 && exp >= 2 * FP16_EXP_BIAS - 1) {
3752  result = fp16_zero(sgn);
3753  flags |= FPLIB_UFC;
3754  } else {
3755  exp += FP16_EXP_BITS;
3756  mnt = fp16_normalise(mnt, &exp);
3757  int result_exp = 2 * FP16_EXP_BIAS - 1 - exp;
3758  uint16_t fraction = (((uint32_t)1 << 19) /
3759  (mnt >> (FP16_BITS - 10) | 1) + 1) >> 1;
3760  fraction <<= FP16_MANT_BITS - 8;
3761  if (result_exp == 0) {
3762  fraction >>= 1;
3763  } else if (result_exp == -1) {
3764  fraction >>= 2;
3765  result_exp = 0;
3766  }
3767  result = fp16_pack(sgn, result_exp, fraction);
3768  }
3769 
3770  set_fpscr0(fpscr, flags);
3771 
3772  return result;
3773 }
3774 
3775 template <>
3776 uint32_t
3777 fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
3778 {
3779  int mode = modeConv(fpscr);
3780  int flags = 0;
3781  int sgn, exp;
3782  uint32_t mnt, result;
3783 
3784  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3785 
3786  if (fp32_is_NaN(exp, mnt)) {
3787  result = fp32_process_NaN(op, mode, &flags);
3788  } else if (exp == FP32_EXP_INF) {
3789  result = fp32_zero(sgn);
3790  } else if (!mnt) {
3791  result = fp32_infinity(sgn);
3792  flags |= FPLIB_DZC;
3793  } else if (!((uint32_t)(op << 1) >> (FP32_MANT_BITS - 1))) {
3794  bool overflow_to_inf = false;
3795  switch (FPCRRounding(fpscr)) {
3796  case FPRounding_TIEEVEN:
3797  overflow_to_inf = true;
3798  break;
3799  case FPRounding_POSINF:
3800  overflow_to_inf = !sgn;
3801  break;
3802  case FPRounding_NEGINF:
3803  overflow_to_inf = sgn;
3804  break;
3805  case FPRounding_ZERO:
3806  overflow_to_inf = false;
3807  break;
3808  default:
3809  panic("Unrecognized FP rounding mode");
3810  }
3811  result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
3812  flags |= FPLIB_OFC | FPLIB_IXC;
3813  } else if (fpscr.fz && exp >= 2 * FP32_EXP_BIAS - 1) {
3814  result = fp32_zero(sgn);
3815  flags |= FPLIB_UFC;
3816  } else {
3817  exp += FP32_EXP_BITS;
3818  mnt = fp32_normalise(mnt, &exp);
3819  int result_exp = 2 * FP32_EXP_BIAS - 1 - exp;
3820  uint32_t fraction = (((uint32_t)1 << 19) /
3821  (mnt >> (FP32_BITS - 10) | 1) + 1) >> 1;
3822  fraction <<= FP32_MANT_BITS - 8;
3823  if (result_exp == 0) {
3824  fraction >>= 1;
3825  } else if (result_exp == -1) {
3826  fraction >>= 2;
3827  result_exp = 0;
3828  }
3829  result = fp32_pack(sgn, result_exp, fraction);
3830  }
3831 
3832  set_fpscr0(fpscr, flags);
3833 
3834  return result;
3835 }
3836 
3837 template <>
3838 uint64_t
3839 fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
3840 {
3841  int mode = modeConv(fpscr);
3842  int flags = 0;
3843  int sgn, exp;
3844  uint64_t mnt, result;
3845 
3846  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3847 
3848  if (fp64_is_NaN(exp, mnt)) {
3849  result = fp64_process_NaN(op, mode, &flags);
3850  } else if (exp == FP64_EXP_INF) {
3851  result = fp64_zero(sgn);
3852  } else if (!mnt) {
3853  result = fp64_infinity(sgn);
3854  flags |= FPLIB_DZC;
3855  } else if (!((uint64_t)(op << 1) >> (FP64_MANT_BITS - 1))) {
3856  bool overflow_to_inf = false;
3857  switch (FPCRRounding(fpscr)) {
3858  case FPRounding_TIEEVEN:
3859  overflow_to_inf = true;
3860  break;
3861  case FPRounding_POSINF:
3862  overflow_to_inf = !sgn;
3863  break;
3864  case FPRounding_NEGINF:
3865  overflow_to_inf = sgn;
3866  break;
3867  case FPRounding_ZERO:
3868  overflow_to_inf = false;
3869  break;
3870  default:
3871  panic("Unrecognized FP rounding mode");
3872  }
3873  result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
3874  flags |= FPLIB_OFC | FPLIB_IXC;
3875  } else if (fpscr.fz && exp >= 2 * FP64_EXP_BIAS - 1) {
3876  result = fp64_zero(sgn);
3877  flags |= FPLIB_UFC;
3878  } else {
3879  exp += FP64_EXP_BITS;
3880  mnt = fp64_normalise(mnt, &exp);
3881  int result_exp = 2 * FP64_EXP_BIAS - 1 - exp;
3882  uint64_t fraction = (((uint32_t)1 << 19) /
3883  (mnt >> (FP64_BITS - 10) | 1) + 1) >> 1;
3884  fraction <<= FP64_MANT_BITS - 8;
3885  if (result_exp == 0) {
3886  fraction >>= 1;
3887  } else if (result_exp == -1) {
3888  fraction >>= 2;
3889  result_exp = 0;
3890  }
3891  result = fp64_pack(sgn, result_exp, fraction);
3892  }
3893 
3894  set_fpscr0(fpscr, flags);
3895 
3896  return result;
3897 }
3898 
3899 template <>
3900 uint16_t
3901 fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3902 {
3903  int mode = modeConv(fpscr);
3904  int flags = 0;
3905  int sgn1, exp1, sgn2, exp2;
3906  uint16_t mnt1, mnt2, result;
3907 
3908  op1 = fplibNeg<uint16_t>(op1);
3909  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3910  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3911 
3912  result = fp16_process_NaNs(op1, op2, mode, &flags);
3913  if (!result) {
3914  if ((exp1 == FP16_EXP_INF && !mnt2) ||
3915  (exp2 == FP16_EXP_INF && !mnt1)) {
3916  result = fp16_FPTwo(0);
3917  } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3918  result = fp16_infinity(sgn1 ^ sgn2);
3919  } else {
3920  result = fp16_muladd(fp16_FPTwo(0), op1, op2, 0, mode, &flags);
3921  }
3922  }
3923 
3924  set_fpscr0(fpscr, flags);
3925 
3926  return result;
3927 }
3928 
3929 template <>
3930 uint32_t
3931 fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3932 {
3933  int mode = modeConv(fpscr);
3934  int flags = 0;
3935  int sgn1, exp1, sgn2, exp2;
3936  uint32_t mnt1, mnt2, result;
3937 
3938  op1 = fplibNeg<uint32_t>(op1);
3939  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3940  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3941 
3942  result = fp32_process_NaNs(op1, op2, mode, &flags);
3943  if (!result) {
3944  if ((exp1 == FP32_EXP_INF && !mnt2) ||
3945  (exp2 == FP32_EXP_INF && !mnt1)) {
3946  result = fp32_FPTwo(0);
3947  } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3948  result = fp32_infinity(sgn1 ^ sgn2);
3949  } else {
3950  result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
3951  }
3952  }
3953 
3954  set_fpscr0(fpscr, flags);
3955 
3956  return result;
3957 }
3958 
3959 template <>
3960 uint64_t
3961 fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3962 {
3963  int mode = modeConv(fpscr);
3964  int flags = 0;
3965  int sgn1, exp1, sgn2, exp2;
3966  uint64_t mnt1, mnt2, result;
3967 
3968  op1 = fplibNeg<uint64_t>(op1);
3969  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3970  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3971 
3972  result = fp64_process_NaNs(op1, op2, mode, &flags);
3973  if (!result) {
3974  if ((exp1 == FP64_EXP_INF && !mnt2) ||
3975  (exp2 == FP64_EXP_INF && !mnt1)) {
3976  result = fp64_FPTwo(0);
3977  } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3978  result = fp64_infinity(sgn1 ^ sgn2);
3979  } else {
3980  result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
3981  }
3982  }
3983 
3984  set_fpscr0(fpscr, flags);
3985 
3986  return result;
3987 }
3988 
3989 template <>
3990 uint16_t
3991 fplibRecpX(uint16_t op, FPSCR &fpscr)
3992 {
3993  int mode = modeConv(fpscr);
3994  int flags = 0;
3995  int sgn, exp;
3996  uint16_t mnt, result;
3997 
3998  fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3999 
4000  if (fp16_is_NaN(exp, mnt)) {
4001  result = fp16_process_NaN(op, mode, &flags);
4002  }
4003  else {
4004  if (!mnt) { // Zero and denormals
4005  result = fp16_pack(sgn, FP16_EXP_INF - 1, 0);
4006  } else { // Infinities and normals
4007  result = fp16_pack(sgn, exp ^ FP16_EXP_INF, 0);
4008  }
4009  }
4010 
4011  set_fpscr0(fpscr, flags);
4012 
4013  return result;
4014 }
4015 
4016 template <>
4017 uint32_t
4018 fplibRecpX(uint32_t op, FPSCR &fpscr)
4019 {
4020  int mode = modeConv(fpscr);
4021  int flags = 0;
4022  int sgn, exp;
4023  uint32_t mnt, result;
4024 
4025  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4026 
4027  if (fp32_is_NaN(exp, mnt)) {
4028  result = fp32_process_NaN(op, mode, &flags);
4029  }
4030  else {
4031  if (!mnt) { // Zero and denormals
4032  result = fp32_pack(sgn, FP32_EXP_INF - 1, 0);
4033  } else { // Infinities and normals
4034  result = fp32_pack(sgn, exp ^ FP32_EXP_INF, 0);
4035  }
4036  }
4037 
4038  set_fpscr0(fpscr, flags);
4039 
4040  return result;
4041 }
4042 
4043 template <>
4044 uint64_t
4045 fplibRecpX(uint64_t op, FPSCR &fpscr)
4046 {
4047  int mode = modeConv(fpscr);
4048  int flags = 0;
4049  int sgn, exp;
4050  uint64_t mnt, result;
4051 
4052  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4053 
4054  if (fp64_is_NaN(exp, mnt)) {
4055  result = fp64_process_NaN(op, mode, &flags);
4056  }
4057  else {
4058  if (!mnt) { // Zero and denormals
4059  result = fp64_pack(sgn, FP64_EXP_INF - 1, 0);
4060  } else { // Infinities and normals
4061  result = fp64_pack(sgn, exp ^ FP64_EXP_INF, 0);
4062  }
4063  }
4064 
4065  set_fpscr0(fpscr, flags);
4066 
4067  return result;
4068 }
4069 
4070 template <>
4071 uint16_t
4072 fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4073 {
4074  int expint = FP16_EXP_BIAS + FP16_MANT_BITS;
4075  int mode = modeConv(fpscr);
4076  int flags = 0;
4077  int sgn, exp;
4078  uint16_t mnt, result;
4079 
4080  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4081  fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4082 
4083  // Handle NaNs, infinities and zeroes:
4084  if (fp16_is_NaN(exp, mnt)) {
4085  result = fp16_process_NaN(op, mode, &flags);
4086  } else if (exp == FP16_EXP_INF) {
4087  result = fp16_infinity(sgn);
4088  } else if (!mnt) {
4089  result = fp16_zero(sgn);
4090  } else if (exp >= expint) {
4091  // There are no fractional bits
4092  result = op;
4093  } else {
4094  // Truncate towards zero:
4095  uint16_t x = expint - exp >= FP16_BITS ? 0 : mnt >> (expint - exp);
4096  int err = exp < expint - FP16_BITS ? 1 :
4097  ((mnt << 1 >> (expint - exp - 1) & 3) |
4098  ((uint16_t)(mnt << 2 << (FP16_BITS + exp - expint)) != 0));
4099  switch (rounding) {
4100  case FPRounding_TIEEVEN:
4101  x += (err == 3 || (err == 2 && (x & 1)));
4102  break;
4103  case FPRounding_POSINF:
4104  x += err && !sgn;
4105  break;
4106  case FPRounding_NEGINF:
4107  x += err && sgn;
4108  break;
4109  case FPRounding_ZERO:
4110  break;
4111  case FPRounding_TIEAWAY:
4112  x += err >> 1;
4113  break;
4114  default:
4115  panic("Unrecognized FP rounding mode");
4116  }
4117 
4118  if (x == 0) {
4119  result = fp16_zero(sgn);
4120  } else {
4121  exp = expint;
4122  mnt = fp16_normalise(x, &exp);
4123  result = fp16_pack(sgn, exp + FP16_EXP_BITS, mnt >> FP16_EXP_BITS);
4124  }
4125 
4126  if (err && exact)
4127  flags |= FPLIB_IXC;
4128  }
4129 
4130  set_fpscr0(fpscr, flags);
4131 
4132  return result;
4133 }
4134 
4135 template <>
4136 uint32_t
4137 fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4138 {
4139  int expint = FP32_EXP_BIAS + FP32_MANT_BITS;
4140  int mode = modeConv(fpscr);
4141  int flags = 0;
4142  int sgn, exp;
4143  uint32_t mnt, result;
4144 
4145  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4146  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4147 
4148  // Handle NaNs, infinities and zeroes:
4149  if (fp32_is_NaN(exp, mnt)) {
4150  result = fp32_process_NaN(op, mode, &flags);
4151  } else if (exp == FP32_EXP_INF) {
4152  result = fp32_infinity(sgn);
4153  } else if (!mnt) {
4154  result = fp32_zero(sgn);
4155  } else if (exp >= expint) {
4156  // There are no fractional bits
4157  result = op;
4158  } else {
4159  // Truncate towards zero:
4160  uint32_t x = expint - exp >= FP32_BITS ? 0 : mnt >> (expint - exp);
4161  int err = exp < expint - FP32_BITS ? 1 :
4162  ((mnt << 1 >> (expint - exp - 1) & 3) |
4163  ((uint32_t)(mnt << 2 << (FP32_BITS + exp - expint)) != 0));
4164  switch (rounding) {
4165  case FPRounding_TIEEVEN:
4166  x += (err == 3 || (err == 2 && (x & 1)));
4167  break;
4168  case FPRounding_POSINF:
4169  x += err && !sgn;
4170  break;
4171  case FPRounding_NEGINF:
4172  x += err && sgn;
4173  break;
4174  case FPRounding_ZERO:
4175  break;
4176  case FPRounding_TIEAWAY:
4177  x += err >> 1;
4178  break;
4179  default:
4180  panic("Unrecognized FP rounding mode");
4181  }
4182 
4183  if (x == 0) {
4184  result = fp32_zero(sgn);
4185  } else {
4186  exp = expint;
4187  mnt = fp32_normalise(x, &exp);
4188  result = fp32_pack(sgn, exp + FP32_EXP_BITS, mnt >> FP32_EXP_BITS);
4189  }
4190 
4191  if (err && exact)
4192  flags |= FPLIB_IXC;
4193  }
4194 
4195  set_fpscr0(fpscr, flags);
4196 
4197  return result;
4198 }
4199 
4200 template <>
4201 uint64_t
4202 fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4203 {
4204  int expint = FP64_EXP_BIAS + FP64_MANT_BITS;
4205  int mode = modeConv(fpscr);
4206  int flags = 0;
4207  int sgn, exp;
4208  uint64_t mnt, result;
4209 
4210  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4211  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4212 
4213  // Handle NaNs, infinities and zeroes:
4214  if (fp64_is_NaN(exp, mnt)) {
4215  result = fp64_process_NaN(op, mode, &flags);
4216  } else if (exp == FP64_EXP_INF) {
4217  result = fp64_infinity(sgn);
4218  } else if (!mnt) {
4219  result = fp64_zero(sgn);
4220  } else if (exp >= expint) {
4221  // There are no fractional bits
4222  result = op;
4223  } else {
4224  // Truncate towards zero:
4225  uint64_t x = expint - exp >= FP64_BITS ? 0 : mnt >> (expint - exp);
4226  int err = exp < expint - FP64_BITS ? 1 :
4227  ((mnt << 1 >> (expint - exp - 1) & 3) |
4228  ((uint64_t)(mnt << 2 << (FP64_BITS + exp - expint)) != 0));
4229  switch (rounding) {
4230  case FPRounding_TIEEVEN:
4231  x += (err == 3 || (err == 2 && (x & 1)));
4232  break;
4233  case FPRounding_POSINF:
4234  x += err && !sgn;
4235  break;
4236  case FPRounding_NEGINF:
4237  x += err && sgn;
4238  break;
4239  case FPRounding_ZERO:
4240  break;
4241  case FPRounding_TIEAWAY:
4242  x += err >> 1;
4243  break;
4244  default:
4245  panic("Unrecognized FP rounding mode");
4246  }
4247 
4248  if (x == 0) {
4249  result = fp64_zero(sgn);
4250  } else {
4251  exp = expint;
4252  mnt = fp64_normalise(x, &exp);
4253  result = fp64_pack(sgn, exp + FP64_EXP_BITS, mnt >> FP64_EXP_BITS);
4254  }
4255 
4256  if (err && exact)
4257  flags |= FPLIB_IXC;
4258  }
4259 
4260  set_fpscr0(fpscr, flags);
4261 
4262  return result;
4263 }
4264 
4265 template <>
4266 uint16_t
4267 fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4268 {
4269  int flags = 0;
4270  uint16_t result = fp16_scale(op1, (int16_t)op2, modeConv(fpscr), &flags);
4271  set_fpscr0(fpscr, flags);
4272  return result;
4273 }
4274 
4275 template <>
4276 uint32_t
4277 fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4278 {
4279  int flags = 0;
4280  uint32_t result = fp32_scale(op1, (int32_t)op2, modeConv(fpscr), &flags);
4281  set_fpscr0(fpscr, flags);
4282  return result;
4283 }
4284 
4285 template <>
4286 uint64_t
4287 fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4288 {
4289  int flags = 0;
4290  uint64_t result = fp64_scale(op1, (int64_t)op2, modeConv(fpscr), &flags);
4291  set_fpscr0(fpscr, flags);
4292  return result;
4293 }
4294 
4295 template <>
4296 uint16_t
4297 fplibSqrt(uint16_t op, FPSCR &fpscr)
4298 {
4299  int flags = 0;
4300  uint16_t result = fp16_sqrt(op, modeConv(fpscr), &flags);
4301  set_fpscr0(fpscr, flags);
4302  return result;
4303 }
4304 
4305 template <>
4306 uint32_t
4307 fplibSqrt(uint32_t op, FPSCR &fpscr)
4308 {
4309  int flags = 0;
4310  uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
4311  set_fpscr0(fpscr, flags);
4312  return result;
4313 }
4314 
4315 template <>
4316 uint64_t
4317 fplibSqrt(uint64_t op, FPSCR &fpscr)
4318 {
4319  int flags = 0;
4320  uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
4321  set_fpscr0(fpscr, flags);
4322  return result;
4323 }
4324 
4325 template <>
4326 uint16_t
4327 fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4328 {
4329  int flags = 0;
4330  uint16_t result = fp16_add(op1, op2, 1, modeConv(fpscr), &flags);
4331  set_fpscr0(fpscr, flags);
4332  return result;
4333 }
4334 
4335 template <>
4336 uint32_t
4337 fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4338 {
4339  int flags = 0;
4340  uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
4341  set_fpscr0(fpscr, flags);
4342  return result;
4343 }
4344 
4345 template <>
4346 uint64_t
4347 fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4348 {
4349  int flags = 0;
4350  uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
4351  set_fpscr0(fpscr, flags);
4352  return result;
4353 }
4354 
4355 template <>
4356 uint16_t
4357 fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
4358 {
4359  static uint16_t coeff[2][8] = {
4360  {
4361  0x3c00,
4362  0xb155,
4363  0x2030,
4364  0x0000,
4365  0x0000,
4366  0x0000,
4367  0x0000,
4368  0x0000,
4369  },
4370  {
4371  0x3c00,
4372  0xb800,
4373  0x293a,
4374  0x0000,
4375  0x0000,
4376  0x0000,
4377  0x0000,
4378  0x0000
4379  }
4380  };
4381  int flags = 0;
4382  uint16_t result =
4383  fp16_muladd(coeff[op2 >> (FP16_BITS - 1)][coeff_index], op1,
4384  fplibAbs(op2), 0, modeConv(fpscr), &flags);
4385  set_fpscr0(fpscr, flags);
4386  return result;
4387 }
4388 
4389 template <>
4390 uint32_t
4391 fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2, FPSCR &fpscr)
4392 {
4393  static uint32_t coeff[2][8] = {
4394  {
4395  0x3f800000,
4396  0xbe2aaaab,
4397  0x3c088886,
4398  0xb95008b9,
4399  0x36369d6d,
4400  0x00000000,
4401  0x00000000,
4402  0x00000000
4403  },
4404  {
4405  0x3f800000,
4406  0xbf000000,
4407  0x3d2aaaa6,
4408  0xbab60705,
4409  0x37cd37cc,
4410  0x00000000,
4411  0x00000000,
4412  0x00000000
4413  }
4414  };
4415  int flags = 0;
4416  uint32_t result =
4417  fp32_muladd(coeff[op2 >> (FP32_BITS - 1)][coeff_index], op1,
4418  fplibAbs(op2), 0, modeConv(fpscr), &flags);
4419  set_fpscr0(fpscr, flags);
4420  return result;
4421 }
4422 
4423 template <>
4424 uint64_t
4425 fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2, FPSCR &fpscr)
4426 {
4427  static uint64_t coeff[2][8] = {
4428  {
4429  0x3ff0000000000000ULL,
4430  0xbfc5555555555543ULL,
4431  0x3f8111111110f30cULL,
4432  0xbf2a01a019b92fc6ULL,
4433  0x3ec71de351f3d22bULL,
4434  0xbe5ae5e2b60f7b91ULL,
4435  0x3de5d8408868552fULL,
4436  0x0000000000000000ULL
4437  },
4438  {
4439  0x3ff0000000000000ULL,
4440  0xbfe0000000000000ULL,
4441  0x3fa5555555555536ULL,
4442  0xbf56c16c16c13a0bULL,
4443  0x3efa01a019b1e8d8ULL,
4444  0xbe927e4f7282f468ULL,
4445  0x3e21ee96d2641b13ULL,
4446  0xbda8f76380fbb401ULL
4447  }
4448  };
4449  int flags = 0;
4450  uint64_t result =
4451  fp64_muladd(coeff[op2 >> (FP64_BITS - 1)][coeff_index], op1,
4452  fplibAbs(op2), 0, modeConv(fpscr), &flags);
4453  set_fpscr0(fpscr, flags);
4454  return result;
4455 }
4456 
4457 template <>
4458 uint16_t
4459 fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4460 {
4461  int flags = 0;
4462  int sgn, exp;
4463  uint16_t mnt;
4464 
4465  int mode = modeConv(fpscr);
4466  uint16_t result = fp16_mul(op1, op1, mode, &flags);
4467  set_fpscr0(fpscr, flags);
4468 
4469  fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4470  if (!fp16_is_NaN(exp, mnt)) {
4471  result = (result & ~(1ULL << (FP16_BITS - 1))) |
4472  op2 << (FP16_BITS - 1);
4473  }
4474  return result;
4475 }
4476 
4477 template <>
4478 uint32_t
4479 fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4480 {
4481  int flags = 0;
4482  int sgn, exp;
4483  uint32_t mnt;
4484 
4485  int mode = modeConv(fpscr);
4486  uint32_t result = fp32_mul(op1, op1, mode, &flags);
4487  set_fpscr0(fpscr, flags);
4488 
4489  fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4490  if (!fp32_is_NaN(exp, mnt)) {
4491  result = (result & ~(1ULL << (FP32_BITS - 1))) | op2 << (FP32_BITS - 1);
4492  }
4493  return result;
4494 }
4495 
4496 template <>
4497 uint64_t
4498 fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4499 {
4500  int flags = 0;
4501  int sgn, exp;
4502  uint64_t mnt;
4503 
4504  int mode = modeConv(fpscr);
4505  uint64_t result = fp64_mul(op1, op1, mode, &flags);
4506  set_fpscr0(fpscr, flags);
4507 
4508  fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4509  if (!fp64_is_NaN(exp, mnt)) {
4510  result = (result & ~(1ULL << (FP64_BITS - 1))) | op2 << (FP64_BITS - 1);
4511  }
4512  return result;
4513 }
4514 
4515 template <>
4516 uint16_t
4517 fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4518 {
4519  static constexpr uint16_t fpOne =
4520  (uint16_t)FP16_EXP_BIAS << FP16_MANT_BITS; // 1.0
4521  if (op2 & 1)
4522  op1 = fpOne;
4523  return op1 ^ ((op2 >> 1) << (FP16_BITS - 1));
4524 }
4525 
4526 template <>
4527 uint32_t
4528 fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4529 {
4530  static constexpr uint32_t fpOne =
4531  (uint32_t)FP32_EXP_BIAS << FP32_MANT_BITS; // 1.0
4532  if (op2 & 1)
4533  op1 = fpOne;
4534  return op1 ^ ((op2 >> 1) << (FP32_BITS - 1));
4535 }
4536 
4537 template <>
4538 uint64_t
4539 fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4540 {
4541  static constexpr uint64_t fpOne =
4542  (uint64_t)FP64_EXP_BIAS << FP64_MANT_BITS; // 1.0
4543  if (op2 & 1)
4544  op1 = fpOne;
4545  return op1 ^ ((op2 >> 1) << (FP64_BITS - 1));
4546 }
4547 
4548 static uint64_t
4549 FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4550  int *flags)
4551 {
4552  int expmax = FP64_EXP_BIAS + FP64_BITS - 1;
4553  uint64_t x;
4554  int err;
4555 
4556  if (exp > expmax) {
4557  *flags = FPLIB_IOC;
4558  return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4559  }
4560 
4561  x = lsr64(mnt << FP64_EXP_BITS, expmax - exp);
4562  err = (exp > expmax - 2 ? 0 :
4563  (lsr64(mnt << FP64_EXP_BITS, expmax - 2 - exp) & 3) |
4564  !!(mnt << FP64_EXP_BITS & (lsl64(1, expmax - 2 - exp) - 1)));
4565 
4566  switch (rounding) {
4567  case FPRounding_TIEEVEN:
4568  x += (err == 3 || (err == 2 && (x & 1)));
4569  break;
4570  case FPRounding_POSINF:
4571  x += err && !sgn;
4572  break;
4573  case FPRounding_NEGINF:
4574  x += err && sgn;
4575  break;
4576  case FPRounding_ZERO:
4577  break;
4578  case FPRounding_TIEAWAY:
4579  x += err >> 1;
4580  break;
4581  default:
4582  panic("Unrecognized FP rounding mode");
4583  }
4584 
4585  if (u ? sgn && x : x > (1ULL << (FP64_BITS - 1)) - !sgn) {
4586  *flags = FPLIB_IOC;
4587  return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4588  }
4589 
4590  if (err) {
4591  *flags = FPLIB_IXC;
4592  }
4593 
4594  return sgn ? -x : x;
4595 }
4596 
4597 static uint32_t
4598 FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4599  int *flags)
4600 {
4601  uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4602  if (u ? x >= 1ULL << FP32_BITS :
4603  !(x < 1ULL << (FP32_BITS - 1) ||
4604  (uint64_t)-x <= (uint64_t)1 << (FP32_BITS - 1))) {
4605  *flags = FPLIB_IOC;
4606  x = ((uint32_t)!u << (FP32_BITS - 1)) - !sgn;
4607  }
4608  return x;
4609 }
4610 
4611 static uint16_t
4612 FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4613  int *flags)
4614 {
4615  uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4616  if (u ? x >= 1ULL << FP16_BITS :
4617  !(x < 1ULL << (FP16_BITS - 1) ||
4618  (uint64_t)-x <= (uint64_t)1 << (FP16_BITS - 1))) {
4619  *flags = FPLIB_IOC;
4620  x = ((uint16_t)!u << (FP16_BITS - 1)) - !sgn;
4621  }
4622  return x;
4623 }
4624 
4625 template <>
4626 uint16_t
4627 fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4628  FPSCR &fpscr)
4629 {
4630  int flags = 0;
4631  int sgn, exp;
4632  uint16_t mnt, result;
4633 
4634  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4635  fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4636 
4637  // If NaN, set cumulative flag or take exception:
4638  if (fp16_is_NaN(exp, mnt)) {
4639  flags = FPLIB_IOC;
4640  result = 0;
4641  } else {
4642  assert(fbits >= 0);
4643  // Infinity is treated as an ordinary normalised number that saturates.
4644  result =
4645  FPToFixed_16(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4646  (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4647  u, rounding, &flags);
4648  }
4649 
4650  set_fpscr0(fpscr, flags);
4651 
4652  return result;
4653 }
4654 
4655 template <>
4656 uint32_t
4657 fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4658  FPSCR &fpscr)
4659 {
4660  int flags = 0;
4661  int sgn, exp;
4662  uint16_t mnt;
4663  uint32_t result;
4664 
4665  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4666  fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4667 
4668  // If NaN, set cumulative flag or take exception:
4669  if (fp16_is_NaN(exp, mnt)) {
4670  flags = FPLIB_IOC;
4671  result = 0;
4672  } else {
4673  assert(fbits >= 0);
4674  if (exp == FP16_EXP_INF)
4675  exp = 255; // infinity: make it big enough to saturate
4676  result =
4677  FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4678  (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4679  u, rounding, &flags);
4680  }
4681 
4682  set_fpscr0(fpscr, flags);
4683 
4684  return result;
4685 }
4686 
4687 template <>
4688 uint32_t
4689 fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4690 {
4691  int flags = 0;
4692  int sgn, exp;
4693  uint32_t mnt, result;
4694 
4695  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4696  fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4697 
4698  // If NaN, set cumulative flag or take exception:
4699  if (fp32_is_NaN(exp, mnt)) {
4700  flags = FPLIB_IOC;
4701  result = 0;
4702  } else {
4703  assert(fbits >= 0);
4704  // Infinity is treated as an ordinary normalised number that saturates.
4705  result =
4706  FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4707  (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4708  u, rounding, &flags);
4709  }
4710 
4711  set_fpscr0(fpscr, flags);
4712 
4713  return result;
4714 }
4715 
4716 template <>
4717 uint32_t
4718 fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4719 {
4720  int flags = 0;
4721  int sgn, exp;
4722  uint64_t mnt;
4723  uint32_t result;
4724 
4725  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4726  fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4727 
4728  // If NaN, set cumulative flag or take exception:
4729  if (fp64_is_NaN(exp, mnt)) {
4730  flags = FPLIB_IOC;
4731  result = 0;
4732  } else {
4733  assert(fbits >= 0);
4734  // Infinity is treated as an ordinary normalised number that saturates.
4735  result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
4736  }
4737 
4738  set_fpscr0(fpscr, flags);
4739 
4740  return result;
4741 }
4742 
4743 uint32_t
4744 fplibFPToFixedJS(uint64_t op, FPSCR &fpscr, bool is64, uint8_t& nz)
4745 {
4746  int flags = 0;
4747  uint32_t result;
4748  bool Z = true;
4749 
4750  uint32_t sgn = bits(op, 63);
4751  int32_t exp = bits(op, 62, 52);
4752  uint64_t mnt = bits(op, 51, 0);
4753 
4754  if (exp == 0) {
4755  if (mnt != 0) {
4756  if (fpscr.fz) {
4757  flags |= FPLIB_IDC;
4758  } else {
4759  flags |= FPLIB_IXC;
4760  Z = 0;
4761  }
4762  }
4763  result = 0;
4764  } else if (exp == 0x7ff) {
4765  flags |= FPLIB_IOC;
4766  result = 0;
4767  Z = 0;
4768  } else {
4769  mnt |= 1ULL << FP64_MANT_BITS;
4770  int mnt_shft = exp - FP64_EXP_BIAS - 52;
4771  bool err = true;
4772 
4773  if (abs(mnt_shft) >= FP64_BITS) {
4774  result = 0;
4775  Z = 0;
4776  } else if (mnt_shft >= 0) {
4777  result = lsl64(mnt, mnt_shft);
4778  } else if (mnt_shft < 0) {
4779  err = lsl64(mnt, mnt_shft+FP64_BITS) != 0;
4780  result = lsr64(mnt, abs(mnt_shft));
4781  }
4782  uint64_t max_result = (1UL << (FP32_BITS - 1)) -!sgn;
4783  if ((exp - FP64_EXP_BIAS) > 31 || result > max_result) {
4784  flags |= FPLIB_IOC;
4785  Z = false;
4786  } else if (err) {
4787  flags |= FPLIB_IXC;
4788  Z = false;
4789  }
4790  result = sgn ? -result : result;
4791  }
4792  if (sgn == 1 && result == 0)
4793  Z = false;
4794 
4795  if (is64) {
4796  nz = Z? 0x1: 0x0;
4797  } else {
4798  fpscr.n = 0;
4799  fpscr.z = (int)Z;
4800  fpscr.c = 0;
4801  fpscr.v = 0;
4802  }
4803 
4804  set_fpscr0(fpscr, flags);
4805  return result;
4806 }
4807 
4808 template <>
4809 uint64_t
4810 fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4811  FPSCR &fpscr)
4812 {
4813  int flags = 0;
4814  int sgn, exp;
4815  uint16_t mnt;
4816  uint64_t result;
4817 
4818  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4819  fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4820 
4821  // If NaN, set cumulative flag or take exception:
4822  if (fp16_is_NaN(exp, mnt)) {
4823  flags = FPLIB_IOC;
4824  result = 0;
4825  } else {
4826  assert(fbits >= 0);
4827  if (exp == FP16_EXP_INF)
4828  exp = 255; // infinity: make it big enough to saturate
4829  result =
4830  FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4831  (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4832  u, rounding, &flags);
4833  }
4834 
4835  set_fpscr0(fpscr, flags);
4836 
4837  return result;
4838 }
4839 
4840 template <>
4841 uint64_t
4842 fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4843 {
4844  int flags = 0;
4845  int sgn, exp;
4846  uint32_t mnt;
4847  uint64_t result;
4848 
4849  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4850  fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4851 
4852  // If NaN, set cumulative flag or take exception:
4853  if (fp32_is_NaN(exp, mnt)) {
4854  flags = FPLIB_IOC;
4855  result = 0;
4856  } else {
4857  assert(fbits >= 0);
4858  // Infinity is treated as an ordinary normalised number that saturates.
4859  result =
4860  FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4861  (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4862  u, rounding, &flags);
4863  }
4864 
4865  set_fpscr0(fpscr, flags);
4866 
4867  return result;
4868 }
4869 
4870 template <>
4871 uint64_t
4872 fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4873 {
4874  int flags = 0;
4875  int sgn, exp;
4876  uint64_t mnt, result;
4877 
4878  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4879  fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4880 
4881  // If NaN, set cumulative flag or take exception:
4882  if (fp64_is_NaN(exp, mnt)) {
4883  flags = FPLIB_IOC;
4884  result = 0;
4885  } else {
4886  assert(fbits >= 0);
4887  // Infinity is treated as an ordinary normalised number that saturates.
4888  result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
4889  }
4890 
4891  set_fpscr0(fpscr, flags);
4892 
4893  return result;
4894 }
4895 
4896 static uint16_t
4897 fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4898 {
4899  int x_sgn = !u && a >> (FP64_BITS - 1);
4900  int x_exp = FP16_EXP_BIAS + FP64_BITS - 1 - fbits;
4901  uint64_t x_mnt = x_sgn ? -a : a;
4902 
4903  // Handle zero:
4904  if (!x_mnt) {
4905  return fp16_zero(0);
4906  }
4907 
4908  // Normalise into FP16_BITS bits, collapsing error into bottom bit:
4909  x_mnt = fp64_normalise(x_mnt, &x_exp);
4910  x_mnt = (x_mnt >> (FP64_BITS - FP16_BITS - 1) |
4911  !!(x_mnt & ((1ULL << (FP64_BITS - FP16_BITS - 1)) - 1)));
4912 
4913  return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
4914 }
4915 
4916 static uint32_t
4917 fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4918 {
4919  int x_sgn = !u && a >> (FP64_BITS - 1);
4920  int x_exp = FP32_EXP_BIAS + FP64_BITS - 1 - fbits;
4921  uint64_t x_mnt = x_sgn ? -a : a;
4922 
4923  // Handle zero:
4924  if (!x_mnt) {
4925  return fp32_zero(0);
4926  }
4927 
4928  // Normalise into FP32_BITS bits, collapsing error into bottom bit:
4929  x_mnt = fp64_normalise(x_mnt, &x_exp);
4930  x_mnt = (x_mnt >> (FP64_BITS - FP32_BITS - 1) |
4931  !!(x_mnt & ((1ULL << (FP64_BITS - FP32_BITS - 1)) - 1)));
4932 
4933  return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
4934 }
4935 
4936 static uint64_t
4937 fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4938 {
4939  int x_sgn = !u && a >> (FP64_BITS - 1);
4940  int x_exp = FP64_EXP_BIAS + FP64_BITS - 1 - fbits;
4941  uint64_t x_mnt = x_sgn ? -a : a;
4942 
4943  // Handle zero:
4944  if (!x_mnt) {
4945  return fp64_zero(0);
4946  }
4947 
4948  x_mnt = fp64_normalise(x_mnt, &x_exp);
4949 
4950  return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
4951 }
4952 
4953 template <>
4954 uint16_t
4955 fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
4956  FPSCR &fpscr)
4957 {
4958  int flags = 0;
4959  uint16_t res = fp16_cvtf(op, fbits, u,
4960  (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4961  &flags);
4962  set_fpscr0(fpscr, flags);
4963  return res;
4964 }
4965 
4966 template <>
4967 uint32_t
4968 fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4969 {
4970  int flags = 0;
4971  uint32_t res = fp32_cvtf(op, fbits, u,
4972  (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4973  &flags);
4974  set_fpscr0(fpscr, flags);
4975  return res;
4976 }
4977 
4978 template <>
4979 uint64_t
4980 fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4981 {
4982  int flags = 0;
4983  uint64_t res = fp64_cvtf(op, fbits, u,
4984  (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4985  &flags);
4986  set_fpscr0(fpscr, flags);
4987  return res;
4988 }
4989 
4990 template <>
4991 uint16_t
4993 {
4994  return fp16_infinity(sgn);
4995 }
4996 
4997 template <>
4998 uint32_t
5000 {
5001  return fp32_infinity(sgn);
5002 }
5003 
5004 template <>
5005 uint64_t
5007 {
5008  return fp64_infinity(sgn);
5009 }
5010 
5011 template <>
5012 uint16_t
5014 {
5015  return fp16_defaultNaN();
5016 }
5017 
5018 template <>
5019 uint32_t
5021 {
5022  return fp32_defaultNaN();
5023 }
5024 
5025 template <>
5026 uint64_t
5028 {
5029  return fp64_defaultNaN();
5030 }
5031 
5032 }
static uint32_t lsr32(uint32_t x, uint32_t shift)
Definition: fplib.cc:115
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:167
uint16_t fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:2382
uint16_t fplibExpA(uint16_t op)
Definition: fplib.cc:2927
static uint16_t fp16_FPThree(int sgn)
Definition: fplib.cc:2579
static uint32_t fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
Definition: fplib.cc:1272
uint16_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
Definition: fplib.cc:4627
static int fp32_is_signalling_NaN(int exp, uint32_t mnt)
Definition: fplib.cc:476
static int fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1168
static int fp64_is_quiet_NaN(int exp, uint64_t mnt)
Definition: fplib.cc:500
bool fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition: fplib.cc:2261
static uint16_t lsl16(uint16_t x, uint32_t shift)
Definition: fplib.cc:97
uint16_t fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
Definition: fplib.cc:4072
#define FPLIB_FZ
Definition: fplib.cc:56
uint16_t fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
Definition: fplib.cc:3520
static int cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Definition: fplib.cc:212
uint16_t fplibDefaultNaN()
Foating-point value for default NaN.
Definition: fplib.cc:5013
static uint32_t fp32_normalise(uint32_t mnt, int *exp)
Definition: fplib.cc:236
static void set_fpscr0(FPSCR &fpscr, int flags)
Definition: fplib.cc:1904
uint16_t fplibNeg(uint16_t op)
Definition: fplib.cc:3480
#define FPLIB_IXC
Definition: fplib.cc:62
Bitfield< 20, 18 > a0
Definition: mt_constants.hh:79
static uint64_t fp64_normalise(uint64_t mnt, int *exp)
Definition: fplib.cc:254
static uint32_t fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:577
static uint16_t fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
Definition: fplib.cc:4897
uint16_t fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:4267
static void add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Definition: fplib.cc:196
Bitfield< 8 > a
static void lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
Definition: fplib.cc:133
uint16_t fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3240
uint16_t fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3333
uint32_t fplibFPToFixedJS(uint64_t op, FPSCR &fpscr, bool is64, uint8_t &nz)
Floating-point JS convert to a signed integer, with rounding to zero.
Definition: fplib.cc:4744
static int fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:1017
static void mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
Definition: fplib.cc:169
#define FP32_MANT(x)
Definition: fplib.cc:93
#define FP16_EXP(x)
Definition: fplib.cc:88
static void fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, int *flags)
Definition: fplib.cc:410
static uint64_t fp64_process_NaN(uint64_t a, int mode, int *flags)
Definition: fplib.cc:544
#define FPLIB_AHP
Definition: fplib.cc:58
Floating-point library code, which will gradually replace vfp.hh.
uint16_t fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3387
static uint64_t fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
Definition: fplib.cc:1995
static uint64_t fp64_FPThree(int sgn)
Definition: fplib.cc:2591
#define FP16_EXP_BITS
Definition: fplib.cc:72
static int fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1143
static uint32_t fp32_zero(int sgn)
Definition: fplib.cc:325
static int fp16_is_signalling_NaN(int exp, uint16_t mnt)
Definition: fplib.cc:470
Definition: ccregs.hh:42
bool fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition: fplib.cc:2241
Bitfield< 4, 0 > mode
static int fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1036
static int fp64_is_signalling_NaN(int exp, uint64_t mnt)
Definition: fplib.cc:482
Bitfield< 0 > t0
FPRounding
Definition: fplib.hh:60
static uint32_t fp32_defaultNaN()
Definition: fplib.cc:379
static uint64_t fp64_FPConvertNaN_16(uint16_t op)
Definition: fplib.cc:2545
static uint16_t fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
Definition: fplib.cc:301
static uint32_t fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
Definition: fplib.cc:865
#define FP16_MANT(x)
Definition: fplib.cc:92
void neg(sc_fxval &c, const sc_fxnum &a)
Definition: sc_fxnum.hh:2270
static uint32_t fp32_FPConvertNaN_64(uint64_t op)
Definition: fplib.cc:2537
static uint32_t fp32_FPOnePointFive(int sgn)
Definition: fplib.cc:2567
#define FP32_EXP_BIAS
Definition: fplib.cc:77
uint16_t fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3625
static uint16_t fp16_zero(int sgn)
Definition: fplib.cc:319
Bitfield< 3, 0 > rm
Definition: types.hh:128
static int fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:967
Bitfield< 7 > b
static uint16_t fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
Definition: fplib.cc:1927
static void fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
Definition: fplib.cc:3158
#define FP32_BITS
Definition: fplib.cc:69
#define FP64_EXP_INF
Definition: fplib.cc:82
Bitfield< 7, 4 > b1
Definition: qarma.hh:64
uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
Definition: fplib.cc:2616
static uint32_t fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
Definition: fplib.cc:652
static FPRounding FPCRRounding(FPSCR &fpscr)
Definition: fplib.hh:70
static const uint8_t recip_sqrt_estimate[256]
Definition: fplib.cc:3499
uint16_t fplibRecpX(uint16_t op, FPSCR &fpscr)
Definition: fplib.cc:3991
uint16_t fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3357
static uint64_t fp64_defaultNaN()
Definition: fplib.cc:385
Bitfield< 22 > u
uint16_t fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3264
static uint32_t fp32_process_NaN(uint32_t a, int mode, int *flags)
Definition: fplib.cc:534
static uint16_t fp16_max_normal(int sgn)
Definition: fplib.cc:337
static uint16_t fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
Definition: fplib.cc:623
#define FP32_EXP(x)
Definition: fplib.cc:89
static uint32_t fp32_sqrt(uint32_t a, int mode, int *flags)
Definition: fplib.cc:2081
#define FP32_MANT_BITS
Definition: fplib.cc:85
Bitfield< 6, 5 > shift
Definition: types.hh:127
static uint64_t fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
Definition: fplib.cc:1332
Bitfield< 3, 0 > b0
Definition: qarma.hh:65
uint16_t fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3901
static int modeConv(FPSCR fpscr)
Definition: fplib.cc:2206
static void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
Definition: fplib.cc:187
static uint32_t fp32_FPTwo(int sgn)
Definition: fplib.cc:2603
static void fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
Definition: fplib.cc:3146
#define FPLIB_OFC
Definition: fplib.cc:64
#define FPLIB_FZ16
Definition: fplib.cc:59
Bitfield< 3 > r0
uint16_t fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
Definition: fplib.cc:3715
static int fp32_is_quiet_NaN(int exp, uint32_t mnt)
Definition: fplib.cc:494
static uint32_t fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
Definition: fplib.cc:794
static uint32_t fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, int mode, int *flags)
Definition: fplib.cc:1588
static uint32_t fp32_repack(int sgn, int exp, uint32_t mnt)
Definition: fplib.cc:3122
static uint64_t fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:600
static void sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Definition: fplib.cc:204
static uint64_t fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, int mode, int *flags)
Definition: fplib.cc:1673
uint16_t fplibInfinity(int sgn)
Floating-point value for +/- infinity.
Definition: fplib.cc:4992
static uint32_t fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
Definition: fplib.cc:4917
static int fp16_is_infinity(int exp, uint16_t mnt)
Definition: fplib.cc:506
static uint32_t FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
Definition: fplib.cc:4598
#define FP64_MANT(x)
Definition: fplib.cc:94
static uint16_t fp16_FPOnePointFive(int sgn)
Definition: fplib.cc:2561
#define FPLIB_DZC
Definition: fplib.cc:65
#define FP64_BITS
Definition: fplib.cc:70
static uint16_t fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
Definition: fplib.cc:710
#define ULL(N)
uint64_t constant
Definition: types.hh:50
uint16_t fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:4327
static uint64_t fp64_repack(int sgn, int exp, uint64_t mnt)
Definition: fplib.cc:3128
static uint16_t fp16_normalise(uint16_t mnt, int *exp)
Definition: fplib.cc:218
static uint64_t fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
Definition: fplib.cc:313
#define FP32_EXP_BITS
Definition: fplib.cc:73
static void fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, int *flags)
Definition: fplib.cc:430
static void fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
Definition: fplib.cc:272
static int fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1193
uint16_t fplibAbs(uint16_t op)
Definition: fplib.cc:2361
uint16_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
Floating-point convert from fixed-point.
Definition: fplib.cc:4955
scale
Definition: types.hh:94
uint16_t fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3171
static int fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:948
static uint16_t fp16_sqrt(uint16_t a, int mode, int *flags)
Definition: fplib.cc:2029
#define FP32_EXP_INF
Definition: fplib.cc:81
static uint64_t FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
Definition: fplib.cc:4549
static uint32_t fp32_FPConvertNaN_16(uint16_t op)
Definition: fplib.cc:2529
bool fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition: fplib.cc:2251
static uint16_t fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
Definition: fplib.cc:1212
static int fp64_is_infinity(int exp, uint64_t mnt)
Definition: fplib.cc:518
static void set_fpscr(FPSCR &fpscr, int flags)
Definition: fplib.cc:2214
static uint64_t fp64_FPConvertNaN_32(uint32_t op)
Definition: fplib.cc:2553
static uint64_t fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
Definition: fplib.cc:871
static uint64_t fp64_zero(int sgn)
Definition: fplib.cc:331
uint16_t fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:2867
static void fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
Definition: fplib.cc:3134
#define FPLIB_RM
Definition: fplib.cc:54
static uint16_t fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale, int mode, int *flags)
Definition: fplib.cc:1503
static uint32_t fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
Definition: fplib.cc:307
Bitfield< 29 > c
static int fp64_is_NaN(int exp, uint64_t mnt)
Definition: fplib.cc:464
static uint64_t fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
Definition: fplib.cc:4937
Bitfield< 1 > t1
#define FP16_EXP_BIAS
Definition: fplib.cc:76
#define FP16_MANT_BITS
Definition: fplib.cc:84
static uint64_t fp64_infinity(int sgn)
Definition: fplib.cc:367
static uint16_t fp16_infinity(int sgn)
Definition: fplib.cc:355
uint16_t fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:4459
int fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
Definition: fplib.cc:2412
static uint32_t fp32_max_normal(int sgn)
Definition: fplib.cc:343
#define FP64_EXP_BIAS
Definition: fplib.cc:78
bool fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition: fplib.cc:2271
static uint16_t fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:554
#define FPLIB_IDC
Definition: fplib.cc:61
static uint16_t FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
Definition: fplib.cc:4612
static uint16_t fp16_repack(int sgn, int exp, uint16_t mnt)
Definition: fplib.cc:3116
static uint16_t fp16_process_NaN(uint16_t a, int mode, int *flags)
Definition: fplib.cc:524
static uint64_t fp64_FPOnePointFive(int sgn)
Definition: fplib.cc:2573
static uint32_t fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1807
static uint64_t fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1466
static void lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
Definition: fplib.cc:151
uint16_t fplibSqrt(uint16_t op, FPSCR &fpscr)
Definition: fplib.cc:4297
#define FPLIB_RP
Definition: fplib.cc:53
static uint64_t fp64_sqrt(uint64_t a, int mode, int *flags)
Definition: fplib.cc:2136
static uint16_t fp16_FPTwo(int sgn)
Definition: fplib.cc:2597
static uint32_t fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
Definition: fplib.cc:1961
#define FPLIB_UFC
Definition: fplib.cc:63
static uint64_t fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
Definition: fplib.cc:942
static int fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1105
static uint16_t fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
Definition: fplib.cc:788
Bitfield< 3, 0 > mask
Definition: types.hh:64
static int fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1124
static int fp16_is_NaN(int exp, uint16_t mnt)
Definition: fplib.cc:452
static uint32_t fp32_FPThree(int sgn)
Definition: fplib.cc:2585
uint16_t fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:2897
static int fp32_is_infinity(int exp, uint32_t mnt)
Definition: fplib.cc:512
Bitfield< 4 > op
Definition: types.hh:80
static int fp16_is_quiet_NaN(int exp, uint16_t mnt)
Definition: fplib.cc:488
static uint64_t lsr64(uint64_t x, uint32_t shift)
Definition: fplib.cc:127
uint16_t fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:4357
T bits(T val, int first, int last)
Extract the bitfield from position &#39;first&#39; to &#39;last&#39; (inclusive) from &#39;val&#39; and right justify it...
Definition: bitfield.hh:72
static uint64_t fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1849
#define FP64_EXP(x)
Definition: fplib.cc:90
static uint16_t fp16_defaultNaN()
Definition: fplib.cc:373
static uint16_t fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:1765
static uint64_t lsl64(uint64_t x, uint32_t shift)
Definition: fplib.cc:121
static uint16_t fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:1392
static int fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:992
static uint64_t fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
Definition: fplib.cc:681
static uint32_t fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1429
#define FP64_EXP_BITS
Definition: fplib.cc:74
#define FP64_MANT_BITS
Definition: fplib.cc:86
Bitfield< 1 > x
Definition: types.hh:105
#define FPLIB_RN
Definition: fplib.cc:52
static uint32_t fp32_infinity(int sgn)
Definition: fplib.cc:361
#define FP16_EXP_INF
Definition: fplib.cc:80
static uint64_t fp64_FPTwo(int sgn)
Definition: fplib.cc:2609
#define FP16_BITS
Definition: fplib.cc:68
static int fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1055
static void fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, int *flags)
Definition: fplib.cc:391
#define FPLIB_IOC
Definition: fplib.cc:66
Bitfield< 22 > a1
static uint16_t fp16_FPConvertNaN_64(uint64_t op)
Definition: fplib.cc:2521
#define FPLIB_DN
Definition: fplib.cc:57
uint16_t fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:4517
static int fp32_is_NaN(int exp, uint32_t mnt)
Definition: fplib.cc:458
static int fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1080
static uint16_t fp16_FPConvertNaN_32(uint32_t op)
Definition: fplib.cc:2513
static uint32_t lsl32(uint32_t x, uint32_t shift)
Definition: fplib.cc:109
static uint16_t lsr16(uint16_t x, uint32_t shift)
Definition: fplib.cc:103
static uint64_t fp64_max_normal(int sgn)
Definition: fplib.cc:349

Generated on Fri Feb 28 2020 16:26:57 for gem5 by doxygen 1.8.13