gem5  v20.1.0.0
fplib.cc
Go to the documentation of this file.
1 /*
2 * Copyright (c) 2012-2013, 2017-2018, 2020 ARM Limited
3 * Copyright (c) 2020 Metempsy Technology Consulting
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions are
17 * met: redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer;
19 * redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution;
22 * neither the name of the copyright holders nor the names of its
23 * contributors may be used to endorse or promote products derived from
24 * this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 */
38 
39 #include <stdint.h>
40 
41 #include <cassert>
42 #include <cmath>
43 
44 #include "base/logging.hh"
45 #include "fplib.hh"
46 
47 namespace ArmISA
48 {
49 
50 #define FPLIB_RN 0
51 #define FPLIB_RP 1
52 #define FPLIB_RM 2
53 #define FPLIB_RZ 3
54 #define FPLIB_FZ 4
55 #define FPLIB_DN 8
56 #define FPLIB_AHP 16
57 #define FPLIB_FZ16 32
58 
59 #define FPLIB_IDC 128 // Input Denormal
60 #define FPLIB_IXC 16 // Inexact
61 #define FPLIB_UFC 8 // Underflow
62 #define FPLIB_OFC 4 // Overflow
63 #define FPLIB_DZC 2 // Division by Zero
64 #define FPLIB_IOC 1 // Invalid Operation
65 
66 #define FP16_BITS 16
67 #define FP32_BITS 32
68 #define FP64_BITS 64
69 
70 #define FP16_EXP_BITS 5
71 #define FP32_EXP_BITS 8
72 #define FP64_EXP_BITS 11
73 
74 #define FP16_EXP_BIAS 15
75 #define FP32_EXP_BIAS 127
76 #define FP64_EXP_BIAS 1023
77 
78 #define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1)
79 #define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1)
80 #define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1)
81 
82 #define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1)
83 #define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1)
84 #define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1)
85 
86 #define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1))
87 #define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1))
88 #define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1))
89 
90 #define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1))
91 #define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1))
92 #define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1))
93 
94 static inline uint16_t
95 lsl16(uint16_t x, uint32_t shift)
96 {
97  return shift < 16 ? x << shift : 0;
98 }
99 
100 static inline uint16_t
101 lsr16(uint16_t x, uint32_t shift)
102 {
103  return shift < 16 ? x >> shift : 0;
104 }
105 
106 static inline uint32_t
107 lsl32(uint32_t x, uint32_t shift)
108 {
109  return shift < 32 ? x << shift : 0;
110 }
111 
112 static inline uint32_t
113 lsr32(uint32_t x, uint32_t shift)
114 {
115  return shift < 32 ? x >> shift : 0;
116 }
117 
118 static inline uint64_t
119 lsl64(uint64_t x, uint32_t shift)
120 {
121  return shift < 64 ? x << shift : 0;
122 }
123 
124 static inline uint64_t
125 lsr64(uint64_t x, uint32_t shift)
126 {
127  return shift < 64 ? x >> shift : 0;
128 }
129 
130 static inline void
131 lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
132 {
133  if (shift == 0) {
134  *r1 = x1;
135  *r0 = x0;
136  } else if (shift < 64) {
137  *r1 = x1 << shift | x0 >> (64 - shift);
138  *r0 = x0 << shift;
139  } else if (shift < 128) {
140  *r1 = x0 << (shift - 64);
141  *r0 = 0;
142  } else {
143  *r1 = 0;
144  *r0 = 0;
145  }
146 }
147 
148 static inline void
149 lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
150 {
151  if (shift == 0) {
152  *r1 = x1;
153  *r0 = x0;
154  } else if (shift < 64) {
155  *r0 = x0 >> shift | x1 << (64 - shift);
156  *r1 = x1 >> shift;
157  } else if (shift < 128) {
158  *r0 = x1 >> (shift - 64);
159  *r1 = 0;
160  } else {
161  *r0 = 0;
162  *r1 = 0;
163  }
164 }
165 
166 static inline void
167 mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
168 {
169  uint32_t mask = ((uint32_t)1 << 31) - 1;
170  uint64_t a0 = a & mask;
171  uint64_t a1 = a >> 31 & mask;
172  uint64_t b0 = b & mask;
173  uint64_t b1 = b >> 31 & mask;
174  uint64_t p0 = a0 * b0;
175  uint64_t p2 = a1 * b1;
176  uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
177  uint64_t s0 = p0;
178  uint64_t s1 = (s0 >> 31) + p1;
179  uint64_t s2 = (s1 >> 31) + p2;
180  *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
181  *x1 = s2 >> 2;
182 }
183 
184 static inline
185 void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
186 {
187  uint64_t t0 = (uint64_t)(uint32_t)a * b;
188  uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
189  *x0 = t1 << 32 | (uint32_t)t0;
190  *x1 = t1 >> 32;
191 }
192 
193 static inline void
194 add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
195  uint64_t b1)
196 {
197  *x0 = a0 + b0;
198  *x1 = a1 + b1 + (*x0 < a0);
199 }
200 
201 static inline void
202 sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
203  uint64_t b1)
204 {
205  *x0 = a0 - b0;
206  *x1 = a1 - b1 - (*x0 > a0);
207 }
208 
209 static inline int
210 cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
211 {
212  return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
213 }
214 
215 static inline uint16_t
216 fp16_normalise(uint16_t mnt, int *exp)
217 {
218  int shift;
219 
220  if (!mnt) {
221  return 0;
222  }
223 
224  for (shift = 8; shift; shift >>= 1) {
225  if (!(mnt >> (16 - shift))) {
226  mnt <<= shift;
227  *exp -= shift;
228  }
229  }
230  return mnt;
231 }
232 
233 static inline uint32_t
234 fp32_normalise(uint32_t mnt, int *exp)
235 {
236  int shift;
237 
238  if (!mnt) {
239  return 0;
240  }
241 
242  for (shift = 16; shift; shift >>= 1) {
243  if (!(mnt >> (32 - shift))) {
244  mnt <<= shift;
245  *exp -= shift;
246  }
247  }
248  return mnt;
249 }
250 
251 static inline uint64_t
252 fp64_normalise(uint64_t mnt, int *exp)
253 {
254  int shift;
255 
256  if (!mnt) {
257  return 0;
258  }
259 
260  for (shift = 32; shift; shift >>= 1) {
261  if (!(mnt >> (64 - shift))) {
262  mnt <<= shift;
263  *exp -= shift;
264  }
265  }
266  return mnt;
267 }
268 
269 static inline void
270 fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
271 {
272  uint64_t x0 = *mnt0;
273  uint64_t x1 = *mnt1;
274  int shift;
275 
276  if (!x0 && !x1) {
277  return;
278  }
279 
280  if (!x1) {
281  x1 = x0;
282  x0 = 0;
283  *exp -= 64;
284  }
285 
286  for (shift = 32; shift; shift >>= 1) {
287  if (!(x1 >> (64 - shift))) {
288  x1 = x1 << shift | x0 >> (64 - shift);
289  x0 <<= shift;
290  *exp -= shift;
291  }
292  }
293 
294  *mnt0 = x0;
295  *mnt1 = x1;
296 }
297 
298 static inline uint16_t
299 fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
300 {
301  return sgn << (FP16_BITS - 1) | exp << FP16_MANT_BITS | FP16_MANT(mnt);
302 }
303 
304 static inline uint32_t
305 fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
306 {
307  return sgn << (FP32_BITS - 1) | exp << FP32_MANT_BITS | FP32_MANT(mnt);
308 }
309 
310 static inline uint64_t
311 fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
312 {
313  return sgn << (FP64_BITS - 1) | exp << FP64_MANT_BITS | FP64_MANT(mnt);
314 }
315 
316 static inline uint16_t
317 fp16_zero(int sgn)
318 {
319  return fp16_pack(sgn, 0, 0);
320 }
321 
322 static inline uint32_t
323 fp32_zero(int sgn)
324 {
325  return fp32_pack(sgn, 0, 0);
326 }
327 
328 static inline uint64_t
329 fp64_zero(int sgn)
330 {
331  return fp64_pack(sgn, 0, 0);
332 }
333 
334 static inline uint16_t
336 {
337  return fp16_pack(sgn, FP16_EXP_INF - 1, -1);
338 }
339 
340 static inline uint32_t
342 {
343  return fp32_pack(sgn, FP32_EXP_INF - 1, -1);
344 }
345 
346 static inline uint64_t
348 {
349  return fp64_pack(sgn, FP64_EXP_INF - 1, -1);
350 }
351 
352 static inline uint16_t
354 {
355  return fp16_pack(sgn, FP16_EXP_INF, 0);
356 }
357 
358 static inline uint32_t
360 {
361  return fp32_pack(sgn, FP32_EXP_INF, 0);
362 }
363 
364 static inline uint64_t
366 {
367  return fp64_pack(sgn, FP64_EXP_INF, 0);
368 }
369 
370 static inline uint16_t
372 {
373  return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1));
374 }
375 
376 static inline uint32_t
378 {
379  return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1));
380 }
381 
382 static inline uint64_t
384 {
385  return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1));
386 }
387 
388 static inline void
389 fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
390  int *flags)
391 {
392  *sgn = x >> (FP16_BITS - 1);
393  *exp = FP16_EXP(x);
394  *mnt = FP16_MANT(x);
395 
396  if (*exp) {
397  *mnt |= 1ULL << FP16_MANT_BITS;
398  } else {
399  // Handle subnormals:
400  // IDC (Input Denormal) is not set in this case.
401  if (*mnt) {
402  if (mode & FPLIB_FZ16) {
403  *mnt = 0;
404  } else {
405  ++*exp;
406  }
407  }
408  }
409 }
410 
411 static inline void
412 fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
413  int *flags)
414 {
415  *sgn = x >> (FP32_BITS - 1);
416  *exp = FP32_EXP(x);
417  *mnt = FP32_MANT(x);
418 
419  if (*exp) {
420  *mnt |= 1ULL << FP32_MANT_BITS;
421  } else {
422  // Handle subnormals:
423  if (*mnt) {
424  if (mode & FPLIB_FZ) {
425  *flags |= FPLIB_IDC;
426  *mnt = 0;
427  } else {
428  ++*exp;
429  }
430  }
431  }
432 }
433 
434 static inline void
435 fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
436  int *flags)
437 {
438 
439 
440  *sgn = x >> (FP64_BITS - 1);
441  *exp = FP64_EXP(x);
442  *mnt = FP64_MANT(x);
443 
444  if (*exp) {
445  *mnt |= 1ULL << FP64_MANT_BITS;
446  } else {
447  // Handle subnormals:
448  if (*mnt) {
449  if (mode & FPLIB_FZ) {
450  *flags |= FPLIB_IDC;
451  *mnt = 0;
452  } else {
453  ++*exp;
454  }
455  }
456  }
457 }
458 
459 static inline int
460 fp16_is_NaN(int exp, uint16_t mnt)
461 {
462  return exp == FP16_EXP_INF && FP16_MANT(mnt);
463 }
464 
465 static inline int
466 fp32_is_NaN(int exp, uint32_t mnt)
467 {
468  return exp == FP32_EXP_INF && FP32_MANT(mnt);
469 }
470 
471 static inline int
472 fp64_is_NaN(int exp, uint64_t mnt)
473 {
474  return exp == FP64_EXP_INF && FP64_MANT(mnt);
475 }
476 
477 static inline int
478 fp16_is_signalling_NaN(int exp, uint16_t mnt)
479 {
480  return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1);
481 }
482 
483 static inline int
484 fp32_is_signalling_NaN(int exp, uint32_t mnt)
485 {
486  return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1);
487 }
488 
489 static inline int
490 fp64_is_signalling_NaN(int exp, uint64_t mnt)
491 {
492  return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1);
493 }
494 
495 static inline int
496 fp16_is_quiet_NaN(int exp, uint16_t mnt)
497 {
498  return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1);
499 }
500 
501 static inline int
502 fp32_is_quiet_NaN(int exp, uint32_t mnt)
503 {
504  return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1);
505 }
506 
507 static inline int
508 fp64_is_quiet_NaN(int exp, uint64_t mnt)
509 {
510  return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1);
511 }
512 
513 static inline int
514 fp16_is_infinity(int exp, uint16_t mnt)
515 {
516  return exp == FP16_EXP_INF && !FP16_MANT(mnt);
517 }
518 
519 static inline int
520 fp32_is_infinity(int exp, uint32_t mnt)
521 {
522  return exp == FP32_EXP_INF && !FP32_MANT(mnt);
523 }
524 
525 static inline int
526 fp64_is_infinity(int exp, uint64_t mnt)
527 {
528  return exp == FP64_EXP_INF && !FP64_MANT(mnt);
529 }
530 
531 static inline uint16_t
532 fp16_process_NaN(uint16_t a, int mode, int *flags)
533 {
534  if (!(a >> (FP16_MANT_BITS - 1) & 1)) {
535  *flags |= FPLIB_IOC;
536  a |= 1ULL << (FP16_MANT_BITS - 1);
537  }
538  return mode & FPLIB_DN ? fp16_defaultNaN() : a;
539 }
540 
541 static inline uint32_t
542 fp32_process_NaN(uint32_t a, int mode, int *flags)
543 {
544  if (!(a >> (FP32_MANT_BITS - 1) & 1)) {
545  *flags |= FPLIB_IOC;
546  a |= 1ULL << (FP32_MANT_BITS - 1);
547  }
548  return mode & FPLIB_DN ? fp32_defaultNaN() : a;
549 }
550 
551 static inline uint64_t
552 fp64_process_NaN(uint64_t a, int mode, int *flags)
553 {
554  if (!(a >> (FP64_MANT_BITS - 1) & 1)) {
555  *flags |= FPLIB_IOC;
556  a |= 1ULL << (FP64_MANT_BITS - 1);
557  }
558  return mode & FPLIB_DN ? fp64_defaultNaN() : a;
559 }
560 
561 static uint16_t
562 fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
563 {
564  int a_exp = FP16_EXP(a);
565  uint16_t a_mnt = FP16_MANT(a);
566  int b_exp = FP16_EXP(b);
567  uint16_t b_mnt = FP16_MANT(b);
568 
569  // Handle signalling NaNs:
570  if (fp16_is_signalling_NaN(a_exp, a_mnt))
571  return fp16_process_NaN(a, mode, flags);
572  if (fp16_is_signalling_NaN(b_exp, b_mnt))
573  return fp16_process_NaN(b, mode, flags);
574 
575  // Handle quiet NaNs:
576  if (fp16_is_NaN(a_exp, a_mnt))
577  return fp16_process_NaN(a, mode, flags);
578  if (fp16_is_NaN(b_exp, b_mnt))
579  return fp16_process_NaN(b, mode, flags);
580 
581  return 0;
582 }
583 
584 static uint32_t
585 fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
586 {
587  int a_exp = FP32_EXP(a);
588  uint32_t a_mnt = FP32_MANT(a);
589  int b_exp = FP32_EXP(b);
590  uint32_t b_mnt = FP32_MANT(b);
591 
592  // Handle signalling NaNs:
593  if (fp32_is_signalling_NaN(a_exp, a_mnt))
594  return fp32_process_NaN(a, mode, flags);
595  if (fp32_is_signalling_NaN(b_exp, b_mnt))
596  return fp32_process_NaN(b, mode, flags);
597 
598  // Handle quiet NaNs:
599  if (fp32_is_NaN(a_exp, a_mnt))
600  return fp32_process_NaN(a, mode, flags);
601  if (fp32_is_NaN(b_exp, b_mnt))
602  return fp32_process_NaN(b, mode, flags);
603 
604  return 0;
605 }
606 
607 static uint64_t
608 fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
609 {
610  int a_exp = FP64_EXP(a);
611  uint64_t a_mnt = FP64_MANT(a);
612  int b_exp = FP64_EXP(b);
613  uint64_t b_mnt = FP64_MANT(b);
614 
615  // Handle signalling NaNs:
616  if (fp64_is_signalling_NaN(a_exp, a_mnt))
617  return fp64_process_NaN(a, mode, flags);
618  if (fp64_is_signalling_NaN(b_exp, b_mnt))
619  return fp64_process_NaN(b, mode, flags);
620 
621  // Handle quiet NaNs:
622  if (fp64_is_NaN(a_exp, a_mnt))
623  return fp64_process_NaN(a, mode, flags);
624  if (fp64_is_NaN(b_exp, b_mnt))
625  return fp64_process_NaN(b, mode, flags);
626 
627  return 0;
628 }
629 
630 static uint16_t
631 fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
632 {
633  int a_exp = FP16_EXP(a);
634  uint16_t a_mnt = FP16_MANT(a);
635  int b_exp = FP16_EXP(b);
636  uint16_t b_mnt = FP16_MANT(b);
637  int c_exp = FP16_EXP(c);
638  uint16_t c_mnt = FP16_MANT(c);
639 
640  // Handle signalling NaNs:
641  if (fp16_is_signalling_NaN(a_exp, a_mnt))
642  return fp16_process_NaN(a, mode, flags);
643  if (fp16_is_signalling_NaN(b_exp, b_mnt))
644  return fp16_process_NaN(b, mode, flags);
645  if (fp16_is_signalling_NaN(c_exp, c_mnt))
646  return fp16_process_NaN(c, mode, flags);
647 
648  // Handle quiet NaNs:
649  if (fp16_is_NaN(a_exp, a_mnt))
650  return fp16_process_NaN(a, mode, flags);
651  if (fp16_is_NaN(b_exp, b_mnt))
652  return fp16_process_NaN(b, mode, flags);
653  if (fp16_is_NaN(c_exp, c_mnt))
654  return fp16_process_NaN(c, mode, flags);
655 
656  return 0;
657 }
658 
659 static uint32_t
660 fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
661 {
662  int a_exp = FP32_EXP(a);
663  uint32_t a_mnt = FP32_MANT(a);
664  int b_exp = FP32_EXP(b);
665  uint32_t b_mnt = FP32_MANT(b);
666  int c_exp = FP32_EXP(c);
667  uint32_t c_mnt = FP32_MANT(c);
668 
669  // Handle signalling NaNs:
670  if (fp32_is_signalling_NaN(a_exp, a_mnt))
671  return fp32_process_NaN(a, mode, flags);
672  if (fp32_is_signalling_NaN(b_exp, b_mnt))
673  return fp32_process_NaN(b, mode, flags);
674  if (fp32_is_signalling_NaN(c_exp, c_mnt))
675  return fp32_process_NaN(c, mode, flags);
676 
677  // Handle quiet NaNs:
678  if (fp32_is_NaN(a_exp, a_mnt))
679  return fp32_process_NaN(a, mode, flags);
680  if (fp32_is_NaN(b_exp, b_mnt))
681  return fp32_process_NaN(b, mode, flags);
682  if (fp32_is_NaN(c_exp, c_mnt))
683  return fp32_process_NaN(c, mode, flags);
684 
685  return 0;
686 }
687 
688 static uint64_t
689 fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
690 {
691  int a_exp = FP64_EXP(a);
692  uint64_t a_mnt = FP64_MANT(a);
693  int b_exp = FP64_EXP(b);
694  uint64_t b_mnt = FP64_MANT(b);
695  int c_exp = FP64_EXP(c);
696  uint64_t c_mnt = FP64_MANT(c);
697 
698  // Handle signalling NaNs:
699  if (fp64_is_signalling_NaN(a_exp, a_mnt))
700  return fp64_process_NaN(a, mode, flags);
701  if (fp64_is_signalling_NaN(b_exp, b_mnt))
702  return fp64_process_NaN(b, mode, flags);
703  if (fp64_is_signalling_NaN(c_exp, c_mnt))
704  return fp64_process_NaN(c, mode, flags);
705 
706  // Handle quiet NaNs:
707  if (fp64_is_NaN(a_exp, a_mnt))
708  return fp64_process_NaN(a, mode, flags);
709  if (fp64_is_NaN(b_exp, b_mnt))
710  return fp64_process_NaN(b, mode, flags);
711  if (fp64_is_NaN(c_exp, c_mnt))
712  return fp64_process_NaN(c, mode, flags);
713 
714  return 0;
715 }
716 
717 static uint16_t
718 fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
719 {
720  int biased_exp; // non-negative exponent value for result
721  uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS)
722  int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
723 
724  assert(rm != FPRounding_TIEAWAY);
725 
726  // Flush to zero:
727  if ((mode & FPLIB_FZ16) && exp < 1) {
728  *flags |= FPLIB_UFC;
729  return fp16_zero(sgn);
730  }
731 
732  // The bottom FP16_EXP_BITS bits of mnt are orred together:
733  mnt = (4ULL << FP16_MANT_BITS | mnt >> (FP16_EXP_BITS - 1) |
734  ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0));
735 
736  if (exp > 0) {
737  biased_exp = exp;
738  int_mant = mnt >> 2;
739  error = mnt & 3;
740  } else {
741  biased_exp = 0;
742  int_mant = lsr16(mnt, 3 - exp);
743  error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
744  }
745 
746  if (!biased_exp && error) { // xx should also check fpscr_val<11>
747  *flags |= FPLIB_UFC;
748  }
749 
750  // Round up:
751  if ((rm == FPLIB_RN && (error == 3 ||
752  (error == 2 && (int_mant & 1)))) ||
753  (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
754  ++int_mant;
755  if (int_mant == 1ULL << FP16_MANT_BITS) {
756  // Rounded up from denormalized to normalized
757  biased_exp = 1;
758  }
759  if (int_mant == 2ULL << FP16_MANT_BITS) {
760  // Rounded up to next exponent
761  ++biased_exp;
762  int_mant >>= 1;
763  }
764  }
765 
766  // Handle rounding to odd aka Von Neumann rounding:
767  if (error && rm == FPRounding_ODD)
768  int_mant |= 1;
769 
770  // Handle overflow:
771  if (!(mode & FPLIB_AHP)) {
772  if (biased_exp >= (int)FP16_EXP_INF) {
773  *flags |= FPLIB_OFC | FPLIB_IXC;
774  if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
775  (rm == FPLIB_RM && sgn)) {
776  return fp16_infinity(sgn);
777  } else {
778  return fp16_max_normal(sgn);
779  }
780  }
781  } else {
782  if (biased_exp >= (int)FP16_EXP_INF + 1) {
783  *flags |= FPLIB_IOC;
784  return fp16_pack(sgn, FP16_EXP_INF, -1);
785  }
786  }
787 
788  if (error) {
789  *flags |= FPLIB_IXC;
790  }
791 
792  return fp16_pack(sgn, biased_exp, int_mant);
793 }
794 
795 static uint16_t
796 fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
797 {
798  return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags);
799 }
800 
801 static uint32_t
802 fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
803 {
804  int biased_exp; // non-negative exponent value for result
805  uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS)
806  int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
807 
808  assert(rm != FPRounding_TIEAWAY);
809 
810  // Flush to zero:
811  if ((mode & FPLIB_FZ) && exp < 1) {
812  *flags |= FPLIB_UFC;
813  return fp32_zero(sgn);
814  }
815 
816  // The bottom FP32_EXP_BITS bits of mnt are orred together:
817  mnt = (4ULL << FP32_MANT_BITS | mnt >> (FP32_EXP_BITS - 1) |
818  ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0));
819 
820  if (exp > 0) {
821  biased_exp = exp;
822  int_mant = mnt >> 2;
823  error = mnt & 3;
824  } else {
825  biased_exp = 0;
826  int_mant = lsr32(mnt, 3 - exp);
827  error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
828  }
829 
830  if (!biased_exp && error) { // xx should also check fpscr_val<11>
831  *flags |= FPLIB_UFC;
832  }
833 
834  // Round up:
835  if ((rm == FPLIB_RN && (error == 3 ||
836  (error == 2 && (int_mant & 1)))) ||
837  (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
838  ++int_mant;
839  if (int_mant == 1ULL << FP32_MANT_BITS) {
840  // Rounded up from denormalized to normalized
841  biased_exp = 1;
842  }
843  if (int_mant == 2ULL << FP32_MANT_BITS) {
844  // Rounded up to next exponent
845  ++biased_exp;
846  int_mant >>= 1;
847  }
848  }
849 
850  // Handle rounding to odd aka Von Neumann rounding:
851  if (error && rm == FPRounding_ODD)
852  int_mant |= 1;
853 
854  // Handle overflow:
855  if (biased_exp >= (int)FP32_EXP_INF) {
856  *flags |= FPLIB_OFC | FPLIB_IXC;
857  if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
858  (rm == FPLIB_RM && sgn)) {
859  return fp32_infinity(sgn);
860  } else {
861  return fp32_max_normal(sgn);
862  }
863  }
864 
865  if (error) {
866  *flags |= FPLIB_IXC;
867  }
868 
869  return fp32_pack(sgn, biased_exp, int_mant);
870 }
871 
872 static uint32_t
873 fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
874 {
875  return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
876 }
877 
878 static uint64_t
879 fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
880 {
881  int biased_exp; // non-negative exponent value for result
882  uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS)
883  int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
884 
885  assert(rm != FPRounding_TIEAWAY);
886 
887  // Flush to zero:
888  if ((mode & FPLIB_FZ) && exp < 1) {
889  *flags |= FPLIB_UFC;
890  return fp64_zero(sgn);
891  }
892 
893  // The bottom FP64_EXP_BITS bits of mnt are orred together:
894  mnt = (4ULL << FP64_MANT_BITS | mnt >> (FP64_EXP_BITS - 1) |
895  ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0));
896 
897  if (exp > 0) {
898  biased_exp = exp;
899  int_mant = mnt >> 2;
900  error = mnt & 3;
901  } else {
902  biased_exp = 0;
903  int_mant = lsr64(mnt, 3 - exp);
904  error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
905  }
906 
907  if (!biased_exp && error) { // xx should also check fpscr_val<11>
908  *flags |= FPLIB_UFC;
909  }
910 
911  // Round up:
912  if ((rm == FPLIB_RN && (error == 3 ||
913  (error == 2 && (int_mant & 1)))) ||
914  (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
915  ++int_mant;
916  if (int_mant == 1ULL << FP64_MANT_BITS) {
917  // Rounded up from denormalized to normalized
918  biased_exp = 1;
919  }
920  if (int_mant == 2ULL << FP64_MANT_BITS) {
921  // Rounded up to next exponent
922  ++biased_exp;
923  int_mant >>= 1;
924  }
925  }
926 
927  // Handle rounding to odd aka Von Neumann rounding:
928  if (error && rm == FPRounding_ODD)
929  int_mant |= 1;
930 
931  // Handle overflow:
932  if (biased_exp >= (int)FP64_EXP_INF) {
933  *flags |= FPLIB_OFC | FPLIB_IXC;
934  if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
935  (rm == FPLIB_RM && sgn)) {
936  return fp64_infinity(sgn);
937  } else {
938  return fp64_max_normal(sgn);
939  }
940  }
941 
942  if (error) {
943  *flags |= FPLIB_IXC;
944  }
945 
946  return fp64_pack(sgn, biased_exp, int_mant);
947 }
948 
949 static uint64_t
950 fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
951 {
952  return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
953 }
954 
955 static int
956 fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
957 {
958  int a_sgn, a_exp, b_sgn, b_exp;
959  uint16_t a_mnt, b_mnt;
960 
961  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
962  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
963 
964  if (fp16_is_NaN(a_exp, a_mnt) ||
965  fp16_is_NaN(b_exp, b_mnt)) {
966  if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
967  fp16_is_signalling_NaN(b_exp, b_mnt))
968  *flags |= FPLIB_IOC;
969  return 0;
970  }
971  return a == b || (!a_mnt && !b_mnt);
972 }
973 
974 static int
975 fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
976 {
977  int a_sgn, a_exp, b_sgn, b_exp;
978  uint16_t a_mnt, b_mnt;
979 
980  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
981  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
982 
983  if (fp16_is_NaN(a_exp, a_mnt) ||
984  fp16_is_NaN(b_exp, b_mnt)) {
985  *flags |= FPLIB_IOC;
986  return 0;
987  }
988  if (!a_mnt && !b_mnt)
989  return 1;
990  if (a_sgn != b_sgn)
991  return b_sgn;
992  if (a_exp != b_exp)
993  return a_sgn ^ (a_exp > b_exp);
994  if (a_mnt != b_mnt)
995  return a_sgn ^ (a_mnt > b_mnt);
996  return 1;
997 }
998 
999 static int
1000 fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
1001 {
1002  int a_sgn, a_exp, b_sgn, b_exp;
1003  uint16_t a_mnt, b_mnt;
1004 
1005  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1006  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1007 
1008  if (fp16_is_NaN(a_exp, a_mnt) ||
1009  fp16_is_NaN(b_exp, b_mnt)) {
1010  *flags |= FPLIB_IOC;
1011  return 0;
1012  }
1013  if (!a_mnt && !b_mnt)
1014  return 0;
1015  if (a_sgn != b_sgn)
1016  return b_sgn;
1017  if (a_exp != b_exp)
1018  return a_sgn ^ (a_exp > b_exp);
1019  if (a_mnt != b_mnt)
1020  return a_sgn ^ (a_mnt > b_mnt);
1021  return 0;
1022 }
1023 
1024 static int
1025 fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
1026 {
1027  int a_sgn, a_exp, b_sgn, b_exp;
1028  uint16_t a_mnt, b_mnt;
1029 
1030  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1031  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1032 
1033  if (fp16_is_NaN(a_exp, a_mnt) ||
1034  fp16_is_NaN(b_exp, b_mnt)) {
1035  if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
1036  fp16_is_signalling_NaN(b_exp, b_mnt))
1037  *flags |= FPLIB_IOC;
1038  return 1;
1039  }
1040  return 0;
1041 }
1042 
1043 static int
1044 fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
1045 {
1046  int a_sgn, a_exp, b_sgn, b_exp;
1047  uint32_t a_mnt, b_mnt;
1048 
1049  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1050  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1051 
1052  if (fp32_is_NaN(a_exp, a_mnt) ||
1053  fp32_is_NaN(b_exp, b_mnt)) {
1054  if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1055  fp32_is_signalling_NaN(b_exp, b_mnt))
1056  *flags |= FPLIB_IOC;
1057  return 0;
1058  }
1059  return a == b || (!a_mnt && !b_mnt);
1060 }
1061 
1062 static int
1063 fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
1064 {
1065  int a_sgn, a_exp, b_sgn, b_exp;
1066  uint32_t a_mnt, b_mnt;
1067 
1068  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1069  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1070 
1071  if (fp32_is_NaN(a_exp, a_mnt) ||
1072  fp32_is_NaN(b_exp, b_mnt)) {
1073  *flags |= FPLIB_IOC;
1074  return 0;
1075  }
1076  if (!a_mnt && !b_mnt)
1077  return 1;
1078  if (a_sgn != b_sgn)
1079  return b_sgn;
1080  if (a_exp != b_exp)
1081  return a_sgn ^ (a_exp > b_exp);
1082  if (a_mnt != b_mnt)
1083  return a_sgn ^ (a_mnt > b_mnt);
1084  return 1;
1085 }
1086 
1087 static int
1088 fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
1089 {
1090  int a_sgn, a_exp, b_sgn, b_exp;
1091  uint32_t a_mnt, b_mnt;
1092 
1093  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1094  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1095 
1096  if (fp32_is_NaN(a_exp, a_mnt) ||
1097  fp32_is_NaN(b_exp, b_mnt)) {
1098  *flags |= FPLIB_IOC;
1099  return 0;
1100  }
1101  if (!a_mnt && !b_mnt)
1102  return 0;
1103  if (a_sgn != b_sgn)
1104  return b_sgn;
1105  if (a_exp != b_exp)
1106  return a_sgn ^ (a_exp > b_exp);
1107  if (a_mnt != b_mnt)
1108  return a_sgn ^ (a_mnt > b_mnt);
1109  return 0;
1110 }
1111 
1112 static int
1113 fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
1114 {
1115  int a_sgn, a_exp, b_sgn, b_exp;
1116  uint32_t a_mnt, b_mnt;
1117 
1118  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1119  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1120 
1121  if (fp32_is_NaN(a_exp, a_mnt) ||
1122  fp32_is_NaN(b_exp, b_mnt)) {
1123  if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1124  fp32_is_signalling_NaN(b_exp, b_mnt))
1125  *flags |= FPLIB_IOC;
1126  return 1;
1127  }
1128  return 0;
1129 }
1130 
1131 static int
1132 fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
1133 {
1134  int a_sgn, a_exp, b_sgn, b_exp;
1135  uint64_t a_mnt, b_mnt;
1136 
1137  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1138  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1139 
1140  if (fp64_is_NaN(a_exp, a_mnt) ||
1141  fp64_is_NaN(b_exp, b_mnt)) {
1142  if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1143  fp64_is_signalling_NaN(b_exp, b_mnt))
1144  *flags |= FPLIB_IOC;
1145  return 0;
1146  }
1147  return a == b || (!a_mnt && !b_mnt);
1148 }
1149 
1150 static int
1151 fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
1152 {
1153  int a_sgn, a_exp, b_sgn, b_exp;
1154  uint64_t a_mnt, b_mnt;
1155 
1156  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1157  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1158 
1159  if (fp64_is_NaN(a_exp, a_mnt) ||
1160  fp64_is_NaN(b_exp, b_mnt)) {
1161  *flags |= FPLIB_IOC;
1162  return 0;
1163  }
1164  if (!a_mnt && !b_mnt)
1165  return 1;
1166  if (a_sgn != b_sgn)
1167  return b_sgn;
1168  if (a_exp != b_exp)
1169  return a_sgn ^ (a_exp > b_exp);
1170  if (a_mnt != b_mnt)
1171  return a_sgn ^ (a_mnt > b_mnt);
1172  return 1;
1173 }
1174 
1175 static int
1176 fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
1177 {
1178  int a_sgn, a_exp, b_sgn, b_exp;
1179  uint64_t a_mnt, b_mnt;
1180 
1181  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1182  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1183 
1184  if (fp64_is_NaN(a_exp, a_mnt) ||
1185  fp64_is_NaN(b_exp, b_mnt)) {
1186  *flags |= FPLIB_IOC;
1187  return 0;
1188  }
1189  if (!a_mnt && !b_mnt)
1190  return 0;
1191  if (a_sgn != b_sgn)
1192  return b_sgn;
1193  if (a_exp != b_exp)
1194  return a_sgn ^ (a_exp > b_exp);
1195  if (a_mnt != b_mnt)
1196  return a_sgn ^ (a_mnt > b_mnt);
1197  return 0;
1198 }
1199 
1200 static int
1201 fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
1202 {
1203  int a_sgn, a_exp, b_sgn, b_exp;
1204  uint64_t a_mnt, b_mnt;
1205 
1206  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1207  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1208 
1209  if (fp64_is_NaN(a_exp, a_mnt) ||
1210  fp64_is_NaN(b_exp, b_mnt)) {
1211  if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1212  fp64_is_signalling_NaN(b_exp, b_mnt))
1213  *flags |= FPLIB_IOC;
1214  return 1;
1215  }
1216  return 0;
1217 }
1218 
1219 static uint16_t
1220 fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
1221 {
1222  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1223  uint16_t a_mnt, b_mnt, x, x_mnt;
1224 
1225  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1226  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1227 
1228  if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1229  return x;
1230  }
1231 
1232  b_sgn ^= neg;
1233 
1234  // Handle infinities and zeroes:
1235  if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) {
1236  *flags |= FPLIB_IOC;
1237  return fp16_defaultNaN();
1238  } else if (a_exp == FP16_EXP_INF) {
1239  return fp16_infinity(a_sgn);
1240  } else if (b_exp == FP16_EXP_INF) {
1241  return fp16_infinity(b_sgn);
1242  } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1243  return fp16_zero(a_sgn);
1244  }
1245 
1246  a_mnt <<= 3;
1247  b_mnt <<= 3;
1248  if (a_exp >= b_exp) {
1249  b_mnt = (lsr16(b_mnt, a_exp - b_exp) |
1250  !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1)));
1251  b_exp = a_exp;
1252  } else {
1253  a_mnt = (lsr16(a_mnt, b_exp - a_exp) |
1254  !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1)));
1255  a_exp = b_exp;
1256  }
1257  x_sgn = a_sgn;
1258  x_exp = a_exp;
1259  if (a_sgn == b_sgn) {
1260  x_mnt = a_mnt + b_mnt;
1261  } else if (a_mnt >= b_mnt) {
1262  x_mnt = a_mnt - b_mnt;
1263  } else {
1264  x_sgn ^= 1;
1265  x_mnt = b_mnt - a_mnt;
1266  }
1267 
1268  if (!x_mnt) {
1269  // Sign of exact zero result depends on rounding mode
1270  return fp16_zero((mode & 3) == 2);
1271  }
1272 
1273  x_mnt = fp16_normalise(x_mnt, &x_exp);
1274 
1275  return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1,
1276  mode, flags);
1277 }
1278 
1279 static uint32_t
1280 fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
1281 {
1282  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1283  uint32_t a_mnt, b_mnt, x, x_mnt;
1284 
1285  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1286  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1287 
1288  if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1289  return x;
1290  }
1291 
1292  b_sgn ^= neg;
1293 
1294  // Handle infinities and zeroes:
1295  if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) {
1296  *flags |= FPLIB_IOC;
1297  return fp32_defaultNaN();
1298  } else if (a_exp == FP32_EXP_INF) {
1299  return fp32_infinity(a_sgn);
1300  } else if (b_exp == FP32_EXP_INF) {
1301  return fp32_infinity(b_sgn);
1302  } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1303  return fp32_zero(a_sgn);
1304  }
1305 
1306  a_mnt <<= 3;
1307  b_mnt <<= 3;
1308  if (a_exp >= b_exp) {
1309  b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
1310  !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
1311  b_exp = a_exp;
1312  } else {
1313  a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
1314  !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
1315  a_exp = b_exp;
1316  }
1317  x_sgn = a_sgn;
1318  x_exp = a_exp;
1319  if (a_sgn == b_sgn) {
1320  x_mnt = a_mnt + b_mnt;
1321  } else if (a_mnt >= b_mnt) {
1322  x_mnt = a_mnt - b_mnt;
1323  } else {
1324  x_sgn ^= 1;
1325  x_mnt = b_mnt - a_mnt;
1326  }
1327 
1328  if (!x_mnt) {
1329  // Sign of exact zero result depends on rounding mode
1330  return fp32_zero((mode & 3) == 2);
1331  }
1332 
1333  x_mnt = fp32_normalise(x_mnt, &x_exp);
1334 
1335  return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1,
1336  mode, flags);
1337 }
1338 
1339 static uint64_t
1340 fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
1341 {
1342  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1343  uint64_t a_mnt, b_mnt, x, x_mnt;
1344 
1345  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1346  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1347 
1348  if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1349  return x;
1350  }
1351 
1352  b_sgn ^= neg;
1353 
1354  // Handle infinities and zeroes:
1355  if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) {
1356  *flags |= FPLIB_IOC;
1357  return fp64_defaultNaN();
1358  } else if (a_exp == FP64_EXP_INF) {
1359  return fp64_infinity(a_sgn);
1360  } else if (b_exp == FP64_EXP_INF) {
1361  return fp64_infinity(b_sgn);
1362  } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1363  return fp64_zero(a_sgn);
1364  }
1365 
1366  a_mnt <<= 3;
1367  b_mnt <<= 3;
1368  if (a_exp >= b_exp) {
1369  b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
1370  !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
1371  b_exp = a_exp;
1372  } else {
1373  a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
1374  !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
1375  a_exp = b_exp;
1376  }
1377  x_sgn = a_sgn;
1378  x_exp = a_exp;
1379  if (a_sgn == b_sgn) {
1380  x_mnt = a_mnt + b_mnt;
1381  } else if (a_mnt >= b_mnt) {
1382  x_mnt = a_mnt - b_mnt;
1383  } else {
1384  x_sgn ^= 1;
1385  x_mnt = b_mnt - a_mnt;
1386  }
1387 
1388  if (!x_mnt) {
1389  // Sign of exact zero result depends on rounding mode
1390  return fp64_zero((mode & 3) == 2);
1391  }
1392 
1393  x_mnt = fp64_normalise(x_mnt, &x_exp);
1394 
1395  return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1,
1396  mode, flags);
1397 }
1398 
1399 static uint16_t
1400 fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
1401 {
1402  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1403  uint16_t a_mnt, b_mnt, x;
1404  uint32_t x_mnt;
1405 
1406  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1407  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1408 
1409  if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1410  return x;
1411  }
1412 
1413  // Handle infinities and zeroes:
1414  if ((a_exp == FP16_EXP_INF && !b_mnt) ||
1415  (b_exp == FP16_EXP_INF && !a_mnt)) {
1416  *flags |= FPLIB_IOC;
1417  return fp16_defaultNaN();
1418  } else if (a_exp == FP16_EXP_INF || b_exp == FP16_EXP_INF) {
1419  return fp16_infinity(a_sgn ^ b_sgn);
1420  } else if (!a_mnt || !b_mnt) {
1421  return fp16_zero(a_sgn ^ b_sgn);
1422  }
1423 
1424  // Multiply and normalise:
1425  x_sgn = a_sgn ^ b_sgn;
1426  x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1;
1427  x_mnt = (uint32_t)a_mnt * b_mnt;
1428  x_mnt = fp32_normalise(x_mnt, &x_exp);
1429 
1430  // Convert to FP16_BITS bits, collapsing error into bottom bit:
1431  x_mnt = lsr32(x_mnt, FP16_BITS - 1) | !!lsl32(x_mnt, FP16_BITS + 1);
1432 
1433  return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1434 }
1435 
1436 static uint32_t
1437 fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1438 {
1439  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1440  uint32_t a_mnt, b_mnt, x;
1441  uint64_t x_mnt;
1442 
1443  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1444  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1445 
1446  if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1447  return x;
1448  }
1449 
1450  // Handle infinities and zeroes:
1451  if ((a_exp == FP32_EXP_INF && !b_mnt) ||
1452  (b_exp == FP32_EXP_INF && !a_mnt)) {
1453  *flags |= FPLIB_IOC;
1454  return fp32_defaultNaN();
1455  } else if (a_exp == FP32_EXP_INF || b_exp == FP32_EXP_INF) {
1456  return fp32_infinity(a_sgn ^ b_sgn);
1457  } else if (!a_mnt || !b_mnt) {
1458  return fp32_zero(a_sgn ^ b_sgn);
1459  }
1460 
1461  // Multiply and normalise:
1462  x_sgn = a_sgn ^ b_sgn;
1463  x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1;
1464  x_mnt = (uint64_t)a_mnt * b_mnt;
1465  x_mnt = fp64_normalise(x_mnt, &x_exp);
1466 
1467  // Convert to FP32_BITS bits, collapsing error into bottom bit:
1468  x_mnt = lsr64(x_mnt, FP32_BITS - 1) | !!lsl64(x_mnt, FP32_BITS + 1);
1469 
1470  return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1471 }
1472 
1473 static uint64_t
1474 fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1475 {
1476  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1477  uint64_t a_mnt, b_mnt, x;
1478  uint64_t x0_mnt, x1_mnt;
1479 
1480  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1481  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1482 
1483  if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1484  return x;
1485  }
1486 
1487  // Handle infinities and zeroes:
1488  if ((a_exp == FP64_EXP_INF && !b_mnt) ||
1489  (b_exp == FP64_EXP_INF && !a_mnt)) {
1490  *flags |= FPLIB_IOC;
1491  return fp64_defaultNaN();
1492  } else if (a_exp == FP64_EXP_INF || b_exp == FP64_EXP_INF) {
1493  return fp64_infinity(a_sgn ^ b_sgn);
1494  } else if (!a_mnt || !b_mnt) {
1495  return fp64_zero(a_sgn ^ b_sgn);
1496  }
1497 
1498  // Multiply and normalise:
1499  x_sgn = a_sgn ^ b_sgn;
1500  x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1;
1501  mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1502  fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1503 
1504  // Convert to FP64_BITS bits, collapsing error into bottom bit:
1505  x0_mnt = x1_mnt << 1 | !!x0_mnt;
1506 
1507  return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1508 }
1509 
1510 static uint16_t
1511 fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale,
1512  int mode, int *flags)
1513 {
1514  int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1515  uint16_t a_mnt, b_mnt, c_mnt, x;
1516  uint32_t x_mnt, y_mnt;
1517 
1518  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1519  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1520  fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1521 
1522  x = fp16_process_NaNs3(a, b, c, mode, flags);
1523 
1524  // Quiet NaN added to product of zero and infinity:
1525  if (fp16_is_quiet_NaN(a_exp, a_mnt) &&
1526  ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) ||
1527  (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) {
1528  x = fp16_defaultNaN();
1529  *flags |= FPLIB_IOC;
1530  }
1531 
1532  if (x) {
1533  return x;
1534  }
1535 
1536  // Handle infinities and zeroes:
1537  if ((b_exp == FP16_EXP_INF && !c_mnt) ||
1538  (c_exp == FP16_EXP_INF && !b_mnt) ||
1539  (a_exp == FP16_EXP_INF &&
1540  (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) &&
1541  (a_sgn != (b_sgn ^ c_sgn)))) {
1542  *flags |= FPLIB_IOC;
1543  return fp16_defaultNaN();
1544  }
1545  if (a_exp == FP16_EXP_INF)
1546  return fp16_infinity(a_sgn);
1547  if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF)
1548  return fp16_infinity(b_sgn ^ c_sgn);
1549  if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1550  return fp16_zero(a_sgn);
1551 
1552  x_sgn = a_sgn;
1553  x_exp = a_exp + 2 * FP16_EXP_BITS - 3;
1554  x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4);
1555 
1556  // Multiply:
1557  y_sgn = b_sgn ^ c_sgn;
1558  y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3;
1559  y_mnt = (uint32_t)b_mnt * c_mnt << 3;
1560  if (!y_mnt) {
1561  y_exp = x_exp;
1562  }
1563 
1564  // Add:
1565  if (x_exp >= y_exp) {
1566  y_mnt = (lsr32(y_mnt, x_exp - y_exp) |
1567  !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1)));
1568  y_exp = x_exp;
1569  } else {
1570  x_mnt = (lsr32(x_mnt, y_exp - x_exp) |
1571  !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1)));
1572  x_exp = y_exp;
1573  }
1574  if (x_sgn == y_sgn) {
1575  x_mnt = x_mnt + y_mnt;
1576  } else if (x_mnt >= y_mnt) {
1577  x_mnt = x_mnt - y_mnt;
1578  } else {
1579  x_sgn ^= 1;
1580  x_mnt = y_mnt - x_mnt;
1581  }
1582 
1583  if (!x_mnt) {
1584  // Sign of exact zero result depends on rounding mode
1585  return fp16_zero((mode & 3) == 2);
1586  }
1587 
1588  // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1589  x_mnt = fp32_normalise(x_mnt, &x_exp);
1590  x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1591 
1592  return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1593 }
1594 
1595 static uint32_t
1596 fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1597  int mode, int *flags)
1598 {
1599  int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1600  uint32_t a_mnt, b_mnt, c_mnt, x;
1601  uint64_t x_mnt, y_mnt;
1602 
1603  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1604  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1605  fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1606 
1607  x = fp32_process_NaNs3(a, b, c, mode, flags);
1608 
1609  // Quiet NaN added to product of zero and infinity:
1610  if (fp32_is_quiet_NaN(a_exp, a_mnt) &&
1611  ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) ||
1612  (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) {
1613  x = fp32_defaultNaN();
1614  *flags |= FPLIB_IOC;
1615  }
1616 
1617  if (x) {
1618  return x;
1619  }
1620 
1621  // Handle infinities and zeroes:
1622  if ((b_exp == FP32_EXP_INF && !c_mnt) ||
1623  (c_exp == FP32_EXP_INF && !b_mnt) ||
1624  (a_exp == FP32_EXP_INF &&
1625  (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) &&
1626  (a_sgn != (b_sgn ^ c_sgn)))) {
1627  *flags |= FPLIB_IOC;
1628  return fp32_defaultNaN();
1629  }
1630  if (a_exp == FP32_EXP_INF)
1631  return fp32_infinity(a_sgn);
1632  if (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF)
1633  return fp32_infinity(b_sgn ^ c_sgn);
1634  if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1635  return fp32_zero(a_sgn);
1636 
1637  x_sgn = a_sgn;
1638  x_exp = a_exp + 2 * FP32_EXP_BITS - 3;
1639  x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4);
1640 
1641  // Multiply:
1642  y_sgn = b_sgn ^ c_sgn;
1643  y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3;
1644  y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1645  if (!y_mnt) {
1646  y_exp = x_exp;
1647  }
1648 
1649  // Add:
1650  if (x_exp >= y_exp) {
1651  y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1652  !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1653  y_exp = x_exp;
1654  } else {
1655  x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1656  !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1657  x_exp = y_exp;
1658  }
1659  if (x_sgn == y_sgn) {
1660  x_mnt = x_mnt + y_mnt;
1661  } else if (x_mnt >= y_mnt) {
1662  x_mnt = x_mnt - y_mnt;
1663  } else {
1664  x_sgn ^= 1;
1665  x_mnt = y_mnt - x_mnt;
1666  }
1667 
1668  if (!x_mnt) {
1669  // Sign of exact zero result depends on rounding mode
1670  return fp32_zero((mode & 3) == 2);
1671  }
1672 
1673  // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1674  x_mnt = fp64_normalise(x_mnt, &x_exp);
1675  x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1676 
1677  return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1678 }
1679 
1680 static uint64_t
1681 fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1682  int mode, int *flags)
1683 {
1684  int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1685  uint64_t a_mnt, b_mnt, c_mnt, x;
1686  uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1687 
1688  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1689  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1690  fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1691 
1692  x = fp64_process_NaNs3(a, b, c, mode, flags);
1693 
1694  // Quiet NaN added to product of zero and infinity:
1695  if (fp64_is_quiet_NaN(a_exp, a_mnt) &&
1696  ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) ||
1697  (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) {
1698  x = fp64_defaultNaN();
1699  *flags |= FPLIB_IOC;
1700  }
1701 
1702  if (x) {
1703  return x;
1704  }
1705 
1706  // Handle infinities and zeroes:
1707  if ((b_exp == FP64_EXP_INF && !c_mnt) ||
1708  (c_exp == FP64_EXP_INF && !b_mnt) ||
1709  (a_exp == FP64_EXP_INF &&
1710  (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) &&
1711  (a_sgn != (b_sgn ^ c_sgn)))) {
1712  *flags |= FPLIB_IOC;
1713  return fp64_defaultNaN();
1714  }
1715  if (a_exp == FP64_EXP_INF)
1716  return fp64_infinity(a_sgn);
1717  if (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF)
1718  return fp64_infinity(b_sgn ^ c_sgn);
1719  if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1720  return fp64_zero(a_sgn);
1721 
1722  x_sgn = a_sgn;
1723  x_exp = a_exp + FP64_EXP_BITS;
1724  x0_mnt = 0;
1725  x1_mnt = a_mnt;
1726 
1727  // Multiply:
1728  y_sgn = b_sgn ^ c_sgn;
1729  y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3;
1730  mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1731  if (!y0_mnt && !y1_mnt) {
1732  y_exp = x_exp;
1733  }
1734 
1735  // Add:
1736  if (x_exp >= y_exp) {
1737  uint64_t t0, t1;
1738  lsl128(&t0, &t1, y0_mnt, y1_mnt,
1739  x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1740  lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1741  y0_mnt |= !!(t0 | t1);
1742  y_exp = x_exp;
1743  } else {
1744  uint64_t t0, t1;
1745  lsl128(&t0, &t1, x0_mnt, x1_mnt,
1746  y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1747  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1748  x0_mnt |= !!(t0 | t1);
1749  x_exp = y_exp;
1750  }
1751  if (x_sgn == y_sgn) {
1752  add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1753  } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1754  sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1755  } else {
1756  x_sgn ^= 1;
1757  sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1758  }
1759 
1760  if (!x0_mnt && !x1_mnt) {
1761  // Sign of exact zero result depends on rounding mode
1762  return fp64_zero((mode & 3) == 2);
1763  }
1764 
1765  // Normalise into FP64_BITS bits, collapsing error into bottom bit:
1766  fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1767  x0_mnt = x1_mnt << 1 | !!x0_mnt;
1768 
1769  return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1770 }
1771 
1772 static uint16_t
1773 fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
1774 {
1775  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1776  uint16_t a_mnt, b_mnt, x;
1777  uint32_t x_mnt;
1778 
1779  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1780  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1781 
1782  if ((x = fp16_process_NaNs(a, b, mode, flags)))
1783  return x;
1784 
1785  // Handle infinities and zeroes:
1786  if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) ||
1787  (!a_mnt && !b_mnt)) {
1788  *flags |= FPLIB_IOC;
1789  return fp16_defaultNaN();
1790  }
1791  if (a_exp == FP16_EXP_INF || !b_mnt) {
1792  if (a_exp != FP16_EXP_INF)
1793  *flags |= FPLIB_DZC;
1794  return fp16_infinity(a_sgn ^ b_sgn);
1795  }
1796  if (!a_mnt || b_exp == FP16_EXP_INF)
1797  return fp16_zero(a_sgn ^ b_sgn);
1798 
1799  // Divide, setting bottom bit if inexact:
1800  a_mnt = fp16_normalise(a_mnt, &a_exp);
1801  x_sgn = a_sgn ^ b_sgn;
1802  x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3);
1803  x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt;
1804  x_mnt |= (x_mnt * b_mnt !=
1805  (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3));
1806 
1807  // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1808  x_mnt = fp32_normalise(x_mnt, &x_exp);
1809  x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1810 
1811  return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1812 }
1813 
1814 static uint32_t
1815 fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1816 {
1817  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1818  uint32_t a_mnt, b_mnt, x;
1819  uint64_t x_mnt;
1820 
1821  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1822  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1823 
1824  if ((x = fp32_process_NaNs(a, b, mode, flags)))
1825  return x;
1826 
1827  // Handle infinities and zeroes:
1828  if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) ||
1829  (!a_mnt && !b_mnt)) {
1830  *flags |= FPLIB_IOC;
1831  return fp32_defaultNaN();
1832  }
1833  if (a_exp == FP32_EXP_INF || !b_mnt) {
1834  if (a_exp != FP32_EXP_INF)
1835  *flags |= FPLIB_DZC;
1836  return fp32_infinity(a_sgn ^ b_sgn);
1837  }
1838  if (!a_mnt || b_exp == FP32_EXP_INF)
1839  return fp32_zero(a_sgn ^ b_sgn);
1840 
1841  // Divide, setting bottom bit if inexact:
1842  a_mnt = fp32_normalise(a_mnt, &a_exp);
1843  x_sgn = a_sgn ^ b_sgn;
1844  x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3);
1845  x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt;
1846  x_mnt |= (x_mnt * b_mnt !=
1847  (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3));
1848 
1849  // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1850  x_mnt = fp64_normalise(x_mnt, &x_exp);
1851  x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1852 
1853  return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1854 }
1855 
1856 static uint64_t
1857 fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1858 {
1859  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1860  uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1861 
1862  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1863  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1864 
1865  if ((x = fp64_process_NaNs(a, b, mode, flags)))
1866  return x;
1867 
1868  // Handle infinities and zeroes:
1869  if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) ||
1870  (!a_mnt && !b_mnt)) {
1871  *flags |= FPLIB_IOC;
1872  return fp64_defaultNaN();
1873  }
1874  if (a_exp == FP64_EXP_INF || !b_mnt) {
1875  if (a_exp != FP64_EXP_INF)
1876  *flags |= FPLIB_DZC;
1877  return fp64_infinity(a_sgn ^ b_sgn);
1878  }
1879  if (!a_mnt || b_exp == FP64_EXP_INF)
1880  return fp64_zero(a_sgn ^ b_sgn);
1881 
1882  // Find reciprocal of divisor with Newton-Raphson:
1883  a_mnt = fp64_normalise(a_mnt, &a_exp);
1884  b_mnt = fp64_normalise(b_mnt, &b_exp);
1885  x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1886  mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1887  sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1888  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1889  mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1890  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1891 
1892  // Multiply by dividend:
1893  x_sgn = a_sgn ^ b_sgn;
1894  x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8;
1895  mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
1896  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1897  x_mnt = x1_mnt;
1898 
1899  // This is an underestimate, so try adding one:
1900  mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
1901  c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1902  if (c <= 0) {
1903  ++x_mnt;
1904  }
1905 
1906  x_mnt = fp64_normalise(x_mnt, &x_exp);
1907 
1908  return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1909 }
1910 
1911 static void
1912 set_fpscr0(FPSCR &fpscr, int flags)
1913 {
1914  if (flags & FPLIB_IDC) {
1915  fpscr.idc = 1;
1916  }
1917  if (flags & FPLIB_IOC) {
1918  fpscr.ioc = 1;
1919  }
1920  if (flags & FPLIB_DZC) {
1921  fpscr.dzc = 1;
1922  }
1923  if (flags & FPLIB_OFC) {
1924  fpscr.ofc = 1;
1925  }
1926  if (flags & FPLIB_UFC) {
1927  fpscr.ufc = 1;
1928  }
1929  if (flags & FPLIB_IXC) {
1930  fpscr.ixc = 1;
1931  }
1932 }
1933 
1934 static uint16_t
1935 fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
1936 {
1937  int a_sgn, a_exp;
1938  uint16_t a_mnt;
1939 
1940  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1941 
1942  // Handle NaNs:
1943  if (fp16_is_NaN(a_exp, a_mnt)) {
1944  return fp16_process_NaN(a, mode, flags);
1945  }
1946 
1947  // Handle zeroes:
1948  if (!a_mnt) {
1949  return fp16_zero(a_sgn);
1950  }
1951 
1952  // Handle infinities:
1953  if (a_exp == FP16_EXP_INF) {
1954  return fp16_infinity(a_sgn);
1955  }
1956 
1957  b = b < -300 ? -300 : b;
1958  b = b > 300 ? 300 : b;
1959  a_exp += b;
1960  a_mnt <<= 3;
1961 
1962  a_mnt = fp16_normalise(a_mnt, &a_exp);
1963 
1964  return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1,
1965  mode, flags);
1966 }
1967 
1968 static uint32_t
1969 fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
1970 {
1971  int a_sgn, a_exp;
1972  uint32_t a_mnt;
1973 
1974  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1975 
1976  // Handle NaNs:
1977  if (fp32_is_NaN(a_exp, a_mnt)) {
1978  return fp32_process_NaN(a, mode, flags);
1979  }
1980 
1981  // Handle zeroes:
1982  if (!a_mnt) {
1983  return fp32_zero(a_sgn);
1984  }
1985 
1986  // Handle infinities:
1987  if (a_exp == FP32_EXP_INF) {
1988  return fp32_infinity(a_sgn);
1989  }
1990 
1991  b = b < -300 ? -300 : b;
1992  b = b > 300 ? 300 : b;
1993  a_exp += b;
1994  a_mnt <<= 3;
1995 
1996  a_mnt = fp32_normalise(a_mnt, &a_exp);
1997 
1998  return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1,
1999  mode, flags);
2000 }
2001 
2002 static uint64_t
2003 fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
2004 {
2005  int a_sgn, a_exp;
2006  uint64_t a_mnt;
2007 
2008  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2009 
2010  // Handle NaNs:
2011  if (fp64_is_NaN(a_exp, a_mnt)) {
2012  return fp64_process_NaN(a, mode, flags);
2013  }
2014 
2015  // Handle zeroes:
2016  if (!a_mnt) {
2017  return fp64_zero(a_sgn);
2018  }
2019 
2020  // Handle infinities:
2021  if (a_exp == FP64_EXP_INF) {
2022  return fp64_infinity(a_sgn);
2023  }
2024 
2025  b = b < -3000 ? -3000 : b;
2026  b = b > 3000 ? 3000 : b;
2027  a_exp += b;
2028  a_mnt <<= 3;
2029 
2030  a_mnt = fp64_normalise(a_mnt, &a_exp);
2031 
2032  return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1,
2033  mode, flags);
2034 }
2035 
2036 static uint16_t
2037 fp16_sqrt(uint16_t a, int mode, int *flags)
2038 {
2039  int a_sgn, a_exp, x_sgn, x_exp;
2040  uint16_t a_mnt, x_mnt;
2041  uint32_t x, t0, t1;
2042 
2043  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2044 
2045  // Handle NaNs:
2046  if (fp16_is_NaN(a_exp, a_mnt))
2047  return fp16_process_NaN(a, mode, flags);
2048 
2049  // Handle infinities and zeroes:
2050  if (!a_mnt)
2051  return fp16_zero(a_sgn);
2052  if (a_exp == FP16_EXP_INF && !a_sgn)
2053  return fp16_infinity(a_sgn);
2054  if (a_sgn) {
2055  *flags |= FPLIB_IOC;
2056  return fp16_defaultNaN();
2057  }
2058 
2059  a_mnt = fp16_normalise(a_mnt, &a_exp);
2060  if (a_exp & 1) {
2061  ++a_exp;
2062  a_mnt >>= 1;
2063  }
2064 
2065  // x = (a * 3 + 5) / 8
2066  x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2067 
2068  // x = (a / x + x) / 2; // 8-bit accuracy
2069  x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2070 
2071  // x = (a / x + x) / 2; // 16-bit accuracy
2072  x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2073 
2074  x_sgn = 0;
2075  x_exp = (a_exp + 27) >> 1;
2076  x_mnt = ((x - (1 << 18)) >> 19) + 1;
2077  t1 = (uint32_t)x_mnt * x_mnt;
2078  t0 = (uint32_t)a_mnt << 9;
2079  if (t1 > t0) {
2080  --x_mnt;
2081  }
2082 
2083  x_mnt = fp16_normalise(x_mnt, &x_exp);
2084 
2085  return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2086 }
2087 
2088 static uint32_t
2089 fp32_sqrt(uint32_t a, int mode, int *flags)
2090 {
2091  int a_sgn, a_exp, x_sgn, x_exp;
2092  uint32_t a_mnt, x, x_mnt;
2093  uint64_t t0, t1;
2094 
2095  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2096 
2097  // Handle NaNs:
2098  if (fp32_is_NaN(a_exp, a_mnt))
2099  return fp32_process_NaN(a, mode, flags);
2100 
2101  // Handle infinities and zeroes:
2102  if (!a_mnt)
2103  return fp32_zero(a_sgn);
2104  if (a_exp == FP32_EXP_INF && !a_sgn)
2105  return fp32_infinity(a_sgn);
2106  if (a_sgn) {
2107  *flags |= FPLIB_IOC;
2108  return fp32_defaultNaN();
2109  }
2110 
2111  a_mnt = fp32_normalise(a_mnt, &a_exp);
2112  if (!(a_exp & 1)) {
2113  ++a_exp;
2114  a_mnt >>= 1;
2115  }
2116 
2117  // x = (a * 3 + 5) / 8
2118  x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
2119 
2120  // x = (a / x + x) / 2; // 8-bit accuracy
2121  x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2122 
2123  // x = (a / x + x) / 2; // 16-bit accuracy
2124  x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2125 
2126  // x = (a / x + x) / 2; // 32-bit accuracy
2127  x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
2128 
2129  x_sgn = 0;
2130  x_exp = (a_exp + 147) >> 1;
2131  x_mnt = ((x - (1 << 5)) >> 6) + 1;
2132  t1 = (uint64_t)x_mnt * x_mnt;
2133  t0 = (uint64_t)a_mnt << 19;
2134  if (t1 > t0) {
2135  --x_mnt;
2136  }
2137 
2138  x_mnt = fp32_normalise(x_mnt, &x_exp);
2139 
2140  return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2141 }
2142 
2143 static uint64_t
2144 fp64_sqrt(uint64_t a, int mode, int *flags)
2145 {
2146  int a_sgn, a_exp, x_sgn, x_exp, c;
2147  uint64_t a_mnt, x_mnt, r, x0, x1;
2148  uint32_t x;
2149 
2150  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2151 
2152  // Handle NaNs:
2153  if (fp64_is_NaN(a_exp, a_mnt))
2154  return fp64_process_NaN(a, mode, flags);
2155 
2156  // Handle infinities and zeroes:
2157  if (!a_mnt)
2158  return fp64_zero(a_sgn);
2159  if (a_exp == FP64_EXP_INF && !a_sgn)
2160  return fp64_infinity(a_sgn);
2161  if (a_sgn) {
2162  *flags |= FPLIB_IOC;
2163  return fp64_defaultNaN();
2164  }
2165 
2166  a_mnt = fp64_normalise(a_mnt, &a_exp);
2167  if (a_exp & 1) {
2168  ++a_exp;
2169  a_mnt >>= 1;
2170  }
2171 
2172  // x = (a * 3 + 5) / 8
2173  x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
2174 
2175  // x = (a / x + x) / 2; // 8-bit accuracy
2176  x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2177 
2178  // x = (a / x + x) / 2; // 16-bit accuracy
2179  x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2180 
2181  // x = (a / x + x) / 2; // 32-bit accuracy
2182  x = ((a_mnt / x) >> 2) + (x >> 1);
2183 
2184  // r = 1 / x; // 32-bit accuracy
2185  r = ((uint64_t)1 << 62) / x;
2186 
2187  // r = r * (2 - x * r); // 64-bit accuracy
2188  mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
2189  lsr128(&x0, &x1, x0, x1, 31);
2190 
2191  // x = (x + a * r) / 2; // 64-bit accuracy
2192  mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
2193  lsl128(&x0, &x1, x0, x1, 5);
2194  lsr128(&x0, &x1, x0, x1, 56);
2195 
2196  x0 = ((uint64_t)x << 31) + (x0 >> 1);
2197 
2198  x_sgn = 0;
2199  x_exp = (a_exp + 1053) >> 1;
2200  x_mnt = x0;
2201  x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
2202  mul62x62(&x0, &x1, x_mnt, x_mnt);
2203  lsl128(&x0, &x1, x0, x1, 19);
2204  c = cmp128(x0, x1, 0, a_mnt);
2205  if (c > 0)
2206  --x_mnt;
2207 
2208  x_mnt = fp64_normalise(x_mnt, &x_exp);
2209 
2210  return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
2211 }
2212 
2213 static int
2214 modeConv(FPSCR fpscr)
2215 {
2216  uint32_t x = (uint32_t)fpscr;
2217  return (x >> 22 & 0xf) | (x >> 19 & 1 ? FPLIB_FZ16 : 0);
2218  // AHP bit is ignored. Only fplibConvert uses AHP.
2219 }
2220 
2221 static void
2222 set_fpscr(FPSCR &fpscr, int flags)
2223 {
2224  // translate back to FPSCR
2225  bool underflow = false;
2226  if (flags & FPLIB_IDC) {
2227  fpscr.idc = 1;
2228  }
2229  if (flags & FPLIB_IOC) {
2230  fpscr.ioc = 1;
2231  }
2232  if (flags & FPLIB_DZC) {
2233  fpscr.dzc = 1;
2234  }
2235  if (flags & FPLIB_OFC) {
2236  fpscr.ofc = 1;
2237  }
2238  if (flags & FPLIB_UFC) {
2239  underflow = true; //xx Why is this required?
2240  fpscr.ufc = 1;
2241  }
2242  if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
2243  fpscr.ixc = 1;
2244  }
2245 }
2246 
2247 template <>
2248 bool
2249 fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
2250 {
2251  int flags = 0;
2252  int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags);
2253  set_fpscr(fpscr, flags);
2254  return x;
2255 }
2256 
2257 template <>
2258 bool
2259 fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
2260 {
2261  int flags = 0;
2262  int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags);
2263  set_fpscr(fpscr, flags);
2264  return x;
2265 }
2266 
2267 template <>
2268 bool
2269 fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
2270 {
2271  int flags = 0;
2272  int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags);
2273  set_fpscr(fpscr, flags);
2274  return x;
2275 }
2276 
2277 template <>
2278 bool
2279 fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
2280 {
2281  int flags = 0;
2282  int x = fp16_compare_un(a, b, modeConv(fpscr), &flags);
2283  set_fpscr(fpscr, flags);
2284  return x;
2285 }
2286 
2287 template <>
2288 bool
2289 fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
2290 {
2291  int flags = 0;
2292  int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
2293  set_fpscr(fpscr, flags);
2294  return x;
2295 }
2296 
2297 template <>
2298 bool
2299 fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
2300 {
2301  int flags = 0;
2302  int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
2303  set_fpscr(fpscr, flags);
2304  return x;
2305 }
2306 
2307 template <>
2308 bool
2309 fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
2310 {
2311  int flags = 0;
2312  int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
2313  set_fpscr(fpscr, flags);
2314  return x;
2315 }
2316 
2317 template <>
2318 bool
2319 fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr)
2320 {
2321  int flags = 0;
2322  int x = fp32_compare_un(a, b, modeConv(fpscr), &flags);
2323  set_fpscr(fpscr, flags);
2324  return x;
2325 }
2326 
2327 template <>
2328 bool
2329 fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
2330 {
2331  int flags = 0;
2332  int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
2333  set_fpscr(fpscr, flags);
2334  return x;
2335 }
2336 
2337 template <>
2338 bool
2339 fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
2340 {
2341  int flags = 0;
2342  int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
2343  set_fpscr(fpscr, flags);
2344  return x;
2345 }
2346 
2347 template <>
2348 bool
2349 fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
2350 {
2351  int flags = 0;
2352  int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
2353  set_fpscr(fpscr, flags);
2354  return x;
2355 }
2356 
2357 template <>
2358 bool
2359 fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr)
2360 {
2361  int flags = 0;
2362  int x = fp64_compare_un(a, b, modeConv(fpscr), &flags);
2363  set_fpscr(fpscr, flags);
2364  return x;
2365 }
2366 
2367 template <>
2368 uint16_t
2369 fplibAbs(uint16_t op)
2370 {
2371  return op & ~(1ULL << (FP16_BITS - 1));
2372 }
2373 
2374 template <>
2375 uint32_t
2376 fplibAbs(uint32_t op)
2377 {
2378  return op & ~(1ULL << (FP32_BITS - 1));
2379 }
2380 
2381 template <>
2382 uint64_t
2383 fplibAbs(uint64_t op)
2384 {
2385  return op & ~(1ULL << (FP64_BITS - 1));
2386 }
2387 
2388 template <>
2389 uint16_t
2390 fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2391 {
2392  int flags = 0;
2393  uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags);
2394  set_fpscr0(fpscr, flags);
2395  return result;
2396 }
2397 
2398 template <>
2399 uint32_t
2400 fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2401 {
2402  int flags = 0;
2403  uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
2404  set_fpscr0(fpscr, flags);
2405  return result;
2406 }
2407 
2408 template <>
2409 uint64_t
2410 fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2411 {
2412  int flags = 0;
2413  uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
2414  set_fpscr0(fpscr, flags);
2415  return result;
2416 }
2417 
2418 template <>
2419 int
2420 fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
2421 {
2422  int mode = modeConv(fpscr);
2423  int flags = 0;
2424  int sgn1, exp1, sgn2, exp2, result;
2425  uint16_t mnt1, mnt2;
2426 
2427  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2428  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2429 
2430  if (fp16_is_NaN(exp1, mnt1) || fp16_is_NaN(exp2, mnt2)) {
2431  result = 3;
2432  if (fp16_is_signalling_NaN(exp1, mnt1) ||
2433  fp16_is_signalling_NaN(exp2, mnt2) || signal_nans)
2434  flags |= FPLIB_IOC;
2435  } else {
2436  if (op1 == op2 || (!mnt1 && !mnt2)) {
2437  result = 6;
2438  } else if (sgn1 != sgn2) {
2439  result = sgn1 ? 8 : 2;
2440  } else if (exp1 != exp2) {
2441  result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2442  } else {
2443  result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2444  }
2445  }
2446 
2447  set_fpscr0(fpscr, flags);
2448 
2449  return result;
2450 }
2451 
2452 template <>
2453 int
2454 fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
2455 {
2456  int mode = modeConv(fpscr);
2457  int flags = 0;
2458  int sgn1, exp1, sgn2, exp2, result;
2459  uint32_t mnt1, mnt2;
2460 
2461  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2462  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2463 
2464  if (fp32_is_NaN(exp1, mnt1) || fp32_is_NaN(exp2, mnt2)) {
2465  result = 3;
2466  if (fp32_is_signalling_NaN(exp1, mnt1) ||
2467  fp32_is_signalling_NaN(exp2, mnt2) || signal_nans)
2468  flags |= FPLIB_IOC;
2469  } else {
2470  if (op1 == op2 || (!mnt1 && !mnt2)) {
2471  result = 6;
2472  } else if (sgn1 != sgn2) {
2473  result = sgn1 ? 8 : 2;
2474  } else if (exp1 != exp2) {
2475  result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2476  } else {
2477  result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2478  }
2479  }
2480 
2481  set_fpscr0(fpscr, flags);
2482 
2483  return result;
2484 }
2485 
2486 template <>
2487 int
2488 fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
2489 {
2490  int mode = modeConv(fpscr);
2491  int flags = 0;
2492  int sgn1, exp1, sgn2, exp2, result;
2493  uint64_t mnt1, mnt2;
2494 
2495  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2496  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2497 
2498  if (fp64_is_NaN(exp1, mnt1) || fp64_is_NaN(exp2, mnt2)) {
2499  result = 3;
2500  if (fp64_is_signalling_NaN(exp1, mnt1) ||
2501  fp64_is_signalling_NaN(exp2, mnt2) || signal_nans)
2502  flags |= FPLIB_IOC;
2503  } else {
2504  if (op1 == op2 || (!mnt1 && !mnt2)) {
2505  result = 6;
2506  } else if (sgn1 != sgn2) {
2507  result = sgn1 ? 8 : 2;
2508  } else if (exp1 != exp2) {
2509  result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2510  } else {
2511  result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2512  }
2513  }
2514 
2515  set_fpscr0(fpscr, flags);
2516 
2517  return result;
2518 }
2519 
2520 static uint16_t
2522 {
2523  return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF,
2524  1ULL << (FP16_MANT_BITS - 1) |
2526 }
2527 
2528 static uint16_t
2530 {
2531  return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF,
2532  1ULL << (FP16_MANT_BITS - 1) |
2534 }
2535 
2536 static uint32_t
2538 {
2539  return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF,
2540  1ULL << (FP32_MANT_BITS - 1) |
2541  (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS));
2542 }
2543 
2544 static uint32_t
2546 {
2547  return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF,
2548  1ULL << (FP32_MANT_BITS - 1) |
2550 }
2551 
2552 static uint64_t
2554 {
2555  return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF,
2556  1ULL << (FP64_MANT_BITS - 1) |
2557  (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS));
2558 }
2559 
2560 static uint64_t
2562 {
2563  return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF,
2564  1ULL << (FP64_MANT_BITS - 1) |
2565  (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS));
2566 }
2567 
2568 static uint16_t
2570 {
2571  return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1));
2572 }
2573 
2574 static uint32_t
2576 {
2577  return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1));
2578 }
2579 
2580 static uint64_t
2582 {
2583  return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1));
2584 }
2585 
2586 static uint16_t
2588 {
2589  return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1));
2590 }
2591 
2592 static uint32_t
2594 {
2595  return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1));
2596 }
2597 
2598 static uint64_t
2600 {
2601  return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1));
2602 }
2603 
2604 static uint16_t
2605 fp16_FPTwo(int sgn)
2606 {
2607  return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0);
2608 }
2609 
2610 static uint32_t
2611 fp32_FPTwo(int sgn)
2612 {
2613  return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0);
2614 }
2615 
2616 static uint64_t
2617 fp64_FPTwo(int sgn)
2618 {
2619  return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0);
2620 }
2621 
2622 template <>
2623 uint16_t
2624 fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2625 {
2626  int mode = modeConv(fpscr);
2627  int flags = 0;
2628  int sgn, exp;
2629  uint32_t mnt;
2630  uint16_t result;
2631 
2632  // Unpack floating-point operand optionally with flush-to-zero:
2633  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2634 
2635  bool alt_hp = fpscr.ahp;
2636 
2637  if (fp32_is_NaN(exp, mnt)) {
2638  if (alt_hp) {
2639  result = fp16_zero(sgn);
2640  } else if (fpscr.dn) {
2641  result = fp16_defaultNaN();
2642  } else {
2643  result = fp16_FPConvertNaN_32(op);
2644  }
2645  if (!(mnt >> (FP32_MANT_BITS - 1) & 1) || alt_hp) {
2646  flags |= FPLIB_IOC;
2647  }
2648  } else if (exp == FP32_EXP_INF) {
2649  if (alt_hp) {
2650  result = ((uint16_t)sgn << (FP16_BITS - 1) |
2651  ((1ULL << (FP16_BITS - 1)) - 1));
2652  flags |= FPLIB_IOC;
2653  } else {
2654  result = fp16_infinity(sgn);
2655  }
2656  } else if (!mnt) {
2657  result = fp16_zero(sgn);
2658  } else {
2659  result =
2661  mnt >> (FP32_MANT_BITS - FP16_BITS) |
2662  !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)),
2663  rounding, (mode & 0xf) | alt_hp << 4, &flags);
2664  }
2665 
2666  set_fpscr0(fpscr, flags);
2667 
2668  return result;
2669 }
2670 
2671 template <>
2672 uint16_t
2673 fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2674 {
2675  int mode = modeConv(fpscr);
2676  int flags = 0;
2677  int sgn, exp;
2678  uint64_t mnt;
2679  uint16_t result;
2680 
2681  // Unpack floating-point operand optionally with flush-to-zero:
2682  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2683 
2684  bool alt_hp = fpscr.ahp;
2685 
2686  if (fp64_is_NaN(exp, mnt)) {
2687  if (alt_hp) {
2688  result = fp16_zero(sgn);
2689  } else if (fpscr.dn) {
2690  result = fp16_defaultNaN();
2691  } else {
2692  result = fp16_FPConvertNaN_64(op);
2693  }
2694  if (!(mnt >> (FP64_MANT_BITS - 1) & 1) || alt_hp) {
2695  flags |= FPLIB_IOC;
2696  }
2697  } else if (exp == FP64_EXP_INF) {
2698  if (alt_hp) {
2699  result = ((uint16_t)sgn << (FP16_BITS - 1) |
2700  ((1ULL << (FP16_BITS - 1)) - 1));
2701  flags |= FPLIB_IOC;
2702  } else {
2703  result = fp16_infinity(sgn);
2704  }
2705  } else if (!mnt) {
2706  result = fp16_zero(sgn);
2707  } else {
2708  result =
2710  mnt >> (FP64_MANT_BITS - FP16_BITS) |
2711  !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)),
2712  rounding, (mode & 0xf) | alt_hp << 4, &flags);
2713  }
2714 
2715  set_fpscr0(fpscr, flags);
2716 
2717  return result;
2718 }
2719 
2720 template <>
2721 uint32_t
2722 fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2723 {
2724  int mode = modeConv(fpscr);
2725  int flags = 0;
2726  int sgn, exp;
2727  uint16_t mnt;
2728  uint32_t result;
2729 
2730  // Unpack floating-point operand optionally with flush-to-zero:
2731  fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2732 
2733  if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2734  if (fpscr.dn) {
2735  result = fp32_defaultNaN();
2736  } else {
2737  result = fp32_FPConvertNaN_16(op);
2738  }
2739  if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2740  flags |= FPLIB_IOC;
2741  }
2742  } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2743  result = fp32_infinity(sgn);
2744  } else if (!mnt) {
2745  result = fp32_zero(sgn);
2746  } else {
2747  mnt = fp16_normalise(mnt, &exp);
2748  result = fp32_pack(sgn, (exp - FP16_EXP_BIAS +
2750  (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1));
2751  }
2752 
2753  set_fpscr0(fpscr, flags);
2754 
2755  return result;
2756 }
2757 
2758 template <>
2759 uint32_t
2760 fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2761 {
2762  int mode = modeConv(fpscr);
2763  int flags = 0;
2764  int sgn, exp;
2765  uint64_t mnt;
2766  uint32_t result;
2767 
2768  // Unpack floating-point operand optionally with flush-to-zero:
2769  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2770 
2771  if (fp64_is_NaN(exp, mnt)) {
2772  if (fpscr.dn) {
2773  result = fp32_defaultNaN();
2774  } else {
2775  result = fp32_FPConvertNaN_64(op);
2776  }
2777  if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) {
2778  flags |= FPLIB_IOC;
2779  }
2780  } else if (exp == FP64_EXP_INF) {
2781  result = fp32_infinity(sgn);
2782  } else if (!mnt) {
2783  result = fp32_zero(sgn);
2784  } else {
2785  result =
2787  mnt >> (FP64_MANT_BITS - FP32_BITS) |
2788  !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)),
2789  rounding, mode, &flags);
2790  }
2791 
2792  set_fpscr0(fpscr, flags);
2793 
2794  return result;
2795 }
2796 
2797 template <>
2798 uint64_t
2799 fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2800 {
2801  int mode = modeConv(fpscr);
2802  int flags = 0;
2803  int sgn, exp;
2804  uint16_t mnt;
2805  uint64_t result;
2806 
2807  // Unpack floating-point operand optionally with flush-to-zero:
2808  fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2809 
2810  if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2811  if (fpscr.dn) {
2812  result = fp64_defaultNaN();
2813  } else {
2814  result = fp64_FPConvertNaN_16(op);
2815  }
2816  if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2817  flags |= FPLIB_IOC;
2818  }
2819  } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2820  result = fp64_infinity(sgn);
2821  } else if (!mnt) {
2822  result = fp64_zero(sgn);
2823  } else {
2824  mnt = fp16_normalise(mnt, &exp);
2825  result = fp64_pack(sgn, (exp - FP16_EXP_BIAS +
2827  (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1));
2828  }
2829 
2830  set_fpscr0(fpscr, flags);
2831 
2832  return result;
2833 }
2834 
2835 template <>
2836 uint64_t
2837 fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2838 {
2839  int mode = modeConv(fpscr);
2840  int flags = 0;
2841  int sgn, exp;
2842  uint32_t mnt;
2843  uint64_t result;
2844 
2845  // Unpack floating-point operand optionally with flush-to-zero:
2846  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2847 
2848  if (fp32_is_NaN(exp, mnt)) {
2849  if (fpscr.dn) {
2850  result = fp64_defaultNaN();
2851  } else {
2852  result = fp64_FPConvertNaN_32(op);
2853  }
2854  if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) {
2855  flags |= FPLIB_IOC;
2856  }
2857  } else if (exp == FP32_EXP_INF) {
2858  result = fp64_infinity(sgn);
2859  } else if (!mnt) {
2860  result = fp64_zero(sgn);
2861  } else {
2862  mnt = fp32_normalise(mnt, &exp);
2863  result = fp64_pack(sgn, (exp - FP32_EXP_BIAS +
2865  (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1));
2866  }
2867 
2868  set_fpscr0(fpscr, flags);
2869 
2870  return result;
2871 }
2872 
2873 template <>
2874 uint16_t
2875 fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
2876 {
2877  int flags = 0;
2878  uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2879  set_fpscr0(fpscr, flags);
2880  return result;
2881 }
2882 
2883 template <>
2884 uint32_t
2885 fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2886 {
2887  int flags = 0;
2888  uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2889  set_fpscr0(fpscr, flags);
2890  return result;
2891 }
2892 
2893 template <>
2894 uint64_t
2895 fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2896 {
2897  int flags = 0;
2898  uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2899  set_fpscr0(fpscr, flags);
2900  return result;
2901 }
2902 
2903 template <>
2904 uint16_t
2905 fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2906 {
2907  int flags = 0;
2908  uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags);
2909  set_fpscr0(fpscr, flags);
2910  return result;
2911 }
2912 
2913 template <>
2914 uint32_t
2915 fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2916 {
2917  int flags = 0;
2918  uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2919  set_fpscr0(fpscr, flags);
2920  return result;
2921 }
2922 
2923 template <>
2924 uint64_t
2925 fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2926 {
2927  int flags = 0;
2928  uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2929  set_fpscr0(fpscr, flags);
2930  return result;
2931 }
2932 
2933 template <>
2934 uint16_t
2935 fplibExpA(uint16_t op)
2936 {
2937  static uint16_t coeff[32] = {
2938  0x0000,
2939  0x0016,
2940  0x002d,
2941  0x0045,
2942  0x005d,
2943  0x0075,
2944  0x008e,
2945  0x00a8,
2946  0x00c2,
2947  0x00dc,
2948  0x00f8,
2949  0x0114,
2950  0x0130,
2951  0x014d,
2952  0x016b,
2953  0x0189,
2954  0x01a8,
2955  0x01c8,
2956  0x01e8,
2957  0x0209,
2958  0x022b,
2959  0x024e,
2960  0x0271,
2961  0x0295,
2962  0x02ba,
2963  0x02e0,
2964  0x0306,
2965  0x032e,
2966  0x0356,
2967  0x037f,
2968  0x03a9,
2969  0x03d4
2970  };
2971  return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) |
2972  coeff[op & ((1 << 5) - 1)]);
2973 }
2974 
2975 template <>
2976 uint32_t
2977 fplibExpA(uint32_t op)
2978 {
2979  static uint32_t coeff[64] = {
2980  0x000000,
2981  0x0164d2,
2982  0x02cd87,
2983  0x043a29,
2984  0x05aac3,
2985  0x071f62,
2986  0x08980f,
2987  0x0a14d5,
2988  0x0b95c2,
2989  0x0d1adf,
2990  0x0ea43a,
2991  0x1031dc,
2992  0x11c3d3,
2993  0x135a2b,
2994  0x14f4f0,
2995  0x16942d,
2996  0x1837f0,
2997  0x19e046,
2998  0x1b8d3a,
2999  0x1d3eda,
3000  0x1ef532,
3001  0x20b051,
3002  0x227043,
3003  0x243516,
3004  0x25fed7,
3005  0x27cd94,
3006  0x29a15b,
3007  0x2b7a3a,
3008  0x2d583f,
3009  0x2f3b79,
3010  0x3123f6,
3011  0x3311c4,
3012  0x3504f3,
3013  0x36fd92,
3014  0x38fbaf,
3015  0x3aff5b,
3016  0x3d08a4,
3017  0x3f179a,
3018  0x412c4d,
3019  0x4346cd,
3020  0x45672a,
3021  0x478d75,
3022  0x49b9be,
3023  0x4bec15,
3024  0x4e248c,
3025  0x506334,
3026  0x52a81e,
3027  0x54f35b,
3028  0x5744fd,
3029  0x599d16,
3030  0x5bfbb8,
3031  0x5e60f5,
3032  0x60ccdf,
3033  0x633f89,
3034  0x65b907,
3035  0x68396a,
3036  0x6ac0c7,
3037  0x6d4f30,
3038  0x6fe4ba,
3039  0x728177,
3040  0x75257d,
3041  0x77d0df,
3042  0x7a83b3,
3043  0x7d3e0c
3044  };
3045  return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) |
3046  coeff[op & ((1 << 6) - 1)]);
3047 }
3048 
3049 template <>
3050 uint64_t
3051 fplibExpA(uint64_t op)
3052 {
3053  static uint64_t coeff[64] = {
3054  0x0000000000000ULL,
3055  0x02c9a3e778061ULL,
3056  0x059b0d3158574ULL,
3057  0x0874518759bc8ULL,
3058  0x0b5586cf9890fULL,
3059  0x0e3ec32d3d1a2ULL,
3060  0x11301d0125b51ULL,
3061  0x1429aaea92de0ULL,
3062  0x172b83c7d517bULL,
3063  0x1a35beb6fcb75ULL,
3064  0x1d4873168b9aaULL,
3065  0x2063b88628cd6ULL,
3066  0x2387a6e756238ULL,
3067  0x26b4565e27cddULL,
3068  0x29e9df51fdee1ULL,
3069  0x2d285a6e4030bULL,
3070  0x306fe0a31b715ULL,
3071  0x33c08b26416ffULL,
3072  0x371a7373aa9cbULL,
3073  0x3a7db34e59ff7ULL,
3074  0x3dea64c123422ULL,
3075  0x4160a21f72e2aULL,
3076  0x44e086061892dULL,
3077  0x486a2b5c13cd0ULL,
3078  0x4bfdad5362a27ULL,
3079  0x4f9b2769d2ca7ULL,
3080  0x5342b569d4f82ULL,
3081  0x56f4736b527daULL,
3082  0x5ab07dd485429ULL,
3083  0x5e76f15ad2148ULL,
3084  0x6247eb03a5585ULL,
3085  0x6623882552225ULL,
3086  0x6a09e667f3bcdULL,
3087  0x6dfb23c651a2fULL,
3088  0x71f75e8ec5f74ULL,
3089  0x75feb564267c9ULL,
3090  0x7a11473eb0187ULL,
3091  0x7e2f336cf4e62ULL,
3092  0x82589994cce13ULL,
3093  0x868d99b4492edULL,
3094  0x8ace5422aa0dbULL,
3095  0x8f1ae99157736ULL,
3096  0x93737b0cdc5e5ULL,
3097  0x97d829fde4e50ULL,
3098  0x9c49182a3f090ULL,
3099  0xa0c667b5de565ULL,
3100  0xa5503b23e255dULL,
3101  0xa9e6b5579fdbfULL,
3102  0xae89f995ad3adULL,
3103  0xb33a2b84f15fbULL,
3104  0xb7f76f2fb5e47ULL,
3105  0xbcc1e904bc1d2ULL,
3106  0xc199bdd85529cULL,
3107  0xc67f12e57d14bULL,
3108  0xcb720dcef9069ULL,
3109  0xd072d4a07897cULL,
3110  0xd5818dcfba487ULL,
3111  0xda9e603db3285ULL,
3112  0xdfc97337b9b5fULL,
3113  0xe502ee78b3ff6ULL,
3114  0xea4afa2a490daULL,
3115  0xefa1bee615a27ULL,
3116  0xf50765b6e4540ULL,
3117  0xfa7c1819e90d8ULL
3118  };
3119  return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) |
3120  coeff[op & ((1 << 6) - 1)]);
3121 }
3122 
3123 static uint16_t
3124 fp16_repack(int sgn, int exp, uint16_t mnt)
3125 {
3126  return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt);
3127 }
3128 
3129 static uint32_t
3130 fp32_repack(int sgn, int exp, uint32_t mnt)
3131 {
3132  return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt);
3133 }
3134 
3135 static uint64_t
3136 fp64_repack(int sgn, int exp, uint64_t mnt)
3137 {
3138  return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt);
3139 }
3140 
3141 static void
3142 fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
3143 {
3144  // Treat a single quiet-NaN as +Infinity/-Infinity
3145  if (!((uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) &&
3146  (uint16_t)~(*op2 << 1) >> FP16_MANT_BITS)
3147  *op1 = fp16_infinity(sgn);
3148  if (!((uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) &&
3149  (uint16_t)~(*op1 << 1) >> FP16_MANT_BITS)
3150  *op2 = fp16_infinity(sgn);
3151 }
3152 
3153 static void
3154 fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
3155 {
3156  // Treat a single quiet-NaN as +Infinity/-Infinity
3157  if (!((uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) &&
3158  (uint32_t)~(*op2 << 1) >> FP32_MANT_BITS)
3159  *op1 = fp32_infinity(sgn);
3160  if (!((uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) &&
3161  (uint32_t)~(*op1 << 1) >> FP32_MANT_BITS)
3162  *op2 = fp32_infinity(sgn);
3163 }
3164 
3165 static void
3166 fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
3167 {
3168  // Treat a single quiet-NaN as +Infinity/-Infinity
3169  if (!((uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) &&
3170  (uint64_t)~(*op2 << 1) >> FP64_MANT_BITS)
3171  *op1 = fp64_infinity(sgn);
3172  if (!((uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) &&
3173  (uint64_t)~(*op1 << 1) >> FP64_MANT_BITS)
3174  *op2 = fp64_infinity(sgn);
3175 }
3176 
3177 template <>
3178 uint16_t
3179 fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3180 {
3181  int mode = modeConv(fpscr);
3182  int flags = 0;
3183  int sgn1, exp1, sgn2, exp2;
3184  uint16_t mnt1, mnt2, x, result;
3185 
3186  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3187  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3188 
3189  if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3190  result = x;
3191  } else {
3192  result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3193  fp16_repack(sgn1, exp1, mnt1) :
3194  fp16_repack(sgn2, exp2, mnt2));
3195  }
3196  set_fpscr0(fpscr, flags);
3197  return result;
3198 }
3199 
3200 template <>
3201 uint32_t
3202 fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3203 {
3204  int mode = modeConv(fpscr);
3205  int flags = 0;
3206  int sgn1, exp1, sgn2, exp2;
3207  uint32_t mnt1, mnt2, x, result;
3208 
3209  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3210  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3211 
3212  if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3213  result = x;
3214  } else {
3215  result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3216  fp32_repack(sgn1, exp1, mnt1) :
3217  fp32_repack(sgn2, exp2, mnt2));
3218  }
3219  set_fpscr0(fpscr, flags);
3220  return result;
3221 }
3222 
3223 template <>
3224 uint64_t
3225 fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3226 {
3227  int mode = modeConv(fpscr);
3228  int flags = 0;
3229  int sgn1, exp1, sgn2, exp2;
3230  uint64_t mnt1, mnt2, x, result;
3231 
3232  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3233  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3234 
3235  if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3236  result = x;
3237  } else {
3238  result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3239  fp64_repack(sgn1, exp1, mnt1) :
3240  fp64_repack(sgn2, exp2, mnt2));
3241  }
3242  set_fpscr0(fpscr, flags);
3243  return result;
3244 }
3245 
3246 template <>
3247 uint16_t
3248 fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3249 {
3250  fp16_minmaxnum(&op1, &op2, 1);
3251  return fplibMax<uint16_t>(op1, op2, fpscr);
3252 }
3253 
3254 template <>
3255 uint32_t
3256 fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3257 {
3258  fp32_minmaxnum(&op1, &op2, 1);
3259  return fplibMax<uint32_t>(op1, op2, fpscr);
3260 }
3261 
3262 template <>
3263 uint64_t
3264 fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3265 {
3266  fp64_minmaxnum(&op1, &op2, 1);
3267  return fplibMax<uint64_t>(op1, op2, fpscr);
3268 }
3269 
3270 template <>
3271 uint16_t
3272 fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3273 {
3274  int mode = modeConv(fpscr);
3275  int flags = 0;
3276  int sgn1, exp1, sgn2, exp2;
3277  uint16_t mnt1, mnt2, x, result;
3278 
3279  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3280  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3281 
3282  if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3283  result = x;
3284  } else {
3285  result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3286  fp16_repack(sgn1, exp1, mnt1) :
3287  fp16_repack(sgn2, exp2, mnt2));
3288  }
3289  set_fpscr0(fpscr, flags);
3290  return result;
3291 }
3292 
3293 template <>
3294 uint32_t
3295 fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3296 {
3297  int mode = modeConv(fpscr);
3298  int flags = 0;
3299  int sgn1, exp1, sgn2, exp2;
3300  uint32_t mnt1, mnt2, x, result;
3301 
3302  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3303  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3304 
3305  if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3306  result = x;
3307  } else {
3308  result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3309  fp32_repack(sgn1, exp1, mnt1) :
3310  fp32_repack(sgn2, exp2, mnt2));
3311  }
3312  set_fpscr0(fpscr, flags);
3313  return result;
3314 }
3315 
3316 template <>
3317 uint64_t
3318 fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3319 {
3320  int mode = modeConv(fpscr);
3321  int flags = 0;
3322  int sgn1, exp1, sgn2, exp2;
3323  uint64_t mnt1, mnt2, x, result;
3324 
3325  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3326  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3327 
3328  if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3329  result = x;
3330  } else {
3331  result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3332  fp64_repack(sgn1, exp1, mnt1) :
3333  fp64_repack(sgn2, exp2, mnt2));
3334  }
3335  set_fpscr0(fpscr, flags);
3336  return result;
3337 }
3338 
3339 template <>
3340 uint16_t
3341 fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3342 {
3343  fp16_minmaxnum(&op1, &op2, 0);
3344  return fplibMin<uint16_t>(op1, op2, fpscr);
3345 }
3346 
3347 template <>
3348 uint32_t
3349 fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3350 {
3351  fp32_minmaxnum(&op1, &op2, 0);
3352  return fplibMin<uint32_t>(op1, op2, fpscr);
3353 }
3354 
3355 template <>
3356 uint64_t
3357 fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3358 {
3359  fp64_minmaxnum(&op1, &op2, 0);
3360  return fplibMin<uint64_t>(op1, op2, fpscr);
3361 }
3362 
3363 template <>
3364 uint16_t
3365 fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3366 {
3367  int flags = 0;
3368  uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags);
3369  set_fpscr0(fpscr, flags);
3370  return result;
3371 }
3372 
3373 template <>
3374 uint32_t
3375 fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3376 {
3377  int flags = 0;
3378  uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
3379  set_fpscr0(fpscr, flags);
3380  return result;
3381 }
3382 
3383 template <>
3384 uint64_t
3385 fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3386 {
3387  int flags = 0;
3388  uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
3389  set_fpscr0(fpscr, flags);
3390  return result;
3391 }
3392 
3393 template <>
3394 uint16_t
3395 fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3396 {
3397  int mode = modeConv(fpscr);
3398  int flags = 0;
3399  int sgn1, exp1, sgn2, exp2;
3400  uint16_t mnt1, mnt2, result;
3401 
3402  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3403  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3404 
3405  result = fp16_process_NaNs(op1, op2, mode, &flags);
3406  if (!result) {
3407  if ((exp1 == FP16_EXP_INF && !mnt2) ||
3408  (exp2 == FP16_EXP_INF && !mnt1)) {
3409  result = fp16_FPTwo(sgn1 ^ sgn2);
3410  } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3411  result = fp16_infinity(sgn1 ^ sgn2);
3412  } else if (!mnt1 || !mnt2) {
3413  result = fp16_zero(sgn1 ^ sgn2);
3414  } else {
3415  result = fp16_mul(op1, op2, mode, &flags);
3416  }
3417  }
3418 
3419  set_fpscr0(fpscr, flags);
3420 
3421  return result;
3422 }
3423 
3424 template <>
3425 uint32_t
3426 fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3427 {
3428  int mode = modeConv(fpscr);
3429  int flags = 0;
3430  int sgn1, exp1, sgn2, exp2;
3431  uint32_t mnt1, mnt2, result;
3432 
3433  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3434  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3435 
3436  result = fp32_process_NaNs(op1, op2, mode, &flags);
3437  if (!result) {
3438  if ((exp1 == FP32_EXP_INF && !mnt2) ||
3439  (exp2 == FP32_EXP_INF && !mnt1)) {
3440  result = fp32_FPTwo(sgn1 ^ sgn2);
3441  } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3442  result = fp32_infinity(sgn1 ^ sgn2);
3443  } else if (!mnt1 || !mnt2) {
3444  result = fp32_zero(sgn1 ^ sgn2);
3445  } else {
3446  result = fp32_mul(op1, op2, mode, &flags);
3447  }
3448  }
3449 
3450  set_fpscr0(fpscr, flags);
3451 
3452  return result;
3453 }
3454 
3455 template <>
3456 uint64_t
3457 fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3458 {
3459  int mode = modeConv(fpscr);
3460  int flags = 0;
3461  int sgn1, exp1, sgn2, exp2;
3462  uint64_t mnt1, mnt2, result;
3463 
3464  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3465  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3466 
3467  result = fp64_process_NaNs(op1, op2, mode, &flags);
3468  if (!result) {
3469  if ((exp1 == FP64_EXP_INF && !mnt2) ||
3470  (exp2 == FP64_EXP_INF && !mnt1)) {
3471  result = fp64_FPTwo(sgn1 ^ sgn2);
3472  } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3473  result = fp64_infinity(sgn1 ^ sgn2);
3474  } else if (!mnt1 || !mnt2) {
3475  result = fp64_zero(sgn1 ^ sgn2);
3476  } else {
3477  result = fp64_mul(op1, op2, mode, &flags);
3478  }
3479  }
3480 
3481  set_fpscr0(fpscr, flags);
3482 
3483  return result;
3484 }
3485 
3486 template <>
3487 uint16_t
3488 fplibNeg(uint16_t op)
3489 {
3490  return op ^ 1ULL << (FP16_BITS - 1);
3491 }
3492 
3493 template <>
3494 uint32_t
3495 fplibNeg(uint32_t op)
3496 {
3497  return op ^ 1ULL << (FP32_BITS - 1);
3498 }
3499 
3500 template <>
3501 uint64_t
3502 fplibNeg(uint64_t op)
3503 {
3504  return op ^ 1ULL << (FP64_BITS - 1);
3505 }
3506 
3507 static const uint8_t recip_sqrt_estimate[256] = {
3508  255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
3509  226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
3510  201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
3511  180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
3512  162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
3513  145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
3514  131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
3515  118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
3516  105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
3517  85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
3518  67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
3519  52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
3520  39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
3521  28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
3522  17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
3523  8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
3524 };
3525 
3526 template <>
3527 uint16_t
3528 fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
3529 {
3530  int mode = modeConv(fpscr);
3531  int flags = 0;
3532  int sgn, exp;
3533  uint16_t mnt, result;
3534 
3535  fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3536 
3537  if (fp16_is_NaN(exp, mnt)) {
3538  result = fp16_process_NaN(op, mode, &flags);
3539  } else if (!mnt) {
3540  result = fp16_infinity(sgn);
3541  flags |= FPLIB_DZC;
3542  } else if (sgn) {
3543  result = fp16_defaultNaN();
3544  flags |= FPLIB_IOC;
3545  } else if (exp == FP16_EXP_INF) {
3546  result = fp16_zero(0);
3547  } else {
3548  exp += FP16_EXP_BITS;
3549  mnt = fp16_normalise(mnt, &exp);
3550  mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3551  (mnt >> (FP16_BITS - 8) & 127)];
3552  result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1,
3553  mnt << (FP16_MANT_BITS - 8));
3554  }
3555 
3556  set_fpscr0(fpscr, flags);
3557 
3558  return result;
3559 }
3560 
3561 template <>
3562 uint32_t
3563 fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
3564 {
3565  int mode = modeConv(fpscr);
3566  int flags = 0;
3567  int sgn, exp;
3568  uint32_t mnt, result;
3569 
3570  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3571 
3572  if (fp32_is_NaN(exp, mnt)) {
3573  result = fp32_process_NaN(op, mode, &flags);
3574  } else if (!mnt) {
3575  result = fp32_infinity(sgn);
3576  flags |= FPLIB_DZC;
3577  } else if (sgn) {
3578  result = fp32_defaultNaN();
3579  flags |= FPLIB_IOC;
3580  } else if (exp == FP32_EXP_INF) {
3581  result = fp32_zero(0);
3582  } else {
3583  exp += FP32_EXP_BITS;
3584  mnt = fp32_normalise(mnt, &exp);
3585  mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3586  (mnt >> (FP32_BITS - 8) & 127)];
3587  result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1,
3588  mnt << (FP32_MANT_BITS - 8));
3589  }
3590 
3591  set_fpscr0(fpscr, flags);
3592 
3593  return result;
3594 }
3595 
3596 template <>
3597 uint64_t
3598 fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
3599 {
3600  int mode = modeConv(fpscr);
3601  int flags = 0;
3602  int sgn, exp;
3603  uint64_t mnt, result;
3604 
3605  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3606 
3607  if (fp64_is_NaN(exp, mnt)) {
3608  result = fp64_process_NaN(op, mode, &flags);
3609  } else if (!mnt) {
3610  result = fp64_infinity(sgn);
3611  flags |= FPLIB_DZC;
3612  } else if (sgn) {
3613  result = fp64_defaultNaN();
3614  flags |= FPLIB_IOC;
3615  } else if (exp == FP64_EXP_INF) {
3616  result = fp32_zero(0);
3617  } else {
3618  exp += FP64_EXP_BITS;
3619  mnt = fp64_normalise(mnt, &exp);
3620  mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3621  (mnt >> (FP64_BITS - 8) & 127)];
3622  result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1,
3623  mnt << (FP64_MANT_BITS - 8));
3624  }
3625 
3626  set_fpscr0(fpscr, flags);
3627 
3628  return result;
3629 }
3630 
3631 template <>
3632 uint16_t
3633 fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3634 {
3635  int mode = modeConv(fpscr);
3636  int flags = 0;
3637  int sgn1, exp1, sgn2, exp2;
3638  uint16_t mnt1, mnt2, result;
3639 
3640  op1 = fplibNeg<uint16_t>(op1);
3641  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3642  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3643 
3644  result = fp16_process_NaNs(op1, op2, mode, &flags);
3645  if (!result) {
3646  if ((exp1 == FP16_EXP_INF && !mnt2) ||
3647  (exp2 == FP16_EXP_INF && !mnt1)) {
3648  result = fp16_FPOnePointFive(0);
3649  } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3650  result = fp16_infinity(sgn1 ^ sgn2);
3651  } else {
3652  result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags);
3653  }
3654  }
3655 
3656  set_fpscr0(fpscr, flags);
3657 
3658  return result;
3659 }
3660 
3661 template <>
3662 uint32_t
3663 fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3664 {
3665  int mode = modeConv(fpscr);
3666  int flags = 0;
3667  int sgn1, exp1, sgn2, exp2;
3668  uint32_t mnt1, mnt2, result;
3669 
3670  op1 = fplibNeg<uint32_t>(op1);
3671  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3672  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3673 
3674  result = fp32_process_NaNs(op1, op2, mode, &flags);
3675  if (!result) {
3676  if ((exp1 == FP32_EXP_INF && !mnt2) ||
3677  (exp2 == FP32_EXP_INF && !mnt1)) {
3678  result = fp32_FPOnePointFive(0);
3679  } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3680  result = fp32_infinity(sgn1 ^ sgn2);
3681  } else {
3682  result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
3683  }
3684  }
3685 
3686  set_fpscr0(fpscr, flags);
3687 
3688  return result;
3689 }
3690 
3691 template <>
3692 uint64_t
3693 fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3694 {
3695  int mode = modeConv(fpscr);
3696  int flags = 0;
3697  int sgn1, exp1, sgn2, exp2;
3698  uint64_t mnt1, mnt2, result;
3699 
3700  op1 = fplibNeg<uint64_t>(op1);
3701  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3702  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3703 
3704  result = fp64_process_NaNs(op1, op2, mode, &flags);
3705  if (!result) {
3706  if ((exp1 == FP64_EXP_INF && !mnt2) ||
3707  (exp2 == FP64_EXP_INF && !mnt1)) {
3708  result = fp64_FPOnePointFive(0);
3709  } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3710  result = fp64_infinity(sgn1 ^ sgn2);
3711  } else {
3712  result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
3713  }
3714  }
3715 
3716  set_fpscr0(fpscr, flags);
3717 
3718  return result;
3719 }
3720 
3721 template <>
3722 uint16_t
3723 fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
3724 {
3725  int mode = modeConv(fpscr);
3726  int flags = 0;
3727  int sgn, exp;
3728  uint16_t mnt, result;
3729 
3730  fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3731 
3732  if (fp16_is_NaN(exp, mnt)) {
3733  result = fp16_process_NaN(op, mode, &flags);
3734  } else if (exp == FP16_EXP_INF) {
3735  result = fp16_zero(sgn);
3736  } else if (!mnt) {
3737  result = fp16_infinity(sgn);
3738  flags |= FPLIB_DZC;
3739  } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) {
3740  bool overflow_to_inf = false;
3741  switch (FPCRRounding(fpscr)) {
3742  case FPRounding_TIEEVEN:
3743  overflow_to_inf = true;
3744  break;
3745  case FPRounding_POSINF:
3746  overflow_to_inf = !sgn;
3747  break;
3748  case FPRounding_NEGINF:
3749  overflow_to_inf = sgn;
3750  break;
3751  case FPRounding_ZERO:
3752  overflow_to_inf = false;
3753  break;
3754  default:
3755  panic("Unrecognized FP rounding mode");
3756  }
3757  result = overflow_to_inf ? fp16_infinity(sgn) : fp16_max_normal(sgn);
3758  flags |= FPLIB_OFC | FPLIB_IXC;
3759  } else if (fpscr.fz16 && exp >= 2 * FP16_EXP_BIAS - 1) {
3760  result = fp16_zero(sgn);
3761  flags |= FPLIB_UFC;
3762  } else {
3763  exp += FP16_EXP_BITS;
3764  mnt = fp16_normalise(mnt, &exp);
3765  int result_exp = 2 * FP16_EXP_BIAS - 1 - exp;
3766  uint16_t fraction = (((uint32_t)1 << 19) /
3767  (mnt >> (FP16_BITS - 10) | 1) + 1) >> 1;
3768  fraction <<= FP16_MANT_BITS - 8;
3769  if (result_exp == 0) {
3770  fraction >>= 1;
3771  } else if (result_exp == -1) {
3772  fraction >>= 2;
3773  result_exp = 0;
3774  }
3775  result = fp16_pack(sgn, result_exp, fraction);
3776  }
3777 
3778  set_fpscr0(fpscr, flags);
3779 
3780  return result;
3781 }
3782 
3783 template <>
3784 uint32_t
3785 fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
3786 {
3787  int mode = modeConv(fpscr);
3788  int flags = 0;
3789  int sgn, exp;
3790  uint32_t mnt, result;
3791 
3792  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3793 
3794  if (fp32_is_NaN(exp, mnt)) {
3795  result = fp32_process_NaN(op, mode, &flags);
3796  } else if (exp == FP32_EXP_INF) {
3797  result = fp32_zero(sgn);
3798  } else if (!mnt) {
3799  result = fp32_infinity(sgn);
3800  flags |= FPLIB_DZC;
3801  } else if (!((uint32_t)(op << 1) >> (FP32_MANT_BITS - 1))) {
3802  bool overflow_to_inf = false;
3803  switch (FPCRRounding(fpscr)) {
3804  case FPRounding_TIEEVEN:
3805  overflow_to_inf = true;
3806  break;
3807  case FPRounding_POSINF:
3808  overflow_to_inf = !sgn;
3809  break;
3810  case FPRounding_NEGINF:
3811  overflow_to_inf = sgn;
3812  break;
3813  case FPRounding_ZERO:
3814  overflow_to_inf = false;
3815  break;
3816  default:
3817  panic("Unrecognized FP rounding mode");
3818  }
3819  result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
3820  flags |= FPLIB_OFC | FPLIB_IXC;
3821  } else if (fpscr.fz && exp >= 2 * FP32_EXP_BIAS - 1) {
3822  result = fp32_zero(sgn);
3823  flags |= FPLIB_UFC;
3824  } else {
3825  exp += FP32_EXP_BITS;
3826  mnt = fp32_normalise(mnt, &exp);
3827  int result_exp = 2 * FP32_EXP_BIAS - 1 - exp;
3828  uint32_t fraction = (((uint32_t)1 << 19) /
3829  (mnt >> (FP32_BITS - 10) | 1) + 1) >> 1;
3830  fraction <<= FP32_MANT_BITS - 8;
3831  if (result_exp == 0) {
3832  fraction >>= 1;
3833  } else if (result_exp == -1) {
3834  fraction >>= 2;
3835  result_exp = 0;
3836  }
3837  result = fp32_pack(sgn, result_exp, fraction);
3838  }
3839 
3840  set_fpscr0(fpscr, flags);
3841 
3842  return result;
3843 }
3844 
3845 template <>
3846 uint64_t
3847 fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
3848 {
3849  int mode = modeConv(fpscr);
3850  int flags = 0;
3851  int sgn, exp;
3852  uint64_t mnt, result;
3853 
3854  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3855 
3856  if (fp64_is_NaN(exp, mnt)) {
3857  result = fp64_process_NaN(op, mode, &flags);
3858  } else if (exp == FP64_EXP_INF) {
3859  result = fp64_zero(sgn);
3860  } else if (!mnt) {
3861  result = fp64_infinity(sgn);
3862  flags |= FPLIB_DZC;
3863  } else if (!((uint64_t)(op << 1) >> (FP64_MANT_BITS - 1))) {
3864  bool overflow_to_inf = false;
3865  switch (FPCRRounding(fpscr)) {
3866  case FPRounding_TIEEVEN:
3867  overflow_to_inf = true;
3868  break;
3869  case FPRounding_POSINF:
3870  overflow_to_inf = !sgn;
3871  break;
3872  case FPRounding_NEGINF:
3873  overflow_to_inf = sgn;
3874  break;
3875  case FPRounding_ZERO:
3876  overflow_to_inf = false;
3877  break;
3878  default:
3879  panic("Unrecognized FP rounding mode");
3880  }
3881  result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
3882  flags |= FPLIB_OFC | FPLIB_IXC;
3883  } else if (fpscr.fz && exp >= 2 * FP64_EXP_BIAS - 1) {
3884  result = fp64_zero(sgn);
3885  flags |= FPLIB_UFC;
3886  } else {
3887  exp += FP64_EXP_BITS;
3888  mnt = fp64_normalise(mnt, &exp);
3889  int result_exp = 2 * FP64_EXP_BIAS - 1 - exp;
3890  uint64_t fraction = (((uint32_t)1 << 19) /
3891  (mnt >> (FP64_BITS - 10) | 1) + 1) >> 1;
3892  fraction <<= FP64_MANT_BITS - 8;
3893  if (result_exp == 0) {
3894  fraction >>= 1;
3895  } else if (result_exp == -1) {
3896  fraction >>= 2;
3897  result_exp = 0;
3898  }
3899  result = fp64_pack(sgn, result_exp, fraction);
3900  }
3901 
3902  set_fpscr0(fpscr, flags);
3903 
3904  return result;
3905 }
3906 
3907 template <>
3908 uint16_t
3909 fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3910 {
3911  int mode = modeConv(fpscr);
3912  int flags = 0;
3913  int sgn1, exp1, sgn2, exp2;
3914  uint16_t mnt1, mnt2, result;
3915 
3916  op1 = fplibNeg<uint16_t>(op1);
3917  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3918  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3919 
3920  result = fp16_process_NaNs(op1, op2, mode, &flags);
3921  if (!result) {
3922  if ((exp1 == FP16_EXP_INF && !mnt2) ||
3923  (exp2 == FP16_EXP_INF && !mnt1)) {
3924  result = fp16_FPTwo(0);
3925  } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3926  result = fp16_infinity(sgn1 ^ sgn2);
3927  } else {
3928  result = fp16_muladd(fp16_FPTwo(0), op1, op2, 0, mode, &flags);
3929  }
3930  }
3931 
3932  set_fpscr0(fpscr, flags);
3933 
3934  return result;
3935 }
3936 
3937 template <>
3938 uint32_t
3939 fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3940 {
3941  int mode = modeConv(fpscr);
3942  int flags = 0;
3943  int sgn1, exp1, sgn2, exp2;
3944  uint32_t mnt1, mnt2, result;
3945 
3946  op1 = fplibNeg<uint32_t>(op1);
3947  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3948  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3949 
3950  result = fp32_process_NaNs(op1, op2, mode, &flags);
3951  if (!result) {
3952  if ((exp1 == FP32_EXP_INF && !mnt2) ||
3953  (exp2 == FP32_EXP_INF && !mnt1)) {
3954  result = fp32_FPTwo(0);
3955  } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3956  result = fp32_infinity(sgn1 ^ sgn2);
3957  } else {
3958  result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
3959  }
3960  }
3961 
3962  set_fpscr0(fpscr, flags);
3963 
3964  return result;
3965 }
3966 
3967 template <>
3968 uint64_t
3969 fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3970 {
3971  int mode = modeConv(fpscr);
3972  int flags = 0;
3973  int sgn1, exp1, sgn2, exp2;
3974  uint64_t mnt1, mnt2, result;
3975 
3976  op1 = fplibNeg<uint64_t>(op1);
3977  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3978  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3979 
3980  result = fp64_process_NaNs(op1, op2, mode, &flags);
3981  if (!result) {
3982  if ((exp1 == FP64_EXP_INF && !mnt2) ||
3983  (exp2 == FP64_EXP_INF && !mnt1)) {
3984  result = fp64_FPTwo(0);
3985  } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3986  result = fp64_infinity(sgn1 ^ sgn2);
3987  } else {
3988  result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
3989  }
3990  }
3991 
3992  set_fpscr0(fpscr, flags);
3993 
3994  return result;
3995 }
3996 
3997 template <>
3998 uint16_t
3999 fplibRecpX(uint16_t op, FPSCR &fpscr)
4000 {
4001  int mode = modeConv(fpscr);
4002  int flags = 0;
4003  int sgn, exp;
4004  uint16_t mnt, result;
4005 
4006  fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4007 
4008  if (fp16_is_NaN(exp, mnt)) {
4009  result = fp16_process_NaN(op, mode, &flags);
4010  }
4011  else {
4012  if (!mnt) { // Zero and denormals
4013  result = fp16_pack(sgn, FP16_EXP_INF - 1, 0);
4014  } else { // Infinities and normals
4015  result = fp16_pack(sgn, exp ^ FP16_EXP_INF, 0);
4016  }
4017  }
4018 
4019  set_fpscr0(fpscr, flags);
4020 
4021  return result;
4022 }
4023 
4024 template <>
4025 uint32_t
4026 fplibRecpX(uint32_t op, FPSCR &fpscr)
4027 {
4028  int mode = modeConv(fpscr);
4029  int flags = 0;
4030  int sgn, exp;
4031  uint32_t mnt, result;
4032 
4033  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4034 
4035  if (fp32_is_NaN(exp, mnt)) {
4036  result = fp32_process_NaN(op, mode, &flags);
4037  }
4038  else {
4039  if (!mnt) { // Zero and denormals
4040  result = fp32_pack(sgn, FP32_EXP_INF - 1, 0);
4041  } else { // Infinities and normals
4042  result = fp32_pack(sgn, exp ^ FP32_EXP_INF, 0);
4043  }
4044  }
4045 
4046  set_fpscr0(fpscr, flags);
4047 
4048  return result;
4049 }
4050 
4051 template <>
4052 uint64_t
4053 fplibRecpX(uint64_t op, FPSCR &fpscr)
4054 {
4055  int mode = modeConv(fpscr);
4056  int flags = 0;
4057  int sgn, exp;
4058  uint64_t mnt, result;
4059 
4060  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4061 
4062  if (fp64_is_NaN(exp, mnt)) {
4063  result = fp64_process_NaN(op, mode, &flags);
4064  }
4065  else {
4066  if (!mnt) { // Zero and denormals
4067  result = fp64_pack(sgn, FP64_EXP_INF - 1, 0);
4068  } else { // Infinities and normals
4069  result = fp64_pack(sgn, exp ^ FP64_EXP_INF, 0);
4070  }
4071  }
4072 
4073  set_fpscr0(fpscr, flags);
4074 
4075  return result;
4076 }
4077 
4078 template <>
4079 uint16_t
4080 fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4081 {
4082  int expint = FP16_EXP_BIAS + FP16_MANT_BITS;
4083  int mode = modeConv(fpscr);
4084  int flags = 0;
4085  int sgn, exp;
4086  uint16_t mnt, result;
4087 
4088  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4089  fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4090 
4091  // Handle NaNs, infinities and zeroes:
4092  if (fp16_is_NaN(exp, mnt)) {
4093  result = fp16_process_NaN(op, mode, &flags);
4094  } else if (exp == FP16_EXP_INF) {
4095  result = fp16_infinity(sgn);
4096  } else if (!mnt) {
4097  result = fp16_zero(sgn);
4098  } else if (exp >= expint) {
4099  // There are no fractional bits
4100  result = op;
4101  } else {
4102  // Truncate towards zero:
4103  uint16_t x = expint - exp >= FP16_BITS ? 0 : mnt >> (expint - exp);
4104  int err = exp < expint - FP16_BITS ? 1 :
4105  ((mnt << 1 >> (expint - exp - 1) & 3) |
4106  ((uint16_t)(mnt << 2 << (FP16_BITS + exp - expint)) != 0));
4107  switch (rounding) {
4108  case FPRounding_TIEEVEN:
4109  x += (err == 3 || (err == 2 && (x & 1)));
4110  break;
4111  case FPRounding_POSINF:
4112  x += err && !sgn;
4113  break;
4114  case FPRounding_NEGINF:
4115  x += err && sgn;
4116  break;
4117  case FPRounding_ZERO:
4118  break;
4119  case FPRounding_TIEAWAY:
4120  x += err >> 1;
4121  break;
4122  default:
4123  panic("Unrecognized FP rounding mode");
4124  }
4125 
4126  if (x == 0) {
4127  result = fp16_zero(sgn);
4128  } else {
4129  exp = expint;
4130  mnt = fp16_normalise(x, &exp);
4131  result = fp16_pack(sgn, exp + FP16_EXP_BITS, mnt >> FP16_EXP_BITS);
4132  }
4133 
4134  if (err && exact)
4135  flags |= FPLIB_IXC;
4136  }
4137 
4138  set_fpscr0(fpscr, flags);
4139 
4140  return result;
4141 }
4142 
4143 template <>
4144 uint32_t
4145 fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4146 {
4147  int expint = FP32_EXP_BIAS + FP32_MANT_BITS;
4148  int mode = modeConv(fpscr);
4149  int flags = 0;
4150  int sgn, exp;
4151  uint32_t mnt, result;
4152 
4153  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4154  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4155 
4156  // Handle NaNs, infinities and zeroes:
4157  if (fp32_is_NaN(exp, mnt)) {
4158  result = fp32_process_NaN(op, mode, &flags);
4159  } else if (exp == FP32_EXP_INF) {
4160  result = fp32_infinity(sgn);
4161  } else if (!mnt) {
4162  result = fp32_zero(sgn);
4163  } else if (exp >= expint) {
4164  // There are no fractional bits
4165  result = op;
4166  } else {
4167  // Truncate towards zero:
4168  uint32_t x = expint - exp >= FP32_BITS ? 0 : mnt >> (expint - exp);
4169  int err = exp < expint - FP32_BITS ? 1 :
4170  ((mnt << 1 >> (expint - exp - 1) & 3) |
4171  ((uint32_t)(mnt << 2 << (FP32_BITS + exp - expint)) != 0));
4172  switch (rounding) {
4173  case FPRounding_TIEEVEN:
4174  x += (err == 3 || (err == 2 && (x & 1)));
4175  break;
4176  case FPRounding_POSINF:
4177  x += err && !sgn;
4178  break;
4179  case FPRounding_NEGINF:
4180  x += err && sgn;
4181  break;
4182  case FPRounding_ZERO:
4183  break;
4184  case FPRounding_TIEAWAY:
4185  x += err >> 1;
4186  break;
4187  default:
4188  panic("Unrecognized FP rounding mode");
4189  }
4190 
4191  if (x == 0) {
4192  result = fp32_zero(sgn);
4193  } else {
4194  exp = expint;
4195  mnt = fp32_normalise(x, &exp);
4196  result = fp32_pack(sgn, exp + FP32_EXP_BITS, mnt >> FP32_EXP_BITS);
4197  }
4198 
4199  if (err && exact)
4200  flags |= FPLIB_IXC;
4201  }
4202 
4203  set_fpscr0(fpscr, flags);
4204 
4205  return result;
4206 }
4207 
4208 template <>
4209 uint64_t
4210 fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4211 {
4212  int expint = FP64_EXP_BIAS + FP64_MANT_BITS;
4213  int mode = modeConv(fpscr);
4214  int flags = 0;
4215  int sgn, exp;
4216  uint64_t mnt, result;
4217 
4218  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4219  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4220 
4221  // Handle NaNs, infinities and zeroes:
4222  if (fp64_is_NaN(exp, mnt)) {
4223  result = fp64_process_NaN(op, mode, &flags);
4224  } else if (exp == FP64_EXP_INF) {
4225  result = fp64_infinity(sgn);
4226  } else if (!mnt) {
4227  result = fp64_zero(sgn);
4228  } else if (exp >= expint) {
4229  // There are no fractional bits
4230  result = op;
4231  } else {
4232  // Truncate towards zero:
4233  uint64_t x = expint - exp >= FP64_BITS ? 0 : mnt >> (expint - exp);
4234  int err = exp < expint - FP64_BITS ? 1 :
4235  ((mnt << 1 >> (expint - exp - 1) & 3) |
4236  ((uint64_t)(mnt << 2 << (FP64_BITS + exp - expint)) != 0));
4237  switch (rounding) {
4238  case FPRounding_TIEEVEN:
4239  x += (err == 3 || (err == 2 && (x & 1)));
4240  break;
4241  case FPRounding_POSINF:
4242  x += err && !sgn;
4243  break;
4244  case FPRounding_NEGINF:
4245  x += err && sgn;
4246  break;
4247  case FPRounding_ZERO:
4248  break;
4249  case FPRounding_TIEAWAY:
4250  x += err >> 1;
4251  break;
4252  default:
4253  panic("Unrecognized FP rounding mode");
4254  }
4255 
4256  if (x == 0) {
4257  result = fp64_zero(sgn);
4258  } else {
4259  exp = expint;
4260  mnt = fp64_normalise(x, &exp);
4261  result = fp64_pack(sgn, exp + FP64_EXP_BITS, mnt >> FP64_EXP_BITS);
4262  }
4263 
4264  if (err && exact)
4265  flags |= FPLIB_IXC;
4266  }
4267 
4268  set_fpscr0(fpscr, flags);
4269 
4270  return result;
4271 }
4272 
4273 template <>
4274 uint16_t
4275 fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4276 {
4277  int flags = 0;
4278  uint16_t result = fp16_scale(op1, (int16_t)op2, modeConv(fpscr), &flags);
4279  set_fpscr0(fpscr, flags);
4280  return result;
4281 }
4282 
4283 template <>
4284 uint32_t
4285 fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4286 {
4287  int flags = 0;
4288  uint32_t result = fp32_scale(op1, (int32_t)op2, modeConv(fpscr), &flags);
4289  set_fpscr0(fpscr, flags);
4290  return result;
4291 }
4292 
4293 template <>
4294 uint64_t
4295 fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4296 {
4297  int flags = 0;
4298  uint64_t result = fp64_scale(op1, (int64_t)op2, modeConv(fpscr), &flags);
4299  set_fpscr0(fpscr, flags);
4300  return result;
4301 }
4302 
4303 template <>
4304 uint16_t
4305 fplibSqrt(uint16_t op, FPSCR &fpscr)
4306 {
4307  int flags = 0;
4308  uint16_t result = fp16_sqrt(op, modeConv(fpscr), &flags);
4309  set_fpscr0(fpscr, flags);
4310  return result;
4311 }
4312 
4313 template <>
4314 uint32_t
4315 fplibSqrt(uint32_t op, FPSCR &fpscr)
4316 {
4317  int flags = 0;
4318  uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
4319  set_fpscr0(fpscr, flags);
4320  return result;
4321 }
4322 
4323 template <>
4324 uint64_t
4325 fplibSqrt(uint64_t op, FPSCR &fpscr)
4326 {
4327  int flags = 0;
4328  uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
4329  set_fpscr0(fpscr, flags);
4330  return result;
4331 }
4332 
4333 template <>
4334 uint16_t
4335 fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4336 {
4337  int flags = 0;
4338  uint16_t result = fp16_add(op1, op2, 1, modeConv(fpscr), &flags);
4339  set_fpscr0(fpscr, flags);
4340  return result;
4341 }
4342 
4343 template <>
4344 uint32_t
4345 fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4346 {
4347  int flags = 0;
4348  uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
4349  set_fpscr0(fpscr, flags);
4350  return result;
4351 }
4352 
4353 template <>
4354 uint64_t
4355 fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4356 {
4357  int flags = 0;
4358  uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
4359  set_fpscr0(fpscr, flags);
4360  return result;
4361 }
4362 
4363 template <>
4364 uint16_t
4365 fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
4366 {
4367  static uint16_t coeff[2][8] = {
4368  {
4369  0x3c00,
4370  0xb155,
4371  0x2030,
4372  0x0000,
4373  0x0000,
4374  0x0000,
4375  0x0000,
4376  0x0000,
4377  },
4378  {
4379  0x3c00,
4380  0xb800,
4381  0x293a,
4382  0x0000,
4383  0x0000,
4384  0x0000,
4385  0x0000,
4386  0x0000
4387  }
4388  };
4389  int flags = 0;
4390  uint16_t result =
4391  fp16_muladd(coeff[op2 >> (FP16_BITS - 1)][coeff_index], op1,
4392  fplibAbs(op2), 0, modeConv(fpscr), &flags);
4393  set_fpscr0(fpscr, flags);
4394  return result;
4395 }
4396 
4397 template <>
4398 uint32_t
4399 fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2, FPSCR &fpscr)
4400 {
4401  static uint32_t coeff[2][8] = {
4402  {
4403  0x3f800000,
4404  0xbe2aaaab,
4405  0x3c088886,
4406  0xb95008b9,
4407  0x36369d6d,
4408  0x00000000,
4409  0x00000000,
4410  0x00000000
4411  },
4412  {
4413  0x3f800000,
4414  0xbf000000,
4415  0x3d2aaaa6,
4416  0xbab60705,
4417  0x37cd37cc,
4418  0x00000000,
4419  0x00000000,
4420  0x00000000
4421  }
4422  };
4423  int flags = 0;
4424  uint32_t result =
4425  fp32_muladd(coeff[op2 >> (FP32_BITS - 1)][coeff_index], op1,
4426  fplibAbs(op2), 0, modeConv(fpscr), &flags);
4427  set_fpscr0(fpscr, flags);
4428  return result;
4429 }
4430 
4431 template <>
4432 uint64_t
4433 fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2, FPSCR &fpscr)
4434 {
4435  static uint64_t coeff[2][8] = {
4436  {
4437  0x3ff0000000000000ULL,
4438  0xbfc5555555555543ULL,
4439  0x3f8111111110f30cULL,
4440  0xbf2a01a019b92fc6ULL,
4441  0x3ec71de351f3d22bULL,
4442  0xbe5ae5e2b60f7b91ULL,
4443  0x3de5d8408868552fULL,
4444  0x0000000000000000ULL
4445  },
4446  {
4447  0x3ff0000000000000ULL,
4448  0xbfe0000000000000ULL,
4449  0x3fa5555555555536ULL,
4450  0xbf56c16c16c13a0bULL,
4451  0x3efa01a019b1e8d8ULL,
4452  0xbe927e4f7282f468ULL,
4453  0x3e21ee96d2641b13ULL,
4454  0xbda8f76380fbb401ULL
4455  }
4456  };
4457  int flags = 0;
4458  uint64_t result =
4459  fp64_muladd(coeff[op2 >> (FP64_BITS - 1)][coeff_index], op1,
4460  fplibAbs(op2), 0, modeConv(fpscr), &flags);
4461  set_fpscr0(fpscr, flags);
4462  return result;
4463 }
4464 
4465 template <>
4466 uint16_t
4467 fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4468 {
4469  int flags = 0;
4470  int sgn, exp;
4471  uint16_t mnt;
4472 
4473  int mode = modeConv(fpscr);
4474  uint16_t result = fp16_mul(op1, op1, mode, &flags);
4475  set_fpscr0(fpscr, flags);
4476 
4477  fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4478  if (!fp16_is_NaN(exp, mnt)) {
4479  result = (result & ~(1ULL << (FP16_BITS - 1))) |
4480  op2 << (FP16_BITS - 1);
4481  }
4482  return result;
4483 }
4484 
4485 template <>
4486 uint32_t
4487 fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4488 {
4489  int flags = 0;
4490  int sgn, exp;
4491  uint32_t mnt;
4492 
4493  int mode = modeConv(fpscr);
4494  uint32_t result = fp32_mul(op1, op1, mode, &flags);
4495  set_fpscr0(fpscr, flags);
4496 
4497  fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4498  if (!fp32_is_NaN(exp, mnt)) {
4499  result = (result & ~(1ULL << (FP32_BITS - 1))) | op2 << (FP32_BITS - 1);
4500  }
4501  return result;
4502 }
4503 
4504 template <>
4505 uint64_t
4506 fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4507 {
4508  int flags = 0;
4509  int sgn, exp;
4510  uint64_t mnt;
4511 
4512  int mode = modeConv(fpscr);
4513  uint64_t result = fp64_mul(op1, op1, mode, &flags);
4514  set_fpscr0(fpscr, flags);
4515 
4516  fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4517  if (!fp64_is_NaN(exp, mnt)) {
4518  result = (result & ~(1ULL << (FP64_BITS - 1))) | op2 << (FP64_BITS - 1);
4519  }
4520  return result;
4521 }
4522 
4523 template <>
4524 uint16_t
4525 fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4526 {
4527  static constexpr uint16_t fpOne =
4528  (uint16_t)FP16_EXP_BIAS << FP16_MANT_BITS; // 1.0
4529  if (op2 & 1)
4530  op1 = fpOne;
4531  return op1 ^ ((op2 >> 1) << (FP16_BITS - 1));
4532 }
4533 
4534 template <>
4535 uint32_t
4536 fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4537 {
4538  static constexpr uint32_t fpOne =
4539  (uint32_t)FP32_EXP_BIAS << FP32_MANT_BITS; // 1.0
4540  if (op2 & 1)
4541  op1 = fpOne;
4542  return op1 ^ ((op2 >> 1) << (FP32_BITS - 1));
4543 }
4544 
4545 template <>
4546 uint64_t
4547 fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4548 {
4549  static constexpr uint64_t fpOne =
4550  (uint64_t)FP64_EXP_BIAS << FP64_MANT_BITS; // 1.0
4551  if (op2 & 1)
4552  op1 = fpOne;
4553  return op1 ^ ((op2 >> 1) << (FP64_BITS - 1));
4554 }
4555 
4556 static uint64_t
4557 FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4558  int *flags)
4559 {
4560  int expmax = FP64_EXP_BIAS + FP64_BITS - 1;
4561  uint64_t x;
4562  int err;
4563 
4564  if (exp > expmax) {
4565  *flags = FPLIB_IOC;
4566  return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4567  }
4568 
4569  x = lsr64(mnt << FP64_EXP_BITS, expmax - exp);
4570  err = (exp > expmax - 2 ? 0 :
4571  (lsr64(mnt << FP64_EXP_BITS, expmax - 2 - exp) & 3) |
4572  !!(mnt << FP64_EXP_BITS & (lsl64(1, expmax - 2 - exp) - 1)));
4573 
4574  switch (rounding) {
4575  case FPRounding_TIEEVEN:
4576  x += (err == 3 || (err == 2 && (x & 1)));
4577  break;
4578  case FPRounding_POSINF:
4579  x += err && !sgn;
4580  break;
4581  case FPRounding_NEGINF:
4582  x += err && sgn;
4583  break;
4584  case FPRounding_ZERO:
4585  break;
4586  case FPRounding_TIEAWAY:
4587  x += err >> 1;
4588  break;
4589  default:
4590  panic("Unrecognized FP rounding mode");
4591  }
4592 
4593  if (u ? sgn && x : x > (1ULL << (FP64_BITS - 1)) - !sgn) {
4594  *flags = FPLIB_IOC;
4595  return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4596  }
4597 
4598  if (err) {
4599  *flags = FPLIB_IXC;
4600  }
4601 
4602  return sgn ? -x : x;
4603 }
4604 
4605 static uint32_t
4606 FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4607  int *flags)
4608 {
4609  uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4610  if (u ? x >= 1ULL << FP32_BITS :
4611  !(x < 1ULL << (FP32_BITS - 1) ||
4612  (uint64_t)-x <= (uint64_t)1 << (FP32_BITS - 1))) {
4613  *flags = FPLIB_IOC;
4614  x = ((uint32_t)!u << (FP32_BITS - 1)) - !sgn;
4615  }
4616  return x;
4617 }
4618 
4619 static uint16_t
4620 FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4621  int *flags)
4622 {
4623  uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4624  if (u ? x >= 1ULL << FP16_BITS :
4625  !(x < 1ULL << (FP16_BITS - 1) ||
4626  (uint64_t)-x <= (uint64_t)1 << (FP16_BITS - 1))) {
4627  *flags = FPLIB_IOC;
4628  x = ((uint16_t)!u << (FP16_BITS - 1)) - !sgn;
4629  }
4630  return x;
4631 }
4632 
4633 template <>
4634 uint16_t
4635 fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4636  FPSCR &fpscr)
4637 {
4638  int flags = 0;
4639  int sgn, exp;
4640  uint16_t mnt, result;
4641 
4642  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4643  fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4644 
4645  // If NaN, set cumulative flag or take exception:
4646  if (fp16_is_NaN(exp, mnt)) {
4647  flags = FPLIB_IOC;
4648  result = 0;
4649  } else {
4650  assert(fbits >= 0);
4651  // Infinity is treated as an ordinary normalised number that saturates.
4652  result =
4653  FPToFixed_16(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4654  (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4655  u, rounding, &flags);
4656  }
4657 
4658  set_fpscr0(fpscr, flags);
4659 
4660  return result;
4661 }
4662 
4663 template <>
4664 uint32_t
4665 fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4666  FPSCR &fpscr)
4667 {
4668  int flags = 0;
4669  int sgn, exp;
4670  uint16_t mnt;
4671  uint32_t result;
4672 
4673  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4674  fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4675 
4676  // If NaN, set cumulative flag or take exception:
4677  if (fp16_is_NaN(exp, mnt)) {
4678  flags = FPLIB_IOC;
4679  result = 0;
4680  } else {
4681  assert(fbits >= 0);
4682  if (exp == FP16_EXP_INF)
4683  exp = 255; // infinity: make it big enough to saturate
4684  result =
4685  FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4686  (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4687  u, rounding, &flags);
4688  }
4689 
4690  set_fpscr0(fpscr, flags);
4691 
4692  return result;
4693 }
4694 
4695 template <>
4696 uint32_t
4697 fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4698 {
4699  int flags = 0;
4700  int sgn, exp;
4701  uint32_t mnt, result;
4702 
4703  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4704  fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4705 
4706  // If NaN, set cumulative flag or take exception:
4707  if (fp32_is_NaN(exp, mnt)) {
4708  flags = FPLIB_IOC;
4709  result = 0;
4710  } else {
4711  assert(fbits >= 0);
4712  // Infinity is treated as an ordinary normalised number that saturates.
4713  result =
4714  FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4715  (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4716  u, rounding, &flags);
4717  }
4718 
4719  set_fpscr0(fpscr, flags);
4720 
4721  return result;
4722 }
4723 
4724 template <>
4725 uint32_t
4726 fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4727 {
4728  int flags = 0;
4729  int sgn, exp;
4730  uint64_t mnt;
4731  uint32_t result;
4732 
4733  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4734  fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4735 
4736  // If NaN, set cumulative flag or take exception:
4737  if (fp64_is_NaN(exp, mnt)) {
4738  flags = FPLIB_IOC;
4739  result = 0;
4740  } else {
4741  assert(fbits >= 0);
4742  // Infinity is treated as an ordinary normalised number that saturates.
4743  result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
4744  }
4745 
4746  set_fpscr0(fpscr, flags);
4747 
4748  return result;
4749 }
4750 
4751 uint32_t
4752 fplibFPToFixedJS(uint64_t op, FPSCR &fpscr, bool is64, uint8_t& nz)
4753 {
4754  int flags = 0;
4755  uint32_t result;
4756  bool Z = true;
4757 
4758  uint32_t sgn = bits(op, 63);
4759  int32_t exp = bits(op, 62, 52);
4760  uint64_t mnt = bits(op, 51, 0);
4761 
4762  if (exp == 0) {
4763  if (mnt != 0) {
4764  if (fpscr.fz) {
4765  flags |= FPLIB_IDC;
4766  } else {
4767  flags |= FPLIB_IXC;
4768  Z = 0;
4769  }
4770  }
4771  result = 0;
4772  } else if (exp == 0x7ff) {
4773  flags |= FPLIB_IOC;
4774  result = 0;
4775  Z = 0;
4776  } else {
4777  mnt |= 1ULL << FP64_MANT_BITS;
4778  int mnt_shft = exp - FP64_EXP_BIAS - 52;
4779  bool err = true;
4780 
4781  if (abs(mnt_shft) >= FP64_BITS) {
4782  result = 0;
4783  Z = 0;
4784  } else if (mnt_shft >= 0) {
4785  result = lsl64(mnt, mnt_shft);
4786  } else if (mnt_shft < 0) {
4787  err = lsl64(mnt, mnt_shft+FP64_BITS) != 0;
4788  result = lsr64(mnt, abs(mnt_shft));
4789  }
4790  uint64_t max_result = (1UL << (FP32_BITS - 1)) -!sgn;
4791  if ((exp - FP64_EXP_BIAS) > 31 || result > max_result) {
4792  flags |= FPLIB_IOC;
4793  Z = false;
4794  } else if (err) {
4795  flags |= FPLIB_IXC;
4796  Z = false;
4797  }
4798  result = sgn ? -result : result;
4799  }
4800  if (sgn == 1 && result == 0)
4801  Z = false;
4802 
4803  if (is64) {
4804  nz = Z? 0x1: 0x0;
4805  } else {
4806  fpscr.n = 0;
4807  fpscr.z = (int)Z;
4808  fpscr.c = 0;
4809  fpscr.v = 0;
4810  }
4811 
4812  set_fpscr0(fpscr, flags);
4813  return result;
4814 }
4815 
4816 template <>
4817 uint64_t
4818 fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4819  FPSCR &fpscr)
4820 {
4821  int flags = 0;
4822  int sgn, exp;
4823  uint16_t mnt;
4824  uint64_t result;
4825 
4826  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4827  fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4828 
4829  // If NaN, set cumulative flag or take exception:
4830  if (fp16_is_NaN(exp, mnt)) {
4831  flags = FPLIB_IOC;
4832  result = 0;
4833  } else {
4834  assert(fbits >= 0);
4835  if (exp == FP16_EXP_INF)
4836  exp = 255; // infinity: make it big enough to saturate
4837  result =
4838  FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4839  (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4840  u, rounding, &flags);
4841  }
4842 
4843  set_fpscr0(fpscr, flags);
4844 
4845  return result;
4846 }
4847 
4848 template <>
4849 uint64_t
4850 fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4851 {
4852  int flags = 0;
4853  int sgn, exp;
4854  uint32_t mnt;
4855  uint64_t result;
4856 
4857  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4858  fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4859 
4860  // If NaN, set cumulative flag or take exception:
4861  if (fp32_is_NaN(exp, mnt)) {
4862  flags = FPLIB_IOC;
4863  result = 0;
4864  } else {
4865  assert(fbits >= 0);
4866  // Infinity is treated as an ordinary normalised number that saturates.
4867  result =
4868  FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4869  (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4870  u, rounding, &flags);
4871  }
4872 
4873  set_fpscr0(fpscr, flags);
4874 
4875  return result;
4876 }
4877 
4878 template <>
4879 uint64_t
4880 fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4881 {
4882  int flags = 0;
4883  int sgn, exp;
4884  uint64_t mnt, result;
4885 
4886  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4887  fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4888 
4889  // If NaN, set cumulative flag or take exception:
4890  if (fp64_is_NaN(exp, mnt)) {
4891  flags = FPLIB_IOC;
4892  result = 0;
4893  } else {
4894  assert(fbits >= 0);
4895  // Infinity is treated as an ordinary normalised number that saturates.
4896  result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
4897  }
4898 
4899  set_fpscr0(fpscr, flags);
4900 
4901  return result;
4902 }
4903 
4904 static uint16_t
4905 fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4906 {
4907  int x_sgn = !u && a >> (FP64_BITS - 1);
4908  int x_exp = FP16_EXP_BIAS + FP64_BITS - 1 - fbits;
4909  uint64_t x_mnt = x_sgn ? -a : a;
4910 
4911  // Handle zero:
4912  if (!x_mnt) {
4913  return fp16_zero(0);
4914  }
4915 
4916  // Normalise into FP16_BITS bits, collapsing error into bottom bit:
4917  x_mnt = fp64_normalise(x_mnt, &x_exp);
4918  x_mnt = (x_mnt >> (FP64_BITS - FP16_BITS - 1) |
4919  !!(x_mnt & ((1ULL << (FP64_BITS - FP16_BITS - 1)) - 1)));
4920 
4921  return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
4922 }
4923 
4924 static uint32_t
4925 fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4926 {
4927  int x_sgn = !u && a >> (FP64_BITS - 1);
4928  int x_exp = FP32_EXP_BIAS + FP64_BITS - 1 - fbits;
4929  uint64_t x_mnt = x_sgn ? -a : a;
4930 
4931  // Handle zero:
4932  if (!x_mnt) {
4933  return fp32_zero(0);
4934  }
4935 
4936  // Normalise into FP32_BITS bits, collapsing error into bottom bit:
4937  x_mnt = fp64_normalise(x_mnt, &x_exp);
4938  x_mnt = (x_mnt >> (FP64_BITS - FP32_BITS - 1) |
4939  !!(x_mnt & ((1ULL << (FP64_BITS - FP32_BITS - 1)) - 1)));
4940 
4941  return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
4942 }
4943 
4944 static uint64_t
4945 fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4946 {
4947  int x_sgn = !u && a >> (FP64_BITS - 1);
4948  int x_exp = FP64_EXP_BIAS + FP64_BITS - 1 - fbits;
4949  uint64_t x_mnt = x_sgn ? -a : a;
4950 
4951  // Handle zero:
4952  if (!x_mnt) {
4953  return fp64_zero(0);
4954  }
4955 
4956  x_mnt = fp64_normalise(x_mnt, &x_exp);
4957 
4958  return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
4959 }
4960 
4961 template <>
4962 uint16_t
4963 fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
4964  FPSCR &fpscr)
4965 {
4966  int flags = 0;
4967  uint16_t res = fp16_cvtf(op, fbits, u,
4968  (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4969  &flags);
4970  set_fpscr0(fpscr, flags);
4971  return res;
4972 }
4973 
4974 template <>
4975 uint32_t
4976 fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4977 {
4978  int flags = 0;
4979  uint32_t res = fp32_cvtf(op, fbits, u,
4980  (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4981  &flags);
4982  set_fpscr0(fpscr, flags);
4983  return res;
4984 }
4985 
4986 template <>
4987 uint64_t
4988 fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4989 {
4990  int flags = 0;
4991  uint64_t res = fp64_cvtf(op, fbits, u,
4992  (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4993  &flags);
4994  set_fpscr0(fpscr, flags);
4995  return res;
4996 }
4997 
4998 template <>
4999 uint16_t
5001 {
5002  return fp16_infinity(sgn);
5003 }
5004 
5005 template <>
5006 uint32_t
5007 fplibInfinity(int sgn)
5008 {
5009  return fp32_infinity(sgn);
5010 }
5011 
5012 template <>
5013 uint64_t
5014 fplibInfinity(int sgn)
5015 {
5016  return fp64_infinity(sgn);
5017 }
5018 
5019 template <>
5020 uint16_t
5022 {
5023  return fp16_defaultNaN();
5024 }
5025 
5026 template <>
5027 uint32_t
5029 {
5030  return fp32_defaultNaN();
5031 }
5032 
5033 template <>
5034 uint64_t
5036 {
5037  return fp64_defaultNaN();
5038 }
5039 
5040 }
ArmISA::fplibAbs
uint16_t fplibAbs(uint16_t op)
Definition: fplib.cc:2369
QARMA::b1
Bitfield< 7, 4 > b1
Definition: qarma.hh:62
FPLIB_DN
#define FPLIB_DN
Definition: fplib.cc:55
FPLIB_FZ
#define FPLIB_FZ
Definition: fplib.cc:54
ArmISA::fp64_process_NaNs3
static uint64_t fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
Definition: fplib.cc:689
ArmISA::fp16_compare_un
static int fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:1025
ArmISA::fp64_compare_gt
static int fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1176
ArmISA::fplibNeg
uint16_t fplibNeg(uint16_t op)
Definition: fplib.cc:3488
FPLIB_RN
#define FPLIB_RN
Definition: fplib.cc:50
ArmISA::fp32_is_signalling_NaN
static int fp32_is_signalling_NaN(int exp, uint32_t mnt)
Definition: fplib.cc:484
ArmISA::fp16_FPConvertNaN_64
static uint16_t fp16_FPConvertNaN_64(uint64_t op)
Definition: fplib.cc:2529
FP64_EXP_BIAS
#define FP64_EXP_BIAS
Definition: fplib.cc:76
ArmISA::fp32_FPOnePointFive
static uint32_t fp32_FPOnePointFive(int sgn)
Definition: fplib.cc:2575
ArmISA::fplibRoundInt
uint16_t fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
Definition: fplib.cc:4080
ArmISA::fp64_scale
static uint64_t fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
Definition: fplib.cc:2003
MipsISA::a0
Bitfield< 20, 18 > a0
Definition: mt_constants.hh:76
ArmISA::fp32_sqrt
static uint32_t fp32_sqrt(uint32_t a, int mode, int *flags)
Definition: fplib.cc:2089
ArmISA::fp16_scale
static uint16_t fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
Definition: fplib.cc:1935
FP64_EXP_BITS
#define FP64_EXP_BITS
Definition: fplib.cc:72
FP32_BITS
#define FP32_BITS
Definition: fplib.cc:67
FP32_EXP_BIAS
#define FP32_EXP_BIAS
Definition: fplib.cc:75
ArmISA::modeConv
static int modeConv(FPSCR fpscr)
Definition: fplib.cc:2214
ArmISA::fp64_mul
static uint64_t fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1474
ArmISA::fp32_compare_eq
static int fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1044
ArmISA::fplibCompareEQ
bool fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition: fplib.cc:2249
ArmISA::fplibExpA
uint16_t fplibExpA(uint16_t op)
Definition: fplib.cc:2935
ArmISA::fp32_is_infinity
static int fp32_is_infinity(int exp, uint32_t mnt)
Definition: fplib.cc:520
ArmISA::fp32_process_NaNs3
static uint32_t fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
Definition: fplib.cc:660
ArmISA::fp32_FPTwo
static uint32_t fp32_FPTwo(int sgn)
Definition: fplib.cc:2611
ArmISA::t0
Bitfield< 0 > t0
Definition: miscregs_types.hh:230
ArmISA::fplibCompareGT
bool fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition: fplib.cc:2269
ArmISA::fplibRSqrtEstimate
uint16_t fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
Definition: fplib.cc:3528
ArmISA::fp32_mul
static uint32_t fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1437
FPLIB_IOC
#define FPLIB_IOC
Definition: fplib.cc:64
ArmISA::fp64_muladd
static uint64_t fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, int mode, int *flags)
Definition: fplib.cc:1681
FPLIB_UFC
#define FPLIB_UFC
Definition: fplib.cc:61
ArmISA::fp64_process_NaNs
static uint64_t fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:608
FP64_EXP_INF
#define FP64_EXP_INF
Definition: fplib.cc:80
ArmISA::fp16_is_NaN
static int fp16_is_NaN(int exp, uint16_t mnt)
Definition: fplib.cc:460
ArmISA::fplibRSqrtStepFused
uint16_t fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3633
ArmISA::set_fpscr0
static void set_fpscr0(FPSCR &fpscr, int flags)
Definition: fplib.cc:1912
X86ISA::op
Bitfield< 4 > op
Definition: types.hh:78
ArmISA::fp64_is_signalling_NaN
static int fp64_is_signalling_NaN(int exp, uint64_t mnt)
Definition: fplib.cc:490
ArmISA::lsl32
static uint32_t lsl32(uint32_t x, uint32_t shift)
Definition: fplib.cc:107
ArmISA::fp32_is_NaN
static int fp32_is_NaN(int exp, uint32_t mnt)
Definition: fplib.cc:466
FP16_EXP_BITS
#define FP16_EXP_BITS
Definition: fplib.cc:70
ArmISA::lsr32
static uint32_t lsr32(uint32_t x, uint32_t shift)
Definition: fplib.cc:113
ArmISA::fp32_zero
static uint32_t fp32_zero(int sgn)
Definition: fplib.cc:323
ArmISA::fp64_div
static uint64_t fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1857
ArmISA::fp16_is_signalling_NaN
static int fp16_is_signalling_NaN(int exp, uint16_t mnt)
Definition: fplib.cc:478
ArmISA::fp16_compare_gt
static int fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:1000
FP64_MANT_BITS
#define FP64_MANT_BITS
Definition: fplib.cc:84
ArmISA
Definition: ccregs.hh:41
ArmISA::fp64_FPTwo
static uint64_t fp64_FPTwo(int sgn)
Definition: fplib.cc:2617
X86ISA::scale
scale
Definition: types.hh:92
ArmISA::fplibDefaultNaN
uint16_t fplibDefaultNaN()
Foating-point value for default NaN.
Definition: fplib.cc:5021
ArmISA::fplibMin
uint16_t fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3272
ArmISA::FPToFixed_32
static uint32_t FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
Definition: fplib.cc:4606
ArmISA::fp16_FPOnePointFive
static uint16_t fp16_FPOnePointFive(int sgn)
Definition: fplib.cc:2569
ArmISA::fp64_is_infinity
static int fp64_is_infinity(int exp, uint64_t mnt)
Definition: fplib.cc:526
ArmISA::fplibMulAdd
uint16_t fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:2875
ArmISA::fp16_max_normal
static uint16_t fp16_max_normal(int sgn)
Definition: fplib.cc:335
ArmISA::fp64_zero
static uint64_t fp64_zero(int sgn)
Definition: fplib.cc:329
FP32_MANT
#define FP32_MANT(x)
Definition: fplib.cc:91
ArmISA::fp16_zero
static uint16_t fp16_zero(int sgn)
Definition: fplib.cc:317
ArmISA::fp64_process_NaN
static uint64_t fp64_process_NaN(uint64_t a, int mode, int *flags)
Definition: fplib.cc:552
ArmISA::fp32_normalise
static uint32_t fp32_normalise(uint32_t mnt, int *exp)
Definition: fplib.cc:234
ArmISA::lsr16
static uint16_t lsr16(uint16_t x, uint32_t shift)
Definition: fplib.cc:101
ArmISA::fp32_FPConvertNaN_64
static uint32_t fp32_FPConvertNaN_64(uint64_t op)
Definition: fplib.cc:2545
ArmISA::FPCRRounding
static FPRounding FPCRRounding(FPSCR &fpscr)
Definition: fplib.hh:67
ArmISA::FPRounding_ZERO
@ FPRounding_ZERO
Definition: fplib.hh:61
FPLIB_FZ16
#define FPLIB_FZ16
Definition: fplib.cc:57
MipsISA::r0
Bitfield< 3 > r0
Definition: pra_constants.hh:136
ArmISA::mul64x32
static void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
Definition: fplib.cc:185
FPLIB_AHP
#define FPLIB_AHP
Definition: fplib.cc:56
ArmISA::fplibRecpX
uint16_t fplibRecpX(uint16_t op, FPSCR &fpscr)
Definition: fplib.cc:3999
ArmISA::fp32_FPConvertNaN_16
static uint32_t fp32_FPConvertNaN_16(uint16_t op)
Definition: fplib.cc:2537
FPLIB_IDC
#define FPLIB_IDC
Definition: fplib.cc:59
ArmISA::lsl16
static uint16_t lsl16(uint16_t x, uint32_t shift)
Definition: fplib.cc:95
ArmISA::lsl128
static void lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
Definition: fplib.cc:131
ArmISA::fp16_compare_eq
static int fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:956
ArmISA::fp16_process_NaNs3
static uint16_t fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
Definition: fplib.cc:631
ArmISA::recip_sqrt_estimate
static const uint8_t recip_sqrt_estimate[256]
Definition: fplib.cc:3507
ArmISA::fplibScale
uint16_t fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:4275
ArmISA::a
Bitfield< 8 > a
Definition: miscregs_types.hh:62
ArmISA::cmp128
static int cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Definition: fplib.cc:210
ArmISA::fp16_repack
static uint16_t fp16_repack(int sgn, int exp, uint16_t mnt)
Definition: fplib.cc:3124
ArmISA::add128
static void add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Definition: fplib.cc:194
ArmISA::FPRounding_TIEEVEN
@ FPRounding_TIEEVEN
Definition: fplib.hh:58
ArmISA::lsr128
static void lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
Definition: fplib.cc:149
FP64_EXP
#define FP64_EXP(x)
Definition: fplib.cc:88
ArmISA::fp32_round
static uint32_t fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
Definition: fplib.cc:873
sc_dt::neg
void neg(sc_fxval &c, const sc_fxnum &a)
Definition: sc_fxnum.hh:2270
ArmISA::shift
Bitfield< 6, 5 > shift
Definition: types.hh:126
FP16_MANT
#define FP16_MANT(x)
Definition: fplib.cc:90
ArmISA::fplibConvert
uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
Definition: fplib.cc:2624
ArmISA::fp32_process_NaN
static uint32_t fp32_process_NaN(uint32_t a, int mode, int *flags)
Definition: fplib.cc:542
ArmISA::fp32_unpack
static void fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, int *flags)
Definition: fplib.cc:412
ArmISA::fp64_normalise
static uint64_t fp64_normalise(uint64_t mnt, int *exp)
Definition: fplib.cc:252
ArmISA::fp32_process_NaNs
static uint32_t fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:585
MipsISA::r
r
Definition: pra_constants.hh:95
ArmISA::fp32_FPThree
static uint32_t fp32_FPThree(int sgn)
Definition: fplib.cc:2593
ArmISA::fp64_compare_un
static int fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1201
ArmISA::fp16_process_NaNs
static uint16_t fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:562
ArmISA::FPRounding_NEGINF
@ FPRounding_NEGINF
Definition: fplib.hh:60
ArmISA::mode
Bitfield< 4, 0 > mode
Definition: miscregs_types.hh:70
ArmISA::FPToFixed_16
static uint16_t FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
Definition: fplib.cc:4620
ArmISA::fplibFPToFixedJS
uint32_t fplibFPToFixedJS(uint64_t op, FPSCR &fpscr, bool is64, uint8_t &nz)
Floating-point JS convert to a signed integer, with rounding to zero.
Definition: fplib.cc:4752
ArmISA::fp32_round_
static uint32_t fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
Definition: fplib.cc:802
ArmISA::sub128
static void sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Definition: fplib.cc:202
FP32_EXP_INF
#define FP32_EXP_INF
Definition: fplib.cc:79
ArmISA::FPToFixed_64
static uint64_t FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
Definition: fplib.cc:4557
ArmISA::fplibSqrt
uint16_t fplibSqrt(uint16_t op, FPSCR &fpscr)
Definition: fplib.cc:4305
ArmISA::lsl64
static uint64_t lsl64(uint64_t x, uint32_t shift)
Definition: fplib.cc:119
ArmISA::FPRounding
FPRounding
Definition: fplib.hh:57
ArmISA::fp32_defaultNaN
static uint32_t fp32_defaultNaN()
Definition: fplib.cc:377
ArmISA::fp32_cvtf
static uint32_t fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
Definition: fplib.cc:4925
ArmISA::fp64_defaultNaN
static uint64_t fp64_defaultNaN()
Definition: fplib.cc:383
ArmISA::fplibFixedToFP
uint16_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
Floating-point convert from fixed-point.
Definition: fplib.cc:4963
ArmISA::fp64_round_
static uint64_t fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
Definition: fplib.cc:879
ArmISA::fp16_minmaxnum
static void fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
Definition: fplib.cc:3142
ArmISA::fp16_mul
static uint16_t fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:1400
ArmISA::fp32_infinity
static uint32_t fp32_infinity(int sgn)
Definition: fplib.cc:359
ArmISA::fp64_is_NaN
static int fp64_is_NaN(int exp, uint64_t mnt)
Definition: fplib.cc:472
ArmISA::fp16_pack
static uint16_t fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
Definition: fplib.cc:299
ArmISA::fplibSub
uint16_t fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:4335
ArmISA::fp32_compare_un
static int fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1113
ArmISA::fp16_muladd
static uint16_t fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale, int mode, int *flags)
Definition: fplib.cc:1511
RiscvISA::x
Bitfield< 3 > x
Definition: pagetable.hh:69
ArmISA::fp64_round
static uint64_t fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
Definition: fplib.cc:950
ArmISA::fplibMulX
uint16_t fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3395
ArmISA::fplibTrigSMul
uint16_t fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:4467
ArmISA::fp64_compare_eq
static int fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1132
ArmISA::fp16_FPConvertNaN_32
static uint16_t fp16_FPConvertNaN_32(uint32_t op)
Definition: fplib.cc:2521
FP64_BITS
#define FP64_BITS
Definition: fplib.cc:68
ArmISA::fp64_add
static uint64_t fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
Definition: fplib.cc:1340
ArmISA::fp32_compare_gt
static int fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1088
ArmISA::rm
Bitfield< 3, 0 > rm
Definition: types.hh:127
ArmISA::fplibMax
uint16_t fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3179
ArmISA::fp64_FPOnePointFive
static uint64_t fp64_FPOnePointFive(int sgn)
Definition: fplib.cc:2581
ArmISA::fp16_FPThree
static uint16_t fp16_FPThree(int sgn)
Definition: fplib.cc:2587
ArmISA::fplibAdd
uint16_t fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:2390
ArmISA::fp32_pack
static uint32_t fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
Definition: fplib.cc:305
ArmISA::fp64_sqrt
static uint64_t fp64_sqrt(uint64_t a, int mode, int *flags)
Definition: fplib.cc:2144
ArmISA::fplibMul
uint16_t fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3365
ArmISA::u
Bitfield< 22 > u
Definition: miscregs_types.hh:348
FP32_EXP
#define FP32_EXP(x)
Definition: fplib.cc:87
fplib.hh
ArmISA::fp16_FPTwo
static uint16_t fp16_FPTwo(int sgn)
Definition: fplib.cc:2605
ArmISA::fp16_is_infinity
static int fp16_is_infinity(int exp, uint16_t mnt)
Definition: fplib.cc:514
ArmISA::fp32_max_normal
static uint32_t fp32_max_normal(int sgn)
Definition: fplib.cc:341
ArmISA::FPRounding_ODD
@ FPRounding_ODD
Definition: fplib.hh:63
ArmISA::fp16_compare_ge
static int fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:975
ArmISA::set_fpscr
static void set_fpscr(FPSCR &fpscr, int flags)
Definition: fplib.cc:2222
ArmISA::fp64_minmaxnum
static void fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
Definition: fplib.cc:3166
ArmISA::b
Bitfield< 7 > b
Definition: miscregs_types.hh:376
ArmISA::fplibCompareUN
bool fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition: fplib.cc:2279
ArmISA::fp64_unpack
static void fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, int *flags)
Definition: fplib.cc:435
FP32_MANT_BITS
#define FP32_MANT_BITS
Definition: fplib.cc:83
ArmISA::fplibFPToFixed
uint16_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
Definition: fplib.cc:4635
ArmISA::fplibTrigMulAdd
uint16_t fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:4365
ArmISA::fp32_minmaxnum
static void fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
Definition: fplib.cc:3154
FP64_MANT
#define FP64_MANT(x)
Definition: fplib.cc:92
FP16_EXP_INF
#define FP16_EXP_INF
Definition: fplib.cc:78
ArmISA::fp32_scale
static uint32_t fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
Definition: fplib.cc:1969
ArmISA::fplibMaxNum
uint16_t fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3248
ArmISA::fp16_add
static uint16_t fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
Definition: fplib.cc:1220
ArmISA::fp16_sqrt
static uint16_t fp16_sqrt(uint16_t a, int mode, int *flags)
Definition: fplib.cc:2037
logging.hh
ArmISA::fp32_div
static uint32_t fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1815
ArmISA::fp32_repack
static uint32_t fp32_repack(int sgn, int exp, uint32_t mnt)
Definition: fplib.cc:3130
FPLIB_DZC
#define FPLIB_DZC
Definition: fplib.cc:63
ArmISA::t1
Bitfield< 1 > t1
Definition: miscregs_types.hh:229
ArmISA::fp16_unpack
static void fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, int *flags)
Definition: fplib.cc:389
ArmISA::fplibInfinity
uint16_t fplibInfinity(int sgn)
Floating-point value for +/- infinity.
Definition: fplib.cc:5000
ArmISA::c
Bitfield< 29 > c
Definition: miscregs_types.hh:50
ArmISA::fplibCompare
int fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
Definition: fplib.cc:2420
ArmISA::fp64_cvtf
static uint64_t fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
Definition: fplib.cc:4945
FP32_EXP_BITS
#define FP32_EXP_BITS
Definition: fplib.cc:71
ArmISA::fp64_infinity
static uint64_t fp64_infinity(int sgn)
Definition: fplib.cc:365
ArmISA::FPRounding_TIEAWAY
@ FPRounding_TIEAWAY
Definition: fplib.hh:62
ArmISA::fplibRecipStepFused
uint16_t fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3909
ArmISA::err
Bitfield< 6 > err
Definition: miscregs_types.hh:744
ArmISA::fp16_is_quiet_NaN
static int fp16_is_quiet_NaN(int exp, uint16_t mnt)
Definition: fplib.cc:496
ArmISA::fp64_max_normal
static uint64_t fp64_max_normal(int sgn)
Definition: fplib.cc:347
ArmISA::fp64_pack
static uint64_t fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
Definition: fplib.cc:311
FP16_EXP_BIAS
#define FP16_EXP_BIAS
Definition: fplib.cc:74
FP16_MANT_BITS
#define FP16_MANT_BITS
Definition: fplib.cc:82
ArmISA::fp64_FPThree
static uint64_t fp64_FPThree(int sgn)
Definition: fplib.cc:2599
ArmISA::fp16_normalise
static uint16_t fp16_normalise(uint16_t mnt, int *exp)
Definition: fplib.cc:216
ArmISA::fp128_normalise
static void fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
Definition: fplib.cc:270
ArmISA::fp64_compare_ge
static int fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1151
ArmISA::fp16_round
static uint16_t fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
Definition: fplib.cc:796
ArmISA::fp16_defaultNaN
static uint16_t fp16_defaultNaN()
Definition: fplib.cc:371
ArmISA::fp16_cvtf
static uint16_t fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
Definition: fplib.cc:4905
ArmISA::FPRounding_POSINF
@ FPRounding_POSINF
Definition: fplib.hh:59
ArmISA::nz
nz
Definition: miscregs_types.hh:49
ArmISA::fp32_is_quiet_NaN
static int fp32_is_quiet_NaN(int exp, uint32_t mnt)
Definition: fplib.cc:502
ArmISA::fplibDiv
uint16_t fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:2905
ArmISA::fp16_process_NaN
static uint16_t fp16_process_NaN(uint16_t a, int mode, int *flags)
Definition: fplib.cc:532
ArmISA::lsr64
static uint64_t lsr64(uint64_t x, uint32_t shift)
Definition: fplib.cc:125
ArmISA::fplibRecipEstimate
uint16_t fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
Definition: fplib.cc:3723
ArmISA::mul62x62
static void mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
Definition: fplib.cc:167
ArmISA::fp32_muladd
static uint32_t fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, int mode, int *flags)
Definition: fplib.cc:1596
ArmISA::fplibMinNum
uint16_t fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3341
FP16_EXP
#define FP16_EXP(x)
Definition: fplib.cc:86
FPLIB_RP
#define FPLIB_RP
Definition: fplib.cc:51
ArmISA::fp64_is_quiet_NaN
static int fp64_is_quiet_NaN(int exp, uint64_t mnt)
Definition: fplib.cc:508
FP16_BITS
#define FP16_BITS
Definition: fplib.cc:66
ArmISA::fp32_compare_ge
static int fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1063
ArmISA::fplibCompareGE
bool fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition: fplib.cc:2259
FPLIB_RM
#define FPLIB_RM
Definition: fplib.cc:52
ArmISA::fp16_div
static uint16_t fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:1773
ArmISA::fp64_FPConvertNaN_16
static uint64_t fp64_FPConvertNaN_16(uint16_t op)
Definition: fplib.cc:2553
ArmISA::fp16_round_
static uint16_t fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
Definition: fplib.cc:718
QARMA::b0
Bitfield< 3, 0 > b0
Definition: qarma.hh:63
FPLIB_IXC
#define FPLIB_IXC
Definition: fplib.cc:60
ArmISA::fp64_FPConvertNaN_32
static uint64_t fp64_FPConvertNaN_32(uint32_t op)
Definition: fplib.cc:2561
ULL
#define ULL(N)
uint64_t constant
Definition: types.hh:50
ArmISA::mask
Bitfield< 28, 24 > mask
Definition: miscregs_types.hh:711
ArmISA::fp64_repack
static uint64_t fp64_repack(int sgn, int exp, uint64_t mnt)
Definition: fplib.cc:3136
ArmISA::fplibTrigSSel
uint16_t fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:4525
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:171
ArmISA::fp16_infinity
static uint16_t fp16_infinity(int sgn)
Definition: fplib.cc:353
ArmISA::a1
Bitfield< 22 > a1
Definition: miscregs_types.hh:495
FPLIB_OFC
#define FPLIB_OFC
Definition: fplib.cc:62
bits
T bits(T val, int first, int last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:75
ArmISA::fp32_add
static uint32_t fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
Definition: fplib.cc:1280

Generated on Wed Sep 30 2020 14:02:00 for gem5 by doxygen 1.8.17