gem5  v22.0.0.2
fplib.cc
Go to the documentation of this file.
1 /*
2 * Copyright (c) 2012-2013, 2017-2018, 2020 ARM Limited
3 * Copyright (c) 2020 Metempsy Technology Consulting
4 * All rights reserved
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions are
17 * met: redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer;
19 * redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution;
22 * neither the name of the copyright holders nor the names of its
23 * contributors may be used to endorse or promote products derived from
24 * this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 */
38 
39 #include <stdint.h>
40 
41 #include <cassert>
42 #include <cmath>
43 
44 #include "base/logging.hh"
45 #include "fplib.hh"
46 
47 namespace gem5
48 {
49 
50 namespace ArmISA
51 {
52 
53 #define FPLIB_RN 0
54 #define FPLIB_RP 1
55 #define FPLIB_RM 2
56 #define FPLIB_RZ 3
57 #define FPLIB_FZ 4
58 #define FPLIB_DN 8
59 #define FPLIB_AHP 16
60 #define FPLIB_FZ16 32
61 
62 #define FPLIB_IDC 128 // Input Denormal
63 #define FPLIB_IXC 16 // Inexact
64 #define FPLIB_UFC 8 // Underflow
65 #define FPLIB_OFC 4 // Overflow
66 #define FPLIB_DZC 2 // Division by Zero
67 #define FPLIB_IOC 1 // Invalid Operation
68 
69 #define FP16_BITS 16
70 #define FP32_BITS 32
71 #define FP64_BITS 64
72 
73 #define FP16_EXP_BITS 5
74 #define FP32_EXP_BITS 8
75 #define FP64_EXP_BITS 11
76 
77 #define FP16_EXP_BIAS 15
78 #define FP32_EXP_BIAS 127
79 #define FP64_EXP_BIAS 1023
80 
81 #define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1)
82 #define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1)
83 #define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1)
84 
85 #define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1)
86 #define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1)
87 #define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1)
88 
89 #define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1))
90 #define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1))
91 #define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1))
92 
93 #define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1))
94 #define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1))
95 #define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1))
96 
97 static inline uint16_t
98 lsl16(uint16_t x, uint32_t shift)
99 {
100  return shift < 16 ? x << shift : 0;
101 }
102 
103 static inline uint16_t
104 lsr16(uint16_t x, uint32_t shift)
105 {
106  return shift < 16 ? x >> shift : 0;
107 }
108 
109 static inline uint32_t
110 lsl32(uint32_t x, uint32_t shift)
111 {
112  return shift < 32 ? x << shift : 0;
113 }
114 
115 static inline uint32_t
116 lsr32(uint32_t x, uint32_t shift)
117 {
118  return shift < 32 ? x >> shift : 0;
119 }
120 
121 static inline uint64_t
122 lsl64(uint64_t x, uint32_t shift)
123 {
124  return shift < 64 ? x << shift : 0;
125 }
126 
127 static inline uint64_t
128 lsr64(uint64_t x, uint32_t shift)
129 {
130  return shift < 64 ? x >> shift : 0;
131 }
132 
133 static inline void
134 lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
135 {
136  if (shift == 0) {
137  *r1 = x1;
138  *r0 = x0;
139  } else if (shift < 64) {
140  *r1 = x1 << shift | x0 >> (64 - shift);
141  *r0 = x0 << shift;
142  } else if (shift < 128) {
143  *r1 = x0 << (shift - 64);
144  *r0 = 0;
145  } else {
146  *r1 = 0;
147  *r0 = 0;
148  }
149 }
150 
151 static inline void
152 lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
153 {
154  if (shift == 0) {
155  *r1 = x1;
156  *r0 = x0;
157  } else if (shift < 64) {
158  *r0 = x0 >> shift | x1 << (64 - shift);
159  *r1 = x1 >> shift;
160  } else if (shift < 128) {
161  *r0 = x1 >> (shift - 64);
162  *r1 = 0;
163  } else {
164  *r0 = 0;
165  *r1 = 0;
166  }
167 }
168 
169 static inline void
170 mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
171 {
172  uint32_t mask = ((uint32_t)1 << 31) - 1;
173  uint64_t a0 = a & mask;
174  uint64_t a1 = a >> 31 & mask;
175  uint64_t b0 = b & mask;
176  uint64_t b1 = b >> 31 & mask;
177  uint64_t p0 = a0 * b0;
178  uint64_t p2 = a1 * b1;
179  uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
180  uint64_t s0 = p0;
181  uint64_t s1 = (s0 >> 31) + p1;
182  uint64_t s2 = (s1 >> 31) + p2;
183  *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
184  *x1 = s2 >> 2;
185 }
186 
187 static inline
188 void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
189 {
190  uint64_t t0 = (uint64_t)(uint32_t)a * b;
191  uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
192  *x0 = t1 << 32 | (uint32_t)t0;
193  *x1 = t1 >> 32;
194 }
195 
196 static inline void
197 add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
198  uint64_t b1)
199 {
200  *x0 = a0 + b0;
201  *x1 = a1 + b1 + (*x0 < a0);
202 }
203 
204 static inline void
205 sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
206  uint64_t b1)
207 {
208  *x0 = a0 - b0;
209  *x1 = a1 - b1 - (*x0 > a0);
210 }
211 
212 static inline int
213 cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
214 {
215  return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
216 }
217 
218 static inline uint16_t
219 fp16_normalise(uint16_t mnt, int *exp)
220 {
221  int shift;
222 
223  if (!mnt) {
224  return 0;
225  }
226 
227  for (shift = 8; shift; shift >>= 1) {
228  if (!(mnt >> (16 - shift))) {
229  mnt <<= shift;
230  *exp -= shift;
231  }
232  }
233  return mnt;
234 }
235 
236 static inline uint32_t
237 fp32_normalise(uint32_t mnt, int *exp)
238 {
239  int shift;
240 
241  if (!mnt) {
242  return 0;
243  }
244 
245  for (shift = 16; shift; shift >>= 1) {
246  if (!(mnt >> (32 - shift))) {
247  mnt <<= shift;
248  *exp -= shift;
249  }
250  }
251  return mnt;
252 }
253 
254 static inline uint64_t
255 fp64_normalise(uint64_t mnt, int *exp)
256 {
257  int shift;
258 
259  if (!mnt) {
260  return 0;
261  }
262 
263  for (shift = 32; shift; shift >>= 1) {
264  if (!(mnt >> (64 - shift))) {
265  mnt <<= shift;
266  *exp -= shift;
267  }
268  }
269  return mnt;
270 }
271 
272 static inline void
273 fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
274 {
275  uint64_t x0 = *mnt0;
276  uint64_t x1 = *mnt1;
277  int shift;
278 
279  if (!x0 && !x1) {
280  return;
281  }
282 
283  if (!x1) {
284  x1 = x0;
285  x0 = 0;
286  *exp -= 64;
287  }
288 
289  for (shift = 32; shift; shift >>= 1) {
290  if (!(x1 >> (64 - shift))) {
291  x1 = x1 << shift | x0 >> (64 - shift);
292  x0 <<= shift;
293  *exp -= shift;
294  }
295  }
296 
297  *mnt0 = x0;
298  *mnt1 = x1;
299 }
300 
301 static inline uint16_t
302 fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
303 {
304  return sgn << (FP16_BITS - 1) | exp << FP16_MANT_BITS | FP16_MANT(mnt);
305 }
306 
307 static inline uint32_t
308 fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
309 {
310  return sgn << (FP32_BITS - 1) | exp << FP32_MANT_BITS | FP32_MANT(mnt);
311 }
312 
313 static inline uint64_t
314 fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
315 {
316  return sgn << (FP64_BITS - 1) | exp << FP64_MANT_BITS | FP64_MANT(mnt);
317 }
318 
319 static inline uint16_t
320 fp16_zero(int sgn)
321 {
322  return fp16_pack(sgn, 0, 0);
323 }
324 
325 static inline uint32_t
326 fp32_zero(int sgn)
327 {
328  return fp32_pack(sgn, 0, 0);
329 }
330 
331 static inline uint64_t
332 fp64_zero(int sgn)
333 {
334  return fp64_pack(sgn, 0, 0);
335 }
336 
337 static inline uint16_t
339 {
340  return fp16_pack(sgn, FP16_EXP_INF - 1, -1);
341 }
342 
343 static inline uint32_t
345 {
346  return fp32_pack(sgn, FP32_EXP_INF - 1, -1);
347 }
348 
349 static inline uint64_t
351 {
352  return fp64_pack(sgn, FP64_EXP_INF - 1, -1);
353 }
354 
355 static inline uint16_t
357 {
358  return fp16_pack(sgn, FP16_EXP_INF, 0);
359 }
360 
361 static inline uint32_t
363 {
364  return fp32_pack(sgn, FP32_EXP_INF, 0);
365 }
366 
367 static inline uint64_t
369 {
370  return fp64_pack(sgn, FP64_EXP_INF, 0);
371 }
372 
373 static inline uint16_t
375 {
376  return fp16_pack(0, FP16_EXP_INF, 1ULL << (FP16_MANT_BITS - 1));
377 }
378 
379 static inline uint32_t
381 {
382  return fp32_pack(0, FP32_EXP_INF, 1ULL << (FP32_MANT_BITS - 1));
383 }
384 
385 static inline uint64_t
387 {
388  return fp64_pack(0, FP64_EXP_INF, 1ULL << (FP64_MANT_BITS - 1));
389 }
390 
391 static inline void
392 fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
393  int *flags)
394 {
395  *sgn = x >> (FP16_BITS - 1);
396  *exp = FP16_EXP(x);
397  *mnt = FP16_MANT(x);
398 
399  if (*exp) {
400  *mnt |= 1ULL << FP16_MANT_BITS;
401  } else {
402  // Handle subnormals:
403  // IDC (Input Denormal) is not set in this case.
404  if (*mnt) {
405  if (mode & FPLIB_FZ16) {
406  *mnt = 0;
407  } else {
408  ++*exp;
409  }
410  }
411  }
412 }
413 
414 static inline void
415 fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
416  int *flags)
417 {
418  *sgn = x >> (FP32_BITS - 1);
419  *exp = FP32_EXP(x);
420  *mnt = FP32_MANT(x);
421 
422  if (*exp) {
423  *mnt |= 1ULL << FP32_MANT_BITS;
424  } else {
425  // Handle subnormals:
426  if (*mnt) {
427  if (mode & FPLIB_FZ) {
428  *flags |= FPLIB_IDC;
429  *mnt = 0;
430  } else {
431  ++*exp;
432  }
433  }
434  }
435 }
436 
437 static inline void
438 fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
439  int *flags)
440 {
441 
442 
443  *sgn = x >> (FP64_BITS - 1);
444  *exp = FP64_EXP(x);
445  *mnt = FP64_MANT(x);
446 
447  if (*exp) {
448  *mnt |= 1ULL << FP64_MANT_BITS;
449  } else {
450  // Handle subnormals:
451  if (*mnt) {
452  if (mode & FPLIB_FZ) {
453  *flags |= FPLIB_IDC;
454  *mnt = 0;
455  } else {
456  ++*exp;
457  }
458  }
459  }
460 }
461 
462 static inline int
463 fp16_is_NaN(int exp, uint16_t mnt)
464 {
465  return exp == FP16_EXP_INF && FP16_MANT(mnt);
466 }
467 
468 static inline int
469 fp32_is_NaN(int exp, uint32_t mnt)
470 {
471  return exp == FP32_EXP_INF && FP32_MANT(mnt);
472 }
473 
474 static inline int
475 fp64_is_NaN(int exp, uint64_t mnt)
476 {
477  return exp == FP64_EXP_INF && FP64_MANT(mnt);
478 }
479 
480 static inline int
481 fp16_is_signalling_NaN(int exp, uint16_t mnt)
482 {
483  return fp16_is_NaN(exp, mnt) && !(mnt >> (FP16_MANT_BITS - 1) & 1);
484 }
485 
486 static inline int
487 fp32_is_signalling_NaN(int exp, uint32_t mnt)
488 {
489  return fp32_is_NaN(exp, mnt) && !(mnt >> (FP32_MANT_BITS - 1) & 1);
490 }
491 
492 static inline int
493 fp64_is_signalling_NaN(int exp, uint64_t mnt)
494 {
495  return fp64_is_NaN(exp, mnt) && !(mnt >> (FP64_MANT_BITS - 1) & 1);
496 }
497 
498 static inline int
499 fp16_is_quiet_NaN(int exp, uint16_t mnt)
500 {
501  return exp == FP16_EXP_INF && (mnt >> (FP16_MANT_BITS - 1) & 1);
502 }
503 
504 static inline int
505 fp32_is_quiet_NaN(int exp, uint32_t mnt)
506 {
507  return exp == FP32_EXP_INF && (mnt >> (FP32_MANT_BITS - 1) & 1);
508 }
509 
510 static inline int
511 fp64_is_quiet_NaN(int exp, uint64_t mnt)
512 {
513  return exp == FP64_EXP_INF && (mnt >> (FP64_MANT_BITS - 1) & 1);
514 }
515 
516 static inline int
517 fp16_is_infinity(int exp, uint16_t mnt)
518 {
519  return exp == FP16_EXP_INF && !FP16_MANT(mnt);
520 }
521 
522 static inline int
523 fp32_is_infinity(int exp, uint32_t mnt)
524 {
525  return exp == FP32_EXP_INF && !FP32_MANT(mnt);
526 }
527 
528 static inline int
529 fp64_is_infinity(int exp, uint64_t mnt)
530 {
531  return exp == FP64_EXP_INF && !FP64_MANT(mnt);
532 }
533 
534 static inline uint16_t
535 fp16_process_NaN(uint16_t a, int mode, int *flags)
536 {
537  if (!(a >> (FP16_MANT_BITS - 1) & 1)) {
538  *flags |= FPLIB_IOC;
539  a |= 1ULL << (FP16_MANT_BITS - 1);
540  }
541  return mode & FPLIB_DN ? fp16_defaultNaN() : a;
542 }
543 
544 static inline uint32_t
545 fp32_process_NaN(uint32_t a, int mode, int *flags)
546 {
547  if (!(a >> (FP32_MANT_BITS - 1) & 1)) {
548  *flags |= FPLIB_IOC;
549  a |= 1ULL << (FP32_MANT_BITS - 1);
550  }
551  return mode & FPLIB_DN ? fp32_defaultNaN() : a;
552 }
553 
554 static inline uint64_t
555 fp64_process_NaN(uint64_t a, int mode, int *flags)
556 {
557  if (!(a >> (FP64_MANT_BITS - 1) & 1)) {
558  *flags |= FPLIB_IOC;
559  a |= 1ULL << (FP64_MANT_BITS - 1);
560  }
561  return mode & FPLIB_DN ? fp64_defaultNaN() : a;
562 }
563 
564 static uint16_t
565 fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
566 {
567  int a_exp = FP16_EXP(a);
568  uint16_t a_mnt = FP16_MANT(a);
569  int b_exp = FP16_EXP(b);
570  uint16_t b_mnt = FP16_MANT(b);
571 
572  // Handle signalling NaNs:
573  if (fp16_is_signalling_NaN(a_exp, a_mnt))
574  return fp16_process_NaN(a, mode, flags);
575  if (fp16_is_signalling_NaN(b_exp, b_mnt))
576  return fp16_process_NaN(b, mode, flags);
577 
578  // Handle quiet NaNs:
579  if (fp16_is_NaN(a_exp, a_mnt))
580  return fp16_process_NaN(a, mode, flags);
581  if (fp16_is_NaN(b_exp, b_mnt))
582  return fp16_process_NaN(b, mode, flags);
583 
584  return 0;
585 }
586 
587 static uint32_t
588 fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
589 {
590  int a_exp = FP32_EXP(a);
591  uint32_t a_mnt = FP32_MANT(a);
592  int b_exp = FP32_EXP(b);
593  uint32_t b_mnt = FP32_MANT(b);
594 
595  // Handle signalling NaNs:
596  if (fp32_is_signalling_NaN(a_exp, a_mnt))
597  return fp32_process_NaN(a, mode, flags);
598  if (fp32_is_signalling_NaN(b_exp, b_mnt))
599  return fp32_process_NaN(b, mode, flags);
600 
601  // Handle quiet NaNs:
602  if (fp32_is_NaN(a_exp, a_mnt))
603  return fp32_process_NaN(a, mode, flags);
604  if (fp32_is_NaN(b_exp, b_mnt))
605  return fp32_process_NaN(b, mode, flags);
606 
607  return 0;
608 }
609 
610 static uint64_t
611 fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
612 {
613  int a_exp = FP64_EXP(a);
614  uint64_t a_mnt = FP64_MANT(a);
615  int b_exp = FP64_EXP(b);
616  uint64_t b_mnt = FP64_MANT(b);
617 
618  // Handle signalling NaNs:
619  if (fp64_is_signalling_NaN(a_exp, a_mnt))
620  return fp64_process_NaN(a, mode, flags);
621  if (fp64_is_signalling_NaN(b_exp, b_mnt))
622  return fp64_process_NaN(b, mode, flags);
623 
624  // Handle quiet NaNs:
625  if (fp64_is_NaN(a_exp, a_mnt))
626  return fp64_process_NaN(a, mode, flags);
627  if (fp64_is_NaN(b_exp, b_mnt))
628  return fp64_process_NaN(b, mode, flags);
629 
630  return 0;
631 }
632 
633 static uint16_t
634 fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
635 {
636  int a_exp = FP16_EXP(a);
637  uint16_t a_mnt = FP16_MANT(a);
638  int b_exp = FP16_EXP(b);
639  uint16_t b_mnt = FP16_MANT(b);
640  int c_exp = FP16_EXP(c);
641  uint16_t c_mnt = FP16_MANT(c);
642 
643  // Handle signalling NaNs:
644  if (fp16_is_signalling_NaN(a_exp, a_mnt))
645  return fp16_process_NaN(a, mode, flags);
646  if (fp16_is_signalling_NaN(b_exp, b_mnt))
647  return fp16_process_NaN(b, mode, flags);
648  if (fp16_is_signalling_NaN(c_exp, c_mnt))
649  return fp16_process_NaN(c, mode, flags);
650 
651  // Handle quiet NaNs:
652  if (fp16_is_NaN(a_exp, a_mnt))
653  return fp16_process_NaN(a, mode, flags);
654  if (fp16_is_NaN(b_exp, b_mnt))
655  return fp16_process_NaN(b, mode, flags);
656  if (fp16_is_NaN(c_exp, c_mnt))
657  return fp16_process_NaN(c, mode, flags);
658 
659  return 0;
660 }
661 
662 static uint32_t
663 fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
664 {
665  int a_exp = FP32_EXP(a);
666  uint32_t a_mnt = FP32_MANT(a);
667  int b_exp = FP32_EXP(b);
668  uint32_t b_mnt = FP32_MANT(b);
669  int c_exp = FP32_EXP(c);
670  uint32_t c_mnt = FP32_MANT(c);
671 
672  // Handle signalling NaNs:
673  if (fp32_is_signalling_NaN(a_exp, a_mnt))
674  return fp32_process_NaN(a, mode, flags);
675  if (fp32_is_signalling_NaN(b_exp, b_mnt))
676  return fp32_process_NaN(b, mode, flags);
677  if (fp32_is_signalling_NaN(c_exp, c_mnt))
678  return fp32_process_NaN(c, mode, flags);
679 
680  // Handle quiet NaNs:
681  if (fp32_is_NaN(a_exp, a_mnt))
682  return fp32_process_NaN(a, mode, flags);
683  if (fp32_is_NaN(b_exp, b_mnt))
684  return fp32_process_NaN(b, mode, flags);
685  if (fp32_is_NaN(c_exp, c_mnt))
686  return fp32_process_NaN(c, mode, flags);
687 
688  return 0;
689 }
690 
691 static uint64_t
692 fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
693 {
694  int a_exp = FP64_EXP(a);
695  uint64_t a_mnt = FP64_MANT(a);
696  int b_exp = FP64_EXP(b);
697  uint64_t b_mnt = FP64_MANT(b);
698  int c_exp = FP64_EXP(c);
699  uint64_t c_mnt = FP64_MANT(c);
700 
701  // Handle signalling NaNs:
702  if (fp64_is_signalling_NaN(a_exp, a_mnt))
703  return fp64_process_NaN(a, mode, flags);
704  if (fp64_is_signalling_NaN(b_exp, b_mnt))
705  return fp64_process_NaN(b, mode, flags);
706  if (fp64_is_signalling_NaN(c_exp, c_mnt))
707  return fp64_process_NaN(c, mode, flags);
708 
709  // Handle quiet NaNs:
710  if (fp64_is_NaN(a_exp, a_mnt))
711  return fp64_process_NaN(a, mode, flags);
712  if (fp64_is_NaN(b_exp, b_mnt))
713  return fp64_process_NaN(b, mode, flags);
714  if (fp64_is_NaN(c_exp, c_mnt))
715  return fp64_process_NaN(c, mode, flags);
716 
717  return 0;
718 }
719 
720 static uint16_t
721 fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
722 {
723  int biased_exp; // non-negative exponent value for result
724  uint16_t int_mant; // mantissa for result, less than (2 << FP16_MANT_BITS)
725  int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
726 
727  assert(rm != FPRounding_TIEAWAY);
728 
729  // Flush to zero:
730  if ((mode & FPLIB_FZ16) && exp < 1) {
731  *flags |= FPLIB_UFC;
732  return fp16_zero(sgn);
733  }
734 
735  // The bottom FP16_EXP_BITS bits of mnt are orred together:
736  mnt = (4ULL << FP16_MANT_BITS | mnt >> (FP16_EXP_BITS - 1) |
737  ((mnt & ((1ULL << FP16_EXP_BITS) - 1)) != 0));
738 
739  if (exp > 0) {
740  biased_exp = exp;
741  int_mant = mnt >> 2;
742  error = mnt & 3;
743  } else {
744  biased_exp = 0;
745  int_mant = lsr16(mnt, 3 - exp);
746  error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
747  }
748 
749  if (!biased_exp && error) { // xx should also check fpscr_val<11>
750  *flags |= FPLIB_UFC;
751  }
752 
753  // Round up:
754  if ((rm == FPLIB_RN && (error == 3 ||
755  (error == 2 && (int_mant & 1)))) ||
756  (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
757  ++int_mant;
758  if (int_mant == 1ULL << FP16_MANT_BITS) {
759  // Rounded up from denormalized to normalized
760  biased_exp = 1;
761  }
762  if (int_mant == 2ULL << FP16_MANT_BITS) {
763  // Rounded up to next exponent
764  ++biased_exp;
765  int_mant >>= 1;
766  }
767  }
768 
769  // Handle rounding to odd aka Von Neumann rounding:
770  if (error && rm == FPRounding_ODD)
771  int_mant |= 1;
772 
773  // Handle overflow:
774  if (!(mode & FPLIB_AHP)) {
775  if (biased_exp >= (int)FP16_EXP_INF) {
776  *flags |= FPLIB_OFC | FPLIB_IXC;
777  if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
778  (rm == FPLIB_RM && sgn)) {
779  return fp16_infinity(sgn);
780  } else {
781  return fp16_max_normal(sgn);
782  }
783  }
784  } else {
785  if (biased_exp >= (int)FP16_EXP_INF + 1) {
786  *flags |= FPLIB_IOC;
787  return fp16_pack(sgn, FP16_EXP_INF, -1);
788  }
789  }
790 
791  if (error) {
792  *flags |= FPLIB_IXC;
793  }
794 
795  return fp16_pack(sgn, biased_exp, int_mant);
796 }
797 
798 static uint16_t
799 fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
800 {
801  return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags);
802 }
803 
804 static uint32_t
805 fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
806 {
807  int biased_exp; // non-negative exponent value for result
808  uint32_t int_mant; // mantissa for result, less than (2 << FP32_MANT_BITS)
809  int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
810 
811  assert(rm != FPRounding_TIEAWAY);
812 
813  // Flush to zero:
814  if ((mode & FPLIB_FZ) && exp < 1) {
815  *flags |= FPLIB_UFC;
816  return fp32_zero(sgn);
817  }
818 
819  // The bottom FP32_EXP_BITS bits of mnt are orred together:
820  mnt = (4ULL << FP32_MANT_BITS | mnt >> (FP32_EXP_BITS - 1) |
821  ((mnt & ((1ULL << FP32_EXP_BITS) - 1)) != 0));
822 
823  if (exp > 0) {
824  biased_exp = exp;
825  int_mant = mnt >> 2;
826  error = mnt & 3;
827  } else {
828  biased_exp = 0;
829  int_mant = lsr32(mnt, 3 - exp);
830  error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
831  }
832 
833  if (!biased_exp && error) { // xx should also check fpscr_val<11>
834  *flags |= FPLIB_UFC;
835  }
836 
837  // Round up:
838  if ((rm == FPLIB_RN && (error == 3 ||
839  (error == 2 && (int_mant & 1)))) ||
840  (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
841  ++int_mant;
842  if (int_mant == 1ULL << FP32_MANT_BITS) {
843  // Rounded up from denormalized to normalized
844  biased_exp = 1;
845  }
846  if (int_mant == 2ULL << FP32_MANT_BITS) {
847  // Rounded up to next exponent
848  ++biased_exp;
849  int_mant >>= 1;
850  }
851  }
852 
853  // Handle rounding to odd aka Von Neumann rounding:
854  if (error && rm == FPRounding_ODD)
855  int_mant |= 1;
856 
857  // Handle overflow:
858  if (biased_exp >= (int)FP32_EXP_INF) {
859  *flags |= FPLIB_OFC | FPLIB_IXC;
860  if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
861  (rm == FPLIB_RM && sgn)) {
862  return fp32_infinity(sgn);
863  } else {
864  return fp32_max_normal(sgn);
865  }
866  }
867 
868  if (error) {
869  *flags |= FPLIB_IXC;
870  }
871 
872  return fp32_pack(sgn, biased_exp, int_mant);
873 }
874 
875 static uint32_t
876 fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
877 {
878  return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
879 }
880 
881 static uint64_t
882 fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
883 {
884  int biased_exp; // non-negative exponent value for result
885  uint64_t int_mant; // mantissa for result, less than (2 << FP64_MANT_BITS)
886  int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
887 
888  assert(rm != FPRounding_TIEAWAY);
889 
890  // Flush to zero:
891  if ((mode & FPLIB_FZ) && exp < 1) {
892  *flags |= FPLIB_UFC;
893  return fp64_zero(sgn);
894  }
895 
896  // The bottom FP64_EXP_BITS bits of mnt are orred together:
897  mnt = (4ULL << FP64_MANT_BITS | mnt >> (FP64_EXP_BITS - 1) |
898  ((mnt & ((1ULL << FP64_EXP_BITS) - 1)) != 0));
899 
900  if (exp > 0) {
901  biased_exp = exp;
902  int_mant = mnt >> 2;
903  error = mnt & 3;
904  } else {
905  biased_exp = 0;
906  int_mant = lsr64(mnt, 3 - exp);
907  error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
908  }
909 
910  if (!biased_exp && error) { // xx should also check fpscr_val<11>
911  *flags |= FPLIB_UFC;
912  }
913 
914  // Round up:
915  if ((rm == FPLIB_RN && (error == 3 ||
916  (error == 2 && (int_mant & 1)))) ||
917  (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
918  ++int_mant;
919  if (int_mant == 1ULL << FP64_MANT_BITS) {
920  // Rounded up from denormalized to normalized
921  biased_exp = 1;
922  }
923  if (int_mant == 2ULL << FP64_MANT_BITS) {
924  // Rounded up to next exponent
925  ++biased_exp;
926  int_mant >>= 1;
927  }
928  }
929 
930  // Handle rounding to odd aka Von Neumann rounding:
931  if (error && rm == FPRounding_ODD)
932  int_mant |= 1;
933 
934  // Handle overflow:
935  if (biased_exp >= (int)FP64_EXP_INF) {
936  *flags |= FPLIB_OFC | FPLIB_IXC;
937  if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
938  (rm == FPLIB_RM && sgn)) {
939  return fp64_infinity(sgn);
940  } else {
941  return fp64_max_normal(sgn);
942  }
943  }
944 
945  if (error) {
946  *flags |= FPLIB_IXC;
947  }
948 
949  return fp64_pack(sgn, biased_exp, int_mant);
950 }
951 
952 static uint64_t
953 fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
954 {
955  return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
956 }
957 
958 static int
959 fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
960 {
961  int a_sgn, a_exp, b_sgn, b_exp;
962  uint16_t a_mnt, b_mnt;
963 
964  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
965  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
966 
967  if (fp16_is_NaN(a_exp, a_mnt) ||
968  fp16_is_NaN(b_exp, b_mnt)) {
969  if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
970  fp16_is_signalling_NaN(b_exp, b_mnt))
971  *flags |= FPLIB_IOC;
972  return 0;
973  }
974  return a == b || (!a_mnt && !b_mnt);
975 }
976 
977 static int
978 fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
979 {
980  int a_sgn, a_exp, b_sgn, b_exp;
981  uint16_t a_mnt, b_mnt;
982 
983  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
984  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
985 
986  if (fp16_is_NaN(a_exp, a_mnt) ||
987  fp16_is_NaN(b_exp, b_mnt)) {
988  *flags |= FPLIB_IOC;
989  return 0;
990  }
991  if (!a_mnt && !b_mnt)
992  return 1;
993  if (a_sgn != b_sgn)
994  return b_sgn;
995  if (a_exp != b_exp)
996  return a_sgn ^ (a_exp > b_exp);
997  if (a_mnt != b_mnt)
998  return a_sgn ^ (a_mnt > b_mnt);
999  return 1;
1000 }
1001 
1002 static int
1003 fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
1004 {
1005  int a_sgn, a_exp, b_sgn, b_exp;
1006  uint16_t a_mnt, b_mnt;
1007 
1008  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1009  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1010 
1011  if (fp16_is_NaN(a_exp, a_mnt) ||
1012  fp16_is_NaN(b_exp, b_mnt)) {
1013  *flags |= FPLIB_IOC;
1014  return 0;
1015  }
1016  if (!a_mnt && !b_mnt)
1017  return 0;
1018  if (a_sgn != b_sgn)
1019  return b_sgn;
1020  if (a_exp != b_exp)
1021  return a_sgn ^ (a_exp > b_exp);
1022  if (a_mnt != b_mnt)
1023  return a_sgn ^ (a_mnt > b_mnt);
1024  return 0;
1025 }
1026 
1027 static int
1028 fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
1029 {
1030  int a_sgn, a_exp, b_sgn, b_exp;
1031  uint16_t a_mnt, b_mnt;
1032 
1033  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1034  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1035 
1036  if (fp16_is_NaN(a_exp, a_mnt) ||
1037  fp16_is_NaN(b_exp, b_mnt)) {
1038  if (fp16_is_signalling_NaN(a_exp, a_mnt) ||
1039  fp16_is_signalling_NaN(b_exp, b_mnt))
1040  *flags |= FPLIB_IOC;
1041  return 1;
1042  }
1043  return 0;
1044 }
1045 
1046 static int
1047 fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
1048 {
1049  int a_sgn, a_exp, b_sgn, b_exp;
1050  uint32_t a_mnt, b_mnt;
1051 
1052  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1053  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1054 
1055  if (fp32_is_NaN(a_exp, a_mnt) ||
1056  fp32_is_NaN(b_exp, b_mnt)) {
1057  if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1058  fp32_is_signalling_NaN(b_exp, b_mnt))
1059  *flags |= FPLIB_IOC;
1060  return 0;
1061  }
1062  return a == b || (!a_mnt && !b_mnt);
1063 }
1064 
1065 static int
1066 fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
1067 {
1068  int a_sgn, a_exp, b_sgn, b_exp;
1069  uint32_t a_mnt, b_mnt;
1070 
1071  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1072  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1073 
1074  if (fp32_is_NaN(a_exp, a_mnt) ||
1075  fp32_is_NaN(b_exp, b_mnt)) {
1076  *flags |= FPLIB_IOC;
1077  return 0;
1078  }
1079  if (!a_mnt && !b_mnt)
1080  return 1;
1081  if (a_sgn != b_sgn)
1082  return b_sgn;
1083  if (a_exp != b_exp)
1084  return a_sgn ^ (a_exp > b_exp);
1085  if (a_mnt != b_mnt)
1086  return a_sgn ^ (a_mnt > b_mnt);
1087  return 1;
1088 }
1089 
1090 static int
1091 fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
1092 {
1093  int a_sgn, a_exp, b_sgn, b_exp;
1094  uint32_t a_mnt, b_mnt;
1095 
1096  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1097  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1098 
1099  if (fp32_is_NaN(a_exp, a_mnt) ||
1100  fp32_is_NaN(b_exp, b_mnt)) {
1101  *flags |= FPLIB_IOC;
1102  return 0;
1103  }
1104  if (!a_mnt && !b_mnt)
1105  return 0;
1106  if (a_sgn != b_sgn)
1107  return b_sgn;
1108  if (a_exp != b_exp)
1109  return a_sgn ^ (a_exp > b_exp);
1110  if (a_mnt != b_mnt)
1111  return a_sgn ^ (a_mnt > b_mnt);
1112  return 0;
1113 }
1114 
1115 static int
1116 fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
1117 {
1118  int a_sgn, a_exp, b_sgn, b_exp;
1119  uint32_t a_mnt, b_mnt;
1120 
1121  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1122  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1123 
1124  if (fp32_is_NaN(a_exp, a_mnt) ||
1125  fp32_is_NaN(b_exp, b_mnt)) {
1126  if (fp32_is_signalling_NaN(a_exp, a_mnt) ||
1127  fp32_is_signalling_NaN(b_exp, b_mnt))
1128  *flags |= FPLIB_IOC;
1129  return 1;
1130  }
1131  return 0;
1132 }
1133 
1134 static int
1135 fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
1136 {
1137  int a_sgn, a_exp, b_sgn, b_exp;
1138  uint64_t a_mnt, b_mnt;
1139 
1140  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1141  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1142 
1143  if (fp64_is_NaN(a_exp, a_mnt) ||
1144  fp64_is_NaN(b_exp, b_mnt)) {
1145  if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1146  fp64_is_signalling_NaN(b_exp, b_mnt))
1147  *flags |= FPLIB_IOC;
1148  return 0;
1149  }
1150  return a == b || (!a_mnt && !b_mnt);
1151 }
1152 
1153 static int
1154 fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
1155 {
1156  int a_sgn, a_exp, b_sgn, b_exp;
1157  uint64_t a_mnt, b_mnt;
1158 
1159  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1160  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1161 
1162  if (fp64_is_NaN(a_exp, a_mnt) ||
1163  fp64_is_NaN(b_exp, b_mnt)) {
1164  *flags |= FPLIB_IOC;
1165  return 0;
1166  }
1167  if (!a_mnt && !b_mnt)
1168  return 1;
1169  if (a_sgn != b_sgn)
1170  return b_sgn;
1171  if (a_exp != b_exp)
1172  return a_sgn ^ (a_exp > b_exp);
1173  if (a_mnt != b_mnt)
1174  return a_sgn ^ (a_mnt > b_mnt);
1175  return 1;
1176 }
1177 
1178 static int
1179 fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
1180 {
1181  int a_sgn, a_exp, b_sgn, b_exp;
1182  uint64_t a_mnt, b_mnt;
1183 
1184  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1185  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1186 
1187  if (fp64_is_NaN(a_exp, a_mnt) ||
1188  fp64_is_NaN(b_exp, b_mnt)) {
1189  *flags |= FPLIB_IOC;
1190  return 0;
1191  }
1192  if (!a_mnt && !b_mnt)
1193  return 0;
1194  if (a_sgn != b_sgn)
1195  return b_sgn;
1196  if (a_exp != b_exp)
1197  return a_sgn ^ (a_exp > b_exp);
1198  if (a_mnt != b_mnt)
1199  return a_sgn ^ (a_mnt > b_mnt);
1200  return 0;
1201 }
1202 
1203 static int
1204 fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
1205 {
1206  int a_sgn, a_exp, b_sgn, b_exp;
1207  uint64_t a_mnt, b_mnt;
1208 
1209  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1210  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1211 
1212  if (fp64_is_NaN(a_exp, a_mnt) ||
1213  fp64_is_NaN(b_exp, b_mnt)) {
1214  if (fp64_is_signalling_NaN(a_exp, a_mnt) ||
1215  fp64_is_signalling_NaN(b_exp, b_mnt))
1216  *flags |= FPLIB_IOC;
1217  return 1;
1218  }
1219  return 0;
1220 }
1221 
1222 static uint16_t
1223 fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
1224 {
1225  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1226  uint16_t a_mnt, b_mnt, x, x_mnt;
1227 
1228  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1229  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1230 
1231  if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1232  return x;
1233  }
1234 
1235  b_sgn ^= neg;
1236 
1237  // Handle infinities and zeroes:
1238  if (a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF && a_sgn != b_sgn) {
1239  *flags |= FPLIB_IOC;
1240  return fp16_defaultNaN();
1241  } else if (a_exp == FP16_EXP_INF) {
1242  return fp16_infinity(a_sgn);
1243  } else if (b_exp == FP16_EXP_INF) {
1244  return fp16_infinity(b_sgn);
1245  } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1246  return fp16_zero(a_sgn);
1247  }
1248 
1249  a_mnt <<= 3;
1250  b_mnt <<= 3;
1251  if (a_exp >= b_exp) {
1252  b_mnt = (lsr16(b_mnt, a_exp - b_exp) |
1253  !!(b_mnt & (lsl16(1, a_exp - b_exp) - 1)));
1254  b_exp = a_exp;
1255  } else {
1256  a_mnt = (lsr16(a_mnt, b_exp - a_exp) |
1257  !!(a_mnt & (lsl16(1, b_exp - a_exp) - 1)));
1258  a_exp = b_exp;
1259  }
1260  x_sgn = a_sgn;
1261  x_exp = a_exp;
1262  if (a_sgn == b_sgn) {
1263  x_mnt = a_mnt + b_mnt;
1264  } else if (a_mnt >= b_mnt) {
1265  x_mnt = a_mnt - b_mnt;
1266  } else {
1267  x_sgn ^= 1;
1268  x_mnt = b_mnt - a_mnt;
1269  }
1270 
1271  if (!x_mnt) {
1272  // Sign of exact zero result depends on rounding mode
1273  return fp16_zero((mode & 3) == 2);
1274  }
1275 
1276  x_mnt = fp16_normalise(x_mnt, &x_exp);
1277 
1278  return fp16_round(x_sgn, x_exp + FP16_EXP_BITS - 3, x_mnt << 1,
1279  mode, flags);
1280 }
1281 
1282 static uint32_t
1283 fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
1284 {
1285  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1286  uint32_t a_mnt, b_mnt, x, x_mnt;
1287 
1288  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1289  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1290 
1291  if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1292  return x;
1293  }
1294 
1295  b_sgn ^= neg;
1296 
1297  // Handle infinities and zeroes:
1298  if (a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF && a_sgn != b_sgn) {
1299  *flags |= FPLIB_IOC;
1300  return fp32_defaultNaN();
1301  } else if (a_exp == FP32_EXP_INF) {
1302  return fp32_infinity(a_sgn);
1303  } else if (b_exp == FP32_EXP_INF) {
1304  return fp32_infinity(b_sgn);
1305  } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1306  return fp32_zero(a_sgn);
1307  }
1308 
1309  a_mnt <<= 3;
1310  b_mnt <<= 3;
1311  if (a_exp >= b_exp) {
1312  b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
1313  !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
1314  b_exp = a_exp;
1315  } else {
1316  a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
1317  !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
1318  a_exp = b_exp;
1319  }
1320  x_sgn = a_sgn;
1321  x_exp = a_exp;
1322  if (a_sgn == b_sgn) {
1323  x_mnt = a_mnt + b_mnt;
1324  } else if (a_mnt >= b_mnt) {
1325  x_mnt = a_mnt - b_mnt;
1326  } else {
1327  x_sgn ^= 1;
1328  x_mnt = b_mnt - a_mnt;
1329  }
1330 
1331  if (!x_mnt) {
1332  // Sign of exact zero result depends on rounding mode
1333  return fp32_zero((mode & 3) == 2);
1334  }
1335 
1336  x_mnt = fp32_normalise(x_mnt, &x_exp);
1337 
1338  return fp32_round(x_sgn, x_exp + FP32_EXP_BITS - 3, x_mnt << 1,
1339  mode, flags);
1340 }
1341 
1342 static uint64_t
1343 fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
1344 {
1345  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1346  uint64_t a_mnt, b_mnt, x, x_mnt;
1347 
1348  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1349  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1350 
1351  if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1352  return x;
1353  }
1354 
1355  b_sgn ^= neg;
1356 
1357  // Handle infinities and zeroes:
1358  if (a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF && a_sgn != b_sgn) {
1359  *flags |= FPLIB_IOC;
1360  return fp64_defaultNaN();
1361  } else if (a_exp == FP64_EXP_INF) {
1362  return fp64_infinity(a_sgn);
1363  } else if (b_exp == FP64_EXP_INF) {
1364  return fp64_infinity(b_sgn);
1365  } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1366  return fp64_zero(a_sgn);
1367  }
1368 
1369  a_mnt <<= 3;
1370  b_mnt <<= 3;
1371  if (a_exp >= b_exp) {
1372  b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
1373  !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
1374  b_exp = a_exp;
1375  } else {
1376  a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
1377  !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
1378  a_exp = b_exp;
1379  }
1380  x_sgn = a_sgn;
1381  x_exp = a_exp;
1382  if (a_sgn == b_sgn) {
1383  x_mnt = a_mnt + b_mnt;
1384  } else if (a_mnt >= b_mnt) {
1385  x_mnt = a_mnt - b_mnt;
1386  } else {
1387  x_sgn ^= 1;
1388  x_mnt = b_mnt - a_mnt;
1389  }
1390 
1391  if (!x_mnt) {
1392  // Sign of exact zero result depends on rounding mode
1393  return fp64_zero((mode & 3) == 2);
1394  }
1395 
1396  x_mnt = fp64_normalise(x_mnt, &x_exp);
1397 
1398  return fp64_round(x_sgn, x_exp + FP64_EXP_BITS - 3, x_mnt << 1,
1399  mode, flags);
1400 }
1401 
1402 static uint16_t
1403 fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
1404 {
1405  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1406  uint16_t a_mnt, b_mnt, x;
1407  uint32_t x_mnt;
1408 
1409  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1410  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1411 
1412  if ((x = fp16_process_NaNs(a, b, mode, flags))) {
1413  return x;
1414  }
1415 
1416  // Handle infinities and zeroes:
1417  if ((a_exp == FP16_EXP_INF && !b_mnt) ||
1418  (b_exp == FP16_EXP_INF && !a_mnt)) {
1419  *flags |= FPLIB_IOC;
1420  return fp16_defaultNaN();
1421  } else if (a_exp == FP16_EXP_INF || b_exp == FP16_EXP_INF) {
1422  return fp16_infinity(a_sgn ^ b_sgn);
1423  } else if (!a_mnt || !b_mnt) {
1424  return fp16_zero(a_sgn ^ b_sgn);
1425  }
1426 
1427  // Multiply and normalise:
1428  x_sgn = a_sgn ^ b_sgn;
1429  x_exp = a_exp + b_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1;
1430  x_mnt = (uint32_t)a_mnt * b_mnt;
1431  x_mnt = fp32_normalise(x_mnt, &x_exp);
1432 
1433  // Convert to FP16_BITS bits, collapsing error into bottom bit:
1434  x_mnt = lsr32(x_mnt, FP16_BITS - 1) | !!lsl32(x_mnt, FP16_BITS + 1);
1435 
1436  return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1437 }
1438 
1439 static uint32_t
1440 fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1441 {
1442  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1443  uint32_t a_mnt, b_mnt, x;
1444  uint64_t x_mnt;
1445 
1446  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1447  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1448 
1449  if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1450  return x;
1451  }
1452 
1453  // Handle infinities and zeroes:
1454  if ((a_exp == FP32_EXP_INF && !b_mnt) ||
1455  (b_exp == FP32_EXP_INF && !a_mnt)) {
1456  *flags |= FPLIB_IOC;
1457  return fp32_defaultNaN();
1458  } else if (a_exp == FP32_EXP_INF || b_exp == FP32_EXP_INF) {
1459  return fp32_infinity(a_sgn ^ b_sgn);
1460  } else if (!a_mnt || !b_mnt) {
1461  return fp32_zero(a_sgn ^ b_sgn);
1462  }
1463 
1464  // Multiply and normalise:
1465  x_sgn = a_sgn ^ b_sgn;
1466  x_exp = a_exp + b_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1;
1467  x_mnt = (uint64_t)a_mnt * b_mnt;
1468  x_mnt = fp64_normalise(x_mnt, &x_exp);
1469 
1470  // Convert to FP32_BITS bits, collapsing error into bottom bit:
1471  x_mnt = lsr64(x_mnt, FP32_BITS - 1) | !!lsl64(x_mnt, FP32_BITS + 1);
1472 
1473  return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1474 }
1475 
1476 static uint64_t
1477 fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1478 {
1479  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1480  uint64_t a_mnt, b_mnt, x;
1481  uint64_t x0_mnt, x1_mnt;
1482 
1483  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1484  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1485 
1486  if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1487  return x;
1488  }
1489 
1490  // Handle infinities and zeroes:
1491  if ((a_exp == FP64_EXP_INF && !b_mnt) ||
1492  (b_exp == FP64_EXP_INF && !a_mnt)) {
1493  *flags |= FPLIB_IOC;
1494  return fp64_defaultNaN();
1495  } else if (a_exp == FP64_EXP_INF || b_exp == FP64_EXP_INF) {
1496  return fp64_infinity(a_sgn ^ b_sgn);
1497  } else if (!a_mnt || !b_mnt) {
1498  return fp64_zero(a_sgn ^ b_sgn);
1499  }
1500 
1501  // Multiply and normalise:
1502  x_sgn = a_sgn ^ b_sgn;
1503  x_exp = a_exp + b_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1;
1504  mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1505  fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1506 
1507  // Convert to FP64_BITS bits, collapsing error into bottom bit:
1508  x0_mnt = x1_mnt << 1 | !!x0_mnt;
1509 
1510  return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1511 }
1512 
1513 static uint16_t
1514 fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale,
1515  int mode, int *flags)
1516 {
1517  int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1518  uint16_t a_mnt, b_mnt, c_mnt, x;
1519  uint32_t x_mnt, y_mnt;
1520 
1521  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1522  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1523  fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1524 
1525  x = fp16_process_NaNs3(a, b, c, mode, flags);
1526 
1527  // Quiet NaN added to product of zero and infinity:
1528  if (fp16_is_quiet_NaN(a_exp, a_mnt) &&
1529  ((!b_mnt && fp16_is_infinity(c_exp, c_mnt)) ||
1530  (!c_mnt && fp16_is_infinity(b_exp, b_mnt)))) {
1531  x = fp16_defaultNaN();
1532  *flags |= FPLIB_IOC;
1533  }
1534 
1535  if (x) {
1536  return x;
1537  }
1538 
1539  // Handle infinities and zeroes:
1540  if ((b_exp == FP16_EXP_INF && !c_mnt) ||
1541  (c_exp == FP16_EXP_INF && !b_mnt) ||
1542  (a_exp == FP16_EXP_INF &&
1543  (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF) &&
1544  (a_sgn != (b_sgn ^ c_sgn)))) {
1545  *flags |= FPLIB_IOC;
1546  return fp16_defaultNaN();
1547  }
1548  if (a_exp == FP16_EXP_INF)
1549  return fp16_infinity(a_sgn);
1550  if (b_exp == FP16_EXP_INF || c_exp == FP16_EXP_INF)
1551  return fp16_infinity(b_sgn ^ c_sgn);
1552  if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1553  return fp16_zero(a_sgn);
1554 
1555  x_sgn = a_sgn;
1556  x_exp = a_exp + 2 * FP16_EXP_BITS - 3;
1557  x_mnt = (uint32_t)a_mnt << (FP16_MANT_BITS + 4);
1558 
1559  // Multiply:
1560  y_sgn = b_sgn ^ c_sgn;
1561  y_exp = b_exp + c_exp - FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3;
1562  y_mnt = (uint32_t)b_mnt * c_mnt << 3;
1563  if (!y_mnt) {
1564  y_exp = x_exp;
1565  }
1566 
1567  // Add:
1568  if (x_exp >= y_exp) {
1569  y_mnt = (lsr32(y_mnt, x_exp - y_exp) |
1570  !!(y_mnt & (lsl32(1, x_exp - y_exp) - 1)));
1571  y_exp = x_exp;
1572  } else {
1573  x_mnt = (lsr32(x_mnt, y_exp - x_exp) |
1574  !!(x_mnt & (lsl32(1, y_exp - x_exp) - 1)));
1575  x_exp = y_exp;
1576  }
1577  if (x_sgn == y_sgn) {
1578  x_mnt = x_mnt + y_mnt;
1579  } else if (x_mnt >= y_mnt) {
1580  x_mnt = x_mnt - y_mnt;
1581  } else {
1582  x_sgn ^= 1;
1583  x_mnt = y_mnt - x_mnt;
1584  }
1585 
1586  if (!x_mnt) {
1587  // Sign of exact zero result depends on rounding mode
1588  return fp16_zero((mode & 3) == 2);
1589  }
1590 
1591  // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1592  x_mnt = fp32_normalise(x_mnt, &x_exp);
1593  x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1594 
1595  return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1596 }
1597 
1598 static uint32_t
1599 fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1600  int mode, int *flags)
1601 {
1602  int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1603  uint32_t a_mnt, b_mnt, c_mnt, x;
1604  uint64_t x_mnt, y_mnt;
1605 
1606  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1607  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1608  fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1609 
1610  x = fp32_process_NaNs3(a, b, c, mode, flags);
1611 
1612  // Quiet NaN added to product of zero and infinity:
1613  if (fp32_is_quiet_NaN(a_exp, a_mnt) &&
1614  ((!b_mnt && fp32_is_infinity(c_exp, c_mnt)) ||
1615  (!c_mnt && fp32_is_infinity(b_exp, b_mnt)))) {
1616  x = fp32_defaultNaN();
1617  *flags |= FPLIB_IOC;
1618  }
1619 
1620  if (x) {
1621  return x;
1622  }
1623 
1624  // Handle infinities and zeroes:
1625  if ((b_exp == FP32_EXP_INF && !c_mnt) ||
1626  (c_exp == FP32_EXP_INF && !b_mnt) ||
1627  (a_exp == FP32_EXP_INF &&
1628  (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF) &&
1629  (a_sgn != (b_sgn ^ c_sgn)))) {
1630  *flags |= FPLIB_IOC;
1631  return fp32_defaultNaN();
1632  }
1633  if (a_exp == FP32_EXP_INF)
1634  return fp32_infinity(a_sgn);
1635  if (b_exp == FP32_EXP_INF || c_exp == FP32_EXP_INF)
1636  return fp32_infinity(b_sgn ^ c_sgn);
1637  if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1638  return fp32_zero(a_sgn);
1639 
1640  x_sgn = a_sgn;
1641  x_exp = a_exp + 2 * FP32_EXP_BITS - 3;
1642  x_mnt = (uint64_t)a_mnt << (FP32_MANT_BITS + 4);
1643 
1644  // Multiply:
1645  y_sgn = b_sgn ^ c_sgn;
1646  y_exp = b_exp + c_exp - FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3;
1647  y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1648  if (!y_mnt) {
1649  y_exp = x_exp;
1650  }
1651 
1652  // Add:
1653  if (x_exp >= y_exp) {
1654  y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1655  !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1656  y_exp = x_exp;
1657  } else {
1658  x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1659  !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1660  x_exp = y_exp;
1661  }
1662  if (x_sgn == y_sgn) {
1663  x_mnt = x_mnt + y_mnt;
1664  } else if (x_mnt >= y_mnt) {
1665  x_mnt = x_mnt - y_mnt;
1666  } else {
1667  x_sgn ^= 1;
1668  x_mnt = y_mnt - x_mnt;
1669  }
1670 
1671  if (!x_mnt) {
1672  // Sign of exact zero result depends on rounding mode
1673  return fp32_zero((mode & 3) == 2);
1674  }
1675 
1676  // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1677  x_mnt = fp64_normalise(x_mnt, &x_exp);
1678  x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1679 
1680  return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1681 }
1682 
1683 static uint64_t
1684 fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1685  int mode, int *flags)
1686 {
1687  int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1688  uint64_t a_mnt, b_mnt, c_mnt, x;
1689  uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1690 
1691  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1692  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1693  fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1694 
1695  x = fp64_process_NaNs3(a, b, c, mode, flags);
1696 
1697  // Quiet NaN added to product of zero and infinity:
1698  if (fp64_is_quiet_NaN(a_exp, a_mnt) &&
1699  ((!b_mnt && fp64_is_infinity(c_exp, c_mnt)) ||
1700  (!c_mnt && fp64_is_infinity(b_exp, b_mnt)))) {
1701  x = fp64_defaultNaN();
1702  *flags |= FPLIB_IOC;
1703  }
1704 
1705  if (x) {
1706  return x;
1707  }
1708 
1709  // Handle infinities and zeroes:
1710  if ((b_exp == FP64_EXP_INF && !c_mnt) ||
1711  (c_exp == FP64_EXP_INF && !b_mnt) ||
1712  (a_exp == FP64_EXP_INF &&
1713  (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF) &&
1714  (a_sgn != (b_sgn ^ c_sgn)))) {
1715  *flags |= FPLIB_IOC;
1716  return fp64_defaultNaN();
1717  }
1718  if (a_exp == FP64_EXP_INF)
1719  return fp64_infinity(a_sgn);
1720  if (b_exp == FP64_EXP_INF || c_exp == FP64_EXP_INF)
1721  return fp64_infinity(b_sgn ^ c_sgn);
1722  if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1723  return fp64_zero(a_sgn);
1724 
1725  x_sgn = a_sgn;
1726  x_exp = a_exp + FP64_EXP_BITS;
1727  x0_mnt = 0;
1728  x1_mnt = a_mnt;
1729 
1730  // Multiply:
1731  y_sgn = b_sgn ^ c_sgn;
1732  y_exp = b_exp + c_exp - FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3;
1733  mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1734  if (!y0_mnt && !y1_mnt) {
1735  y_exp = x_exp;
1736  }
1737 
1738  // Add:
1739  if (x_exp >= y_exp) {
1740  uint64_t t0, t1;
1741  lsl128(&t0, &t1, y0_mnt, y1_mnt,
1742  x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1743  lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1744  y0_mnt |= !!(t0 | t1);
1745  y_exp = x_exp;
1746  } else {
1747  uint64_t t0, t1;
1748  lsl128(&t0, &t1, x0_mnt, x1_mnt,
1749  y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1750  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1751  x0_mnt |= !!(t0 | t1);
1752  x_exp = y_exp;
1753  }
1754  if (x_sgn == y_sgn) {
1755  add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1756  } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1757  sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1758  } else {
1759  x_sgn ^= 1;
1760  sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1761  }
1762 
1763  if (!x0_mnt && !x1_mnt) {
1764  // Sign of exact zero result depends on rounding mode
1765  return fp64_zero((mode & 3) == 2);
1766  }
1767 
1768  // Normalise into FP64_BITS bits, collapsing error into bottom bit:
1769  fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1770  x0_mnt = x1_mnt << 1 | !!x0_mnt;
1771 
1772  return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1773 }
1774 
1775 static uint16_t
1776 fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
1777 {
1778  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1779  uint16_t a_mnt, b_mnt, x;
1780  uint32_t x_mnt;
1781 
1782  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1783  fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1784 
1785  if ((x = fp16_process_NaNs(a, b, mode, flags)))
1786  return x;
1787 
1788  // Handle infinities and zeroes:
1789  if ((a_exp == FP16_EXP_INF && b_exp == FP16_EXP_INF) ||
1790  (!a_mnt && !b_mnt)) {
1791  *flags |= FPLIB_IOC;
1792  return fp16_defaultNaN();
1793  }
1794  if (a_exp == FP16_EXP_INF || !b_mnt) {
1795  if (a_exp != FP16_EXP_INF)
1796  *flags |= FPLIB_DZC;
1797  return fp16_infinity(a_sgn ^ b_sgn);
1798  }
1799  if (!a_mnt || b_exp == FP16_EXP_INF)
1800  return fp16_zero(a_sgn ^ b_sgn);
1801 
1802  // Divide, setting bottom bit if inexact:
1803  a_mnt = fp16_normalise(a_mnt, &a_exp);
1804  x_sgn = a_sgn ^ b_sgn;
1805  x_exp = a_exp - b_exp + (FP16_EXP_BIAS + FP16_BITS + 2 * FP16_EXP_BITS - 3);
1806  x_mnt = ((uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3)) / b_mnt;
1807  x_mnt |= (x_mnt * b_mnt !=
1808  (uint32_t)a_mnt << (FP16_MANT_BITS - FP16_EXP_BITS + 3));
1809 
1810  // Normalise into FP16_BITS bits, collapsing error into bottom bit:
1811  x_mnt = fp32_normalise(x_mnt, &x_exp);
1812  x_mnt = x_mnt >> (FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1813 
1814  return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1815 }
1816 
1817 static uint32_t
1818 fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1819 {
1820  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1821  uint32_t a_mnt, b_mnt, x;
1822  uint64_t x_mnt;
1823 
1824  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1825  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1826 
1827  if ((x = fp32_process_NaNs(a, b, mode, flags)))
1828  return x;
1829 
1830  // Handle infinities and zeroes:
1831  if ((a_exp == FP32_EXP_INF && b_exp == FP32_EXP_INF) ||
1832  (!a_mnt && !b_mnt)) {
1833  *flags |= FPLIB_IOC;
1834  return fp32_defaultNaN();
1835  }
1836  if (a_exp == FP32_EXP_INF || !b_mnt) {
1837  if (a_exp != FP32_EXP_INF)
1838  *flags |= FPLIB_DZC;
1839  return fp32_infinity(a_sgn ^ b_sgn);
1840  }
1841  if (!a_mnt || b_exp == FP32_EXP_INF)
1842  return fp32_zero(a_sgn ^ b_sgn);
1843 
1844  // Divide, setting bottom bit if inexact:
1845  a_mnt = fp32_normalise(a_mnt, &a_exp);
1846  x_sgn = a_sgn ^ b_sgn;
1847  x_exp = a_exp - b_exp + (FP32_EXP_BIAS + FP32_BITS + 2 * FP32_EXP_BITS - 3);
1848  x_mnt = ((uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3)) / b_mnt;
1849  x_mnt |= (x_mnt * b_mnt !=
1850  (uint64_t)a_mnt << (FP32_MANT_BITS - FP32_EXP_BITS + 3));
1851 
1852  // Normalise into FP32_BITS bits, collapsing error into bottom bit:
1853  x_mnt = fp64_normalise(x_mnt, &x_exp);
1854  x_mnt = x_mnt >> (FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1855 
1856  return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1857 }
1858 
1859 static uint64_t
1860 fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1861 {
1862  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1863  uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1864 
1865  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1866  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1867 
1868  if ((x = fp64_process_NaNs(a, b, mode, flags)))
1869  return x;
1870 
1871  // Handle infinities and zeroes:
1872  if ((a_exp == FP64_EXP_INF && b_exp == FP64_EXP_INF) ||
1873  (!a_mnt && !b_mnt)) {
1874  *flags |= FPLIB_IOC;
1875  return fp64_defaultNaN();
1876  }
1877  if (a_exp == FP64_EXP_INF || !b_mnt) {
1878  if (a_exp != FP64_EXP_INF)
1879  *flags |= FPLIB_DZC;
1880  return fp64_infinity(a_sgn ^ b_sgn);
1881  }
1882  if (!a_mnt || b_exp == FP64_EXP_INF)
1883  return fp64_zero(a_sgn ^ b_sgn);
1884 
1885  // Find reciprocal of divisor with Newton-Raphson:
1886  a_mnt = fp64_normalise(a_mnt, &a_exp);
1887  b_mnt = fp64_normalise(b_mnt, &b_exp);
1888  x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1889  mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1890  sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1891  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1892  mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1893  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1894 
1895  // Multiply by dividend:
1896  x_sgn = a_sgn ^ b_sgn;
1897  x_exp = a_exp - b_exp + FP64_EXP_BIAS + 8;
1898  mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
1899  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1900  x_mnt = x1_mnt;
1901 
1902  // This is an underestimate, so try adding one:
1903  mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
1904  c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1905  if (c <= 0) {
1906  ++x_mnt;
1907  }
1908 
1909  x_mnt = fp64_normalise(x_mnt, &x_exp);
1910 
1911  return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1912 }
1913 
1914 static void
1915 set_fpscr0(FPSCR &fpscr, int flags)
1916 {
1917  if (flags & FPLIB_IDC) {
1918  fpscr.idc = 1;
1919  }
1920  if (flags & FPLIB_IOC) {
1921  fpscr.ioc = 1;
1922  }
1923  if (flags & FPLIB_DZC) {
1924  fpscr.dzc = 1;
1925  }
1926  if (flags & FPLIB_OFC) {
1927  fpscr.ofc = 1;
1928  }
1929  if (flags & FPLIB_UFC) {
1930  fpscr.ufc = 1;
1931  }
1932  if (flags & FPLIB_IXC) {
1933  fpscr.ixc = 1;
1934  }
1935 }
1936 
1937 static uint16_t
1938 fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
1939 {
1940  int a_sgn, a_exp;
1941  uint16_t a_mnt;
1942 
1943  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1944 
1945  // Handle NaNs:
1946  if (fp16_is_NaN(a_exp, a_mnt)) {
1947  return fp16_process_NaN(a, mode, flags);
1948  }
1949 
1950  // Handle zeroes:
1951  if (!a_mnt) {
1952  return fp16_zero(a_sgn);
1953  }
1954 
1955  // Handle infinities:
1956  if (a_exp == FP16_EXP_INF) {
1957  return fp16_infinity(a_sgn);
1958  }
1959 
1960  b = b < -300 ? -300 : b;
1961  b = b > 300 ? 300 : b;
1962  a_exp += b;
1963  a_mnt <<= 3;
1964 
1965  a_mnt = fp16_normalise(a_mnt, &a_exp);
1966 
1967  return fp16_round(a_sgn, a_exp + FP16_EXP_BITS - 3, a_mnt << 1,
1968  mode, flags);
1969 }
1970 
1971 static uint32_t
1972 fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
1973 {
1974  int a_sgn, a_exp;
1975  uint32_t a_mnt;
1976 
1977  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1978 
1979  // Handle NaNs:
1980  if (fp32_is_NaN(a_exp, a_mnt)) {
1981  return fp32_process_NaN(a, mode, flags);
1982  }
1983 
1984  // Handle zeroes:
1985  if (!a_mnt) {
1986  return fp32_zero(a_sgn);
1987  }
1988 
1989  // Handle infinities:
1990  if (a_exp == FP32_EXP_INF) {
1991  return fp32_infinity(a_sgn);
1992  }
1993 
1994  b = b < -300 ? -300 : b;
1995  b = b > 300 ? 300 : b;
1996  a_exp += b;
1997  a_mnt <<= 3;
1998 
1999  a_mnt = fp32_normalise(a_mnt, &a_exp);
2000 
2001  return fp32_round(a_sgn, a_exp + FP32_EXP_BITS - 3, a_mnt << 1,
2002  mode, flags);
2003 }
2004 
2005 static uint64_t
2006 fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
2007 {
2008  int a_sgn, a_exp;
2009  uint64_t a_mnt;
2010 
2011  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2012 
2013  // Handle NaNs:
2014  if (fp64_is_NaN(a_exp, a_mnt)) {
2015  return fp64_process_NaN(a, mode, flags);
2016  }
2017 
2018  // Handle zeroes:
2019  if (!a_mnt) {
2020  return fp64_zero(a_sgn);
2021  }
2022 
2023  // Handle infinities:
2024  if (a_exp == FP64_EXP_INF) {
2025  return fp64_infinity(a_sgn);
2026  }
2027 
2028  b = b < -3000 ? -3000 : b;
2029  b = b > 3000 ? 3000 : b;
2030  a_exp += b;
2031  a_mnt <<= 3;
2032 
2033  a_mnt = fp64_normalise(a_mnt, &a_exp);
2034 
2035  return fp64_round(a_sgn, a_exp + FP64_EXP_BITS - 3, a_mnt << 1,
2036  mode, flags);
2037 }
2038 
2039 static uint16_t
2040 fp16_sqrt(uint16_t a, int mode, int *flags)
2041 {
2042  int a_sgn, a_exp, x_sgn, x_exp;
2043  uint16_t a_mnt, x_mnt;
2044  uint32_t x, t0, t1;
2045 
2046  fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2047 
2048  // Handle NaNs:
2049  if (fp16_is_NaN(a_exp, a_mnt))
2050  return fp16_process_NaN(a, mode, flags);
2051 
2052  // Handle infinities and zeroes:
2053  if (!a_mnt)
2054  return fp16_zero(a_sgn);
2055  if (a_exp == FP16_EXP_INF && !a_sgn)
2056  return fp16_infinity(a_sgn);
2057  if (a_sgn) {
2058  *flags |= FPLIB_IOC;
2059  return fp16_defaultNaN();
2060  }
2061 
2062  a_mnt = fp16_normalise(a_mnt, &a_exp);
2063  if (a_exp & 1) {
2064  ++a_exp;
2065  a_mnt >>= 1;
2066  }
2067 
2068  // x = (a * 3 + 5) / 8
2069  x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2070 
2071  // x = (a / x + x) / 2; // 8-bit accuracy
2072  x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2073 
2074  // x = (a / x + x) / 2; // 16-bit accuracy
2075  x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2076 
2077  x_sgn = 0;
2078  x_exp = (a_exp + 27) >> 1;
2079  x_mnt = ((x - (1 << 18)) >> 19) + 1;
2080  t1 = (uint32_t)x_mnt * x_mnt;
2081  t0 = (uint32_t)a_mnt << 9;
2082  if (t1 > t0) {
2083  --x_mnt;
2084  }
2085 
2086  x_mnt = fp16_normalise(x_mnt, &x_exp);
2087 
2088  return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2089 }
2090 
2091 static uint32_t
2092 fp32_sqrt(uint32_t a, int mode, int *flags)
2093 {
2094  int a_sgn, a_exp, x_sgn, x_exp;
2095  uint32_t a_mnt, x, x_mnt;
2096  uint64_t t0, t1;
2097 
2098  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2099 
2100  // Handle NaNs:
2101  if (fp32_is_NaN(a_exp, a_mnt))
2102  return fp32_process_NaN(a, mode, flags);
2103 
2104  // Handle infinities and zeroes:
2105  if (!a_mnt)
2106  return fp32_zero(a_sgn);
2107  if (a_exp == FP32_EXP_INF && !a_sgn)
2108  return fp32_infinity(a_sgn);
2109  if (a_sgn) {
2110  *flags |= FPLIB_IOC;
2111  return fp32_defaultNaN();
2112  }
2113 
2114  a_mnt = fp32_normalise(a_mnt, &a_exp);
2115  if (!(a_exp & 1)) {
2116  ++a_exp;
2117  a_mnt >>= 1;
2118  }
2119 
2120  // x = (a * 3 + 5) / 8
2121  x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
2122 
2123  // x = (a / x + x) / 2; // 8-bit accuracy
2124  x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2125 
2126  // x = (a / x + x) / 2; // 16-bit accuracy
2127  x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2128 
2129  // x = (a / x + x) / 2; // 32-bit accuracy
2130  x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
2131 
2132  x_sgn = 0;
2133  x_exp = (a_exp + 147) >> 1;
2134  x_mnt = ((x - (1 << 5)) >> 6) + 1;
2135  t1 = (uint64_t)x_mnt * x_mnt;
2136  t0 = (uint64_t)a_mnt << 19;
2137  if (t1 > t0) {
2138  --x_mnt;
2139  }
2140 
2141  x_mnt = fp32_normalise(x_mnt, &x_exp);
2142 
2143  return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2144 }
2145 
2146 static uint64_t
2147 fp64_sqrt(uint64_t a, int mode, int *flags)
2148 {
2149  int a_sgn, a_exp, x_sgn, x_exp, c;
2150  uint64_t a_mnt, x_mnt, r, x0, x1;
2151  uint32_t x;
2152 
2153  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2154 
2155  // Handle NaNs:
2156  if (fp64_is_NaN(a_exp, a_mnt))
2157  return fp64_process_NaN(a, mode, flags);
2158 
2159  // Handle infinities and zeroes:
2160  if (!a_mnt)
2161  return fp64_zero(a_sgn);
2162  if (a_exp == FP64_EXP_INF && !a_sgn)
2163  return fp64_infinity(a_sgn);
2164  if (a_sgn) {
2165  *flags |= FPLIB_IOC;
2166  return fp64_defaultNaN();
2167  }
2168 
2169  a_mnt = fp64_normalise(a_mnt, &a_exp);
2170  if (a_exp & 1) {
2171  ++a_exp;
2172  a_mnt >>= 1;
2173  }
2174 
2175  // x = (a * 3 + 5) / 8
2176  x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
2177 
2178  // x = (a / x + x) / 2; // 8-bit accuracy
2179  x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2180 
2181  // x = (a / x + x) / 2; // 16-bit accuracy
2182  x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2183 
2184  // x = (a / x + x) / 2; // 32-bit accuracy
2185  x = ((a_mnt / x) >> 2) + (x >> 1);
2186 
2187  // r = 1 / x; // 32-bit accuracy
2188  r = ((uint64_t)1 << 62) / x;
2189 
2190  // r = r * (2 - x * r); // 64-bit accuracy
2191  mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
2192  lsr128(&x0, &x1, x0, x1, 31);
2193 
2194  // x = (x + a * r) / 2; // 64-bit accuracy
2195  mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
2196  lsl128(&x0, &x1, x0, x1, 5);
2197  lsr128(&x0, &x1, x0, x1, 56);
2198 
2199  x0 = ((uint64_t)x << 31) + (x0 >> 1);
2200 
2201  x_sgn = 0;
2202  x_exp = (a_exp + 1053) >> 1;
2203  x_mnt = x0;
2204  x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
2205  mul62x62(&x0, &x1, x_mnt, x_mnt);
2206  lsl128(&x0, &x1, x0, x1, 19);
2207  c = cmp128(x0, x1, 0, a_mnt);
2208  if (c > 0)
2209  --x_mnt;
2210 
2211  x_mnt = fp64_normalise(x_mnt, &x_exp);
2212 
2213  return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
2214 }
2215 
2216 static int
2217 modeConv(FPSCR fpscr)
2218 {
2219  uint32_t x = (uint32_t)fpscr;
2220  return (x >> 22 & 0xf) | (x >> 19 & 1 ? FPLIB_FZ16 : 0);
2221  // AHP bit is ignored. Only fplibConvert uses AHP.
2222 }
2223 
2224 static void
2225 set_fpscr(FPSCR &fpscr, int flags)
2226 {
2227  // translate back to FPSCR
2228  bool underflow = false;
2229  if (flags & FPLIB_IDC) {
2230  fpscr.idc = 1;
2231  }
2232  if (flags & FPLIB_IOC) {
2233  fpscr.ioc = 1;
2234  }
2235  if (flags & FPLIB_DZC) {
2236  fpscr.dzc = 1;
2237  }
2238  if (flags & FPLIB_OFC) {
2239  fpscr.ofc = 1;
2240  }
2241  if (flags & FPLIB_UFC) {
2242  underflow = true; //xx Why is this required?
2243  fpscr.ufc = 1;
2244  }
2245  if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
2246  fpscr.ixc = 1;
2247  }
2248 }
2249 
2250 template <>
2251 bool
2252 fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
2253 {
2254  int flags = 0;
2255  int x = fp16_compare_eq(a, b, modeConv(fpscr), &flags);
2256  set_fpscr(fpscr, flags);
2257  return x;
2258 }
2259 
2260 template <>
2261 bool
2262 fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
2263 {
2264  int flags = 0;
2265  int x = fp16_compare_ge(a, b, modeConv(fpscr), &flags);
2266  set_fpscr(fpscr, flags);
2267  return x;
2268 }
2269 
2270 template <>
2271 bool
2272 fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
2273 {
2274  int flags = 0;
2275  int x = fp16_compare_gt(a, b, modeConv(fpscr), &flags);
2276  set_fpscr(fpscr, flags);
2277  return x;
2278 }
2279 
2280 template <>
2281 bool
2282 fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
2283 {
2284  int flags = 0;
2285  int x = fp16_compare_un(a, b, modeConv(fpscr), &flags);
2286  set_fpscr(fpscr, flags);
2287  return x;
2288 }
2289 
2290 template <>
2291 bool
2292 fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
2293 {
2294  int flags = 0;
2295  int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
2296  set_fpscr(fpscr, flags);
2297  return x;
2298 }
2299 
2300 template <>
2301 bool
2302 fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
2303 {
2304  int flags = 0;
2305  int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
2306  set_fpscr(fpscr, flags);
2307  return x;
2308 }
2309 
2310 template <>
2311 bool
2312 fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
2313 {
2314  int flags = 0;
2315  int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
2316  set_fpscr(fpscr, flags);
2317  return x;
2318 }
2319 
2320 template <>
2321 bool
2322 fplibCompareUN(uint32_t a, uint32_t b, FPSCR &fpscr)
2323 {
2324  int flags = 0;
2325  int x = fp32_compare_un(a, b, modeConv(fpscr), &flags);
2326  set_fpscr(fpscr, flags);
2327  return x;
2328 }
2329 
2330 template <>
2331 bool
2332 fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
2333 {
2334  int flags = 0;
2335  int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
2336  set_fpscr(fpscr, flags);
2337  return x;
2338 }
2339 
2340 template <>
2341 bool
2342 fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
2343 {
2344  int flags = 0;
2345  int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
2346  set_fpscr(fpscr, flags);
2347  return x;
2348 }
2349 
2350 template <>
2351 bool
2352 fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
2353 {
2354  int flags = 0;
2355  int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
2356  set_fpscr(fpscr, flags);
2357  return x;
2358 }
2359 
2360 template <>
2361 bool
2362 fplibCompareUN(uint64_t a, uint64_t b, FPSCR &fpscr)
2363 {
2364  int flags = 0;
2365  int x = fp64_compare_un(a, b, modeConv(fpscr), &flags);
2366  set_fpscr(fpscr, flags);
2367  return x;
2368 }
2369 
2370 template <>
2371 uint16_t
2372 fplibAbs(uint16_t op)
2373 {
2374  return op & ~(1ULL << (FP16_BITS - 1));
2375 }
2376 
2377 template <>
2378 uint32_t
2379 fplibAbs(uint32_t op)
2380 {
2381  return op & ~(1ULL << (FP32_BITS - 1));
2382 }
2383 
2384 template <>
2385 uint64_t
2386 fplibAbs(uint64_t op)
2387 {
2388  return op & ~(1ULL << (FP64_BITS - 1));
2389 }
2390 
2391 template <>
2392 uint16_t
2393 fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2394 {
2395  int flags = 0;
2396  uint16_t result = fp16_add(op1, op2, 0, modeConv(fpscr), &flags);
2397  set_fpscr0(fpscr, flags);
2398  return result;
2399 }
2400 
2401 template <>
2402 uint32_t
2403 fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2404 {
2405  int flags = 0;
2406  uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
2407  set_fpscr0(fpscr, flags);
2408  return result;
2409 }
2410 
2411 template <>
2412 uint64_t
2413 fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2414 {
2415  int flags = 0;
2416  uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
2417  set_fpscr0(fpscr, flags);
2418  return result;
2419 }
2420 
2421 template <>
2422 int
2423 fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
2424 {
2425  int mode = modeConv(fpscr);
2426  int flags = 0;
2427  int sgn1, exp1, sgn2, exp2, result;
2428  uint16_t mnt1, mnt2;
2429 
2430  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2431  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2432 
2433  if (fp16_is_NaN(exp1, mnt1) || fp16_is_NaN(exp2, mnt2)) {
2434  result = 3;
2435  if (fp16_is_signalling_NaN(exp1, mnt1) ||
2436  fp16_is_signalling_NaN(exp2, mnt2) || signal_nans)
2437  flags |= FPLIB_IOC;
2438  } else {
2439  if (op1 == op2 || (!mnt1 && !mnt2)) {
2440  result = 6;
2441  } else if (sgn1 != sgn2) {
2442  result = sgn1 ? 8 : 2;
2443  } else if (exp1 != exp2) {
2444  result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2445  } else {
2446  result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2447  }
2448  }
2449 
2450  set_fpscr0(fpscr, flags);
2451 
2452  return result;
2453 }
2454 
2455 template <>
2456 int
2457 fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
2458 {
2459  int mode = modeConv(fpscr);
2460  int flags = 0;
2461  int sgn1, exp1, sgn2, exp2, result;
2462  uint32_t mnt1, mnt2;
2463 
2464  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2465  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2466 
2467  if (fp32_is_NaN(exp1, mnt1) || fp32_is_NaN(exp2, mnt2)) {
2468  result = 3;
2469  if (fp32_is_signalling_NaN(exp1, mnt1) ||
2470  fp32_is_signalling_NaN(exp2, mnt2) || signal_nans)
2471  flags |= FPLIB_IOC;
2472  } else {
2473  if (op1 == op2 || (!mnt1 && !mnt2)) {
2474  result = 6;
2475  } else if (sgn1 != sgn2) {
2476  result = sgn1 ? 8 : 2;
2477  } else if (exp1 != exp2) {
2478  result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2479  } else {
2480  result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2481  }
2482  }
2483 
2484  set_fpscr0(fpscr, flags);
2485 
2486  return result;
2487 }
2488 
2489 template <>
2490 int
2491 fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
2492 {
2493  int mode = modeConv(fpscr);
2494  int flags = 0;
2495  int sgn1, exp1, sgn2, exp2, result;
2496  uint64_t mnt1, mnt2;
2497 
2498  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2499  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2500 
2501  if (fp64_is_NaN(exp1, mnt1) || fp64_is_NaN(exp2, mnt2)) {
2502  result = 3;
2503  if (fp64_is_signalling_NaN(exp1, mnt1) ||
2504  fp64_is_signalling_NaN(exp2, mnt2) || signal_nans)
2505  flags |= FPLIB_IOC;
2506  } else {
2507  if (op1 == op2 || (!mnt1 && !mnt2)) {
2508  result = 6;
2509  } else if (sgn1 != sgn2) {
2510  result = sgn1 ? 8 : 2;
2511  } else if (exp1 != exp2) {
2512  result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2513  } else {
2514  result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2515  }
2516  }
2517 
2518  set_fpscr0(fpscr, flags);
2519 
2520  return result;
2521 }
2522 
2523 static uint16_t
2525 {
2526  return fp16_pack(op >> (FP32_BITS - 1), FP16_EXP_INF,
2527  1ULL << (FP16_MANT_BITS - 1) |
2529 }
2530 
2531 static uint16_t
2533 {
2534  return fp16_pack(op >> (FP64_BITS - 1), FP16_EXP_INF,
2535  1ULL << (FP16_MANT_BITS - 1) |
2537 }
2538 
2539 static uint32_t
2541 {
2542  return fp32_pack(op >> (FP16_BITS - 1), FP32_EXP_INF,
2543  1ULL << (FP32_MANT_BITS - 1) |
2544  (uint32_t)op << (FP32_MANT_BITS - FP16_MANT_BITS));
2545 }
2546 
2547 static uint32_t
2549 {
2550  return fp32_pack(op >> (FP64_BITS - 1), FP32_EXP_INF,
2551  1ULL << (FP32_MANT_BITS - 1) |
2553 }
2554 
2555 static uint64_t
2557 {
2558  return fp64_pack(op >> (FP16_BITS - 1), FP64_EXP_INF,
2559  1ULL << (FP64_MANT_BITS - 1) |
2560  (uint64_t)op << (FP64_MANT_BITS - FP16_MANT_BITS));
2561 }
2562 
2563 static uint64_t
2565 {
2566  return fp64_pack(op >> (FP32_BITS - 1), FP64_EXP_INF,
2567  1ULL << (FP64_MANT_BITS - 1) |
2568  (uint64_t)op << (FP64_MANT_BITS - FP32_MANT_BITS));
2569 }
2570 
2571 static uint16_t
2573 {
2574  return fp16_pack(sgn, FP16_EXP_BIAS, 1ULL << (FP16_MANT_BITS - 1));
2575 }
2576 
2577 static uint32_t
2579 {
2580  return fp32_pack(sgn, FP32_EXP_BIAS, 1ULL << (FP32_MANT_BITS - 1));
2581 }
2582 
2583 static uint64_t
2585 {
2586  return fp64_pack(sgn, FP64_EXP_BIAS, 1ULL << (FP64_MANT_BITS - 1));
2587 }
2588 
2589 static uint16_t
2591 {
2592  return fp16_pack(sgn, FP16_EXP_BIAS + 1, 1ULL << (FP16_MANT_BITS - 1));
2593 }
2594 
2595 static uint32_t
2597 {
2598  return fp32_pack(sgn, FP32_EXP_BIAS + 1, 1ULL << (FP32_MANT_BITS - 1));
2599 }
2600 
2601 static uint64_t
2603 {
2604  return fp64_pack(sgn, FP64_EXP_BIAS + 1, 1ULL << (FP64_MANT_BITS - 1));
2605 }
2606 
2607 static uint16_t
2608 fp16_FPTwo(int sgn)
2609 {
2610  return fp16_pack(sgn, FP16_EXP_BIAS + 1, 0);
2611 }
2612 
2613 static uint32_t
2614 fp32_FPTwo(int sgn)
2615 {
2616  return fp32_pack(sgn, FP32_EXP_BIAS + 1, 0);
2617 }
2618 
2619 static uint64_t
2620 fp64_FPTwo(int sgn)
2621 {
2622  return fp64_pack(sgn, FP64_EXP_BIAS + 1, 0);
2623 }
2624 
2625 template <>
2626 uint16_t
2627 fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2628 {
2629  int mode = modeConv(fpscr);
2630  int flags = 0;
2631  int sgn, exp;
2632  uint32_t mnt;
2633  uint16_t result;
2634 
2635  // Unpack floating-point operand optionally with flush-to-zero:
2636  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2637 
2638  bool alt_hp = fpscr.ahp;
2639 
2640  if (fp32_is_NaN(exp, mnt)) {
2641  if (alt_hp) {
2642  result = fp16_zero(sgn);
2643  } else if (fpscr.dn) {
2644  result = fp16_defaultNaN();
2645  } else {
2646  result = fp16_FPConvertNaN_32(op);
2647  }
2648  if (!(mnt >> (FP32_MANT_BITS - 1) & 1) || alt_hp) {
2649  flags |= FPLIB_IOC;
2650  }
2651  } else if (exp == FP32_EXP_INF) {
2652  if (alt_hp) {
2653  result = ((uint16_t)sgn << (FP16_BITS - 1) |
2654  ((1ULL << (FP16_BITS - 1)) - 1));
2655  flags |= FPLIB_IOC;
2656  } else {
2657  result = fp16_infinity(sgn);
2658  }
2659  } else if (!mnt) {
2660  result = fp16_zero(sgn);
2661  } else {
2662  result =
2664  mnt >> (FP32_MANT_BITS - FP16_BITS) |
2665  !!(mnt & ((1ULL << (FP32_MANT_BITS - FP16_BITS)) - 1)),
2666  rounding, (mode & 0xf) | alt_hp << 4, &flags);
2667  }
2668 
2669  set_fpscr0(fpscr, flags);
2670 
2671  return result;
2672 }
2673 
2674 template <>
2675 uint16_t
2676 fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2677 {
2678  int mode = modeConv(fpscr);
2679  int flags = 0;
2680  int sgn, exp;
2681  uint64_t mnt;
2682  uint16_t result;
2683 
2684  // Unpack floating-point operand optionally with flush-to-zero:
2685  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2686 
2687  bool alt_hp = fpscr.ahp;
2688 
2689  if (fp64_is_NaN(exp, mnt)) {
2690  if (alt_hp) {
2691  result = fp16_zero(sgn);
2692  } else if (fpscr.dn) {
2693  result = fp16_defaultNaN();
2694  } else {
2695  result = fp16_FPConvertNaN_64(op);
2696  }
2697  if (!(mnt >> (FP64_MANT_BITS - 1) & 1) || alt_hp) {
2698  flags |= FPLIB_IOC;
2699  }
2700  } else if (exp == FP64_EXP_INF) {
2701  if (alt_hp) {
2702  result = ((uint16_t)sgn << (FP16_BITS - 1) |
2703  ((1ULL << (FP16_BITS - 1)) - 1));
2704  flags |= FPLIB_IOC;
2705  } else {
2706  result = fp16_infinity(sgn);
2707  }
2708  } else if (!mnt) {
2709  result = fp16_zero(sgn);
2710  } else {
2711  result =
2713  mnt >> (FP64_MANT_BITS - FP16_BITS) |
2714  !!(mnt & ((1ULL << (FP64_MANT_BITS - FP16_BITS)) - 1)),
2715  rounding, (mode & 0xf) | alt_hp << 4, &flags);
2716  }
2717 
2718  set_fpscr0(fpscr, flags);
2719 
2720  return result;
2721 }
2722 
2723 template <>
2724 uint32_t
2725 fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2726 {
2727  int mode = modeConv(fpscr);
2728  int flags = 0;
2729  int sgn, exp;
2730  uint16_t mnt;
2731  uint32_t result;
2732 
2733  // Unpack floating-point operand optionally with flush-to-zero:
2734  fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2735 
2736  if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2737  if (fpscr.dn) {
2738  result = fp32_defaultNaN();
2739  } else {
2740  result = fp32_FPConvertNaN_16(op);
2741  }
2742  if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2743  flags |= FPLIB_IOC;
2744  }
2745  } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2746  result = fp32_infinity(sgn);
2747  } else if (!mnt) {
2748  result = fp32_zero(sgn);
2749  } else {
2750  mnt = fp16_normalise(mnt, &exp);
2751  result = fp32_pack(sgn, (exp - FP16_EXP_BIAS +
2753  (uint32_t)mnt << (FP32_MANT_BITS - FP16_BITS + 1));
2754  }
2755 
2756  set_fpscr0(fpscr, flags);
2757 
2758  return result;
2759 }
2760 
2761 template <>
2762 uint32_t
2763 fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
2764 {
2765  int mode = modeConv(fpscr);
2766  int flags = 0;
2767  int sgn, exp;
2768  uint64_t mnt;
2769  uint32_t result;
2770 
2771  // Unpack floating-point operand optionally with flush-to-zero:
2772  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2773 
2774  if (fp64_is_NaN(exp, mnt)) {
2775  if (fpscr.dn) {
2776  result = fp32_defaultNaN();
2777  } else {
2778  result = fp32_FPConvertNaN_64(op);
2779  }
2780  if (!(mnt >> (FP64_MANT_BITS - 1) & 1)) {
2781  flags |= FPLIB_IOC;
2782  }
2783  } else if (exp == FP64_EXP_INF) {
2784  result = fp32_infinity(sgn);
2785  } else if (!mnt) {
2786  result = fp32_zero(sgn);
2787  } else {
2788  result =
2790  mnt >> (FP64_MANT_BITS - FP32_BITS) |
2791  !!(mnt & ((1ULL << (FP64_MANT_BITS - FP32_BITS)) - 1)),
2792  rounding, mode, &flags);
2793  }
2794 
2795  set_fpscr0(fpscr, flags);
2796 
2797  return result;
2798 }
2799 
2800 template <>
2801 uint64_t
2802 fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
2803 {
2804  int mode = modeConv(fpscr);
2805  int flags = 0;
2806  int sgn, exp;
2807  uint16_t mnt;
2808  uint64_t result;
2809 
2810  // Unpack floating-point operand optionally with flush-to-zero:
2811  fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2812 
2813  if (fp16_is_NaN(exp, mnt) && !fpscr.ahp) {
2814  if (fpscr.dn) {
2815  result = fp64_defaultNaN();
2816  } else {
2817  result = fp64_FPConvertNaN_16(op);
2818  }
2819  if (!(mnt >> (FP16_MANT_BITS - 1) & 1)) {
2820  flags |= FPLIB_IOC;
2821  }
2822  } else if (exp == FP16_EXP_INF && !fpscr.ahp) {
2823  result = fp64_infinity(sgn);
2824  } else if (!mnt) {
2825  result = fp64_zero(sgn);
2826  } else {
2827  mnt = fp16_normalise(mnt, &exp);
2828  result = fp64_pack(sgn, (exp - FP16_EXP_BIAS +
2830  (uint64_t)mnt << (FP64_MANT_BITS - FP16_BITS + 1));
2831  }
2832 
2833  set_fpscr0(fpscr, flags);
2834 
2835  return result;
2836 }
2837 
2838 template <>
2839 uint64_t
2840 fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
2841 {
2842  int mode = modeConv(fpscr);
2843  int flags = 0;
2844  int sgn, exp;
2845  uint32_t mnt;
2846  uint64_t result;
2847 
2848  // Unpack floating-point operand optionally with flush-to-zero:
2849  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2850 
2851  if (fp32_is_NaN(exp, mnt)) {
2852  if (fpscr.dn) {
2853  result = fp64_defaultNaN();
2854  } else {
2855  result = fp64_FPConvertNaN_32(op);
2856  }
2857  if (!(mnt >> (FP32_MANT_BITS - 1) & 1)) {
2858  flags |= FPLIB_IOC;
2859  }
2860  } else if (exp == FP32_EXP_INF) {
2861  result = fp64_infinity(sgn);
2862  } else if (!mnt) {
2863  result = fp64_zero(sgn);
2864  } else {
2865  mnt = fp32_normalise(mnt, &exp);
2866  result = fp64_pack(sgn, (exp - FP32_EXP_BIAS +
2868  (uint64_t)mnt << (FP64_MANT_BITS - FP32_BITS + 1));
2869  }
2870 
2871  set_fpscr0(fpscr, flags);
2872 
2873  return result;
2874 }
2875 
2876 template <>
2877 uint16_t
2878 fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
2879 {
2880  int flags = 0;
2881  uint16_t result = fp16_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2882  set_fpscr0(fpscr, flags);
2883  return result;
2884 }
2885 
2886 template <>
2887 uint32_t
2888 fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2889 {
2890  int flags = 0;
2891  uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2892  set_fpscr0(fpscr, flags);
2893  return result;
2894 }
2895 
2896 template <>
2897 uint64_t
2898 fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2899 {
2900  int flags = 0;
2901  uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2902  set_fpscr0(fpscr, flags);
2903  return result;
2904 }
2905 
2906 template <>
2907 uint16_t
2908 fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
2909 {
2910  int flags = 0;
2911  uint16_t result = fp16_div(op1, op2, modeConv(fpscr), &flags);
2912  set_fpscr0(fpscr, flags);
2913  return result;
2914 }
2915 
2916 template <>
2917 uint32_t
2918 fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2919 {
2920  int flags = 0;
2921  uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2922  set_fpscr0(fpscr, flags);
2923  return result;
2924 }
2925 
2926 template <>
2927 uint64_t
2928 fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2929 {
2930  int flags = 0;
2931  uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2932  set_fpscr0(fpscr, flags);
2933  return result;
2934 }
2935 
2936 template <>
2937 uint16_t
2938 fplibExpA(uint16_t op)
2939 {
2940  static uint16_t coeff[32] = {
2941  0x0000,
2942  0x0016,
2943  0x002d,
2944  0x0045,
2945  0x005d,
2946  0x0075,
2947  0x008e,
2948  0x00a8,
2949  0x00c2,
2950  0x00dc,
2951  0x00f8,
2952  0x0114,
2953  0x0130,
2954  0x014d,
2955  0x016b,
2956  0x0189,
2957  0x01a8,
2958  0x01c8,
2959  0x01e8,
2960  0x0209,
2961  0x022b,
2962  0x024e,
2963  0x0271,
2964  0x0295,
2965  0x02ba,
2966  0x02e0,
2967  0x0306,
2968  0x032e,
2969  0x0356,
2970  0x037f,
2971  0x03a9,
2972  0x03d4
2973  };
2974  return ((((op >> 5) & ((1 << FP16_EXP_BITS) - 1)) << FP16_MANT_BITS) |
2975  coeff[op & ((1 << 5) - 1)]);
2976 }
2977 
2978 template <>
2979 uint32_t
2980 fplibExpA(uint32_t op)
2981 {
2982  static uint32_t coeff[64] = {
2983  0x000000,
2984  0x0164d2,
2985  0x02cd87,
2986  0x043a29,
2987  0x05aac3,
2988  0x071f62,
2989  0x08980f,
2990  0x0a14d5,
2991  0x0b95c2,
2992  0x0d1adf,
2993  0x0ea43a,
2994  0x1031dc,
2995  0x11c3d3,
2996  0x135a2b,
2997  0x14f4f0,
2998  0x16942d,
2999  0x1837f0,
3000  0x19e046,
3001  0x1b8d3a,
3002  0x1d3eda,
3003  0x1ef532,
3004  0x20b051,
3005  0x227043,
3006  0x243516,
3007  0x25fed7,
3008  0x27cd94,
3009  0x29a15b,
3010  0x2b7a3a,
3011  0x2d583f,
3012  0x2f3b79,
3013  0x3123f6,
3014  0x3311c4,
3015  0x3504f3,
3016  0x36fd92,
3017  0x38fbaf,
3018  0x3aff5b,
3019  0x3d08a4,
3020  0x3f179a,
3021  0x412c4d,
3022  0x4346cd,
3023  0x45672a,
3024  0x478d75,
3025  0x49b9be,
3026  0x4bec15,
3027  0x4e248c,
3028  0x506334,
3029  0x52a81e,
3030  0x54f35b,
3031  0x5744fd,
3032  0x599d16,
3033  0x5bfbb8,
3034  0x5e60f5,
3035  0x60ccdf,
3036  0x633f89,
3037  0x65b907,
3038  0x68396a,
3039  0x6ac0c7,
3040  0x6d4f30,
3041  0x6fe4ba,
3042  0x728177,
3043  0x75257d,
3044  0x77d0df,
3045  0x7a83b3,
3046  0x7d3e0c
3047  };
3048  return ((((op >> 6) & ((1 << FP32_EXP_BITS) - 1)) << FP32_MANT_BITS) |
3049  coeff[op & ((1 << 6) - 1)]);
3050 }
3051 
3052 template <>
3053 uint64_t
3054 fplibExpA(uint64_t op)
3055 {
3056  static uint64_t coeff[64] = {
3057  0x0000000000000ULL,
3058  0x02c9a3e778061ULL,
3059  0x059b0d3158574ULL,
3060  0x0874518759bc8ULL,
3061  0x0b5586cf9890fULL,
3062  0x0e3ec32d3d1a2ULL,
3063  0x11301d0125b51ULL,
3064  0x1429aaea92de0ULL,
3065  0x172b83c7d517bULL,
3066  0x1a35beb6fcb75ULL,
3067  0x1d4873168b9aaULL,
3068  0x2063b88628cd6ULL,
3069  0x2387a6e756238ULL,
3070  0x26b4565e27cddULL,
3071  0x29e9df51fdee1ULL,
3072  0x2d285a6e4030bULL,
3073  0x306fe0a31b715ULL,
3074  0x33c08b26416ffULL,
3075  0x371a7373aa9cbULL,
3076  0x3a7db34e59ff7ULL,
3077  0x3dea64c123422ULL,
3078  0x4160a21f72e2aULL,
3079  0x44e086061892dULL,
3080  0x486a2b5c13cd0ULL,
3081  0x4bfdad5362a27ULL,
3082  0x4f9b2769d2ca7ULL,
3083  0x5342b569d4f82ULL,
3084  0x56f4736b527daULL,
3085  0x5ab07dd485429ULL,
3086  0x5e76f15ad2148ULL,
3087  0x6247eb03a5585ULL,
3088  0x6623882552225ULL,
3089  0x6a09e667f3bcdULL,
3090  0x6dfb23c651a2fULL,
3091  0x71f75e8ec5f74ULL,
3092  0x75feb564267c9ULL,
3093  0x7a11473eb0187ULL,
3094  0x7e2f336cf4e62ULL,
3095  0x82589994cce13ULL,
3096  0x868d99b4492edULL,
3097  0x8ace5422aa0dbULL,
3098  0x8f1ae99157736ULL,
3099  0x93737b0cdc5e5ULL,
3100  0x97d829fde4e50ULL,
3101  0x9c49182a3f090ULL,
3102  0xa0c667b5de565ULL,
3103  0xa5503b23e255dULL,
3104  0xa9e6b5579fdbfULL,
3105  0xae89f995ad3adULL,
3106  0xb33a2b84f15fbULL,
3107  0xb7f76f2fb5e47ULL,
3108  0xbcc1e904bc1d2ULL,
3109  0xc199bdd85529cULL,
3110  0xc67f12e57d14bULL,
3111  0xcb720dcef9069ULL,
3112  0xd072d4a07897cULL,
3113  0xd5818dcfba487ULL,
3114  0xda9e603db3285ULL,
3115  0xdfc97337b9b5fULL,
3116  0xe502ee78b3ff6ULL,
3117  0xea4afa2a490daULL,
3118  0xefa1bee615a27ULL,
3119  0xf50765b6e4540ULL,
3120  0xfa7c1819e90d8ULL
3121  };
3122  return ((((op >> 6) & ((1 << FP64_EXP_BITS) - 1)) << FP64_MANT_BITS) |
3123  coeff[op & ((1 << 6) - 1)]);
3124 }
3125 
3126 static uint16_t
3127 fp16_repack(int sgn, int exp, uint16_t mnt)
3128 {
3129  return fp16_pack(sgn, mnt >> FP16_MANT_BITS ? exp : 0, mnt);
3130 }
3131 
3132 static uint32_t
3133 fp32_repack(int sgn, int exp, uint32_t mnt)
3134 {
3135  return fp32_pack(sgn, mnt >> FP32_MANT_BITS ? exp : 0, mnt);
3136 }
3137 
3138 static uint64_t
3139 fp64_repack(int sgn, int exp, uint64_t mnt)
3140 {
3141  return fp64_pack(sgn, mnt >> FP64_MANT_BITS ? exp : 0, mnt);
3142 }
3143 
3144 static void
3145 fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
3146 {
3147  // Treat a single quiet-NaN as +Infinity/-Infinity
3148  if (!((uint16_t)~(*op1 << 1) >> FP16_MANT_BITS) &&
3149  (uint16_t)~(*op2 << 1) >> FP16_MANT_BITS)
3150  *op1 = fp16_infinity(sgn);
3151  if (!((uint16_t)~(*op2 << 1) >> FP16_MANT_BITS) &&
3152  (uint16_t)~(*op1 << 1) >> FP16_MANT_BITS)
3153  *op2 = fp16_infinity(sgn);
3154 }
3155 
3156 static void
3157 fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
3158 {
3159  // Treat a single quiet-NaN as +Infinity/-Infinity
3160  if (!((uint32_t)~(*op1 << 1) >> FP32_MANT_BITS) &&
3161  (uint32_t)~(*op2 << 1) >> FP32_MANT_BITS)
3162  *op1 = fp32_infinity(sgn);
3163  if (!((uint32_t)~(*op2 << 1) >> FP32_MANT_BITS) &&
3164  (uint32_t)~(*op1 << 1) >> FP32_MANT_BITS)
3165  *op2 = fp32_infinity(sgn);
3166 }
3167 
3168 static void
3169 fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
3170 {
3171  // Treat a single quiet-NaN as +Infinity/-Infinity
3172  if (!((uint64_t)~(*op1 << 1) >> FP64_MANT_BITS) &&
3173  (uint64_t)~(*op2 << 1) >> FP64_MANT_BITS)
3174  *op1 = fp64_infinity(sgn);
3175  if (!((uint64_t)~(*op2 << 1) >> FP64_MANT_BITS) &&
3176  (uint64_t)~(*op1 << 1) >> FP64_MANT_BITS)
3177  *op2 = fp64_infinity(sgn);
3178 }
3179 
3180 template <>
3181 uint16_t
3182 fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3183 {
3184  int mode = modeConv(fpscr);
3185  int flags = 0;
3186  int sgn1, exp1, sgn2, exp2;
3187  uint16_t mnt1, mnt2, x, result;
3188 
3189  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3190  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3191 
3192  if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3193  result = x;
3194  } else {
3195  result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3196  fp16_repack(sgn1, exp1, mnt1) :
3197  fp16_repack(sgn2, exp2, mnt2));
3198  }
3199  set_fpscr0(fpscr, flags);
3200  return result;
3201 }
3202 
3203 template <>
3204 uint32_t
3205 fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3206 {
3207  int mode = modeConv(fpscr);
3208  int flags = 0;
3209  int sgn1, exp1, sgn2, exp2;
3210  uint32_t mnt1, mnt2, x, result;
3211 
3212  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3213  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3214 
3215  if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3216  result = x;
3217  } else {
3218  result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3219  fp32_repack(sgn1, exp1, mnt1) :
3220  fp32_repack(sgn2, exp2, mnt2));
3221  }
3222  set_fpscr0(fpscr, flags);
3223  return result;
3224 }
3225 
3226 template <>
3227 uint64_t
3228 fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3229 {
3230  int mode = modeConv(fpscr);
3231  int flags = 0;
3232  int sgn1, exp1, sgn2, exp2;
3233  uint64_t mnt1, mnt2, x, result;
3234 
3235  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3236  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3237 
3238  if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3239  result = x;
3240  } else {
3241  result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3242  fp64_repack(sgn1, exp1, mnt1) :
3243  fp64_repack(sgn2, exp2, mnt2));
3244  }
3245  set_fpscr0(fpscr, flags);
3246  return result;
3247 }
3248 
3249 template <>
3250 uint16_t
3251 fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3252 {
3253  fp16_minmaxnum(&op1, &op2, 1);
3254  return fplibMax<uint16_t>(op1, op2, fpscr);
3255 }
3256 
3257 template <>
3258 uint32_t
3259 fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3260 {
3261  fp32_minmaxnum(&op1, &op2, 1);
3262  return fplibMax<uint32_t>(op1, op2, fpscr);
3263 }
3264 
3265 template <>
3266 uint64_t
3267 fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3268 {
3269  fp64_minmaxnum(&op1, &op2, 1);
3270  return fplibMax<uint64_t>(op1, op2, fpscr);
3271 }
3272 
3273 template <>
3274 uint16_t
3275 fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3276 {
3277  int mode = modeConv(fpscr);
3278  int flags = 0;
3279  int sgn1, exp1, sgn2, exp2;
3280  uint16_t mnt1, mnt2, x, result;
3281 
3282  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3283  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3284 
3285  if ((x = fp16_process_NaNs(op1, op2, mode, &flags))) {
3286  result = x;
3287  } else {
3288  result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3289  fp16_repack(sgn1, exp1, mnt1) :
3290  fp16_repack(sgn2, exp2, mnt2));
3291  }
3292  set_fpscr0(fpscr, flags);
3293  return result;
3294 }
3295 
3296 template <>
3297 uint32_t
3298 fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3299 {
3300  int mode = modeConv(fpscr);
3301  int flags = 0;
3302  int sgn1, exp1, sgn2, exp2;
3303  uint32_t mnt1, mnt2, x, result;
3304 
3305  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3306  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3307 
3308  if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
3309  result = x;
3310  } else {
3311  result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3312  fp32_repack(sgn1, exp1, mnt1) :
3313  fp32_repack(sgn2, exp2, mnt2));
3314  }
3315  set_fpscr0(fpscr, flags);
3316  return result;
3317 }
3318 
3319 template <>
3320 uint64_t
3321 fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3322 {
3323  int mode = modeConv(fpscr);
3324  int flags = 0;
3325  int sgn1, exp1, sgn2, exp2;
3326  uint64_t mnt1, mnt2, x, result;
3327 
3328  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3329  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3330 
3331  if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
3332  result = x;
3333  } else {
3334  result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3335  fp64_repack(sgn1, exp1, mnt1) :
3336  fp64_repack(sgn2, exp2, mnt2));
3337  }
3338  set_fpscr0(fpscr, flags);
3339  return result;
3340 }
3341 
3342 template <>
3343 uint16_t
3344 fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3345 {
3346  fp16_minmaxnum(&op1, &op2, 0);
3347  return fplibMin<uint16_t>(op1, op2, fpscr);
3348 }
3349 
3350 template <>
3351 uint32_t
3352 fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3353 {
3354  fp32_minmaxnum(&op1, &op2, 0);
3355  return fplibMin<uint32_t>(op1, op2, fpscr);
3356 }
3357 
3358 template <>
3359 uint64_t
3360 fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3361 {
3362  fp64_minmaxnum(&op1, &op2, 0);
3363  return fplibMin<uint64_t>(op1, op2, fpscr);
3364 }
3365 
3366 template <>
3367 uint16_t
3368 fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3369 {
3370  int flags = 0;
3371  uint16_t result = fp16_mul(op1, op2, modeConv(fpscr), &flags);
3372  set_fpscr0(fpscr, flags);
3373  return result;
3374 }
3375 
3376 template <>
3377 uint32_t
3378 fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3379 {
3380  int flags = 0;
3381  uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
3382  set_fpscr0(fpscr, flags);
3383  return result;
3384 }
3385 
3386 template <>
3387 uint64_t
3388 fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3389 {
3390  int flags = 0;
3391  uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
3392  set_fpscr0(fpscr, flags);
3393  return result;
3394 }
3395 
3396 template <>
3397 uint16_t
3398 fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3399 {
3400  int mode = modeConv(fpscr);
3401  int flags = 0;
3402  int sgn1, exp1, sgn2, exp2;
3403  uint16_t mnt1, mnt2, result;
3404 
3405  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3406  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3407 
3408  result = fp16_process_NaNs(op1, op2, mode, &flags);
3409  if (!result) {
3410  if ((exp1 == FP16_EXP_INF && !mnt2) ||
3411  (exp2 == FP16_EXP_INF && !mnt1)) {
3412  result = fp16_FPTwo(sgn1 ^ sgn2);
3413  } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3414  result = fp16_infinity(sgn1 ^ sgn2);
3415  } else if (!mnt1 || !mnt2) {
3416  result = fp16_zero(sgn1 ^ sgn2);
3417  } else {
3418  result = fp16_mul(op1, op2, mode, &flags);
3419  }
3420  }
3421 
3422  set_fpscr0(fpscr, flags);
3423 
3424  return result;
3425 }
3426 
3427 template <>
3428 uint32_t
3429 fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3430 {
3431  int mode = modeConv(fpscr);
3432  int flags = 0;
3433  int sgn1, exp1, sgn2, exp2;
3434  uint32_t mnt1, mnt2, result;
3435 
3436  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3437  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3438 
3439  result = fp32_process_NaNs(op1, op2, mode, &flags);
3440  if (!result) {
3441  if ((exp1 == FP32_EXP_INF && !mnt2) ||
3442  (exp2 == FP32_EXP_INF && !mnt1)) {
3443  result = fp32_FPTwo(sgn1 ^ sgn2);
3444  } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3445  result = fp32_infinity(sgn1 ^ sgn2);
3446  } else if (!mnt1 || !mnt2) {
3447  result = fp32_zero(sgn1 ^ sgn2);
3448  } else {
3449  result = fp32_mul(op1, op2, mode, &flags);
3450  }
3451  }
3452 
3453  set_fpscr0(fpscr, flags);
3454 
3455  return result;
3456 }
3457 
3458 template <>
3459 uint64_t
3460 fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3461 {
3462  int mode = modeConv(fpscr);
3463  int flags = 0;
3464  int sgn1, exp1, sgn2, exp2;
3465  uint64_t mnt1, mnt2, result;
3466 
3467  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3468  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3469 
3470  result = fp64_process_NaNs(op1, op2, mode, &flags);
3471  if (!result) {
3472  if ((exp1 == FP64_EXP_INF && !mnt2) ||
3473  (exp2 == FP64_EXP_INF && !mnt1)) {
3474  result = fp64_FPTwo(sgn1 ^ sgn2);
3475  } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3476  result = fp64_infinity(sgn1 ^ sgn2);
3477  } else if (!mnt1 || !mnt2) {
3478  result = fp64_zero(sgn1 ^ sgn2);
3479  } else {
3480  result = fp64_mul(op1, op2, mode, &flags);
3481  }
3482  }
3483 
3484  set_fpscr0(fpscr, flags);
3485 
3486  return result;
3487 }
3488 
3489 template <>
3490 uint16_t
3491 fplibNeg(uint16_t op)
3492 {
3493  return op ^ 1ULL << (FP16_BITS - 1);
3494 }
3495 
3496 template <>
3497 uint32_t
3498 fplibNeg(uint32_t op)
3499 {
3500  return op ^ 1ULL << (FP32_BITS - 1);
3501 }
3502 
3503 template <>
3504 uint64_t
3505 fplibNeg(uint64_t op)
3506 {
3507  return op ^ 1ULL << (FP64_BITS - 1);
3508 }
3509 
3510 static const uint8_t recip_sqrt_estimate[256] = {
3511  255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
3512  226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
3513  201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
3514  180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
3515  162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
3516  145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
3517  131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
3518  118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
3519  105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
3520  85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
3521  67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
3522  52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
3523  39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
3524  28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
3525  17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
3526  8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
3527 };
3528 
3529 template <>
3530 uint16_t
3531 fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
3532 {
3533  int mode = modeConv(fpscr);
3534  int flags = 0;
3535  int sgn, exp;
3536  uint16_t mnt, result;
3537 
3538  fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3539 
3540  if (fp16_is_NaN(exp, mnt)) {
3541  result = fp16_process_NaN(op, mode, &flags);
3542  } else if (!mnt) {
3543  result = fp16_infinity(sgn);
3544  flags |= FPLIB_DZC;
3545  } else if (sgn) {
3546  result = fp16_defaultNaN();
3547  flags |= FPLIB_IOC;
3548  } else if (exp == FP16_EXP_INF) {
3549  result = fp16_zero(0);
3550  } else {
3551  exp += FP16_EXP_BITS;
3552  mnt = fp16_normalise(mnt, &exp);
3553  mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3554  (mnt >> (FP16_BITS - 8) & 127)];
3555  result = fp16_pack(0, (3 * FP16_EXP_BIAS - exp - 1) >> 1,
3556  mnt << (FP16_MANT_BITS - 8));
3557  }
3558 
3559  set_fpscr0(fpscr, flags);
3560 
3561  return result;
3562 }
3563 
3564 template <>
3565 uint32_t
3566 fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
3567 {
3568  int mode = modeConv(fpscr);
3569  int flags = 0;
3570  int sgn, exp;
3571  uint32_t mnt, result;
3572 
3573  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3574 
3575  if (fp32_is_NaN(exp, mnt)) {
3576  result = fp32_process_NaN(op, mode, &flags);
3577  } else if (!mnt) {
3578  result = fp32_infinity(sgn);
3579  flags |= FPLIB_DZC;
3580  } else if (sgn) {
3581  result = fp32_defaultNaN();
3582  flags |= FPLIB_IOC;
3583  } else if (exp == FP32_EXP_INF) {
3584  result = fp32_zero(0);
3585  } else {
3586  exp += FP32_EXP_BITS;
3587  mnt = fp32_normalise(mnt, &exp);
3588  mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3589  (mnt >> (FP32_BITS - 8) & 127)];
3590  result = fp32_pack(0, (3 * FP32_EXP_BIAS - exp - 1) >> 1,
3591  mnt << (FP32_MANT_BITS - 8));
3592  }
3593 
3594  set_fpscr0(fpscr, flags);
3595 
3596  return result;
3597 }
3598 
3599 template <>
3600 uint64_t
3601 fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
3602 {
3603  int mode = modeConv(fpscr);
3604  int flags = 0;
3605  int sgn, exp;
3606  uint64_t mnt, result;
3607 
3608  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3609 
3610  if (fp64_is_NaN(exp, mnt)) {
3611  result = fp64_process_NaN(op, mode, &flags);
3612  } else if (!mnt) {
3613  result = fp64_infinity(sgn);
3614  flags |= FPLIB_DZC;
3615  } else if (sgn) {
3616  result = fp64_defaultNaN();
3617  flags |= FPLIB_IOC;
3618  } else if (exp == FP64_EXP_INF) {
3619  result = fp32_zero(0);
3620  } else {
3621  exp += FP64_EXP_BITS;
3622  mnt = fp64_normalise(mnt, &exp);
3623  mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3624  (mnt >> (FP64_BITS - 8) & 127)];
3625  result = fp64_pack(0, (3 * FP64_EXP_BIAS - exp - 1) >> 1,
3626  mnt << (FP64_MANT_BITS - 8));
3627  }
3628 
3629  set_fpscr0(fpscr, flags);
3630 
3631  return result;
3632 }
3633 
3634 template <>
3635 uint16_t
3636 fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3637 {
3638  int mode = modeConv(fpscr);
3639  int flags = 0;
3640  int sgn1, exp1, sgn2, exp2;
3641  uint16_t mnt1, mnt2, result;
3642 
3643  op1 = fplibNeg<uint16_t>(op1);
3644  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3645  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3646 
3647  result = fp16_process_NaNs(op1, op2, mode, &flags);
3648  if (!result) {
3649  if ((exp1 == FP16_EXP_INF && !mnt2) ||
3650  (exp2 == FP16_EXP_INF && !mnt1)) {
3651  result = fp16_FPOnePointFive(0);
3652  } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3653  result = fp16_infinity(sgn1 ^ sgn2);
3654  } else {
3655  result = fp16_muladd(fp16_FPThree(0), op1, op2, -1, mode, &flags);
3656  }
3657  }
3658 
3659  set_fpscr0(fpscr, flags);
3660 
3661  return result;
3662 }
3663 
3664 template <>
3665 uint32_t
3666 fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3667 {
3668  int mode = modeConv(fpscr);
3669  int flags = 0;
3670  int sgn1, exp1, sgn2, exp2;
3671  uint32_t mnt1, mnt2, result;
3672 
3673  op1 = fplibNeg<uint32_t>(op1);
3674  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3675  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3676 
3677  result = fp32_process_NaNs(op1, op2, mode, &flags);
3678  if (!result) {
3679  if ((exp1 == FP32_EXP_INF && !mnt2) ||
3680  (exp2 == FP32_EXP_INF && !mnt1)) {
3681  result = fp32_FPOnePointFive(0);
3682  } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3683  result = fp32_infinity(sgn1 ^ sgn2);
3684  } else {
3685  result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
3686  }
3687  }
3688 
3689  set_fpscr0(fpscr, flags);
3690 
3691  return result;
3692 }
3693 
3694 template <>
3695 uint64_t
3696 fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3697 {
3698  int mode = modeConv(fpscr);
3699  int flags = 0;
3700  int sgn1, exp1, sgn2, exp2;
3701  uint64_t mnt1, mnt2, result;
3702 
3703  op1 = fplibNeg<uint64_t>(op1);
3704  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3705  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3706 
3707  result = fp64_process_NaNs(op1, op2, mode, &flags);
3708  if (!result) {
3709  if ((exp1 == FP64_EXP_INF && !mnt2) ||
3710  (exp2 == FP64_EXP_INF && !mnt1)) {
3711  result = fp64_FPOnePointFive(0);
3712  } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3713  result = fp64_infinity(sgn1 ^ sgn2);
3714  } else {
3715  result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
3716  }
3717  }
3718 
3719  set_fpscr0(fpscr, flags);
3720 
3721  return result;
3722 }
3723 
3724 template <>
3725 uint16_t
3726 fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
3727 {
3728  int mode = modeConv(fpscr);
3729  int flags = 0;
3730  int sgn, exp;
3731  uint16_t mnt, result;
3732 
3733  fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3734 
3735  if (fp16_is_NaN(exp, mnt)) {
3736  result = fp16_process_NaN(op, mode, &flags);
3737  } else if (exp == FP16_EXP_INF) {
3738  result = fp16_zero(sgn);
3739  } else if (!mnt) {
3740  result = fp16_infinity(sgn);
3741  flags |= FPLIB_DZC;
3742  } else if (!((uint16_t)(op << 1) >> (FP16_MANT_BITS - 1))) {
3743  bool overflow_to_inf = false;
3744  switch (FPCRRounding(fpscr)) {
3745  case FPRounding_TIEEVEN:
3746  overflow_to_inf = true;
3747  break;
3748  case FPRounding_POSINF:
3749  overflow_to_inf = !sgn;
3750  break;
3751  case FPRounding_NEGINF:
3752  overflow_to_inf = sgn;
3753  break;
3754  case FPRounding_ZERO:
3755  overflow_to_inf = false;
3756  break;
3757  default:
3758  panic("Unrecognized FP rounding mode");
3759  }
3760  result = overflow_to_inf ? fp16_infinity(sgn) : fp16_max_normal(sgn);
3761  flags |= FPLIB_OFC | FPLIB_IXC;
3762  } else if (fpscr.fz16 && exp >= 2 * FP16_EXP_BIAS - 1) {
3763  result = fp16_zero(sgn);
3764  flags |= FPLIB_UFC;
3765  } else {
3766  exp += FP16_EXP_BITS;
3767  mnt = fp16_normalise(mnt, &exp);
3768  int result_exp = 2 * FP16_EXP_BIAS - 1 - exp;
3769  uint16_t fraction = (((uint32_t)1 << 19) /
3770  (mnt >> (FP16_BITS - 10) | 1) + 1) >> 1;
3771  fraction <<= FP16_MANT_BITS - 8;
3772  if (result_exp == 0) {
3773  fraction >>= 1;
3774  } else if (result_exp == -1) {
3775  fraction >>= 2;
3776  result_exp = 0;
3777  }
3778  result = fp16_pack(sgn, result_exp, fraction);
3779  }
3780 
3781  set_fpscr0(fpscr, flags);
3782 
3783  return result;
3784 }
3785 
3786 template <>
3787 uint32_t
3788 fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
3789 {
3790  int mode = modeConv(fpscr);
3791  int flags = 0;
3792  int sgn, exp;
3793  uint32_t mnt, result;
3794 
3795  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3796 
3797  if (fp32_is_NaN(exp, mnt)) {
3798  result = fp32_process_NaN(op, mode, &flags);
3799  } else if (exp == FP32_EXP_INF) {
3800  result = fp32_zero(sgn);
3801  } else if (!mnt) {
3802  result = fp32_infinity(sgn);
3803  flags |= FPLIB_DZC;
3804  } else if (!((uint32_t)(op << 1) >> (FP32_MANT_BITS - 1))) {
3805  bool overflow_to_inf = false;
3806  switch (FPCRRounding(fpscr)) {
3807  case FPRounding_TIEEVEN:
3808  overflow_to_inf = true;
3809  break;
3810  case FPRounding_POSINF:
3811  overflow_to_inf = !sgn;
3812  break;
3813  case FPRounding_NEGINF:
3814  overflow_to_inf = sgn;
3815  break;
3816  case FPRounding_ZERO:
3817  overflow_to_inf = false;
3818  break;
3819  default:
3820  panic("Unrecognized FP rounding mode");
3821  }
3822  result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
3823  flags |= FPLIB_OFC | FPLIB_IXC;
3824  } else if (fpscr.fz && exp >= 2 * FP32_EXP_BIAS - 1) {
3825  result = fp32_zero(sgn);
3826  flags |= FPLIB_UFC;
3827  } else {
3828  exp += FP32_EXP_BITS;
3829  mnt = fp32_normalise(mnt, &exp);
3830  int result_exp = 2 * FP32_EXP_BIAS - 1 - exp;
3831  uint32_t fraction = (((uint32_t)1 << 19) /
3832  (mnt >> (FP32_BITS - 10) | 1) + 1) >> 1;
3833  fraction <<= FP32_MANT_BITS - 8;
3834  if (result_exp == 0) {
3835  fraction >>= 1;
3836  } else if (result_exp == -1) {
3837  fraction >>= 2;
3838  result_exp = 0;
3839  }
3840  result = fp32_pack(sgn, result_exp, fraction);
3841  }
3842 
3843  set_fpscr0(fpscr, flags);
3844 
3845  return result;
3846 }
3847 
3848 template <>
3849 uint64_t
3850 fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
3851 {
3852  int mode = modeConv(fpscr);
3853  int flags = 0;
3854  int sgn, exp;
3855  uint64_t mnt, result;
3856 
3857  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
3858 
3859  if (fp64_is_NaN(exp, mnt)) {
3860  result = fp64_process_NaN(op, mode, &flags);
3861  } else if (exp == FP64_EXP_INF) {
3862  result = fp64_zero(sgn);
3863  } else if (!mnt) {
3864  result = fp64_infinity(sgn);
3865  flags |= FPLIB_DZC;
3866  } else if (!((uint64_t)(op << 1) >> (FP64_MANT_BITS - 1))) {
3867  bool overflow_to_inf = false;
3868  switch (FPCRRounding(fpscr)) {
3869  case FPRounding_TIEEVEN:
3870  overflow_to_inf = true;
3871  break;
3872  case FPRounding_POSINF:
3873  overflow_to_inf = !sgn;
3874  break;
3875  case FPRounding_NEGINF:
3876  overflow_to_inf = sgn;
3877  break;
3878  case FPRounding_ZERO:
3879  overflow_to_inf = false;
3880  break;
3881  default:
3882  panic("Unrecognized FP rounding mode");
3883  }
3884  result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
3885  flags |= FPLIB_OFC | FPLIB_IXC;
3886  } else if (fpscr.fz && exp >= 2 * FP64_EXP_BIAS - 1) {
3887  result = fp64_zero(sgn);
3888  flags |= FPLIB_UFC;
3889  } else {
3890  exp += FP64_EXP_BITS;
3891  mnt = fp64_normalise(mnt, &exp);
3892  int result_exp = 2 * FP64_EXP_BIAS - 1 - exp;
3893  uint64_t fraction = (((uint32_t)1 << 19) /
3894  (mnt >> (FP64_BITS - 10) | 1) + 1) >> 1;
3895  fraction <<= FP64_MANT_BITS - 8;
3896  if (result_exp == 0) {
3897  fraction >>= 1;
3898  } else if (result_exp == -1) {
3899  fraction >>= 2;
3900  result_exp = 0;
3901  }
3902  result = fp64_pack(sgn, result_exp, fraction);
3903  }
3904 
3905  set_fpscr0(fpscr, flags);
3906 
3907  return result;
3908 }
3909 
3910 template <>
3911 uint16_t
3912 fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
3913 {
3914  int mode = modeConv(fpscr);
3915  int flags = 0;
3916  int sgn1, exp1, sgn2, exp2;
3917  uint16_t mnt1, mnt2, result;
3918 
3919  op1 = fplibNeg<uint16_t>(op1);
3920  fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3921  fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3922 
3923  result = fp16_process_NaNs(op1, op2, mode, &flags);
3924  if (!result) {
3925  if ((exp1 == FP16_EXP_INF && !mnt2) ||
3926  (exp2 == FP16_EXP_INF && !mnt1)) {
3927  result = fp16_FPTwo(0);
3928  } else if (exp1 == FP16_EXP_INF || exp2 == FP16_EXP_INF) {
3929  result = fp16_infinity(sgn1 ^ sgn2);
3930  } else {
3931  result = fp16_muladd(fp16_FPTwo(0), op1, op2, 0, mode, &flags);
3932  }
3933  }
3934 
3935  set_fpscr0(fpscr, flags);
3936 
3937  return result;
3938 }
3939 
3940 template <>
3941 uint32_t
3942 fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
3943 {
3944  int mode = modeConv(fpscr);
3945  int flags = 0;
3946  int sgn1, exp1, sgn2, exp2;
3947  uint32_t mnt1, mnt2, result;
3948 
3949  op1 = fplibNeg<uint32_t>(op1);
3950  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3951  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3952 
3953  result = fp32_process_NaNs(op1, op2, mode, &flags);
3954  if (!result) {
3955  if ((exp1 == FP32_EXP_INF && !mnt2) ||
3956  (exp2 == FP32_EXP_INF && !mnt1)) {
3957  result = fp32_FPTwo(0);
3958  } else if (exp1 == FP32_EXP_INF || exp2 == FP32_EXP_INF) {
3959  result = fp32_infinity(sgn1 ^ sgn2);
3960  } else {
3961  result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
3962  }
3963  }
3964 
3965  set_fpscr0(fpscr, flags);
3966 
3967  return result;
3968 }
3969 
3970 template <>
3971 uint64_t
3972 fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
3973 {
3974  int mode = modeConv(fpscr);
3975  int flags = 0;
3976  int sgn1, exp1, sgn2, exp2;
3977  uint64_t mnt1, mnt2, result;
3978 
3979  op1 = fplibNeg<uint64_t>(op1);
3980  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3981  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3982 
3983  result = fp64_process_NaNs(op1, op2, mode, &flags);
3984  if (!result) {
3985  if ((exp1 == FP64_EXP_INF && !mnt2) ||
3986  (exp2 == FP64_EXP_INF && !mnt1)) {
3987  result = fp64_FPTwo(0);
3988  } else if (exp1 == FP64_EXP_INF || exp2 == FP64_EXP_INF) {
3989  result = fp64_infinity(sgn1 ^ sgn2);
3990  } else {
3991  result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
3992  }
3993  }
3994 
3995  set_fpscr0(fpscr, flags);
3996 
3997  return result;
3998 }
3999 
4000 template <>
4001 uint16_t
4002 fplibRecpX(uint16_t op, FPSCR &fpscr)
4003 {
4004  int mode = modeConv(fpscr);
4005  int flags = 0;
4006  int sgn, exp;
4007  uint16_t mnt, result;
4008 
4009  fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4010 
4011  if (fp16_is_NaN(exp, mnt)) {
4012  result = fp16_process_NaN(op, mode, &flags);
4013  }
4014  else {
4015  if (!mnt) { // Zero and denormals
4016  result = fp16_pack(sgn, FP16_EXP_INF - 1, 0);
4017  } else { // Infinities and normals
4018  result = fp16_pack(sgn, exp ^ FP16_EXP_INF, 0);
4019  }
4020  }
4021 
4022  set_fpscr0(fpscr, flags);
4023 
4024  return result;
4025 }
4026 
4027 template <>
4028 uint32_t
4029 fplibRecpX(uint32_t op, FPSCR &fpscr)
4030 {
4031  int mode = modeConv(fpscr);
4032  int flags = 0;
4033  int sgn, exp;
4034  uint32_t mnt, result;
4035 
4036  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4037 
4038  if (fp32_is_NaN(exp, mnt)) {
4039  result = fp32_process_NaN(op, mode, &flags);
4040  }
4041  else {
4042  if (!mnt) { // Zero and denormals
4043  result = fp32_pack(sgn, FP32_EXP_INF - 1, 0);
4044  } else { // Infinities and normals
4045  result = fp32_pack(sgn, exp ^ FP32_EXP_INF, 0);
4046  }
4047  }
4048 
4049  set_fpscr0(fpscr, flags);
4050 
4051  return result;
4052 }
4053 
4054 template <>
4055 uint64_t
4056 fplibRecpX(uint64_t op, FPSCR &fpscr)
4057 {
4058  int mode = modeConv(fpscr);
4059  int flags = 0;
4060  int sgn, exp;
4061  uint64_t mnt, result;
4062 
4063  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4064 
4065  if (fp64_is_NaN(exp, mnt)) {
4066  result = fp64_process_NaN(op, mode, &flags);
4067  }
4068  else {
4069  if (!mnt) { // Zero and denormals
4070  result = fp64_pack(sgn, FP64_EXP_INF - 1, 0);
4071  } else { // Infinities and normals
4072  result = fp64_pack(sgn, exp ^ FP64_EXP_INF, 0);
4073  }
4074  }
4075 
4076  set_fpscr0(fpscr, flags);
4077 
4078  return result;
4079 }
4080 
4081 template <>
4082 uint16_t
4083 fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4084 {
4085  int expint = FP16_EXP_BIAS + FP16_MANT_BITS;
4086  int mode = modeConv(fpscr);
4087  int flags = 0;
4088  int sgn, exp;
4089  uint16_t mnt, result;
4090 
4091  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4092  fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4093 
4094  // Handle NaNs, infinities and zeroes:
4095  if (fp16_is_NaN(exp, mnt)) {
4096  result = fp16_process_NaN(op, mode, &flags);
4097  } else if (exp == FP16_EXP_INF) {
4098  result = fp16_infinity(sgn);
4099  } else if (!mnt) {
4100  result = fp16_zero(sgn);
4101  } else if (exp >= expint) {
4102  // There are no fractional bits
4103  result = op;
4104  } else {
4105  // Truncate towards zero:
4106  uint16_t x = expint - exp >= FP16_BITS ? 0 : mnt >> (expint - exp);
4107  int err = exp < expint - FP16_BITS ? 1 :
4108  ((mnt << 1 >> (expint - exp - 1) & 3) |
4109  ((uint16_t)(mnt << 2 << (FP16_BITS + exp - expint)) != 0));
4110  switch (rounding) {
4111  case FPRounding_TIEEVEN:
4112  x += (err == 3 || (err == 2 && (x & 1)));
4113  break;
4114  case FPRounding_POSINF:
4115  x += err && !sgn;
4116  break;
4117  case FPRounding_NEGINF:
4118  x += err && sgn;
4119  break;
4120  case FPRounding_ZERO:
4121  break;
4122  case FPRounding_TIEAWAY:
4123  x += err >> 1;
4124  break;
4125  default:
4126  panic("Unrecognized FP rounding mode");
4127  }
4128 
4129  if (x == 0) {
4130  result = fp16_zero(sgn);
4131  } else {
4132  exp = expint;
4133  mnt = fp16_normalise(x, &exp);
4134  result = fp16_pack(sgn, exp + FP16_EXP_BITS, mnt >> FP16_EXP_BITS);
4135  }
4136 
4137  if (err && exact)
4138  flags |= FPLIB_IXC;
4139  }
4140 
4141  set_fpscr0(fpscr, flags);
4142 
4143  return result;
4144 }
4145 
4146 template <>
4147 uint32_t
4148 fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4149 {
4150  int expint = FP32_EXP_BIAS + FP32_MANT_BITS;
4151  int mode = modeConv(fpscr);
4152  int flags = 0;
4153  int sgn, exp;
4154  uint32_t mnt, result;
4155 
4156  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4157  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4158 
4159  // Handle NaNs, infinities and zeroes:
4160  if (fp32_is_NaN(exp, mnt)) {
4161  result = fp32_process_NaN(op, mode, &flags);
4162  } else if (exp == FP32_EXP_INF) {
4163  result = fp32_infinity(sgn);
4164  } else if (!mnt) {
4165  result = fp32_zero(sgn);
4166  } else if (exp >= expint) {
4167  // There are no fractional bits
4168  result = op;
4169  } else {
4170  // Truncate towards zero:
4171  uint32_t x = expint - exp >= FP32_BITS ? 0 : mnt >> (expint - exp);
4172  int err = exp < expint - FP32_BITS ? 1 :
4173  ((mnt << 1 >> (expint - exp - 1) & 3) |
4174  ((uint32_t)(mnt << 2 << (FP32_BITS + exp - expint)) != 0));
4175  switch (rounding) {
4176  case FPRounding_TIEEVEN:
4177  x += (err == 3 || (err == 2 && (x & 1)));
4178  break;
4179  case FPRounding_POSINF:
4180  x += err && !sgn;
4181  break;
4182  case FPRounding_NEGINF:
4183  x += err && sgn;
4184  break;
4185  case FPRounding_ZERO:
4186  break;
4187  case FPRounding_TIEAWAY:
4188  x += err >> 1;
4189  break;
4190  default:
4191  panic("Unrecognized FP rounding mode");
4192  }
4193 
4194  if (x == 0) {
4195  result = fp32_zero(sgn);
4196  } else {
4197  exp = expint;
4198  mnt = fp32_normalise(x, &exp);
4199  result = fp32_pack(sgn, exp + FP32_EXP_BITS, mnt >> FP32_EXP_BITS);
4200  }
4201 
4202  if (err && exact)
4203  flags |= FPLIB_IXC;
4204  }
4205 
4206  set_fpscr0(fpscr, flags);
4207 
4208  return result;
4209 }
4210 
4211 template <>
4212 uint64_t
4213 fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
4214 {
4215  int expint = FP64_EXP_BIAS + FP64_MANT_BITS;
4216  int mode = modeConv(fpscr);
4217  int flags = 0;
4218  int sgn, exp;
4219  uint64_t mnt, result;
4220 
4221  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4222  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
4223 
4224  // Handle NaNs, infinities and zeroes:
4225  if (fp64_is_NaN(exp, mnt)) {
4226  result = fp64_process_NaN(op, mode, &flags);
4227  } else if (exp == FP64_EXP_INF) {
4228  result = fp64_infinity(sgn);
4229  } else if (!mnt) {
4230  result = fp64_zero(sgn);
4231  } else if (exp >= expint) {
4232  // There are no fractional bits
4233  result = op;
4234  } else {
4235  // Truncate towards zero:
4236  uint64_t x = expint - exp >= FP64_BITS ? 0 : mnt >> (expint - exp);
4237  int err = exp < expint - FP64_BITS ? 1 :
4238  ((mnt << 1 >> (expint - exp - 1) & 3) |
4239  ((uint64_t)(mnt << 2 << (FP64_BITS + exp - expint)) != 0));
4240  switch (rounding) {
4241  case FPRounding_TIEEVEN:
4242  x += (err == 3 || (err == 2 && (x & 1)));
4243  break;
4244  case FPRounding_POSINF:
4245  x += err && !sgn;
4246  break;
4247  case FPRounding_NEGINF:
4248  x += err && sgn;
4249  break;
4250  case FPRounding_ZERO:
4251  break;
4252  case FPRounding_TIEAWAY:
4253  x += err >> 1;
4254  break;
4255  default:
4256  panic("Unrecognized FP rounding mode");
4257  }
4258 
4259  if (x == 0) {
4260  result = fp64_zero(sgn);
4261  } else {
4262  exp = expint;
4263  mnt = fp64_normalise(x, &exp);
4264  result = fp64_pack(sgn, exp + FP64_EXP_BITS, mnt >> FP64_EXP_BITS);
4265  }
4266 
4267  if (err && exact)
4268  flags |= FPLIB_IXC;
4269  }
4270 
4271  set_fpscr0(fpscr, flags);
4272 
4273  return result;
4274 }
4275 
4276 template <>
4277 uint16_t
4278 fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4279 {
4280  int flags = 0;
4281  uint16_t result = fp16_scale(op1, (int16_t)op2, modeConv(fpscr), &flags);
4282  set_fpscr0(fpscr, flags);
4283  return result;
4284 }
4285 
4286 template <>
4287 uint32_t
4288 fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4289 {
4290  int flags = 0;
4291  uint32_t result = fp32_scale(op1, (int32_t)op2, modeConv(fpscr), &flags);
4292  set_fpscr0(fpscr, flags);
4293  return result;
4294 }
4295 
4296 template <>
4297 uint64_t
4298 fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4299 {
4300  int flags = 0;
4301  uint64_t result = fp64_scale(op1, (int64_t)op2, modeConv(fpscr), &flags);
4302  set_fpscr0(fpscr, flags);
4303  return result;
4304 }
4305 
4306 template <>
4307 uint16_t
4308 fplibSqrt(uint16_t op, FPSCR &fpscr)
4309 {
4310  int flags = 0;
4311  uint16_t result = fp16_sqrt(op, modeConv(fpscr), &flags);
4312  set_fpscr0(fpscr, flags);
4313  return result;
4314 }
4315 
4316 template <>
4317 uint32_t
4318 fplibSqrt(uint32_t op, FPSCR &fpscr)
4319 {
4320  int flags = 0;
4321  uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
4322  set_fpscr0(fpscr, flags);
4323  return result;
4324 }
4325 
4326 template <>
4327 uint64_t
4328 fplibSqrt(uint64_t op, FPSCR &fpscr)
4329 {
4330  int flags = 0;
4331  uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
4332  set_fpscr0(fpscr, flags);
4333  return result;
4334 }
4335 
4336 template <>
4337 uint16_t
4338 fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4339 {
4340  int flags = 0;
4341  uint16_t result = fp16_add(op1, op2, 1, modeConv(fpscr), &flags);
4342  set_fpscr0(fpscr, flags);
4343  return result;
4344 }
4345 
4346 template <>
4347 uint32_t
4348 fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4349 {
4350  int flags = 0;
4351  uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
4352  set_fpscr0(fpscr, flags);
4353  return result;
4354 }
4355 
4356 template <>
4357 uint64_t
4358 fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4359 {
4360  int flags = 0;
4361  uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
4362  set_fpscr0(fpscr, flags);
4363  return result;
4364 }
4365 
4366 template <>
4367 uint16_t
4368 fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
4369 {
4370  static uint16_t coeff[2][8] = {
4371  {
4372  0x3c00,
4373  0xb155,
4374  0x2030,
4375  0x0000,
4376  0x0000,
4377  0x0000,
4378  0x0000,
4379  0x0000,
4380  },
4381  {
4382  0x3c00,
4383  0xb800,
4384  0x293a,
4385  0x0000,
4386  0x0000,
4387  0x0000,
4388  0x0000,
4389  0x0000
4390  }
4391  };
4392  int flags = 0;
4393  uint16_t result =
4394  fp16_muladd(coeff[op2 >> (FP16_BITS - 1)][coeff_index], op1,
4395  fplibAbs(op2), 0, modeConv(fpscr), &flags);
4396  set_fpscr0(fpscr, flags);
4397  return result;
4398 }
4399 
4400 template <>
4401 uint32_t
4402 fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2, FPSCR &fpscr)
4403 {
4404  static uint32_t coeff[2][8] = {
4405  {
4406  0x3f800000,
4407  0xbe2aaaab,
4408  0x3c088886,
4409  0xb95008b9,
4410  0x36369d6d,
4411  0x00000000,
4412  0x00000000,
4413  0x00000000
4414  },
4415  {
4416  0x3f800000,
4417  0xbf000000,
4418  0x3d2aaaa6,
4419  0xbab60705,
4420  0x37cd37cc,
4421  0x00000000,
4422  0x00000000,
4423  0x00000000
4424  }
4425  };
4426  int flags = 0;
4427  uint32_t result =
4428  fp32_muladd(coeff[op2 >> (FP32_BITS - 1)][coeff_index], op1,
4429  fplibAbs(op2), 0, modeConv(fpscr), &flags);
4430  set_fpscr0(fpscr, flags);
4431  return result;
4432 }
4433 
4434 template <>
4435 uint64_t
4436 fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2, FPSCR &fpscr)
4437 {
4438  static uint64_t coeff[2][8] = {
4439  {
4440  0x3ff0000000000000ULL,
4441  0xbfc5555555555543ULL,
4442  0x3f8111111110f30cULL,
4443  0xbf2a01a019b92fc6ULL,
4444  0x3ec71de351f3d22bULL,
4445  0xbe5ae5e2b60f7b91ULL,
4446  0x3de5d8408868552fULL,
4447  0x0000000000000000ULL
4448  },
4449  {
4450  0x3ff0000000000000ULL,
4451  0xbfe0000000000000ULL,
4452  0x3fa5555555555536ULL,
4453  0xbf56c16c16c13a0bULL,
4454  0x3efa01a019b1e8d8ULL,
4455  0xbe927e4f7282f468ULL,
4456  0x3e21ee96d2641b13ULL,
4457  0xbda8f76380fbb401ULL
4458  }
4459  };
4460  int flags = 0;
4461  uint64_t result =
4462  fp64_muladd(coeff[op2 >> (FP64_BITS - 1)][coeff_index], op1,
4463  fplibAbs(op2), 0, modeConv(fpscr), &flags);
4464  set_fpscr0(fpscr, flags);
4465  return result;
4466 }
4467 
4468 template <>
4469 uint16_t
4470 fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4471 {
4472  int flags = 0;
4473  int sgn, exp;
4474  uint16_t mnt;
4475 
4476  int mode = modeConv(fpscr);
4477  uint16_t result = fp16_mul(op1, op1, mode, &flags);
4478  set_fpscr0(fpscr, flags);
4479 
4480  fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4481  if (!fp16_is_NaN(exp, mnt)) {
4482  result = (result & ~(1ULL << (FP16_BITS - 1))) |
4483  op2 << (FP16_BITS - 1);
4484  }
4485  return result;
4486 }
4487 
4488 template <>
4489 uint32_t
4490 fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4491 {
4492  int flags = 0;
4493  int sgn, exp;
4494  uint32_t mnt;
4495 
4496  int mode = modeConv(fpscr);
4497  uint32_t result = fp32_mul(op1, op1, mode, &flags);
4498  set_fpscr0(fpscr, flags);
4499 
4500  fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4501  if (!fp32_is_NaN(exp, mnt)) {
4502  result = (result & ~(1ULL << (FP32_BITS - 1))) | op2 << (FP32_BITS - 1);
4503  }
4504  return result;
4505 }
4506 
4507 template <>
4508 uint64_t
4509 fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4510 {
4511  int flags = 0;
4512  int sgn, exp;
4513  uint64_t mnt;
4514 
4515  int mode = modeConv(fpscr);
4516  uint64_t result = fp64_mul(op1, op1, mode, &flags);
4517  set_fpscr0(fpscr, flags);
4518 
4519  fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4520  if (!fp64_is_NaN(exp, mnt)) {
4521  result = (result & ~(1ULL << (FP64_BITS - 1))) | op2 << (FP64_BITS - 1);
4522  }
4523  return result;
4524 }
4525 
4526 template <>
4527 uint16_t
4528 fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
4529 {
4530  static constexpr uint16_t fpOne =
4531  (uint16_t)FP16_EXP_BIAS << FP16_MANT_BITS; // 1.0
4532  if (op2 & 1)
4533  op1 = fpOne;
4534  return op1 ^ ((op2 >> 1) << (FP16_BITS - 1));
4535 }
4536 
4537 template <>
4538 uint32_t
4539 fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr)
4540 {
4541  static constexpr uint32_t fpOne =
4542  (uint32_t)FP32_EXP_BIAS << FP32_MANT_BITS; // 1.0
4543  if (op2 & 1)
4544  op1 = fpOne;
4545  return op1 ^ ((op2 >> 1) << (FP32_BITS - 1));
4546 }
4547 
4548 template <>
4549 uint64_t
4550 fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr)
4551 {
4552  static constexpr uint64_t fpOne =
4553  (uint64_t)FP64_EXP_BIAS << FP64_MANT_BITS; // 1.0
4554  if (op2 & 1)
4555  op1 = fpOne;
4556  return op1 ^ ((op2 >> 1) << (FP64_BITS - 1));
4557 }
4558 
4559 static uint64_t
4560 FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4561  int *flags)
4562 {
4563  int expmax = FP64_EXP_BIAS + FP64_BITS - 1;
4564  uint64_t x;
4565  int err;
4566 
4567  if (exp > expmax) {
4568  *flags = FPLIB_IOC;
4569  return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4570  }
4571 
4572  x = lsr64(mnt << FP64_EXP_BITS, expmax - exp);
4573  err = (exp > expmax - 2 ? 0 :
4574  (lsr64(mnt << FP64_EXP_BITS, expmax - 2 - exp) & 3) |
4575  !!(mnt << FP64_EXP_BITS & (lsl64(1, expmax - 2 - exp) - 1)));
4576 
4577  switch (rounding) {
4578  case FPRounding_TIEEVEN:
4579  x += (err == 3 || (err == 2 && (x & 1)));
4580  break;
4581  case FPRounding_POSINF:
4582  x += err && !sgn;
4583  break;
4584  case FPRounding_NEGINF:
4585  x += err && sgn;
4586  break;
4587  case FPRounding_ZERO:
4588  break;
4589  case FPRounding_TIEAWAY:
4590  x += err >> 1;
4591  break;
4592  default:
4593  panic("Unrecognized FP rounding mode");
4594  }
4595 
4596  if (u ? sgn && x : x > (1ULL << (FP64_BITS - 1)) - !sgn) {
4597  *flags = FPLIB_IOC;
4598  return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4599  }
4600 
4601  if (err) {
4602  *flags = FPLIB_IXC;
4603  }
4604 
4605  return sgn ? -x : x;
4606 }
4607 
4608 static uint32_t
4609 FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4610  int *flags)
4611 {
4612  uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4613  if (u ? x >= 1ULL << FP32_BITS :
4614  !(x < 1ULL << (FP32_BITS - 1) ||
4615  (uint64_t)-x <= (uint64_t)1 << (FP32_BITS - 1))) {
4616  *flags = FPLIB_IOC;
4617  x = ((uint32_t)!u << (FP32_BITS - 1)) - !sgn;
4618  }
4619  return x;
4620 }
4621 
4622 static uint16_t
4623 FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
4624  int *flags)
4625 {
4626  uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4627  if (u ? x >= 1ULL << FP16_BITS :
4628  !(x < 1ULL << (FP16_BITS - 1) ||
4629  (uint64_t)-x <= (uint64_t)1 << (FP16_BITS - 1))) {
4630  *flags = FPLIB_IOC;
4631  x = ((uint16_t)!u << (FP16_BITS - 1)) - !sgn;
4632  }
4633  return x;
4634 }
4635 
4636 template <>
4637 uint16_t
4638 fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4639  FPSCR &fpscr)
4640 {
4641  int flags = 0;
4642  int sgn, exp;
4643  uint16_t mnt, result;
4644 
4645  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4646  fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4647 
4648  // If NaN, set cumulative flag or take exception:
4649  if (fp16_is_NaN(exp, mnt)) {
4650  flags = FPLIB_IOC;
4651  result = 0;
4652  } else {
4653  assert(fbits >= 0);
4654  // Infinity is treated as an ordinary normalised number that saturates.
4655  result =
4656  FPToFixed_16(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4657  (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4658  u, rounding, &flags);
4659  }
4660 
4661  set_fpscr0(fpscr, flags);
4662 
4663  return result;
4664 }
4665 
4666 template <>
4667 uint32_t
4668 fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4669  FPSCR &fpscr)
4670 {
4671  int flags = 0;
4672  int sgn, exp;
4673  uint16_t mnt;
4674  uint32_t result;
4675 
4676  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4677  fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4678 
4679  // If NaN, set cumulative flag or take exception:
4680  if (fp16_is_NaN(exp, mnt)) {
4681  flags = FPLIB_IOC;
4682  result = 0;
4683  } else {
4684  assert(fbits >= 0);
4685  if (exp == FP16_EXP_INF)
4686  exp = 255; // infinity: make it big enough to saturate
4687  result =
4688  FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4689  (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4690  u, rounding, &flags);
4691  }
4692 
4693  set_fpscr0(fpscr, flags);
4694 
4695  return result;
4696 }
4697 
4698 template <>
4699 uint32_t
4700 fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4701 {
4702  int flags = 0;
4703  int sgn, exp;
4704  uint32_t mnt, result;
4705 
4706  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4707  fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4708 
4709  // If NaN, set cumulative flag or take exception:
4710  if (fp32_is_NaN(exp, mnt)) {
4711  flags = FPLIB_IOC;
4712  result = 0;
4713  } else {
4714  assert(fbits >= 0);
4715  // Infinity is treated as an ordinary normalised number that saturates.
4716  result =
4717  FPToFixed_32(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4718  (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4719  u, rounding, &flags);
4720  }
4721 
4722  set_fpscr0(fpscr, flags);
4723 
4724  return result;
4725 }
4726 
4727 template <>
4728 uint32_t
4729 fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4730 {
4731  int flags = 0;
4732  int sgn, exp;
4733  uint64_t mnt;
4734  uint32_t result;
4735 
4736  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4737  fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4738 
4739  // If NaN, set cumulative flag or take exception:
4740  if (fp64_is_NaN(exp, mnt)) {
4741  flags = FPLIB_IOC;
4742  result = 0;
4743  } else {
4744  assert(fbits >= 0);
4745  // Infinity is treated as an ordinary normalised number that saturates.
4746  result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
4747  }
4748 
4749  set_fpscr0(fpscr, flags);
4750 
4751  return result;
4752 }
4753 
4754 uint32_t
4755 fplibFPToFixedJS(uint64_t op, FPSCR &fpscr, bool is64, uint8_t& nz)
4756 {
4757  int flags = 0;
4758  uint32_t result;
4759  bool Z = true;
4760 
4761  uint32_t sgn = bits(op, 63);
4762  int32_t exp = bits(op, 62, 52);
4763  uint64_t mnt = bits(op, 51, 0);
4764 
4765  if (exp == 0) {
4766  if (mnt != 0) {
4767  if (fpscr.fz) {
4768  flags |= FPLIB_IDC;
4769  } else {
4770  flags |= FPLIB_IXC;
4771  Z = 0;
4772  }
4773  }
4774  result = 0;
4775  } else if (exp == 0x7ff) {
4776  flags |= FPLIB_IOC;
4777  result = 0;
4778  Z = 0;
4779  } else {
4780  mnt |= 1ULL << FP64_MANT_BITS;
4781  int mnt_shft = exp - FP64_EXP_BIAS - 52;
4782  bool err = true;
4783 
4784  if (abs(mnt_shft) >= FP64_BITS) {
4785  result = 0;
4786  Z = 0;
4787  } else if (mnt_shft >= 0) {
4788  result = lsl64(mnt, mnt_shft);
4789  } else if (mnt_shft < 0) {
4790  err = lsl64(mnt, mnt_shft+FP64_BITS) != 0;
4791  result = lsr64(mnt, abs(mnt_shft));
4792  }
4793  uint64_t max_result = (1UL << (FP32_BITS - 1)) -!sgn;
4794  if ((exp - FP64_EXP_BIAS) > 31 || result > max_result) {
4795  flags |= FPLIB_IOC;
4796  Z = false;
4797  } else if (err) {
4798  flags |= FPLIB_IXC;
4799  Z = false;
4800  }
4801  result = sgn ? -result : result;
4802  }
4803  if (sgn == 1 && result == 0)
4804  Z = false;
4805 
4806  if (is64) {
4807  nz = Z? 0x1: 0x0;
4808  } else {
4809  fpscr.n = 0;
4810  fpscr.z = (int)Z;
4811  fpscr.c = 0;
4812  fpscr.v = 0;
4813  }
4814 
4815  set_fpscr0(fpscr, flags);
4816  return result;
4817 }
4818 
4819 template <>
4820 uint64_t
4821 fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
4822  FPSCR &fpscr)
4823 {
4824  int flags = 0;
4825  int sgn, exp;
4826  uint16_t mnt;
4827  uint64_t result;
4828 
4829  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4830  fp16_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4831 
4832  // If NaN, set cumulative flag or take exception:
4833  if (fp16_is_NaN(exp, mnt)) {
4834  flags = FPLIB_IOC;
4835  result = 0;
4836  } else {
4837  assert(fbits >= 0);
4838  if (exp == FP16_EXP_INF)
4839  exp = 255; // infinity: make it big enough to saturate
4840  result =
4841  FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP16_EXP_BIAS + fbits,
4842  (uint64_t)mnt << (FP64_MANT_BITS - FP16_MANT_BITS),
4843  u, rounding, &flags);
4844  }
4845 
4846  set_fpscr0(fpscr, flags);
4847 
4848  return result;
4849 }
4850 
4851 template <>
4852 uint64_t
4853 fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4854 {
4855  int flags = 0;
4856  int sgn, exp;
4857  uint32_t mnt;
4858  uint64_t result;
4859 
4860  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4861  fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4862 
4863  // If NaN, set cumulative flag or take exception:
4864  if (fp32_is_NaN(exp, mnt)) {
4865  flags = FPLIB_IOC;
4866  result = 0;
4867  } else {
4868  assert(fbits >= 0);
4869  // Infinity is treated as an ordinary normalised number that saturates.
4870  result =
4871  FPToFixed_64(sgn, exp + FP64_EXP_BIAS - FP32_EXP_BIAS + fbits,
4872  (uint64_t)mnt << (FP64_MANT_BITS - FP32_MANT_BITS),
4873  u, rounding, &flags);
4874  }
4875 
4876  set_fpscr0(fpscr, flags);
4877 
4878  return result;
4879 }
4880 
4881 template <>
4882 uint64_t
4883 fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4884 {
4885  int flags = 0;
4886  int sgn, exp;
4887  uint64_t mnt, result;
4888 
4889  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
4890  fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
4891 
4892  // If NaN, set cumulative flag or take exception:
4893  if (fp64_is_NaN(exp, mnt)) {
4894  flags = FPLIB_IOC;
4895  result = 0;
4896  } else {
4897  assert(fbits >= 0);
4898  // Infinity is treated as an ordinary normalised number that saturates.
4899  result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
4900  }
4901 
4902  set_fpscr0(fpscr, flags);
4903 
4904  return result;
4905 }
4906 
4907 static uint16_t
4908 fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4909 {
4910  int x_sgn = !u && a >> (FP64_BITS - 1);
4911  int x_exp = FP16_EXP_BIAS + FP64_BITS - 1 - fbits;
4912  uint64_t x_mnt = x_sgn ? -a : a;
4913 
4914  // Handle zero:
4915  if (!x_mnt) {
4916  return fp16_zero(0);
4917  }
4918 
4919  // Normalise into FP16_BITS bits, collapsing error into bottom bit:
4920  x_mnt = fp64_normalise(x_mnt, &x_exp);
4921  x_mnt = (x_mnt >> (FP64_BITS - FP16_BITS - 1) |
4922  !!(x_mnt & ((1ULL << (FP64_BITS - FP16_BITS - 1)) - 1)));
4923 
4924  return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
4925 }
4926 
4927 static uint32_t
4928 fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4929 {
4930  int x_sgn = !u && a >> (FP64_BITS - 1);
4931  int x_exp = FP32_EXP_BIAS + FP64_BITS - 1 - fbits;
4932  uint64_t x_mnt = x_sgn ? -a : a;
4933 
4934  // Handle zero:
4935  if (!x_mnt) {
4936  return fp32_zero(0);
4937  }
4938 
4939  // Normalise into FP32_BITS bits, collapsing error into bottom bit:
4940  x_mnt = fp64_normalise(x_mnt, &x_exp);
4941  x_mnt = (x_mnt >> (FP64_BITS - FP32_BITS - 1) |
4942  !!(x_mnt & ((1ULL << (FP64_BITS - FP32_BITS - 1)) - 1)));
4943 
4944  return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
4945 }
4946 
4947 static uint64_t
4948 fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
4949 {
4950  int x_sgn = !u && a >> (FP64_BITS - 1);
4951  int x_exp = FP64_EXP_BIAS + FP64_BITS - 1 - fbits;
4952  uint64_t x_mnt = x_sgn ? -a : a;
4953 
4954  // Handle zero:
4955  if (!x_mnt) {
4956  return fp64_zero(0);
4957  }
4958 
4959  x_mnt = fp64_normalise(x_mnt, &x_exp);
4960 
4961  return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
4962 }
4963 
4964 template <>
4965 uint16_t
4966 fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
4967  FPSCR &fpscr)
4968 {
4969  int flags = 0;
4970  uint16_t res = fp16_cvtf(op, fbits, u,
4971  (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4972  &flags);
4973  set_fpscr0(fpscr, flags);
4974  return res;
4975 }
4976 
4977 template <>
4978 uint32_t
4979 fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4980 {
4981  int flags = 0;
4982  uint32_t res = fp32_cvtf(op, fbits, u,
4983  (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4984  &flags);
4985  set_fpscr0(fpscr, flags);
4986  return res;
4987 }
4988 
4989 template <>
4990 uint64_t
4991 fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
4992 {
4993  int flags = 0;
4994  uint64_t res = fp64_cvtf(op, fbits, u,
4995  (int)rounding | ((uint32_t)fpscr >> 22 & 12),
4996  &flags);
4997  set_fpscr0(fpscr, flags);
4998  return res;
4999 }
5000 
5001 template <>
5002 uint16_t
5004 {
5005  return fp16_infinity(sgn);
5006 }
5007 
5008 template <>
5009 uint32_t
5010 fplibInfinity(int sgn)
5011 {
5012  return fp32_infinity(sgn);
5013 }
5014 
5015 template <>
5016 uint64_t
5017 fplibInfinity(int sgn)
5018 {
5019  return fp64_infinity(sgn);
5020 }
5021 
5022 template <>
5023 uint16_t
5025 {
5026  return fp16_defaultNaN();
5027 }
5028 
5029 template <>
5030 uint32_t
5032 {
5033  return fp32_defaultNaN();
5034 }
5035 
5036 template <>
5037 uint64_t
5039 {
5040  return fp64_defaultNaN();
5041 }
5042 
5043 } // namespace ArmISA
5044 } // namespace gem5
gem5::ArmISA::fplibRSqrtStepFused
uint16_t fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3636
gem5::ArmISA::lsl64
static uint64_t lsl64(uint64_t x, uint32_t shift)
Definition: fplib.cc:122
gem5::ArmISA::fp64_infinity
static uint64_t fp64_infinity(int sgn)
Definition: fplib.cc:368
FPLIB_DN
#define FPLIB_DN
Definition: fplib.cc:58
FPLIB_FZ
#define FPLIB_FZ
Definition: fplib.cc:57
gem5::ArmISA::fp64_FPOnePointFive
static uint64_t fp64_FPOnePointFive(int sgn)
Definition: fplib.cc:2584
gem5::ArmISA::fp32_muladd
static uint32_t fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, int mode, int *flags)
Definition: fplib.cc:1599
gem5::ArmISA::lsl128
static void lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
Definition: fplib.cc:134
gem5::ArmISA::FPToFixed_64
static uint64_t FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
Definition: fplib.cc:4560
gem5::ArmISA::fp16_minmaxnum
static void fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
Definition: fplib.cc:3145
FPLIB_RN
#define FPLIB_RN
Definition: fplib.cc:53
gem5::ArmISA::fp64_FPConvertNaN_32
static uint64_t fp64_FPConvertNaN_32(uint32_t op)
Definition: fplib.cc:2564
FP64_EXP_BIAS
#define FP64_EXP_BIAS
Definition: fplib.cc:79
gem5::ArmISA::fp16_round
static uint16_t fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
Definition: fplib.cc:799
gem5::ArmISA::fp16_compare_un
static int fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:1028
gem5::ArmISA::fp16_compare_ge
static int fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:978
gem5::ArmISA::fp32_FPConvertNaN_16
static uint32_t fp32_FPConvertNaN_16(uint16_t op)
Definition: fplib.cc:2540
gem5::ArmISA::fp32_compare_ge
static int fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1066
gem5::ArmISA::fp16_defaultNaN
static uint16_t fp16_defaultNaN()
Definition: fplib.cc:374
gem5::ArmISA::fp32_is_quiet_NaN
static int fp32_is_quiet_NaN(int exp, uint32_t mnt)
Definition: fplib.cc:505
gem5::ArmISA::fplibConvert
uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
Definition: fplib.cc:2627
gem5::ArmISA::fp16_FPThree
static uint16_t fp16_FPThree(int sgn)
Definition: fplib.cc:2590
gem5::ArmISA::fp16_is_infinity
static int fp16_is_infinity(int exp, uint16_t mnt)
Definition: fplib.cc:517
FP64_EXP_BITS
#define FP64_EXP_BITS
Definition: fplib.cc:75
FP32_BITS
#define FP32_BITS
Definition: fplib.cc:70
FP32_EXP_BIAS
#define FP32_EXP_BIAS
Definition: fplib.cc:78
gem5::ArmISA::FPRounding
FPRounding
Definition: fplib.hh:60
gem5::X86ISA::scale
scale
Definition: types.hh:97
gem5::ArmISA::fplibCompare
int fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
Definition: fplib.cc:2423
gem5::ArmISA::fp16_process_NaNs3
static uint16_t fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
Definition: fplib.cc:634
gem5::ArmISA::fp64_FPThree
static uint64_t fp64_FPThree(int sgn)
Definition: fplib.cc:2602
gem5::ArmISA::fplibCompareGT
bool fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition: fplib.cc:2272
gem5::ArmISA::fp32_repack
static uint32_t fp32_repack(int sgn, int exp, uint32_t mnt)
Definition: fplib.cc:3133
gem5::ArmISA::fp16_FPOnePointFive
static uint16_t fp16_FPOnePointFive(int sgn)
Definition: fplib.cc:2572
gem5::ArmISA::fp32_process_NaN
static uint32_t fp32_process_NaN(uint32_t a, int mode, int *flags)
Definition: fplib.cc:545
gem5::ArmISA::fp16_repack
static uint16_t fp16_repack(int sgn, int exp, uint16_t mnt)
Definition: fplib.cc:3127
gem5::ArmISA::fp16_max_normal
static uint16_t fp16_max_normal(int sgn)
Definition: fplib.cc:338
gem5::ArmISA::fplibFPToFixed
uint16_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
Definition: fplib.cc:4638
gem5::ArmISA::err
Bitfield< 6 > err
Definition: misc_types.hh:803
gem5::ArmISA::lsl16
static uint16_t lsl16(uint16_t x, uint32_t shift)
Definition: fplib.cc:98
gem5::ArmISA::fplibCompareGE
bool fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition: fplib.cc:2262
gem5::ArmISA::fp32_process_NaNs3
static uint32_t fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
Definition: fplib.cc:663
gem5::ArmISA::fp64_max_normal
static uint64_t fp64_max_normal(int sgn)
Definition: fplib.cc:350
FPLIB_IOC
#define FPLIB_IOC
Definition: fplib.cc:67
gem5::ArmISA::t0
Bitfield< 0 > t0
Definition: misc_types.hh:234
gem5::ArmISA::fplibDefaultNaN
uint16_t fplibDefaultNaN()
Foating-point value for default NaN.
Definition: fplib.cc:5024
FPLIB_UFC
#define FPLIB_UFC
Definition: fplib.cc:64
gem5::ArmISA::fp16_add
static uint16_t fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
Definition: fplib.cc:1223
gem5::ArmISA::fp64_zero
static uint64_t fp64_zero(int sgn)
Definition: fplib.cc:332
gem5::ArmISA::modeConv
static int modeConv(FPSCR fpscr)
Definition: fplib.cc:2217
gem5::ArmISA::fp32_scale
static uint32_t fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
Definition: fplib.cc:1972
gem5::ArmISA::fp16_is_NaN
static int fp16_is_NaN(int exp, uint16_t mnt)
Definition: fplib.cc:463
gem5::ArmISA::fp16_zero
static uint16_t fp16_zero(int sgn)
Definition: fplib.cc:320
gem5::ArmISA::a
Bitfield< 8 > a
Definition: misc_types.hh:66
FP64_EXP_INF
#define FP64_EXP_INF
Definition: fplib.cc:83
gem5::ArmISA::fp16_sqrt
static uint16_t fp16_sqrt(uint16_t a, int mode, int *flags)
Definition: fplib.cc:2040
gem5::ArmISA::fp32_FPTwo
static uint32_t fp32_FPTwo(int sgn)
Definition: fplib.cc:2614
gem5::ArmISA::fp16_is_quiet_NaN
static int fp16_is_quiet_NaN(int exp, uint16_t mnt)
Definition: fplib.cc:499
FP16_EXP_BITS
#define FP16_EXP_BITS
Definition: fplib.cc:73
gem5::ArmISA::FPRounding_TIEEVEN
@ FPRounding_TIEEVEN
Definition: fplib.hh:62
gem5::ArmISA::fp64_process_NaNs
static uint64_t fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:611
gem5::VegaISA::r
Bitfield< 5 > r
Definition: pagetable.hh:60
gem5::ArmISA::fplibScale
uint16_t fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:4278
gem5::ArmISA::fp32_add
static uint32_t fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
Definition: fplib.cc:1283
gem5::ArmISA::fp16_scale
static uint16_t fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
Definition: fplib.cc:1938
gem5::ArmISA::fp16_div
static uint16_t fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:1776
gem5::ArmISA::fplibRSqrtEstimate
uint16_t fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
Definition: fplib.cc:3531
FP64_MANT_BITS
#define FP64_MANT_BITS
Definition: fplib.cc:87
gem5::ArmISA::fp64_compare_eq
static int fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1135
gem5::ArmISA::fp16_is_signalling_NaN
static int fp16_is_signalling_NaN(int exp, uint16_t mnt)
Definition: fplib.cc:481
gem5::ArmISA::fp64_normalise
static uint64_t fp64_normalise(uint64_t mnt, int *exp)
Definition: fplib.cc:255
gem5::ArmISA::cmp128
static int cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Definition: fplib.cc:213
FP32_MANT
#define FP32_MANT(x)
Definition: fplib.cc:94
gem5::ArmISA::t1
Bitfield< 1 > t1
Definition: misc_types.hh:233
gem5::ArmISA::fplibAbs
uint16_t fplibAbs(uint16_t op)
Definition: fplib.cc:2372
gem5::ArmISA::shift
Bitfield< 6, 5 > shift
Definition: types.hh:117
gem5::ArmISA::fp32_pack
static uint32_t fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
Definition: fplib.cc:308
FPLIB_FZ16
#define FPLIB_FZ16
Definition: fplib.cc:60
gem5::ArmISA::FPCRRounding
static FPRounding FPCRRounding(FPSCR &fpscr)
Definition: fplib.hh:71
gem5::ArmISA::mul62x62
static void mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
Definition: fplib.cc:170
gem5::ArmISA::fp64_scale
static uint64_t fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
Definition: fplib.cc:2006
gem5::ArmISA::fp16_cvtf
static uint16_t fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
Definition: fplib.cc:4908
FPLIB_AHP
#define FPLIB_AHP
Definition: fplib.cc:59
gem5::ArmISA::fplibFixedToFP
uint16_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
Floating-point convert from fixed-point.
Definition: fplib.cc:4966
gem5::ArmISA::fp32_sqrt
static uint32_t fp32_sqrt(uint32_t a, int mode, int *flags)
Definition: fplib.cc:2092
FPLIB_IDC
#define FPLIB_IDC
Definition: fplib.cc:62
gem5::ArmISA::fplibSqrt
uint16_t fplibSqrt(uint16_t op, FPSCR &fpscr)
Definition: fplib.cc:4308
gem5::ArmISA::b
Bitfield< 7 > b
Definition: misc_types.hh:382
gem5::ArmISA::nz
nz
Definition: misc_types.hh:52
gem5::ArmISA::fp32_cvtf
static uint32_t fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
Definition: fplib.cc:4928
gem5::ArmISA::fplibSub
uint16_t fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:4338
gem5::ArmISA::add128
static void add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Definition: fplib.cc:197
FP64_EXP
#define FP64_EXP(x)
Definition: fplib.cc:91
gem5::MipsISA::a0
Bitfield< 20, 18 > a0
Definition: mt_constants.hh:79
sc_dt::neg
void neg(sc_fxval &c, const sc_fxnum &a)
Definition: sc_fxnum.hh:2270
FP16_MANT
#define FP16_MANT(x)
Definition: fplib.cc:93
gem5::ArmISA::rm
Bitfield< 3, 0 > rm
Definition: types.hh:118
gem5::VegaISA::x
Bitfield< 4 > x
Definition: pagetable.hh:61
gem5::ArmISA::fp32_infinity
static uint32_t fp32_infinity(int sgn)
Definition: fplib.cc:362
gem5::ArmISA::fp32_minmaxnum
static void fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
Definition: fplib.cc:3157
gem5::ArmISA::FPRounding_POSINF
@ FPRounding_POSINF
Definition: fplib.hh:63
gem5::ArmISA::fp32_zero
static uint32_t fp32_zero(int sgn)
Definition: fplib.cc:326
gem5::ArmISA::fplibCompareUN
bool fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition: fplib.cc:2282
gem5::ArmISA::fp16_FPConvertNaN_32
static uint16_t fp16_FPConvertNaN_32(uint32_t op)
Definition: fplib.cc:2524
gem5::ArmISA::FPToFixed_32
static uint32_t FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
Definition: fplib.cc:4609
gem5::ArmISA::fplibMulX
uint16_t fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3398
gem5::ArmISA::fp32_defaultNaN
static uint32_t fp32_defaultNaN()
Definition: fplib.cc:380
gem5::ArmISA::fp16_compare_gt
static int fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:1003
gem5::ArmISA::FPRounding_NEGINF
@ FPRounding_NEGINF
Definition: fplib.hh:64
gem5::ArmISA::fp64_div
static uint64_t fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1860
gem5::ArmISA::fp64_round_
static uint64_t fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
Definition: fplib.cc:882
FP32_EXP_INF
#define FP32_EXP_INF
Definition: fplib.cc:82
gem5::ArmISA::fp32_round_
static uint32_t fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
Definition: fplib.cc:805
gem5::ArmISA::a1
Bitfield< 22 > a1
Definition: misc_types.hh:501
gem5::QARMA::b1
Bitfield< 7, 4 > b1
Definition: qarma.hh:65
gem5::ArmISA::fplibFPToFixedJS
uint32_t fplibFPToFixedJS(uint64_t op, FPSCR &fpscr, bool is64, uint8_t &nz)
Floating-point JS convert to a signed integer, with rounding to zero.
Definition: fplib.cc:4755
gem5::ArmISA::mask
Bitfield< 3, 0 > mask
Definition: pcstate.hh:63
gem5::ArmISA::fplibRecpX
uint16_t fplibRecpX(uint16_t op, FPSCR &fpscr)
Definition: fplib.cc:4002
gem5::ArmISA::fp16_muladd
static uint16_t fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale, int mode, int *flags)
Definition: fplib.cc:1514
gem5::ArmISA::fplibTrigSSel
uint16_t fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:4528
gem5::bits
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:76
flags
uint8_t flags
Definition: helpers.cc:66
gem5::ArmISA::fp32_is_infinity
static int fp32_is_infinity(int exp, uint32_t mnt)
Definition: fplib.cc:523
gem5::ArmISA::set_fpscr0
static void set_fpscr0(FPSCR &fpscr, int flags)
Definition: fplib.cc:1915
gem5::ArmISA::fp64_add
static uint64_t fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
Definition: fplib.cc:1343
gem5::ArmISA::set_fpscr
static void set_fpscr(FPSCR &fpscr, int flags)
Definition: fplib.cc:2225
gem5::ArmISA::c
Bitfield< 29 > c
Definition: misc_types.hh:53
gem5::ArmISA::fplibRecipEstimate
uint16_t fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
Definition: fplib.cc:3726
gem5::ArmISA::FPRounding_TIEAWAY
@ FPRounding_TIEAWAY
Definition: fplib.hh:66
gem5::ArmISA::fp32_FPOnePointFive
static uint32_t fp32_FPOnePointFive(int sgn)
Definition: fplib.cc:2578
gem5::ArmISA::lsl32
static uint32_t lsl32(uint32_t x, uint32_t shift)
Definition: fplib.cc:110
FP64_BITS
#define FP64_BITS
Definition: fplib.cc:71
gem5::ArmISA::fplibRoundInt
uint16_t fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
Definition: fplib.cc:4083
gem5::ArmISA::fp16_FPTwo
static uint16_t fp16_FPTwo(int sgn)
Definition: fplib.cc:2608
gem5::ArmISA::fplibTrigSMul
uint16_t fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:4470
gem5::ArmISA::fp64_defaultNaN
static uint64_t fp64_defaultNaN()
Definition: fplib.cc:386
gem5::ArmISA::fp64_mul
static uint64_t fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1477
gem5::ArmISA::fp32_div
static uint32_t fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1818
gem5::ArmISA::sub128
static void sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Definition: fplib.cc:205
gem5::ArmISA::fp16_unpack
static void fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, int *flags)
Definition: fplib.cc:392
gem5::MipsISA::r0
Bitfield< 3 > r0
Definition: pra_constants.hh:139
gem5::ArmISA::fp16_compare_eq
static int fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:959
gem5::ArmISA::fp64_process_NaNs3
static uint64_t fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
Definition: fplib.cc:692
gem5::ArmISA::fp64_sqrt
static uint64_t fp64_sqrt(uint64_t a, int mode, int *flags)
Definition: fplib.cc:2147
FP32_EXP
#define FP32_EXP(x)
Definition: fplib.cc:90
gem5::ArmISA::fplibMax
uint16_t fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3182
gem5::ArmISA::fplibMin
uint16_t fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3275
gem5::ArmISA::fp64_repack
static uint64_t fp64_repack(int sgn, int exp, uint64_t mnt)
Definition: fplib.cc:3139
gem5::ArmISA::lsr32
static uint32_t lsr32(uint32_t x, uint32_t shift)
Definition: fplib.cc:116
gem5::ArmISA::u
Bitfield< 22 > u
Definition: misc_types.hh:353
gem5::ArmISA::fp16_process_NaN
static uint16_t fp16_process_NaN(uint16_t a, int mode, int *flags)
Definition: fplib.cc:535
fplib.hh
gem5::ArmISA::lsr128
static void lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
Definition: fplib.cc:152
gem5::QARMA::b0
Bitfield< 3, 0 > b0
Definition: qarma.hh:66
gem5::ArmISA::fplibMul
uint16_t fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3368
gem5::ArmISA::FPRounding_ZERO
@ FPRounding_ZERO
Definition: fplib.hh:65
gem5::ArmISA::fp64_compare_ge
static int fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1154
gem5::ArmISA::fp32_unpack
static void fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, int *flags)
Definition: fplib.cc:415
gem5::ArmISA::fp32_compare_gt
static int fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1091
gem5::ArmISA::fp16_process_NaNs
static uint16_t fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:565
FP32_MANT_BITS
#define FP32_MANT_BITS
Definition: fplib.cc:86
gem5::ArmISA::fp64_compare_un
static int fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1204
gem5::ArmISA::recip_sqrt_estimate
static const uint8_t recip_sqrt_estimate[256]
Definition: fplib.cc:3510
gem5::ArmISA::fp64_is_quiet_NaN
static int fp64_is_quiet_NaN(int exp, uint64_t mnt)
Definition: fplib.cc:511
gem5::ArmISA::fplibMaxNum
uint16_t fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3251
gem5::ArmISA::fplibTrigMulAdd
uint16_t fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:4368
FP64_MANT
#define FP64_MANT(x)
Definition: fplib.cc:95
FP16_EXP_INF
#define FP16_EXP_INF
Definition: fplib.cc:81
gem5::ArmISA::fplibExpA
uint16_t fplibExpA(uint16_t op)
Definition: fplib.cc:2938
gem5::ArmISA::fp32_process_NaNs
static uint32_t fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:588
logging.hh
gem5::ArmISA::mul64x32
static void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
Definition: fplib.cc:188
gem5::ArmISA::fp32_max_normal
static uint32_t fp32_max_normal(int sgn)
Definition: fplib.cc:344
gem5::ArmISA::fp64_is_infinity
static int fp64_is_infinity(int exp, uint64_t mnt)
Definition: fplib.cc:529
gem5::ArmISA::fp64_unpack
static void fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, int *flags)
Definition: fplib.cc:438
FPLIB_DZC
#define FPLIB_DZC
Definition: fplib.cc:66
gem5::ArmISA::fp32_mul
static uint32_t fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1440
gem5::ArmISA::fp32_is_NaN
static int fp32_is_NaN(int exp, uint32_t mnt)
Definition: fplib.cc:469
gem5::ArmISA::FPToFixed_16
static uint16_t FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
Definition: fplib.cc:4623
gem5::ArmISA::fp64_pack
static uint64_t fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
Definition: fplib.cc:314
gem5::ArmISA::fp64_FPTwo
static uint64_t fp64_FPTwo(int sgn)
Definition: fplib.cc:2620
FP32_EXP_BITS
#define FP32_EXP_BITS
Definition: fplib.cc:74
gem5::ArmISA::fplibMinNum
uint16_t fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3344
gem5::ArmISA::FPRounding_ODD
@ FPRounding_ODD
Definition: fplib.hh:67
gem5::ArmISA::fp32_round
static uint32_t fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
Definition: fplib.cc:876
FP16_EXP_BIAS
#define FP16_EXP_BIAS
Definition: fplib.cc:77
FP16_MANT_BITS
#define FP16_MANT_BITS
Definition: fplib.cc:85
gem5::ArmISA::fp16_round_
static uint16_t fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
Definition: fplib.cc:721
gem5::ArmISA::fplibAdd
uint16_t fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:2393
gem5::ArmISA::fp16_pack
static uint16_t fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
Definition: fplib.cc:302
gem5::ArmISA::fp16_infinity
static uint16_t fp16_infinity(int sgn)
Definition: fplib.cc:356
gem5::ArmISA::fp64_process_NaN
static uint64_t fp64_process_NaN(uint64_t a, int mode, int *flags)
Definition: fplib.cc:555
gem5::ArmISA::lsr16
static uint16_t lsr16(uint16_t x, uint32_t shift)
Definition: fplib.cc:104
gem5::ArmISA::fp32_compare_un
static int fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1116
gem5::ArmISA::lsr64
static uint64_t lsr64(uint64_t x, uint32_t shift)
Definition: fplib.cc:128
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::ArmISA::fplibCompareEQ
bool fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
Definition: fplib.cc:2252
gem5::ArmISA::fplibMulAdd
uint16_t fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:2878
gem5::ArmISA::fp32_is_signalling_NaN
static int fp32_is_signalling_NaN(int exp, uint32_t mnt)
Definition: fplib.cc:487
gem5::ArmISA::fplibDiv
uint16_t fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:2908
gem5::ArmISA::fp64_FPConvertNaN_16
static uint64_t fp64_FPConvertNaN_16(uint16_t op)
Definition: fplib.cc:2556
gem5::ArmISA::fplibRecipStepFused
uint16_t fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
Definition: fplib.cc:3912
gem5::ArmISA::fp32_FPThree
static uint32_t fp32_FPThree(int sgn)
Definition: fplib.cc:2596
FP16_EXP
#define FP16_EXP(x)
Definition: fplib.cc:89
FPLIB_RP
#define FPLIB_RP
Definition: fplib.cc:54
FP16_BITS
#define FP16_BITS
Definition: fplib.cc:69
gem5::ArmISA::fplibInfinity
uint16_t fplibInfinity(int sgn)
Floating-point value for +/- infinity.
Definition: fplib.cc:5003
gem5::ArmISA::fplibNeg
uint16_t fplibNeg(uint16_t op)
Definition: fplib.cc:3491
gem5::ArmISA::fp64_muladd
static uint64_t fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, int mode, int *flags)
Definition: fplib.cc:1684
FPLIB_RM
#define FPLIB_RM
Definition: fplib.cc:55
gem5::ArmISA::fp32_normalise
static uint32_t fp32_normalise(uint32_t mnt, int *exp)
Definition: fplib.cc:237
gem5::ArmISA::fp32_compare_eq
static int fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1047
gem5::ArmISA::fp64_minmaxnum
static void fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
Definition: fplib.cc:3169
gem5::X86ISA::op
Bitfield< 4 > op
Definition: types.hh:83
gem5::ArmISA::fp16_mul
static uint16_t fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
Definition: fplib.cc:1403
gem5::ArmISA::fp64_is_signalling_NaN
static int fp64_is_signalling_NaN(int exp, uint64_t mnt)
Definition: fplib.cc:493
gem5::ArmISA::fp64_cvtf
static uint64_t fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
Definition: fplib.cc:4948
FPLIB_IXC
#define FPLIB_IXC
Definition: fplib.cc:63
gem5::ArmISA::fp16_FPConvertNaN_64
static uint16_t fp16_FPConvertNaN_64(uint64_t op)
Definition: fplib.cc:2532
gem5::ArmISA::fp64_round
static uint64_t fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
Definition: fplib.cc:953
gem5::ArmISA::fp32_FPConvertNaN_64
static uint32_t fp32_FPConvertNaN_64(uint64_t op)
Definition: fplib.cc:2548
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
gem5::ArmISA::fp64_compare_gt
static int fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1179
gem5::ArmISA::mode
Bitfield< 4, 0 > mode
Definition: misc_types.hh:74
gem5::ArmISA::fp128_normalise
static void fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
Definition: fplib.cc:273
FPLIB_OFC
#define FPLIB_OFC
Definition: fplib.cc:65
gem5::ArmISA::fp64_is_NaN
static int fp64_is_NaN(int exp, uint64_t mnt)
Definition: fplib.cc:475
gem5::ArmISA::fp16_normalise
static uint16_t fp16_normalise(uint16_t mnt, int *exp)
Definition: fplib.cc:219
error
std::string error
Definition: remote_gdb.cc:210

Generated on Thu Jul 28 2022 13:32:23 for gem5 by doxygen 1.8.17