gem5  v21.1.0.2
vfp.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2010-2013, 2019 ARM Limited
3  * All rights reserved
4  *
5  * The license below extends only to copyright in the software and shall
6  * not be construed as granting a license to any other intellectual
7  * property including but not limited to intellectual property relating
8  * to a hardware implementation of the functionality of the software
9  * licensed hereunder. You may use the software subject to the license
10  * terms below provided that you ensure that this notice is replicated
11  * unmodified and in its entirety in all distributions of the software,
12  * modified or unmodified, in source code or in binary form.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions are
16  * met: redistributions of source code must retain the above copyright
17  * notice, this list of conditions and the following disclaimer;
18  * redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution;
21  * neither the name of the copyright holders nor the names of its
22  * contributors may be used to endorse or promote products derived from
23  * this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include "arch/arm/insts/vfp.hh"
39 
40 namespace gem5
41 {
42 
43 using namespace ArmISA;
44 
45 /*
46  * The asm statements below are to keep gcc from reordering code. Otherwise
47  * the rounding mode might be set after the operation it was intended for, the
48  * exception bits read before it, etc.
49  */
50 
51 std::string
53  Addr pc, const loader::SymbolTable *symtab) const
54 {
55  std::stringstream ss;
56  printMnemonic(ss, "", false);
57  printIntReg(ss, op1);
58  ccprintf(ss, ", ");
59  printIntReg(ss, op2);
60  ccprintf(ss, ", #%d", defCc);
61  ccprintf(ss, ", ");
62  printCondition(ss, condCode, true);
63  return ss.str();
64 }
65 
66 std::string
68  Addr pc, const loader::SymbolTable *symtab) const
69 {
70  std::stringstream ss;
71  printMnemonic(ss, "", false);
73  ccprintf(ss, ", ");
74  printIntReg(ss, op1);
75  ccprintf(ss, ", ");
76  printIntReg(ss, op2);
77  ccprintf(ss, ", ");
78  printCondition(ss, condCode, true);
79  return ss.str();
80 }
81 
82 std::string
84  Addr pc, const loader::SymbolTable *symtab) const
85 {
86  std::stringstream ss;
89  ss << ", ";
91  return ss.str();
92 }
93 
94 std::string
96  Addr pc, const loader::SymbolTable *symtab) const
97 {
98  std::stringstream ss;
101  ccprintf(ss, ", #%d", imm);
102  return ss.str();
103 }
104 
105 std::string
107  Addr pc, const loader::SymbolTable *symtab) const
108 {
109  std::stringstream ss;
110  printMnemonic(ss);
112  ss << ", ";
113  printFloatReg(ss, op1);
114  ccprintf(ss, ", #%d", imm);
115  return ss.str();
116 }
117 
118 std::string
120  Addr pc, const loader::SymbolTable *symtab) const
121 {
122  std::stringstream ss;
123  printMnemonic(ss);
125  ss << ", ";
126  printFloatReg(ss, op1);
127  ss << ", ";
128  printFloatReg(ss, op2);
129  return ss.str();
130 }
131 
132 std::string
134  Addr pc, const loader::SymbolTable *symtab)
135  const
136 {
137  std::stringstream ss;
138  printMnemonic(ss);
141  ss << ", ";
142  printFloatReg(ss, op1);
143  ss << ", ";
144  printFloatReg(ss, op2);
145  return ss.str();
146 }
147 
148 std::string
150  Addr pc, const loader::SymbolTable *symtab) const
151 {
152  std::stringstream ss;
153  printMnemonic(ss);
155  ss << ", ";
156  printFloatReg(ss, op1);
157  ss << ", ";
158  printFloatReg(ss, op2);
159  ss << ", ";
160  printFloatReg(ss, op3);
161  return ss.str();
162 }
163 
164 std::string
166  Addr pc, const loader::SymbolTable *symtab) const
167 {
168  std::stringstream ss;
169  printMnemonic(ss);
171  ss << ", ";
172  printFloatReg(ss, op1);
173  ss << ", ";
174  printFloatReg(ss, op2);
175  ccprintf(ss, ", #%d", imm);
176  return ss.str();
177 }
178 
179 namespace ArmISA
180 {
181 
184 {
185  int roundingMode = fegetround();
186  feclearexcept(FeAllExceptions);
187  switch (rMode) {
188  case VfpRoundNearest:
189  fesetround(FeRoundNearest);
190  break;
191  case VfpRoundUpward:
192  fesetround(FeRoundUpward);
193  break;
194  case VfpRoundDown:
195  fesetround(FeRoundDown);
196  break;
197  case VfpRoundZero:
198  fesetround(FeRoundZero);
199  break;
200  }
201  return roundingMode;
202 }
203 
204 void
205 finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask)
206 {
207  int exceptions = fetestexcept(FeAllExceptions);
208  bool underflow = false;
209  if ((exceptions & FeInvalid) && mask.ioc) {
210  fpscr.ioc = 1;
211  }
212  if ((exceptions & FeDivByZero) && mask.dzc) {
213  fpscr.dzc = 1;
214  }
215  if ((exceptions & FeOverflow) && mask.ofc) {
216  fpscr.ofc = 1;
217  }
218  if (exceptions & FeUnderflow) {
219  underflow = true;
220  if (mask.ufc)
221  fpscr.ufc = 1;
222  }
223  if ((exceptions & FeInexact) && !(underflow && flush) && mask.ixc) {
224  fpscr.ixc = 1;
225  }
226  fesetround(state);
227 }
228 
229 template <class fpType>
230 fpType
231 fixDest(bool flush, bool defaultNan, fpType val, fpType op1)
232 {
233  int fpClass = std::fpclassify(val);
234  fpType junk = 0.0;
235  if (fpClass == FP_NAN) {
236  const bool single = (sizeof(val) == sizeof(float));
237  const uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;
238  const bool nan = std::isnan(op1);
239  if (!nan || defaultNan) {
240  val = bitsToFp(qnan, junk);
241  } else if (nan) {
242  val = bitsToFp(fpToBits(op1) | qnan, junk);
243  }
244  } else if (fpClass == FP_SUBNORMAL && flush == 1) {
245  // Turn val into a zero with the correct sign;
246  uint64_t bitMask = 0x1ULL << (sizeof(fpType) * 8 - 1);
247  val = bitsToFp(fpToBits(val) & bitMask, junk);
248  feclearexcept(FeInexact);
249  feraiseexcept(FeUnderflow);
250  }
251  return val;
252 }
253 
254 template
255 float fixDest<float>(bool flush, bool defaultNan, float val, float op1);
256 template
257 double fixDest<double>(bool flush, bool defaultNan, double val, double op1);
258 
259 template <class fpType>
260 fpType
261 fixDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)
262 {
263  int fpClass = std::fpclassify(val);
264  fpType junk = 0.0;
265  if (fpClass == FP_NAN) {
266  const bool single = (sizeof(val) == sizeof(float));
267  const uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;
268  const bool nan1 = std::isnan(op1);
269  const bool nan2 = std::isnan(op2);
270  const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
271  const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
272  if ((!nan1 && !nan2) || defaultNan) {
273  val = bitsToFp(qnan, junk);
274  } else if (signal1) {
275  val = bitsToFp(fpToBits(op1) | qnan, junk);
276  } else if (signal2) {
277  val = bitsToFp(fpToBits(op2) | qnan, junk);
278  } else if (nan1) {
279  val = op1;
280  } else if (nan2) {
281  val = op2;
282  }
283  } else if (fpClass == FP_SUBNORMAL && flush) {
284  // Turn val into a zero with the correct sign;
285  uint64_t bitMask = 0x1ULL << (sizeof(fpType) * 8 - 1);
286  val = bitsToFp(fpToBits(val) & bitMask, junk);
287  feclearexcept(FeInexact);
288  feraiseexcept(FeUnderflow);
289  }
290  return val;
291 }
292 
293 template
294 float fixDest<float>(bool flush, bool defaultNan,
295  float val, float op1, float op2);
296 template
297 double fixDest<double>(bool flush, bool defaultNan,
298  double val, double op1, double op2);
299 
300 template <class fpType>
301 fpType
302 fixDivDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)
303 {
304  fpType mid = fixDest(flush, defaultNan, val, op1, op2);
305  const bool single = (sizeof(fpType) == sizeof(float));
306  const fpType junk = 0.0;
307  if ((single && (val == bitsToFp(0x00800000, junk) ||
308  val == bitsToFp(0x80800000, junk))) ||
309  (!single && (val == bitsToFp(0x0010000000000000ULL, junk) ||
310  val == bitsToFp(0x8010000000000000ULL, junk)))
311  ) {
312  __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
313  fesetround(FeRoundZero);
314  fpType temp = 0.0;
315  __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
316  temp = op1 / op2;
317  if (flushToZero(temp)) {
318  feraiseexcept(FeUnderflow);
319  if (flush) {
320  feclearexcept(FeInexact);
321  mid = temp;
322  }
323  }
324  __asm__ __volatile__("" :: "m" (temp));
325  }
326  return mid;
327 }
328 
329 template
330 float fixDivDest<float>(bool flush, bool defaultNan,
331  float val, float op1, float op2);
332 template
333 double fixDivDest<double>(bool flush, bool defaultNan,
334  double val, double op1, double op2);
335 
336 float
337 fixFpDFpSDest(FPSCR fpscr, double val)
338 {
339  const float junk = 0.0;
340  float op1 = 0.0;
341  if (std::isnan(val)) {
342  uint64_t valBits = fpToBits(val);
343  uint32_t op1Bits = bits(valBits, 50, 29) |
344  (mask(9) << 22) |
345  (bits(valBits, 63) << 31);
346  op1 = bitsToFp(op1Bits, junk);
347  }
348  float mid = fixDest(fpscr.fz, fpscr.dn, (float)val, op1);
349  if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
350  (FeUnderflow | FeInexact)) {
351  feclearexcept(FeInexact);
352  }
353  if (mid == bitsToFp(0x00800000, junk) ||
354  mid == bitsToFp(0x80800000, junk)) {
355  __asm__ __volatile__("" : "=m" (val) : "m" (val));
356  fesetround(FeRoundZero);
357  float temp = 0.0;
358  __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
359  temp = val;
360  if (flushToZero(temp)) {
361  feraiseexcept(FeUnderflow);
362  if (fpscr.fz) {
363  feclearexcept(FeInexact);
364  mid = temp;
365  }
366  }
367  __asm__ __volatile__("" :: "m" (temp));
368  }
369  return mid;
370 }
371 
372 double
373 fixFpSFpDDest(FPSCR fpscr, float val)
374 {
375  const double junk = 0.0;
376  double op1 = 0.0;
377  if (std::isnan(val)) {
378  uint32_t valBits = fpToBits(val);
379  uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
380  (mask(12) << 51) |
381  ((uint64_t)bits(valBits, 31) << 63);
382  op1 = bitsToFp(op1Bits, junk);
383  }
384  double mid = fixDest(fpscr.fz, fpscr.dn, (double)val, op1);
385  if (mid == bitsToFp(0x0010000000000000ULL, junk) ||
386  mid == bitsToFp(0x8010000000000000ULL, junk)) {
387  __asm__ __volatile__("" : "=m" (val) : "m" (val));
388  fesetround(FeRoundZero);
389  double temp = 0.0;
390  __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
391  temp = val;
392  if (flushToZero(temp)) {
393  feraiseexcept(FeUnderflow);
394  if (fpscr.fz) {
395  feclearexcept(FeInexact);
396  mid = temp;
397  }
398  }
399  __asm__ __volatile__("" :: "m" (temp));
400  }
401  return mid;
402 }
403 
404 static inline uint16_t
405 vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan,
406  uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble)
407 {
408  uint32_t mWidth;
409  uint32_t eWidth;
410  uint32_t eHalfRange;
411  uint32_t sBitPos;
412 
413  if (isDouble) {
414  mWidth = 52;
415  eWidth = 11;
416  } else {
417  mWidth = 23;
418  eWidth = 8;
419  }
420  sBitPos = eWidth + mWidth;
421  eHalfRange = (1 << (eWidth-1)) - 1;
422 
423  // Extract the operand.
424  bool neg = bits(opBits, sBitPos);
425  uint32_t exponent = bits(opBits, sBitPos-1, mWidth);
426  uint64_t oldMantissa = bits(opBits, mWidth-1, 0);
427  uint32_t mantissa = oldMantissa >> (mWidth - 10);
428  // Do the conversion.
429  uint64_t extra = oldMantissa & mask(mWidth - 10);
430  if (exponent == mask(eWidth)) {
431  if (oldMantissa != 0) {
432  // Nans.
433  if (bits(mantissa, 9) == 0) {
434  // Signalling nan.
435  fpscr.ioc = 1;
436  }
437  if (ahp) {
438  mantissa = 0;
439  exponent = 0;
440  fpscr.ioc = 1;
441  } else if (defaultNan) {
442  mantissa = (1 << 9);
443  exponent = 0x1f;
444  neg = false;
445  } else {
446  exponent = 0x1f;
447  mantissa |= (1 << 9);
448  }
449  } else {
450  // Infinities.
451  exponent = 0x1F;
452  if (ahp) {
453  fpscr.ioc = 1;
454  mantissa = 0x3ff;
455  } else {
456  mantissa = 0;
457  }
458  }
459  } else if (exponent == 0 && oldMantissa == 0) {
460  // Zero, don't need to do anything.
461  } else {
462  // Normalized or denormalized numbers.
463 
464  bool inexact = (extra != 0);
465 
466  if (exponent == 0) {
467  // Denormalized.
468  // If flush to zero is on, this shouldn't happen.
469  assert(!flush);
470 
471  // Check for underflow
472  if (inexact || fpscr.ufe)
473  fpscr.ufc = 1;
474 
475  // Handle rounding.
476  unsigned mode = rMode;
477  if ((mode == VfpRoundUpward && !neg && extra) ||
478  (mode == VfpRoundDown && neg && extra) ||
479  (mode == VfpRoundNearest &&
480  (extra > (1 << 9) ||
481  (extra == (1 << 9) && bits(mantissa, 0))))) {
482  mantissa++;
483  }
484 
485  // See if the number became normalized after rounding.
486  if (mantissa == (1 << 10)) {
487  mantissa = 0;
488  exponent = 1;
489  }
490  } else {
491  // Normalized.
492 
493  // We need to track the dropped bits differently since
494  // more can be dropped by denormalizing.
495  bool topOne = bits(extra, mWidth - 10 - 1);
496  bool restZeros = bits(extra, mWidth - 10 - 2, 0) == 0;
497 
498  if (exponent <= (eHalfRange - 15)) {
499  // The result is too small. Denormalize.
500  mantissa |= (1 << 10);
501  while (mantissa && exponent <= (eHalfRange - 15)) {
502  restZeros = restZeros && !topOne;
503  topOne = bits(mantissa, 0);
504  mantissa = mantissa >> 1;
505  exponent++;
506  }
507  if (topOne || !restZeros)
508  inexact = true;
509  exponent = 0;
510  } else {
511  // Change bias.
512  exponent -= (eHalfRange - 15);
513  }
514 
515  if (exponent == 0 && (inexact || fpscr.ufe)) {
516  // Underflow
517  fpscr.ufc = 1;
518  }
519 
520  // Handle rounding.
521  unsigned mode = rMode;
522  bool nonZero = topOne || !restZeros;
523  if ((mode == VfpRoundUpward && !neg && nonZero) ||
524  (mode == VfpRoundDown && neg && nonZero) ||
525  (mode == VfpRoundNearest && topOne &&
526  (!restZeros || bits(mantissa, 0)))) {
527  mantissa++;
528  }
529 
530  // See if we rounded up and need to bump the exponent.
531  if (mantissa == (1 << 10)) {
532  mantissa = 0;
533  exponent++;
534  }
535 
536  // Deal with overflow
537  if (ahp) {
538  if (exponent >= 0x20) {
539  exponent = 0x1f;
540  mantissa = 0x3ff;
541  fpscr.ioc = 1;
542  // Supress inexact exception.
543  inexact = false;
544  }
545  } else {
546  if (exponent >= 0x1f) {
547  if ((mode == VfpRoundNearest) ||
548  (mode == VfpRoundUpward && !neg) ||
549  (mode == VfpRoundDown && neg)) {
550  // Overflow to infinity.
551  exponent = 0x1f;
552  mantissa = 0;
553  } else {
554  // Overflow to max normal.
555  exponent = 0x1e;
556  mantissa = 0x3ff;
557  }
558  fpscr.ofc = 1;
559  inexact = true;
560  }
561  }
562  }
563 
564  if (inexact) {
565  fpscr.ixc = 1;
566  }
567  }
568  // Reassemble and install the result.
569  uint32_t result = bits(mantissa, 9, 0);
570  replaceBits(result, 14, 10, exponent);
571  if (neg)
572  result |= (1 << 15);
573  return result;
574 }
575 
576 uint16_t
577 vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
578  uint32_t rMode, bool ahp, float op)
579 {
580  uint64_t opBits = fpToBits(op);
581  return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, false);
582 }
583 
584 uint16_t
585 vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan,
586  uint32_t rMode, bool ahp, double op)
587 {
588  uint64_t opBits = fpToBits(op);
589  return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, true);
590 }
591 
592 static inline uint64_t
593 vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble)
594 {
595  uint32_t mWidth;
596  uint32_t eWidth;
597  uint32_t eHalfRange;
598  uint32_t sBitPos;
599 
600  if (isDouble) {
601  mWidth = 52;
602  eWidth = 11;
603  } else {
604  mWidth = 23;
605  eWidth = 8;
606  }
607  sBitPos = eWidth + mWidth;
608  eHalfRange = (1 << (eWidth-1)) - 1;
609 
610  // Extract the bitfields.
611  bool neg = bits(op, 15);
612  uint32_t exponent = bits(op, 14, 10);
613  uint64_t mantissa = bits(op, 9, 0);
614  // Do the conversion.
615  if (exponent == 0) {
616  if (mantissa != 0) {
617  // Normalize the value.
618  exponent = exponent + (eHalfRange - 15) + 1;
619  while (mantissa < (1 << 10)) {
620  mantissa = mantissa << 1;
621  exponent--;
622  }
623  }
624  mantissa = mantissa << (mWidth - 10);
625  } else if (exponent == 0x1f && !ahp) {
626  // Infinities and nans.
627  exponent = mask(eWidth);
628  if (mantissa != 0) {
629  // Nans.
630  mantissa = mantissa << (mWidth - 10);
631  if (bits(mantissa, mWidth-1) == 0) {
632  // Signalling nan.
633  fpscr.ioc = 1;
634  mantissa |= (((uint64_t) 1) << (mWidth-1));
635  }
636  if (defaultNan) {
637  mantissa &= ~mask(mWidth-1);
638  neg = false;
639  }
640  }
641  } else {
642  exponent = exponent + (eHalfRange - 15);
643  mantissa = mantissa << (mWidth - 10);
644  }
645  // Reassemble the result.
646  uint64_t result = bits(mantissa, mWidth-1, 0);
647  replaceBits(result, sBitPos-1, mWidth, exponent);
648  if (neg) {
649  result |= (((uint64_t) 1) << sBitPos);
650  }
651  return result;
652 }
653 
654 double
655 vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
656 {
657  double junk = 0.0;
658  uint64_t result;
659 
660  result = vcvtFpHFp(fpscr, defaultNan, ahp, op, true);
661  return bitsToFp(result, junk);
662 }
663 
664 float
665 vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
666 {
667  float junk = 0.0;
668  uint64_t result;
669 
670  result = vcvtFpHFp(fpscr, defaultNan, ahp, op, false);
671  return bitsToFp(result, junk);
672 }
673 
674 float
675 vfpUFixedToFpS(bool flush, bool defaultNan,
676  uint64_t val, uint8_t width, uint8_t imm)
677 {
678  fesetround(FeRoundNearest);
679  if (width == 16)
680  val = (uint16_t)val;
681  else if (width == 32)
682  val = (uint32_t)val;
683  else if (width != 64)
684  panic("Unsupported width %d", width);
685  float scale = powf(2.0, imm);
686  __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
687  feclearexcept(FeAllExceptions);
688  __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
689  return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
690 }
691 
692 float
693 vfpSFixedToFpS(bool flush, bool defaultNan,
694  int64_t val, uint8_t width, uint8_t imm)
695 {
696  fesetround(FeRoundNearest);
697  if (width == 16)
698  val = szext<16>(val);
699  else if (width == 32)
700  val = szext<32>(val);
701  else if (width != 64)
702  panic("Unsupported width %d", width);
703 
704  float scale = powf(2.0, imm);
705  __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
706  feclearexcept(FeAllExceptions);
707  __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
708  return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
709 }
710 
711 
712 double
713 vfpUFixedToFpD(bool flush, bool defaultNan,
714  uint64_t val, uint8_t width, uint8_t imm)
715 {
716  fesetround(FeRoundNearest);
717  if (width == 16)
718  val = (uint16_t)val;
719  else if (width == 32)
720  val = (uint32_t)val;
721  else if (width != 64)
722  panic("Unsupported width %d", width);
723 
724  double scale = pow(2.0, imm);
725  __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
726  feclearexcept(FeAllExceptions);
727  __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
728  return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
729 }
730 
731 double
732 vfpSFixedToFpD(bool flush, bool defaultNan,
733  int64_t val, uint8_t width, uint8_t imm)
734 {
735  fesetround(FeRoundNearest);
736  if (width == 16)
737  val = szext<16>(val);
738  else if (width == 32)
739  val = szext<32>(val);
740  else if (width != 64)
741  panic("Unsupported width %d", width);
742 
743  double scale = pow(2.0, imm);
744  __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
745  feclearexcept(FeAllExceptions);
746  __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
747  return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
748 }
749 
750 // This function implements a magic formula taken from the architecture
751 // reference manual. It was originally called recip_sqrt_estimate.
752 static double
754 {
755  int64_t q0, q1, s;
756  double r;
757  if (a < 0.5) {
758  q0 = (int64_t)(a * 512.0);
759  r = 1.0 / sqrt(((double)q0 + 0.5) / 512.0);
760  } else {
761  q1 = (int64_t)(a * 256.0);
762  r = 1.0 / sqrt(((double)q1 + 0.5) / 256.0);
763  }
764  s = (int64_t)(256.0 * r + 0.5);
765  return (double)s / 256.0;
766 }
767 
768 // This function is only intended for use in Neon instructions because
769 // it ignores certain bits in the FPSCR.
770 float
771 fprSqrtEstimate(FPSCR &fpscr, float op)
772 {
773  const uint32_t qnan = 0x7fc00000;
774  float junk = 0.0;
775  int fpClass = std::fpclassify(op);
776  if (fpClass == FP_NAN) {
777  if ((fpToBits(op) & qnan) != qnan)
778  fpscr.ioc = 1;
779  return bitsToFp(qnan, junk);
780  } else if (fpClass == FP_ZERO) {
781  fpscr.dzc = 1;
782  // Return infinity with the same sign as the operand.
783  return bitsToFp((std::signbit(op) << 31) |
784  (0xFF << 23) | (0 << 0), junk);
785  } else if (std::signbit(op)) {
786  // Set invalid op bit.
787  fpscr.ioc = 1;
788  return bitsToFp(qnan, junk);
789  } else if (fpClass == FP_INFINITE) {
790  return 0.0;
791  } else {
792  uint64_t opBits = fpToBits(op);
793  double scaled;
794  if (bits(opBits, 23)) {
795  scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |
796  (0x3fdULL << 52) | (bits(opBits, 31) << 63),
797  (double)0.0);
798  } else {
799  scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |
800  (0x3feULL << 52) | (bits(opBits, 31) << 63),
801  (double)0.0);
802  }
803  uint64_t resultExp = (380 - bits(opBits, 30, 23)) / 2;
804 
805  uint64_t estimate = fpToBits(recipSqrtEstimate(scaled));
806 
807  return bitsToFp((bits(estimate, 63) << 31) |
808  (bits(resultExp, 7, 0) << 23) |
809  (bits(estimate, 51, 29) << 0), junk);
810  }
811 }
812 
813 uint32_t
815 {
816  if (bits(op, 31, 30) == 0) {
817  return -1;
818  } else {
819  double dpOp;
820  if (bits(op, 31)) {
821  dpOp = bitsToFp((0ULL << 63) |
822  (0x3feULL << 52) |
823  (bits((uint64_t)op, 30, 0) << 21) |
824  (0 << 0), (double)0.0);
825  } else {
826  dpOp = bitsToFp((0ULL << 63) |
827  (0x3fdULL << 52) |
828  (bits((uint64_t)op, 29, 0) << 22) |
829  (0 << 0), (double)0.0);
830  }
831  uint64_t estimate = fpToBits(recipSqrtEstimate(dpOp));
832  return (1 << 31) | bits(estimate, 51, 21);
833  }
834 }
835 
836 // This function implements a magic formula taken from the architecture
837 // reference manual. It was originally called recip_estimate.
838 
839 static double
841 {
842  int64_t q, s;
843  double r;
844  q = (int64_t)(a * 512.0);
845  r = 1.0 / (((double)q + 0.5) / 512.0);
846  s = (int64_t)(256.0 * r + 0.5);
847  return (double)s / 256.0;
848 }
849 
850 // This function is only intended for use in Neon instructions because
851 // it ignores certain bits in the FPSCR.
852 float
853 fpRecipEstimate(FPSCR &fpscr, float op)
854 {
855  const uint32_t qnan = 0x7fc00000;
856  float junk = 0.0;
857  int fpClass = std::fpclassify(op);
858  if (fpClass == FP_NAN) {
859  if ((fpToBits(op) & qnan) != qnan)
860  fpscr.ioc = 1;
861  return bitsToFp(qnan, junk);
862  } else if (fpClass == FP_INFINITE) {
863  return bitsToFp(std::signbit(op) << 31, junk);
864  } else if (fpClass == FP_ZERO) {
865  fpscr.dzc = 1;
866  // Return infinity with the same sign as the operand.
867  return bitsToFp((std::signbit(op) << 31) |
868  (0xFF << 23) | (0 << 0), junk);
869  } else if (fabs(op) >= pow(2.0, 126)) {
870  fpscr.ufc = 1;
871  return bitsToFp(std::signbit(op) << 31, junk);
872  } else {
873  uint64_t opBits = fpToBits(op);
874  double scaled;
875  scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |
876  (0x3feULL << 52) | (0ULL << 63),
877  (double)0.0);
878  uint64_t resultExp = 253 - bits(opBits, 30, 23);
879 
880  uint64_t estimate = fpToBits(recipEstimate(scaled));
881 
882  return bitsToFp((bits(opBits, 31) << 31) |
883  (bits(resultExp, 7, 0) << 23) |
884  (bits(estimate, 51, 29) << 0), junk);
885  }
886 }
887 
888 uint32_t
890 {
891  if (bits(op, 31) == 0) {
892  return -1;
893  } else {
894  double dpOp;
895  dpOp = bitsToFp((0ULL << 63) |
896  (0x3feULL << 52) |
897  (bits((uint64_t)op, 30, 0) << 21) |
898  (0 << 0), (double)0.0);
899  uint64_t estimate = fpToBits(recipEstimate(dpOp));
900  return (1 << 31) | bits(estimate, 51, 21);
901  }
902 }
903 
904 FPSCR
905 fpStandardFPSCRValue(const FPSCR &fpscr)
906 {
907  FPSCR new_fpscr(0);
908  new_fpscr.ahp = fpscr.ahp;
909  new_fpscr.dn = 1;
910  new_fpscr.fz = 1;
911  new_fpscr.fz16 = fpscr.fz16;
912  return new_fpscr;
913 };
914 
915 template <class fpType>
916 fpType
917 FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
918  fpType op1, fpType op2) const
919 {
920  done = true;
921  fpType junk = 0.0;
922  fpType dest = 0.0;
923  const bool single = (sizeof(fpType) == sizeof(float));
924  const uint64_t qnan =
925  single ? 0x7fc00000 : 0x7ff8000000000000ULL;
926  const bool nan1 = std::isnan(op1);
927  const bool nan2 = std::isnan(op2);
928  const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
929  const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
930  if (nan1 || nan2) {
931  if (defaultNan) {
932  dest = bitsToFp(qnan, junk);
933  } else if (signal1) {
934  dest = bitsToFp(fpToBits(op1) | qnan, junk);
935  } else if (signal2) {
936  dest = bitsToFp(fpToBits(op2) | qnan, junk);
937  } else if (nan1) {
938  dest = op1;
939  } else if (nan2) {
940  dest = op2;
941  }
942  if (signal1 || signal2) {
943  fpscr.ioc = 1;
944  }
945  } else {
946  done = false;
947  }
948  return dest;
949 }
950 
951 template
952 float FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
953  float op1, float op2) const;
954 template
955 double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
956  double op1, double op2) const;
957 
958 // @TODO remove this function when we've finished switching all FMA code to use the new FPLIB
959 template <class fpType>
960 fpType
961 FpOp::ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,
962  fpType (*func)(fpType, fpType, fpType),
963  bool flush, bool defaultNan, uint32_t rMode) const
964 {
965  const bool single = (sizeof(fpType) == sizeof(float));
966  fpType junk = 0.0;
967 
968  if (flush && (flushToZero(op1, op2) || flushToZero(op3)))
969  fpscr.idc = 1;
971  __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3), "=m" (state)
972  : "m" (op1), "m" (op2), "m" (op3), "m" (state));
973  fpType dest = func(op1, op2, op3);
974  __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
975 
976  int fpClass = std::fpclassify(dest);
977  // Get NAN behavior right. This varies between x86 and ARM.
978  if (fpClass == FP_NAN) {
979  const uint64_t qnan =
980  single ? 0x7fc00000 : 0x7ff8000000000000ULL;
981  const bool nan1 = std::isnan(op1);
982  const bool nan2 = std::isnan(op2);
983  const bool nan3 = std::isnan(op3);
984  const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
985  const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
986  const bool signal3 = nan3 && ((fpToBits(op3) & qnan) != qnan);
987  if ((!nan1 && !nan2 && !nan3) || (defaultNan == 1)) {
988  dest = bitsToFp(qnan, junk);
989  } else if (signal1) {
990  dest = bitsToFp(fpToBits(op1) | qnan, junk);
991  } else if (signal2) {
992  dest = bitsToFp(fpToBits(op2) | qnan, junk);
993  } else if (signal3) {
994  dest = bitsToFp(fpToBits(op3) | qnan, junk);
995  } else if (nan1) {
996  dest = op1;
997  } else if (nan2) {
998  dest = op2;
999  } else if (nan3) {
1000  dest = op3;
1001  }
1002  } else if (flush && flushToZero(dest)) {
1003  feraiseexcept(FeUnderflow);
1004  } else if ((
1005  (single && (dest == bitsToFp(0x00800000, junk) ||
1006  dest == bitsToFp(0x80800000, junk))) ||
1007  (!single &&
1008  (dest == bitsToFp(0x0010000000000000ULL, junk) ||
1009  dest == bitsToFp(0x8010000000000000ULL, junk)))
1010  ) && rMode != VfpRoundZero) {
1011  /*
1012  * Correct for the fact that underflow is detected -before- rounding
1013  * in ARM and -after- rounding in x86.
1014  */
1015  fesetround(FeRoundZero);
1016  __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3)
1017  : "m" (op1), "m" (op2), "m" (op3));
1018  fpType temp = func(op1, op2, op2);
1019  __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
1020  if (flush && flushToZero(temp)) {
1021  dest = temp;
1022  }
1023  }
1024  finishVfp(fpscr, state, flush);
1025  return dest;
1026 }
1027 
1028 template
1029 float FpOp::ternaryOp(FPSCR &fpscr, float op1, float op2, float op3,
1030  float (*func)(float, float, float),
1031  bool flush, bool defaultNan, uint32_t rMode) const;
1032 template
1033 double FpOp::ternaryOp(FPSCR &fpscr, double op1, double op2, double op3,
1034  double (*func)(double, double, double),
1035  bool flush, bool defaultNan, uint32_t rMode) const;
1036 
1037 template <class fpType>
1038 fpType
1039 FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
1040  fpType (*func)(fpType, fpType),
1041  bool flush, bool defaultNan, uint32_t rMode) const
1042 {
1043  const bool single = (sizeof(fpType) == sizeof(float));
1044  fpType junk = 0.0;
1045 
1046  if (flush && flushToZero(op1, op2))
1047  fpscr.idc = 1;
1048  VfpSavedState state = prepFpState(rMode);
1049  __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
1050  : "m" (op1), "m" (op2), "m" (state));
1051  fpType dest = func(op1, op2);
1052  __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
1053 
1054  // Get NAN behavior right. This varies between x86 and ARM.
1055  if (std::isnan(dest)) {
1056  const uint64_t qnan =
1057  single ? 0x7fc00000 : 0x7ff8000000000000ULL;
1058  const bool nan1 = std::isnan(op1);
1059  const bool nan2 = std::isnan(op2);
1060  const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
1061  const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
1062  if ((!nan1 && !nan2) || (defaultNan == 1)) {
1063  dest = bitsToFp(qnan, junk);
1064  } else if (signal1) {
1065  dest = bitsToFp(fpToBits(op1) | qnan, junk);
1066  } else if (signal2) {
1067  dest = bitsToFp(fpToBits(op2) | qnan, junk);
1068  } else if (nan1) {
1069  dest = op1;
1070  } else if (nan2) {
1071  dest = op2;
1072  }
1073  } else if (flush && flushToZero(dest)) {
1074  feraiseexcept(FeUnderflow);
1075  } else if ((
1076  (single && (dest == bitsToFp(0x00800000, junk) ||
1077  dest == bitsToFp(0x80800000, junk))) ||
1078  (!single &&
1079  (dest == bitsToFp(0x0010000000000000ULL, junk) ||
1080  dest == bitsToFp(0x8010000000000000ULL, junk)))
1081  ) && rMode != VfpRoundZero) {
1082  /*
1083  * Correct for the fact that underflow is detected -before- rounding
1084  * in ARM and -after- rounding in x86.
1085  */
1086  fesetround(FeRoundZero);
1087  __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
1088  : "m" (op1), "m" (op2));
1089  fpType temp = func(op1, op2);
1090  __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
1091  if (flush && flushToZero(temp)) {
1092  dest = temp;
1093  }
1094  }
1095  finishVfp(fpscr, state, flush);
1096  return dest;
1097 }
1098 
1099 template
1100 float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2,
1101  float (*func)(float, float),
1102  bool flush, bool defaultNan, uint32_t rMode) const;
1103 template
1104 double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2,
1105  double (*func)(double, double),
1106  bool flush, bool defaultNan, uint32_t rMode) const;
1107 
1108 template <class fpType>
1109 fpType
1110 FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType),
1111  bool flush, uint32_t rMode) const
1112 {
1113  const bool single = (sizeof(fpType) == sizeof(float));
1114  fpType junk = 0.0;
1115 
1116  if (flush && flushToZero(op1))
1117  fpscr.idc = 1;
1118  VfpSavedState state = prepFpState(rMode);
1119  __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
1120  : "m" (op1), "m" (state));
1121  fpType dest = func(op1);
1122  __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
1123 
1124  // Get NAN behavior right. This varies between x86 and ARM.
1125  if (std::isnan(dest)) {
1126  const uint64_t qnan =
1127  single ? 0x7fc00000 : 0x7ff8000000000000ULL;
1128  const bool nan = std::isnan(op1);
1129  if (!nan || fpscr.dn == 1) {
1130  dest = bitsToFp(qnan, junk);
1131  } else if (nan) {
1132  dest = bitsToFp(fpToBits(op1) | qnan, junk);
1133  }
1134  } else if (flush && flushToZero(dest)) {
1135  feraiseexcept(FeUnderflow);
1136  } else if ((
1137  (single && (dest == bitsToFp(0x00800000, junk) ||
1138  dest == bitsToFp(0x80800000, junk))) ||
1139  (!single &&
1140  (dest == bitsToFp(0x0010000000000000ULL, junk) ||
1141  dest == bitsToFp(0x8010000000000000ULL, junk)))
1142  ) && rMode != VfpRoundZero) {
1143  /*
1144  * Correct for the fact that underflow is detected -before- rounding
1145  * in ARM and -after- rounding in x86.
1146  */
1147  fesetround(FeRoundZero);
1148  __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
1149  fpType temp = func(op1);
1150  __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
1151  if (flush && flushToZero(temp)) {
1152  dest = temp;
1153  }
1154  }
1155  finishVfp(fpscr, state, flush);
1156  return dest;
1157 }
1158 
1159 template
1160 float FpOp::unaryOp(FPSCR &fpscr, float op1, float (*func)(float),
1161  bool flush, uint32_t rMode) const;
1162 template
1163 double FpOp::unaryOp(FPSCR &fpscr, double op1, double (*func)(double),
1164  bool flush, uint32_t rMode) const;
1165 
1166 IntRegIndex
1167 VfpMacroOp::addStride(IntRegIndex idx, unsigned stride)
1168 {
1169  if (wide) {
1170  stride *= 2;
1171  }
1172  unsigned offset = idx % 8;
1173  idx = (IntRegIndex)(idx - offset);
1174  offset += stride;
1175  idx = (IntRegIndex)(idx + (offset % 8));
1176  return idx;
1177 }
1178 
1179 void
1180 VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
1181 {
1182  unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
1183  assert(!inScalarBank(dest));
1184  dest = addStride(dest, stride);
1185  op1 = addStride(op1, stride);
1186  if (!inScalarBank(op2)) {
1187  op2 = addStride(op2, stride);
1188  }
1189 }
1190 
1191 void
1192 VfpMacroOp::nextIdxs(IntRegIndex &dest, IntRegIndex &op1)
1193 {
1194  unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
1195  assert(!inScalarBank(dest));
1196  dest = addStride(dest, stride);
1197  if (!inScalarBank(op1)) {
1198  op1 = addStride(op1, stride);
1199  }
1200 }
1201 
1202 void
1203 VfpMacroOp::nextIdxs(IntRegIndex &dest)
1204 {
1205  unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
1206  assert(!inScalarBank(dest));
1207  dest = addStride(dest, stride);
1208 }
1209 
1210 } // namespace ArmISA
1211 } // namespace gem5
gem5::ArmISA::unsignedRSqrtEstimate
uint32_t unsignedRSqrtEstimate(uint32_t op)
Definition: vfp.cc:814
gem5::ArmISA::VfpRoundNearest
@ VfpRoundNearest
Definition: vfp.hh:106
gem5::ArmISA::FpOp::binaryOp
fpType binaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType(*func)(fpType, fpType), bool flush, bool defaultNan, uint32_t rMode) const
Definition: vfp.cc:1039
gem5::ArmISA::FpRegRegRegRegOp::op1
IntRegIndex op1
Definition: vfp.hh:1019
gem5::ArmISA::FpRegRegRegRegOp::dest
IntRegIndex dest
Definition: vfp.hh:1018
gem5::ArmISA::bitsToFp
static float bitsToFp(uint64_t, float)
Definition: vfp.hh:182
gem5::ArmISA::vfpUFixedToFpS
float vfpUFixedToFpS(bool flush, bool defaultNan, uint64_t val, uint8_t width, uint8_t imm)
Definition: vfp.cc:675
gem5::ArmISA::fixFpDFpSDest
float fixFpDFpSDest(FPSCR fpscr, double val)
Definition: vfp.cc:337
gem5::ArmISA::fprSqrtEstimate
float fprSqrtEstimate(FPSCR &fpscr, float op)
Definition: vfp.cc:771
gem5::ArmISA::FpCondSelOp::dest
IntRegIndex dest
Definition: vfp.hh:905
gem5::ArmISA::FpCondCompRegOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: vfp.cc:52
gem5::ArmISA::VfpRoundDown
@ VfpRoundDown
Definition: vfp.hh:108
gem5::X86ISA::scale
scale
Definition: types.hh:97
gem5::ArmISA::FeRoundDown
@ FeRoundDown
Definition: vfp.hh:98
gem5::ArmISA::FeOverflow
@ FeOverflow
Definition: vfp.hh:91
gem5::ArmISA::FpRegRegRegImmOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: vfp.cc:165
gem5::ArmISA::FpCondSelOp::op2
IntRegIndex op2
Definition: vfp.hh:905
gem5::replaceBits
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition: bitfield.hh:197
vfp.hh
gem5::ArmISA::ArmStaticInst::printMnemonic
void printMnemonic(std::ostream &os, const std::string &suffix="", bool withPred=true, bool withCond64=false, ConditionCode cond64=COND_UC) const
Definition: static_inst.cc:377
gem5::ArmISA::FeUnderflow
@ FeUnderflow
Definition: vfp.hh:92
gem5::ArmISA::FpRegRegRegCondOp::cond
ConditionCode cond
Definition: vfp.hh:999
gem5::ArmISA::FpCondCompRegOp::op2
IntRegIndex op2
Definition: vfp.hh:887
gem5::ArmISA::fixDest< float >
template float fixDest< float >(bool flush, bool defaultNan, float val, float op1)
gem5::ArmISA::vcvtFpFpH
static uint16_t vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble)
Definition: vfp.cc:405
gem5::X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:775
gem5::ArmISA::a
Bitfield< 8 > a
Definition: misc_types.hh:65
gem5::ArmISA::FpCondCompRegOp::op1
IntRegIndex op1
Definition: vfp.hh:887
gem5::ArmISA::fixDest< double >
template double fixDest< double >(bool flush, bool defaultNan, double val, double op1)
gem5::ArmISA::FpRegRegRegCondOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: vfp.cc:133
gem5::ArmISA::FeDivByZero
@ FeDivByZero
Definition: vfp.hh:88
gem5::loader::SymbolTable
Definition: symtab.hh:65
gem5::ArmISA::FpCondCompRegOp::defCc
uint8_t defCc
Definition: vfp.hh:889
gem5::ArmISA::ahp
Bitfield< 26 > ahp
Definition: misc_types.hh:450
gem5::ArmISA::FpRegRegRegOp::dest
IntRegIndex dest
Definition: vfp.hh:977
gem5::ArmISA::FpRegRegRegOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: vfp.cc:119
gem5::ArmISA::FeAllExceptions
@ FeAllExceptions
Definition: vfp.hh:93
gem5::ArmISA::vfpSFixedToFpS
float vfpSFixedToFpS(bool flush, bool defaultNan, int64_t val, uint8_t width, uint8_t imm)
Definition: vfp.cc:693
gem5::ArmISA::FpRegRegImmOp::op1
IntRegIndex op1
Definition: vfp.hh:959
gem5::ArmISA::FpRegRegOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: vfp.cc:83
gem5::ccprintf
void ccprintf(cp::Print &print)
Definition: cprintf.hh:130
gem5::ArmISA::FpRegImmOp::imm
uint64_t imm
Definition: vfp.hh:941
gem5::ArmISA::FpOp::processNans
fpType processNans(FPSCR &fpscr, bool &done, bool defaultNan, fpType op1, fpType op2) const
Definition: vfp.cc:917
gem5::ArmISA::FeRoundUpward
@ FeRoundUpward
Definition: vfp.hh:101
gem5::ArmISA::finishVfp
void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask)
Definition: vfp.cc:205
gem5::ArmISA::FpCondSelOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: vfp.cc:67
gem5::ArmISA::FpRegRegImmOp::imm
uint64_t imm
Definition: vfp.hh:960
gem5::ArmISA::ArmStaticInst::printCondition
void printCondition(std::ostream &os, unsigned code, bool noImplicit=false) const
Definition: static_inst.cc:417
gem5::ArmISA::FpCondSelOp::op1
IntRegIndex op1
Definition: vfp.hh:905
gem5::ArmISA::FeInexact
@ FeInexact
Definition: vfp.hh:89
gem5::ArmISA::vfpUFixedToFpD
double vfpUFixedToFpD(bool flush, bool defaultNan, uint64_t val, uint8_t width, uint8_t imm)
Definition: vfp.cc:713
gem5::ArmISA::width
Bitfield< 4 > width
Definition: misc_types.hh:71
sc_dt::neg
void neg(sc_fxval &c, const sc_fxnum &a)
Definition: sc_fxnum.hh:2270
gem5::ArmISA::vcvtFpSFpH
uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, float op)
Definition: vfp.cc:577
gem5::ArmISA::FpRegRegRegCondOp::op1
IntRegIndex op1
Definition: vfp.hh:997
gem5::ArmISA::FpRegRegRegOp::op1
IntRegIndex op1
Definition: vfp.hh:978
gem5::ArmISA::vcvtFpHFpS
float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
Definition: vfp.cc:665
gem5::ArmISA::s
Bitfield< 4 > s
Definition: misc_types.hh:561
gem5::ArmISA::FpRegRegRegRegOp::op3
IntRegIndex op3
Definition: vfp.hh:1021
gem5::ArmISA::FpCondSelOp::condCode
ConditionCode condCode
Definition: vfp.hh:906
gem5::ArmISA::VfpMacroOp::addStride
IntRegIndex addStride(IntRegIndex idx, unsigned stride)
Definition: vfp.cc:1167
gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:144
gem5::ArmISA::VfpRoundUpward
@ VfpRoundUpward
Definition: vfp.hh:107
gem5::ArmISA::mask
Bitfield< 3, 0 > mask
Definition: pcstate.hh:63
gem5::bits
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:76
gem5::ArmISA::recipSqrtEstimate
static double recipSqrtEstimate(double a)
Definition: vfp.cc:753
gem5::ArmISA::vfpSFixedToFpD
double vfpSFixedToFpD(bool flush, bool defaultNan, int64_t val, uint8_t width, uint8_t imm)
Definition: vfp.cc:732
gem5::ArmISA::VfpMacroOp::nextIdxs
void nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2)
Definition: vfp.cc:1180
gem5::ArmISA::VfpSavedState
int VfpSavedState
Definition: vfp.hh:215
gem5::ArmISA::FpRegRegRegImmOp::imm
uint64_t imm
Definition: vfp.hh:1042
gem5::ArmISA::FpRegRegRegRegOp::op2
IntRegIndex op2
Definition: vfp.hh:1020
gem5::ArmISA::FeInvalid
@ FeInvalid
Definition: vfp.hh:90
gem5::ArmISA::vcvtFpHFpD
double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
Definition: vfp.cc:655
gem5::ArmISA::fpToBits
static uint32_t fpToBits(float)
Definition: vfp.hh:158
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::ArmISA::FpRegRegRegImmOp::op2
IntRegIndex op2
Definition: vfp.hh:1041
gem5::ArmISA::fixDivDest< double >
template double fixDivDest< double >(bool flush, bool defaultNan, double val, double op1, double op2)
gem5::ArmISA::FpRegRegRegCondOp::op2
IntRegIndex op2
Definition: vfp.hh:998
gem5::ArmISA::FpRegRegRegCondOp::dest
IntRegIndex dest
Definition: vfp.hh:996
gem5::ArmISA::fixDivDest
fpType fixDivDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)
Definition: vfp.cc:302
gem5::ArmISA::FpRegRegRegOp::op2
IntRegIndex op2
Definition: vfp.hh:979
gem5::ArmISA::FpRegRegRegImmOp::dest
IntRegIndex dest
Definition: vfp.hh:1039
gem5::ArmISA::fpRecipEstimate
float fpRecipEstimate(FPSCR &fpscr, float op)
Definition: vfp.cc:853
gem5::ArmISA::FpRegRegImmOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: vfp.cc:106
gem5::ArmISA::vcvtFpHFp
static uint64_t vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble)
Definition: vfp.cc:593
gem5::ArmISA::prepFpState
VfpSavedState prepFpState(uint32_t rMode)
Definition: vfp.cc:183
gem5::ArmISA::q
Bitfield< 27 > q
Definition: misc_types.hh:55
gem5::ArmISA::imm
Bitfield< 7, 0 > imm
Definition: types.hh:132
gem5::ArmISA::flushToZero
flushToZero
Definition: misc_types.hh:476
gem5::ArmISA::recipEstimate
static double recipEstimate(double a)
Definition: vfp.cc:840
gem5::ArmISA::ArmStaticInst::printFloatReg
void printFloatReg(std::ostream &os, RegIndex reg_idx) const
Definition: static_inst.cc:345
gem5::ArmISA::FeRoundNearest
@ FeRoundNearest
Definition: vfp.hh:99
gem5::ArmISA::FpRegRegOp::dest
IntRegIndex dest
Definition: vfp.hh:922
gem5::ArmISA::FpRegImmOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: vfp.cc:95
gem5::ArmISA::ss
Bitfield< 21 > ss
Definition: misc_types.hh:59
gem5::ArmISA::VfpRoundZero
@ VfpRoundZero
Definition: vfp.hh:109
gem5::ArmISA::FpOp::unaryOp
fpType unaryOp(FPSCR &fpscr, fpType op1, fpType(*func)(fpType), bool flush, uint32_t rMode) const
Definition: vfp.cc:1110
gem5::ArmISA::FeRoundZero
@ FeRoundZero
Definition: vfp.hh:100
gem5::MipsISA::pc
Bitfield< 4 > pc
Definition: pra_constants.hh:243
gem5::ArmISA::fpStandardFPSCRValue
FPSCR fpStandardFPSCRValue(const FPSCR &fpscr)
Definition: vfp.cc:905
gem5::ArmISA::FpCondCompRegOp::condCode
ConditionCode condCode
Definition: vfp.hh:888
gem5::ArmISA::fixDivDest< float >
template float fixDivDest< float >(bool flush, bool defaultNan, float val, float op1, float op2)
gem5::ArmISA::FpRegRegRegImmOp::op1
IntRegIndex op1
Definition: vfp.hh:1040
gem5::ArmISA::FpOp::ternaryOp
fpType ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, fpType(*func)(fpType, fpType, fpType), bool flush, bool defaultNan, uint32_t rMode) const
Definition: vfp.cc:961
gem5::ArmISA::unsignedRecipEstimate
uint32_t unsignedRecipEstimate(uint32_t op)
Definition: vfp.cc:889
gem5::MipsISA::r
r
Definition: pra_constants.hh:98
gem5::ArmISA::FpRegRegOp::op1
IntRegIndex op1
Definition: vfp.hh:923
gem5::ArmISA::FpRegImmOp::dest
IntRegIndex dest
Definition: vfp.hh:940
gem5::ArmISA::fixDest
fpType fixDest(bool flush, bool defaultNan, fpType val, fpType op1)
Definition: vfp.cc:231
gem5::ArmISA::ArmStaticInst::printIntReg
void printIntReg(std::ostream &os, RegIndex reg_idx, uint8_t opWidth=0) const
Print a register name for disassembly given the unique dependence tag number (FP or int).
Definition: static_inst.cc:299
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::ArmISA::stride
Bitfield< 21, 20 > stride
Definition: misc_types.hh:446
gem5::ArmISA::FpRegRegImmOp::dest
IntRegIndex dest
Definition: vfp.hh:958
gem5::ArmISA::FpRegRegRegRegOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: vfp.cc:149
gem5::X86ISA::op
Bitfield< 4 > op
Definition: types.hh:83
gem5::ArmISA::vcvtFpDFpH
uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, double op)
Definition: vfp.cc:585
gem5::ArmISA::rMode
Bitfield< 23, 22 > rMode
Definition: misc_types.hh:447
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:177
gem5::ArmISA::mode
Bitfield< 4, 0 > mode
Definition: misc_types.hh:73
gem5::ArmISA::fixFpSFpDDest
double fixFpSFpDDest(FPSCR fpscr, float val)
Definition: vfp.cc:373

Generated on Tue Sep 21 2021 12:24:36 for gem5 by doxygen 1.8.17