1  /* Half-float conversion routines.
       2  
       3     Copyright (C) 2008-2023 Free Software Foundation, Inc.
       4     Contributed by CodeSourcery.
       5  
       6     This file is free software; you can redistribute it and/or modify it
       7     under the terms of the GNU General Public License as published by the
       8     Free Software Foundation; either version 3, or (at your option) any
       9     later version.
      10  
      11     This file is distributed in the hope that it will be useful, but
      12     WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14     General Public License for more details.
      15  
      16     Under Section 7 of GPL version 3, you are granted additional
      17     permissions described in the GCC Runtime Library Exception, version
      18     3.1, as published by the Free Software Foundation.
      19  
      20     You should have received a copy of the GNU General Public License and
      21     a copy of the GCC Runtime Library Exception along with this program;
      22     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      23     <http://www.gnu.org/licenses/>.  */
      24  
      25  struct format
      26  {
      27    /* Number of bits.  */
      28    unsigned long long size;
      29    /* Exponent bias.  */
      30    unsigned long long bias;
      31    /* Exponent width in bits.  */
      32    unsigned long long exponent;
      33    /* Significand precision in explicitly stored bits.  */
      34    unsigned long long significand;
      35  };
      36  
      37  static const struct format
      38  binary32 =
      39  {
      40    32,   /* size.  */
      41    127,  /* bias.  */
      42    8,    /* exponent.  */
      43    23    /* significand.  */
      44  };
      45  
      46  static const struct format
      47  binary64 =
      48  {
      49    64,    /* size.  */
      50    1023,  /* bias.  */
      51    11,    /* exponent.  */
      52    52     /* significand.  */
      53  };
      54  
      55  /* Function prototypes.  */
      56  unsigned short __gnu_f2h_ieee (unsigned int a);
      57  unsigned int __gnu_h2f_ieee (unsigned short a);
      58  unsigned short __gnu_f2h_alternative (unsigned int x);
      59  unsigned int __gnu_h2f_alternative (unsigned short a);
      60  unsigned short __gnu_d2h_ieee (unsigned long long a);
      61  unsigned short __gnu_d2h_alternative (unsigned long long x);
      62  
      63  static inline unsigned short
      64  __gnu_float2h_internal (const struct format* fmt,
      65  			unsigned long long a, int ieee)
      66  {
      67    unsigned long long point = 1ULL << fmt->significand;
      68    unsigned short sign = (a >> (fmt->size - 16)) & 0x8000;
      69    int aexp;
      70    unsigned long long mantissa;
      71    unsigned long long mask;
      72    unsigned long long increment;
      73  
      74    /* Get the exponent and mantissa encodings.  */
      75    mantissa = a & (point - 1);
      76  
      77    mask = (1 << fmt->exponent) - 1;
      78    aexp = (a >> fmt->significand) & mask;
      79  
      80    /* Infinity, NaN and alternative format special case.  */
      81    if (((unsigned int) aexp) == mask)
      82      {
      83        if (!ieee)
      84  	return sign;
      85        if (mantissa == 0)
      86  	return sign | 0x7c00;	/* Infinity.  */
      87        /* Remaining cases are NaNs.  Convert SNaN to QNaN.  */
      88        return sign | 0x7e00 | (mantissa >> (fmt->significand - 10));
      89      }
      90  
      91    /* Zero.  */
      92    if (aexp == 0 && mantissa == 0)
      93      return sign;
      94  
      95    /* Construct the exponent and mantissa.  */
      96    aexp -= fmt->bias;
      97  
      98    /* Decimal point is immediately after the significand.  */
      99    mantissa |= point;
     100  
     101    if (aexp < -14)
     102      {
     103        mask = point | (point - 1);
     104        /* Minimum exponent for half-precision is 2^-24.  */
     105        if (aexp >= -25)
     106  	mask >>= 25 + aexp;
     107      }
     108    else
     109      mask = (point - 1) >> 10;
     110  
     111    /* Round.  */
     112    if (mantissa & mask)
     113      {
     114        increment = (mask + 1) >> 1;
     115        if ((mantissa & mask) == increment)
     116  	increment = mantissa & (increment << 1);
     117        mantissa += increment;
     118        if (mantissa >= (point << 1))
     119  	{
     120  	  mantissa >>= 1;
     121  	  aexp++;
     122  	}
     123      }
     124  
     125    if (ieee)
     126      {
     127        if (aexp > 15)
     128  	return sign | 0x7c00;
     129      }
     130    else
     131      {
     132        if (aexp > 16)
     133  	return sign | 0x7fff;
     134      }
     135  
     136    if (aexp < -24)
     137      return sign;
     138  
     139    if (aexp < -14)
     140      {
     141        mantissa >>= -14 - aexp;
     142        aexp = -14;
     143      }
     144  
     145    /* Encode the final 16-bit floating-point value.
     146  
     147       This is formed of the sign bit, the bias-adjusted exponent, and the
     148       calculated mantissa, with the following caveats:
     149  
     150       1.  The mantissa calculated after rounding could have a leading 1.
     151  	 To compensate for this, subtract one from the exponent bias (15)
     152  	 before adding it to the calculated exponent.
     153       2.  When we were calculating rounding, we left the mantissa with the
     154  	 number of bits of the source operand, it needs reduced to ten
     155  	 bits (+1 for the afforementioned leading 1) by shifting right by
     156  	 the number of bits in the source mantissa - 10.
     157       3.  To ensure the leading 1 in the mantissa is applied to the exponent
     158  	 we need to add the mantissa rather than apply an arithmetic "or"
     159  	 to it.  */
     160  
     161    return sign | (((aexp + 14) << 10) + (mantissa >> (fmt->significand - 10)));
     162  }
     163  
     164  static inline unsigned short
     165  __gnu_f2h_internal (unsigned int a, int ieee)
     166  {
     167    return __gnu_float2h_internal (&binary32, (unsigned long long) a, ieee);
     168  }
     169  
     170  static inline unsigned short
     171  __gnu_d2h_internal (unsigned long long a, int ieee)
     172  {
     173    return __gnu_float2h_internal (&binary64, a, ieee);
     174  }
     175  
     176  static inline unsigned int
     177  __gnu_h2f_internal(unsigned short a, int ieee)
     178  {
     179    unsigned int sign = (unsigned int)(a & 0x8000) << 16;
     180    int aexp = (a >> 10) & 0x1f;
     181    unsigned int mantissa = a & 0x3ff;
     182  
     183    if (aexp == 0x1f && ieee)
     184      return sign | 0x7f800000 | (mantissa << 13);
     185  
     186    if (aexp == 0)
     187      {
     188        int shift;
     189  
     190        if (mantissa == 0)
     191  	return sign;
     192  
     193        shift = __builtin_clz(mantissa) - 21;
     194        mantissa <<= shift;
     195        aexp = -shift;
     196      }
     197  
     198    return sign | (((aexp + 0x70) << 23) + (mantissa << 13));
     199  }
     200  
     201  unsigned short
     202  __gnu_f2h_ieee(unsigned int a)
     203  {
     204    return __gnu_f2h_internal(a, 1);
     205  }
     206  
     207  unsigned int
     208  __gnu_h2f_ieee(unsigned short a)
     209  {
     210    return __gnu_h2f_internal(a, 1);
     211  }
     212  
     213  unsigned short
     214  __gnu_f2h_alternative(unsigned int x)
     215  {
     216    return __gnu_f2h_internal(x, 0);
     217  }
     218  
     219  unsigned int
     220  __gnu_h2f_alternative(unsigned short a)
     221  {
     222    return __gnu_h2f_internal(a, 0);
     223  }
     224  
     225  unsigned short
     226  __gnu_d2h_ieee (unsigned long long a)
     227  {
     228    return __gnu_d2h_internal (a, 1);
     229  }
     230  
     231  unsigned short
     232  __gnu_d2h_alternative (unsigned long long x)
     233  {
     234    return __gnu_d2h_internal (x, 0);
     235  }