1  /* Software floating-point emulation.
       2     Basic two-word fraction declaration and manipulation.
       3     Copyright (C) 1997-2022 Free Software Foundation, Inc.
       4     This file is part of the GNU C Library.
       5  
       6     The GNU C Library is free software; you can redistribute it and/or
       7     modify it under the terms of the GNU Lesser General Public
       8     License as published by the Free Software Foundation; either
       9     version 2.1 of the License, or (at your option) any later version.
      10  
      11     In addition to the permissions in the GNU Lesser General Public
      12     License, the Free Software Foundation gives you unlimited
      13     permission to link the compiled version of this file into
      14     combinations with other programs, and to distribute those
      15     combinations without any restriction coming from the use of this
      16     file.  (The Lesser General Public License restrictions do apply in
      17     other respects; for example, they cover modification of the file,
      18     and distribution when not linked into a combine executable.)
      19  
      20     The GNU C Library is distributed in the hope that it will be useful,
      21     but WITHOUT ANY WARRANTY; without even the implied warranty of
      22     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      23     Lesser General Public License for more details.
      24  
      25     You should have received a copy of the GNU Lesser General Public
      26     License along with the GNU C Library; if not, see
      27     <https://www.gnu.org/licenses/>.  */
      28  
      29  #ifndef SOFT_FP_OP_2_H
      30  #define SOFT_FP_OP_2_H	1
      31  
      32  #define _FP_FRAC_DECL_2(X)				\
      33    _FP_W_TYPE X##_f0 _FP_ZERO_INIT, X##_f1 _FP_ZERO_INIT
      34  #define _FP_FRAC_COPY_2(D, S)	(D##_f0 = S##_f0, D##_f1 = S##_f1)
      35  #define _FP_FRAC_SET_2(X, I)	__FP_FRAC_SET_2 (X, I)
      36  #define _FP_FRAC_HIGH_2(X)	(X##_f1)
      37  #define _FP_FRAC_LOW_2(X)	(X##_f0)
      38  #define _FP_FRAC_WORD_2(X, w)	(X##_f##w)
      39  
      40  #define _FP_FRAC_SLL_2(X, N)						\
      41    (void) (((N) < _FP_W_TYPE_SIZE)					\
      42  	  ? ({								\
      43  	      if (__builtin_constant_p (N) && (N) == 1)			\
      44  		{							\
      45  		  X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
      46  		  X##_f0 += X##_f0;					\
      47  		}							\
      48  	      else							\
      49  		{							\
      50  		  X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
      51  		  X##_f0 <<= (N);					\
      52  		}							\
      53  	      0;							\
      54  	    })								\
      55  	  : ({								\
      56  	      X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);		\
      57  	      X##_f0 = 0;						\
      58  	    }))
      59  
      60  
      61  #define _FP_FRAC_SRL_2(X, N)						\
      62    (void) (((N) < _FP_W_TYPE_SIZE)					\
      63  	  ? ({								\
      64  	      X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
      65  	      X##_f1 >>= (N);						\
      66  	    })								\
      67  	  : ({								\
      68  	      X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);		\
      69  	      X##_f1 = 0;						\
      70  	    }))
      71  
      72  /* Right shift with sticky-lsb.  */
      73  #define _FP_FRAC_SRST_2(X, S, N, sz)					\
      74    (void) (((N) < _FP_W_TYPE_SIZE)					\
      75  	  ? ({								\
      76  	      S = (__builtin_constant_p (N) && (N) == 1			\
      77  		   ? X##_f0 & 1						\
      78  		   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);		\
      79  	      X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
      80  	      X##_f1 >>= (N);						\
      81  	    })								\
      82  	  : ({								\
      83  	      S = ((((N) == _FP_W_TYPE_SIZE				\
      84  		     ? 0						\
      85  		     : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))		\
      86  		    | X##_f0) != 0);					\
      87  	      X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));		\
      88  	      X##_f1 = 0;						\
      89  	    }))
      90  
      91  #define _FP_FRAC_SRS_2(X, N, sz)					\
      92    (void) (((N) < _FP_W_TYPE_SIZE)					\
      93  	  ? ({								\
      94  	      X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
      95  			| (__builtin_constant_p (N) && (N) == 1		\
      96  			   ? X##_f0 & 1					\
      97  			   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
      98  	      X##_f1 >>= (N);						\
      99  	    })								\
     100  	  : ({								\
     101  	      X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)		\
     102  			| ((((N) == _FP_W_TYPE_SIZE			\
     103  			     ? 0					\
     104  			     : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))	\
     105  			    | X##_f0) != 0));				\
     106  	      X##_f1 = 0;						\
     107  	    }))
     108  
     109  #define _FP_FRAC_ADDI_2(X, I)	\
     110    __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
     111  
     112  #define _FP_FRAC_ADD_2(R, X, Y)	\
     113    __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
     114  
     115  #define _FP_FRAC_SUB_2(R, X, Y)	\
     116    __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
     117  
     118  #define _FP_FRAC_DEC_2(X, Y)	\
     119    __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
     120  
     121  #define _FP_FRAC_CLZ_2(R, X)			\
     122    do						\
     123      {						\
     124        if (X##_f1)				\
     125  	__FP_CLZ ((R), X##_f1);			\
     126        else					\
     127  	{					\
     128  	  __FP_CLZ ((R), X##_f0);		\
     129  	  (R) += _FP_W_TYPE_SIZE;		\
     130  	}					\
     131      }						\
     132    while (0)
     133  
     134  /* Predicates.  */
     135  #define _FP_FRAC_NEGP_2(X)	((_FP_WS_TYPE) X##_f1 < 0)
     136  #define _FP_FRAC_ZEROP_2(X)	((X##_f1 | X##_f0) == 0)
     137  #define _FP_FRAC_OVERP_2(fs, X)	(_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
     138  #define _FP_FRAC_CLEAR_OVERP_2(fs, X)	(_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
     139  #define _FP_FRAC_HIGHBIT_DW_2(fs, X)	\
     140    (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
     141  #define _FP_FRAC_EQ_2(X, Y)	(X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
     142  #define _FP_FRAC_GT_2(X, Y)	\
     143    (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
     144  #define _FP_FRAC_GE_2(X, Y)	\
     145    (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
     146  
     147  #define _FP_ZEROFRAC_2		0, 0
     148  #define _FP_MINFRAC_2		0, 1
     149  #define _FP_MAXFRAC_2		(~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
     150  
     151  /* Internals.  */
     152  
     153  #define __FP_FRAC_SET_2(X, I1, I0)	(X##_f0 = I0, X##_f1 = I1)
     154  
     155  #define __FP_CLZ_2(R, xh, xl)			\
     156    do						\
     157      {						\
     158        if (xh)					\
     159  	__FP_CLZ ((R), xh);			\
     160        else					\
     161  	{					\
     162  	  __FP_CLZ ((R), xl);			\
     163  	  (R) += _FP_W_TYPE_SIZE;		\
     164  	}					\
     165      }						\
     166    while (0)
     167  
     168  #if 0
     169  
     170  # ifndef __FP_FRAC_ADDI_2
     171  #  define __FP_FRAC_ADDI_2(xh, xl, i)	\
     172    (xh += ((xl += i) < i))
     173  # endif
     174  # ifndef __FP_FRAC_ADD_2
     175  #  define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl)	\
     176    (rh = xh + yh + ((rl = xl + yl) < xl))
     177  # endif
     178  # ifndef __FP_FRAC_SUB_2
     179  #  define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl)	\
     180    (rh = xh - yh - ((rl = xl - yl) > xl))
     181  # endif
     182  # ifndef __FP_FRAC_DEC_2
     183  #  define __FP_FRAC_DEC_2(xh, xl, yh, yl)		\
     184    do							\
     185      {							\
     186        UWtype __FP_FRAC_DEC_2_t = xl;			\
     187        xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t);	\
     188      }							\
     189    while (0)
     190  # endif
     191  
     192  #else
     193  
     194  # undef __FP_FRAC_ADDI_2
     195  # define __FP_FRAC_ADDI_2(xh, xl, i)	add_ssaaaa (xh, xl, xh, xl, 0, i)
     196  # undef __FP_FRAC_ADD_2
     197  # define __FP_FRAC_ADD_2		add_ssaaaa
     198  # undef __FP_FRAC_SUB_2
     199  # define __FP_FRAC_SUB_2		sub_ddmmss
     200  # undef __FP_FRAC_DEC_2
     201  # define __FP_FRAC_DEC_2(xh, xl, yh, yl)	\
     202    sub_ddmmss (xh, xl, xh, xl, yh, yl)
     203  
     204  #endif
     205  
     206  /* Unpack the raw bits of a native fp value.  Do not classify or
     207     normalize the data.  */
     208  
     209  #define _FP_UNPACK_RAW_2(fs, X, val)			\
     210    do							\
     211      {							\
     212        union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo;	\
     213        _FP_UNPACK_RAW_2_flo.flt = (val);			\
     214  							\
     215        X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0;		\
     216        X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1;		\
     217        X##_e  = _FP_UNPACK_RAW_2_flo.bits.exp;		\
     218        X##_s  = _FP_UNPACK_RAW_2_flo.bits.sign;		\
     219      }							\
     220    while (0)
     221  
     222  #define _FP_UNPACK_RAW_2_P(fs, X, val)			\
     223    do							\
     224      {							\
     225        union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo	\
     226  	= (union _FP_UNION_##fs *) (val);		\
     227  							\
     228        X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0;	\
     229        X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1;	\
     230        X##_e  = _FP_UNPACK_RAW_2_P_flo->bits.exp;	\
     231        X##_s  = _FP_UNPACK_RAW_2_P_flo->bits.sign;	\
     232      }							\
     233    while (0)
     234  
     235  
     236  /* Repack the raw bits of a native fp value.  */
     237  
     238  #define _FP_PACK_RAW_2(fs, val, X)		\
     239    do						\
     240      {						\
     241        union _FP_UNION_##fs _FP_PACK_RAW_2_flo;	\
     242  						\
     243        _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0;	\
     244        _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1;	\
     245        _FP_PACK_RAW_2_flo.bits.exp   = X##_e;	\
     246        _FP_PACK_RAW_2_flo.bits.sign  = X##_s;	\
     247  						\
     248        (val) = _FP_PACK_RAW_2_flo.flt;		\
     249      }						\
     250    while (0)
     251  
     252  #define _FP_PACK_RAW_2_P(fs, val, X)			\
     253    do							\
     254      {							\
     255        union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo	\
     256  	= (union _FP_UNION_##fs *) (val);		\
     257  							\
     258        _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0;	\
     259        _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1;	\
     260        _FP_PACK_RAW_2_P_flo->bits.exp   = X##_e;		\
     261        _FP_PACK_RAW_2_P_flo->bits.sign  = X##_s;		\
     262      }							\
     263    while (0)
     264  
     265  
     266  /* Multiplication algorithms: */
     267  
     268  /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
     269  
     270  #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit)		\
     271    do									\
     272      {									\
     273        _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b);			\
     274        _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c);			\
     275  									\
     276        doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0),		\
     277  	    X##_f0, Y##_f0);						\
     278        doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0,	\
     279  	    X##_f0, Y##_f1);						\
     280        doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0,	\
     281  	    X##_f1, Y##_f0);						\
     282        doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),		\
     283  	    X##_f1, Y##_f1);						\
     284  									\
     285        __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
     286  		       _FP_FRAC_WORD_4 (R, 1), 0,			\
     287  		       _FP_MUL_MEAT_DW_2_wide_b_f1,			\
     288  		       _FP_MUL_MEAT_DW_2_wide_b_f0,			\
     289  		       _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
     290  		       _FP_FRAC_WORD_4 (R, 1));				\
     291        __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
     292  		       _FP_FRAC_WORD_4 (R, 1), 0,			\
     293  		       _FP_MUL_MEAT_DW_2_wide_c_f1,			\
     294  		       _FP_MUL_MEAT_DW_2_wide_c_f0,			\
     295  		       _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
     296  		       _FP_FRAC_WORD_4 (R, 1));				\
     297      }									\
     298    while (0)
     299  
     300  #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)			\
     301    do									\
     302      {									\
     303        _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z);				\
     304  									\
     305        _FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z,	\
     306  			      X, Y, doit);				\
     307  									\
     308        /* Normalize since we know where the msb of the multiplicands	\
     309  	 were (bit B), we know that the msb of the of the product is	\
     310  	 at either 2B or 2B-1.  */					\
     311        _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1,		\
     312  		      2*(wfracbits));					\
     313        R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0);		\
     314        R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1);		\
     315      }									\
     316    while (0)
     317  
     318  /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
     319     Do only 3 multiplications instead of four. This one is for machines
     320     where multiplication is much more expensive than subtraction.  */
     321  
     322  #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit)		\
     323    do									\
     324      {									\
     325        _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b);			\
     326        _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c);			\
     327        _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d;				\
     328        int _FP_MUL_MEAT_DW_2_wide_3mul_c1;				\
     329        int _FP_MUL_MEAT_DW_2_wide_3mul_c2;				\
     330  									\
     331        _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1;		\
     332        _FP_MUL_MEAT_DW_2_wide_3mul_c1					\
     333  	= _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0;			\
     334        _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1;		\
     335        _FP_MUL_MEAT_DW_2_wide_3mul_c2					\
     336  	= _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0;			\
     337        doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0),	\
     338  	    X##_f0, Y##_f0);						\
     339        doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1),		\
     340  	    _FP_MUL_MEAT_DW_2_wide_3mul_b_f0,				\
     341  	    _FP_MUL_MEAT_DW_2_wide_3mul_b_f1);				\
     342        doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1,				\
     343  	    _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1);		\
     344  									\
     345        _FP_MUL_MEAT_DW_2_wide_3mul_b_f0					\
     346  	&= -_FP_MUL_MEAT_DW_2_wide_3mul_c2;				\
     347        _FP_MUL_MEAT_DW_2_wide_3mul_b_f1					\
     348  	&= -_FP_MUL_MEAT_DW_2_wide_3mul_c1;				\
     349        __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
     350  		       _FP_FRAC_WORD_4 (R, 1),				\
     351  		       (_FP_MUL_MEAT_DW_2_wide_3mul_c1			\
     352  			& _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0,		\
     353  		       _FP_MUL_MEAT_DW_2_wide_3mul_d,			\
     354  		       0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
     355        __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
     356  			_FP_MUL_MEAT_DW_2_wide_3mul_b_f0);		\
     357        __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
     358  			_FP_MUL_MEAT_DW_2_wide_3mul_b_f1);		\
     359        __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
     360  		       _FP_FRAC_WORD_4 (R, 1),				\
     361  		       0, _FP_MUL_MEAT_DW_2_wide_3mul_d,		\
     362  		       _FP_FRAC_WORD_4 (R, 0));				\
     363        __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
     364  		       _FP_FRAC_WORD_4 (R, 1), 0,			\
     365  		       _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,		\
     366  		       _FP_MUL_MEAT_DW_2_wide_3mul_c_f0);		\
     367        __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
     368  		       _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,		\
     369  		       _FP_MUL_MEAT_DW_2_wide_3mul_c_f0,		\
     370  		       _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2));	\
     371      }									\
     372    while (0)
     373  
     374  #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)		\
     375    do									\
     376      {									\
     377        _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z);			\
     378  									\
     379        _FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits),				\
     380  				   _FP_MUL_MEAT_2_wide_3mul_z,		\
     381  				   X, Y, doit);				\
     382  									\
     383        /* Normalize since we know where the msb of the multiplicands	\
     384  	 were (bit B), we know that the msb of the of the product is	\
     385  	 at either 2B or 2B-1.  */					\
     386        _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z,			\
     387  		      (wfracbits)-1, 2*(wfracbits));			\
     388        R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0);		\
     389        R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1);		\
     390      }									\
     391    while (0)
     392  
     393  #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y)	\
     394    do							\
     395      {							\
     396        _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2];		\
     397        _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2];		\
     398        _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0;		\
     399        _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1;		\
     400        _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0;		\
     401        _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1;		\
     402  							\
     403        mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x,	\
     404  		 _FP_MUL_MEAT_DW_2_gmp_y, 2);		\
     405      }							\
     406    while (0)
     407  
     408  #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)				\
     409    do									\
     410      {									\
     411        _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z);				\
     412  									\
     413        _FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y);	\
     414  									\
     415        /* Normalize since we know where the msb of the multiplicands	\
     416  	 were (bit B), we know that the msb of the of the product is	\
     417  	 at either 2B or 2B-1.  */					\
     418        _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1,		\
     419  		      2*(wfracbits));					\
     420        R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0];				\
     421        R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1];				\
     422      }									\
     423    while (0)
     424  
     425  /* Do at most 120x120=240 bits multiplication using double floating
     426     point multiplication.  This is useful if floating point
     427     multiplication has much bigger throughput than integer multiply.
     428     It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
     429     between 106 and 120 only.
     430     Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
     431     SETFETZ is a macro which will disable all FPU exceptions and set rounding
     432     towards zero,  RESETFE should optionally reset it back.  */
     433  
     434  #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
     435    do									\
     436      {									\
     437        static const double _const[] =					\
     438  	{								\
     439  	  /* 2^-24 */ 5.9604644775390625e-08,				\
     440  	  /* 2^-48 */ 3.5527136788005009e-15,				\
     441  	  /* 2^-72 */ 2.1175823681357508e-22,				\
     442  	  /* 2^-96 */ 1.2621774483536189e-29,				\
     443  	  /* 2^28 */ 2.68435456e+08,					\
     444  	  /* 2^4 */ 1.600000e+01,					\
     445  	  /* 2^-20 */ 9.5367431640625e-07,				\
     446  	  /* 2^-44 */ 5.6843418860808015e-14,				\
     447  	  /* 2^-68 */ 3.3881317890172014e-21,				\
     448  	  /* 2^-92 */ 2.0194839173657902e-28,				\
     449  	  /* 2^-116 */ 1.2037062152420224e-35				\
     450  	};								\
     451        double _a240, _b240, _c240, _d240, _e240, _f240,			\
     452  	_g240, _h240, _i240, _j240, _k240;				\
     453        union { double d; UDItype i; } _l240, _m240, _n240, _o240,	\
     454  				       _p240, _q240, _r240, _s240;	\
     455        UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;		\
     456  									\
     457        _FP_STATIC_ASSERT ((wfracbits) >= 106 && (wfracbits) <= 120,	\
     458  			 "wfracbits out of range");			\
     459  									\
     460        setfetz;								\
     461  									\
     462        _e240 = (double) (long) (X##_f0 & 0xffffff);			\
     463        _j240 = (double) (long) (Y##_f0 & 0xffffff);			\
     464        _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff);		\
     465        _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff);		\
     466        _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
     467        _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
     468        _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff);		\
     469        _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff);		\
     470        _a240 = (double) (long) (X##_f1 >> 32);				\
     471        _f240 = (double) (long) (Y##_f1 >> 32);				\
     472        _e240 *= _const[3];						\
     473        _j240 *= _const[3];						\
     474        _d240 *= _const[2];						\
     475        _i240 *= _const[2];						\
     476        _c240 *= _const[1];						\
     477        _h240 *= _const[1];						\
     478        _b240 *= _const[0];						\
     479        _g240 *= _const[0];						\
     480        _s240.d =							      _e240*_j240; \
     481        _r240.d =						_d240*_j240 + _e240*_i240; \
     482        _q240.d =				  _c240*_j240 + _d240*_i240 + _e240*_h240; \
     483        _p240.d =		    _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
     484        _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
     485        _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;	\
     486        _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;		\
     487        _l240.d = _a240*_g240 + _b240*_f240;				\
     488        _k240 =   _a240*_f240;						\
     489        _r240.d += _s240.d;						\
     490        _q240.d += _r240.d;						\
     491        _p240.d += _q240.d;						\
     492        _o240.d += _p240.d;						\
     493        _n240.d += _o240.d;						\
     494        _m240.d += _n240.d;						\
     495        _l240.d += _m240.d;						\
     496        _k240 += _l240.d;							\
     497        _s240.d -= ((_const[10]+_s240.d)-_const[10]);			\
     498        _r240.d -= ((_const[9]+_r240.d)-_const[9]);			\
     499        _q240.d -= ((_const[8]+_q240.d)-_const[8]);			\
     500        _p240.d -= ((_const[7]+_p240.d)-_const[7]);			\
     501        _o240.d += _const[7];						\
     502        _n240.d += _const[6];						\
     503        _m240.d += _const[5];						\
     504        _l240.d += _const[4];						\
     505        if (_s240.d != 0.0)						\
     506  	_y240 = 1;							\
     507        if (_r240.d != 0.0)						\
     508  	_y240 = 1;							\
     509        if (_q240.d != 0.0)						\
     510  	_y240 = 1;							\
     511        if (_p240.d != 0.0)						\
     512  	_y240 = 1;							\
     513        _t240 = (DItype) _k240;						\
     514        _u240 = _l240.i;							\
     515        _v240 = _m240.i;							\
     516        _w240 = _n240.i;							\
     517        _x240 = _o240.i;							\
     518        R##_f1 = ((_t240 << (128 - (wfracbits - 1)))			\
     519  		| ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)));	\
     520        R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1)))		\
     521  		| ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))	\
     522  		| ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))	\
     523  		| ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))	\
     524  		| _y240);						\
     525        resetfe;								\
     526      }									\
     527    while (0)
     528  
     529  /* Division algorithms: */
     530  
     531  #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)				\
     532    do									\
     533      {									\
     534        _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2;				\
     535        _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1;				\
     536        _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0;				\
     537        _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1;				\
     538        _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0;				\
     539        _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1;				\
     540        _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0;				\
     541        if (_FP_FRAC_GE_2 (X, Y))						\
     542  	{								\
     543  	  _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1;			\
     544  	  _FP_DIV_MEAT_2_udiv_n_f1					\
     545  	    = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;		\
     546  	  _FP_DIV_MEAT_2_udiv_n_f0					\
     547  	    = X##_f0 << (_FP_W_TYPE_SIZE - 1);				\
     548  	}								\
     549        else								\
     550  	{								\
     551  	  R##_e--;							\
     552  	  _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1;				\
     553  	  _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0;				\
     554  	  _FP_DIV_MEAT_2_udiv_n_f0 = 0;					\
     555  	}								\
     556  									\
     557        /* Normalize, i.e. make the most significant bit of the		\
     558  	 denominator set.  */						\
     559        _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs);				\
     560  									\
     561        udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1,			\
     562  		  _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1,	\
     563  		  Y##_f1);						\
     564        umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0,	\
     565  		 R##_f1, Y##_f0);					\
     566        _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0;		\
     567        if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r))	\
     568  	{								\
     569  	  R##_f1--;							\
     570  	  _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,			\
     571  			  _FP_DIV_MEAT_2_udiv_r);			\
     572  	  if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)			\
     573  	      && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,			\
     574  				_FP_DIV_MEAT_2_udiv_r))			\
     575  	    {								\
     576  	      R##_f1--;							\
     577  	      _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,			\
     578  			      _FP_DIV_MEAT_2_udiv_r);			\
     579  	    }								\
     580  	}								\
     581        _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m);	\
     582  									\
     583        if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1)				\
     584  	{								\
     585  	  /* This is a special case, not an optimization		\
     586  	     (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype).	\
     587  	     As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y,		\
     588  	     R##_f0 can be either (UWtype)-1 or (UWtype)-2.  But as we	\
     589  	     know what kind of bits it is (sticky, guard, round),	\
     590  	     we don't care.  We also don't care what the reminder is,	\
     591  	     because the guard bit will be set anyway.  -jj */		\
     592  	  R##_f0 = -1;							\
     593  	}								\
     594        else								\
     595  	{								\
     596  	  udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1,			\
     597  		      _FP_DIV_MEAT_2_udiv_r_f1,				\
     598  		      _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1);		\
     599  	  umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1,				\
     600  		     _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0);		\
     601  	  _FP_DIV_MEAT_2_udiv_r_f0 = 0;					\
     602  	  if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,			\
     603  			     _FP_DIV_MEAT_2_udiv_r))			\
     604  	    {								\
     605  	      R##_f0--;							\
     606  	      _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,			\
     607  			      _FP_DIV_MEAT_2_udiv_r);			\
     608  	      if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)		\
     609  		  && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,		\
     610  				    _FP_DIV_MEAT_2_udiv_r))		\
     611  		{							\
     612  		  R##_f0--;						\
     613  		  _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,		\
     614  				  _FP_DIV_MEAT_2_udiv_r);		\
     615  		}							\
     616  	    }								\
     617  	  if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r,			\
     618  			      _FP_DIV_MEAT_2_udiv_m))			\
     619  	    R##_f0 |= _FP_WORK_STICKY;					\
     620  	}								\
     621      }									\
     622    while (0)
     623  
     624  
     625  /* Square root algorithms:
     626     We have just one right now, maybe Newton approximation
     627     should be added for those machines where division is fast.  */
     628  
     629  #define _FP_SQRT_MEAT_2(R, S, T, X, q)				\
     630    do								\
     631      {								\
     632        while (q)							\
     633  	{							\
     634  	  T##_f1 = S##_f1 + (q);				\
     635  	  if (T##_f1 <= X##_f1)					\
     636  	    {							\
     637  	      S##_f1 = T##_f1 + (q);				\
     638  	      X##_f1 -= T##_f1;					\
     639  	      R##_f1 += (q);					\
     640  	    }							\
     641  	  _FP_FRAC_SLL_2 (X, 1);				\
     642  	  (q) >>= 1;						\
     643  	}							\
     644        (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);		\
     645        while ((q) != _FP_WORK_ROUND)				\
     646  	{							\
     647  	  T##_f0 = S##_f0 + (q);				\
     648  	  T##_f1 = S##_f1;					\
     649  	  if (T##_f1 < X##_f1					\
     650  	      || (T##_f1 == X##_f1 && T##_f0 <= X##_f0))	\
     651  	    {							\
     652  	      S##_f0 = T##_f0 + (q);				\
     653  	      S##_f1 += (T##_f0 > S##_f0);			\
     654  	      _FP_FRAC_DEC_2 (X, T);				\
     655  	      R##_f0 += (q);					\
     656  	    }							\
     657  	  _FP_FRAC_SLL_2 (X, 1);				\
     658  	  (q) >>= 1;						\
     659  	}							\
     660        if (X##_f0 | X##_f1)					\
     661  	{							\
     662  	  if (S##_f1 < X##_f1					\
     663  	      || (S##_f1 == X##_f1 && S##_f0 < X##_f0))		\
     664  	    R##_f0 |= _FP_WORK_ROUND;				\
     665  	  R##_f0 |= _FP_WORK_STICKY;				\
     666  	}							\
     667      }								\
     668    while (0)
     669  
     670  
     671  /* Assembly/disassembly for converting to/from integral types.
     672     No shifting or overflow handled here.  */
     673  
     674  #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)	\
     675    (void) (((rsize) <= _FP_W_TYPE_SIZE)		\
     676  	  ? ({ (r) = X##_f0; })			\
     677  	  : ({					\
     678  	      (r) = X##_f1;			\
     679  	      (r) <<= _FP_W_TYPE_SIZE;		\
     680  	      (r) += X##_f0;			\
     681  	    }))
     682  
     683  #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)	\
     684    do						\
     685      {						\
     686        X##_f0 = (r);				\
     687        X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE	\
     688  		? 0				\
     689  		: (r) >> _FP_W_TYPE_SIZE);	\
     690      }						\
     691    while (0)
     692  
     693  /* Convert FP values between word sizes.  */
     694  
     695  #define _FP_FRAC_COPY_1_2(D, S)		(D##_f = S##_f0)
     696  
     697  #define _FP_FRAC_COPY_2_1(D, S)		((D##_f0 = S##_f), (D##_f1 = 0))
     698  
     699  #define _FP_FRAC_COPY_2_2(D, S)		_FP_FRAC_COPY_2 (D, S)
     700  
     701  #endif /* !SOFT_FP_OP_2_H */