(root)/
gcc-13.2.0/
libgcc/
config/
libbid/
bid_inline_add.h
       1  /* Copyright (C) 2007-2023 Free Software Foundation, Inc.
       2  
       3  This file is part of GCC.
       4  
       5  GCC is free software; you can redistribute it and/or modify it under
       6  the terms of the GNU General Public License as published by the Free
       7  Software Foundation; either version 3, or (at your option) any later
       8  version.
       9  
      10  GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      11  WARRANTY; without even the implied warranty of MERCHANTABILITY or
      12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      13  for more details.
      14  
      15  Under Section 7 of GPL version 3, you are granted additional
      16  permissions described in the GCC Runtime Library Exception, version
      17  3.1, as published by the Free Software Foundation.
      18  
      19  You should have received a copy of the GNU General Public License and
      20  a copy of the GCC Runtime Library Exception along with this program;
      21  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      22  <http://www.gnu.org/licenses/>.  */
      23  
      24  /*****************************************************************************
      25   *
      26   *    Helper add functions (for fma)
      27   *
      28   *    __BID_INLINE__ UINT64 get_add64(
      29   *        UINT64 sign_x, int exponent_x, UINT64 coefficient_x, 
      30   *        UINT64 sign_y, int exponent_y, UINT64 coefficient_y, 
      31   *  					 int rounding_mode)
      32   *
      33   *   __BID_INLINE__ UINT64 get_add128(
      34   *                       UINT64 sign_x, int exponent_x, UINT64 coefficient_x, 
      35   *                       UINT64 sign_y, int final_exponent_y, UINT128 CY, 
      36   *                       int extra_digits, int rounding_mode)
      37   *
      38   *****************************************************************************
      39   *
      40   *  Algorithm description:
      41   *
      42   *  get_add64:  same as BID64 add, but arguments are unpacked and there 
      43   *                                 are no special case checks
      44   *
      45   *  get_add128: add 64-bit coefficient to 128-bit product (which contains 
      46   *                                        16+extra_digits decimal digits), 
      47   *                         return BID64 result
      48   *              - the exponents are compared and the two coefficients are 
      49   *                properly aligned for addition/subtraction
      50   *              - multiple paths are needed
      51   *              - final result exponent is calculated and the lower term is
      52   *                      rounded first if necessary, to avoid manipulating 
      53   *                      coefficients longer than 128 bits 
      54   *
      55   ****************************************************************************/
      56  
      57  #ifndef _INLINE_BID_ADD_H_
      58  #define _INLINE_BID_ADD_H_
      59  
      60  #include "bid_internal.h"
      61  
      62  #define MAX_FORMAT_DIGITS     16
      63  #define DECIMAL_EXPONENT_BIAS 398
      64  #define MASK_BINARY_EXPONENT  0x7ff0000000000000ull
      65  #define BINARY_EXPONENT_BIAS  0x3ff
      66  #define UPPER_EXPON_LIMIT     51
      67  
      68  ///////////////////////////////////////////////////////////////////////
      69  //
      70  // get_add64() is essentially the same as bid_add(), except that 
      71  //             the arguments are unpacked
      72  //
      73  //////////////////////////////////////////////////////////////////////
      74  __BID_INLINE__ UINT64
      75  get_add64 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
      76  	   UINT64 sign_y, int exponent_y, UINT64 coefficient_y,
      77  	   int rounding_mode, unsigned *fpsc) {
      78    UINT128 CA, CT, CT_new;
      79    UINT64 sign_a, sign_b, coefficient_a, coefficient_b, sign_s, sign_ab,
      80      rem_a;
      81    UINT64 saved_ca, saved_cb, C0_64, C64, remainder_h, T1, carry, tmp,
      82      C64_new;
      83    int_double tempx;
      84    int exponent_a, exponent_b, diff_dec_expon;
      85    int bin_expon_ca, extra_digits, amount, scale_k, scale_ca;
      86    unsigned rmode, status;
      87  
      88    // sort arguments by exponent
      89    if (exponent_x <= exponent_y) {
      90      sign_a = sign_y;
      91      exponent_a = exponent_y;
      92      coefficient_a = coefficient_y;
      93      sign_b = sign_x;
      94      exponent_b = exponent_x;
      95      coefficient_b = coefficient_x;
      96    } else {
      97      sign_a = sign_x;
      98      exponent_a = exponent_x;
      99      coefficient_a = coefficient_x;
     100      sign_b = sign_y;
     101      exponent_b = exponent_y;
     102      coefficient_b = coefficient_y;
     103    }
     104  
     105    // exponent difference
     106    diff_dec_expon = exponent_a - exponent_b;
     107  
     108    /* get binary coefficients of x and y */
     109  
     110    //--- get number of bits in the coefficients of x and y ---
     111  
     112    tempx.d = (double) coefficient_a;
     113    bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
     114  
     115    if (!coefficient_a) {
     116      return get_BID64 (sign_b, exponent_b, coefficient_b, rounding_mode,
     117  		      fpsc);
     118    }
     119    if (diff_dec_expon > MAX_FORMAT_DIGITS) {
     120      // normalize a to a 16-digit coefficient
     121  
     122      scale_ca = estimate_decimal_digits[bin_expon_ca];
     123      if (coefficient_a >= power10_table_128[scale_ca].w[0])
     124        scale_ca++;
     125  
     126      scale_k = 16 - scale_ca;
     127  
     128      coefficient_a *= power10_table_128[scale_k].w[0];
     129  
     130      diff_dec_expon -= scale_k;
     131      exponent_a -= scale_k;
     132  
     133      /* get binary coefficients of x and y */
     134  
     135      //--- get number of bits in the coefficients of x and y ---
     136      tempx.d = (double) coefficient_a;
     137      bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
     138  
     139      if (diff_dec_expon > MAX_FORMAT_DIGITS) {
     140  #ifdef SET_STATUS_FLAGS
     141        if (coefficient_b) {
     142  	__set_status_flags (fpsc, INEXACT_EXCEPTION);
     143        }
     144  #endif
     145  
     146  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
     147  #ifndef IEEE_ROUND_NEAREST
     148        if (((rounding_mode) & 3) && coefficient_b)	// not ROUNDING_TO_NEAREST
     149        {
     150  	switch (rounding_mode) {
     151  	case ROUNDING_DOWN:
     152  	  if (sign_b) {
     153  	    coefficient_a -= ((((SINT64) sign_a) >> 63) | 1);
     154  	    if (coefficient_a < 1000000000000000ull) {
     155  	      exponent_a--;
     156  	      coefficient_a = 9999999999999999ull;
     157  	    } else if (coefficient_a >= 10000000000000000ull) {
     158  	      exponent_a++;
     159  	      coefficient_a = 1000000000000000ull;
     160  	    }
     161  	  }
     162  	  break;
     163  	case ROUNDING_UP:
     164  	  if (!sign_b) {
     165  	    coefficient_a += ((((SINT64) sign_a) >> 63) | 1);
     166  	    if (coefficient_a < 1000000000000000ull) {
     167  	      exponent_a--;
     168  	      coefficient_a = 9999999999999999ull;
     169  	    } else if (coefficient_a >= 10000000000000000ull) {
     170  	      exponent_a++;
     171  	      coefficient_a = 1000000000000000ull;
     172  	    }
     173  	  }
     174  	  break;
     175  	default:	// RZ
     176  	  if (sign_a != sign_b) {
     177  	    coefficient_a--;
     178  	    if (coefficient_a < 1000000000000000ull) {
     179  	      exponent_a--;
     180  	      coefficient_a = 9999999999999999ull;
     181  	    }
     182  	  }
     183  	  break;
     184  	}
     185        } else
     186  #endif
     187  #endif
     188  	// check special case here
     189  	if ((coefficient_a == 1000000000000000ull)
     190  	    && (diff_dec_expon == MAX_FORMAT_DIGITS + 1)
     191  	    && (sign_a ^ sign_b)
     192  	    && (coefficient_b > 5000000000000000ull)) {
     193  	coefficient_a = 9999999999999999ull;
     194  	exponent_a--;
     195        }
     196  
     197        return get_BID64 (sign_a, exponent_a, coefficient_a,
     198  			rounding_mode, fpsc);
     199      }
     200    }
     201    // test whether coefficient_a*10^(exponent_a-exponent_b)  may exceed 2^62
     202    if (bin_expon_ca + estimate_bin_expon[diff_dec_expon] < 60) {
     203      // coefficient_a*10^(exponent_a-exponent_b)<2^63
     204  
     205      // multiply by 10^(exponent_a-exponent_b)
     206      coefficient_a *= power10_table_128[diff_dec_expon].w[0];
     207  
     208      // sign mask
     209      sign_b = ((SINT64) sign_b) >> 63;
     210      // apply sign to coeff. of b
     211      coefficient_b = (coefficient_b + sign_b) ^ sign_b;
     212  
     213      // apply sign to coefficient a
     214      sign_a = ((SINT64) sign_a) >> 63;
     215      coefficient_a = (coefficient_a + sign_a) ^ sign_a;
     216  
     217      coefficient_a += coefficient_b;
     218      // get sign
     219      sign_s = ((SINT64) coefficient_a) >> 63;
     220      coefficient_a = (coefficient_a + sign_s) ^ sign_s;
     221      sign_s &= 0x8000000000000000ull;
     222  
     223      // coefficient_a < 10^16 ?
     224      if (coefficient_a < power10_table_128[MAX_FORMAT_DIGITS].w[0]) {
     225  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
     226  #ifndef IEEE_ROUND_NEAREST
     227        if (rounding_mode == ROUNDING_DOWN && (!coefficient_a)
     228  	  && sign_a != sign_b)
     229  	sign_s = 0x8000000000000000ull;
     230  #endif
     231  #endif
     232        return get_BID64 (sign_s, exponent_b, coefficient_a,
     233  			rounding_mode, fpsc);
     234      }
     235      // otherwise rounding is necessary
     236  
     237      // already know coefficient_a<10^19
     238      // coefficient_a < 10^17 ?
     239      if (coefficient_a < power10_table_128[17].w[0])
     240        extra_digits = 1;
     241      else if (coefficient_a < power10_table_128[18].w[0])
     242        extra_digits = 2;
     243      else
     244        extra_digits = 3;
     245  
     246  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
     247  #ifndef IEEE_ROUND_NEAREST
     248      rmode = rounding_mode;
     249      if (sign_s && (unsigned) (rmode - 1) < 2)
     250        rmode = 3 - rmode;
     251  #else
     252      rmode = 0;
     253  #endif
     254  #else
     255      rmode = 0;
     256  #endif
     257      coefficient_a += round_const_table[rmode][extra_digits];
     258  
     259      // get P*(2^M[extra_digits])/10^extra_digits
     260      __mul_64x64_to_128 (CT, coefficient_a,
     261  			reciprocals10_64[extra_digits]);
     262  
     263      // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
     264      amount = short_recip_scale[extra_digits];
     265      C64 = CT.w[1] >> amount;
     266  
     267    } else {
     268      // coefficient_a*10^(exponent_a-exponent_b) is large
     269      sign_s = sign_a;
     270  
     271  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
     272  #ifndef IEEE_ROUND_NEAREST
     273      rmode = rounding_mode;
     274      if (sign_s && (unsigned) (rmode - 1) < 2)
     275        rmode = 3 - rmode;
     276  #else
     277      rmode = 0;
     278  #endif
     279  #else
     280      rmode = 0;
     281  #endif
     282  
     283      // check whether we can take faster path
     284      scale_ca = estimate_decimal_digits[bin_expon_ca];
     285  
     286      sign_ab = sign_a ^ sign_b;
     287      sign_ab = ((SINT64) sign_ab) >> 63;
     288  
     289      // T1 = 10^(16-diff_dec_expon)
     290      T1 = power10_table_128[16 - diff_dec_expon].w[0];
     291  
     292      // get number of digits in coefficient_a
     293      //P_ca = power10_table_128[scale_ca].w[0];
     294      //P_ca_m1 = power10_table_128[scale_ca-1].w[0];
     295      if (coefficient_a >= power10_table_128[scale_ca].w[0]) {
     296        scale_ca++;
     297        //P_ca_m1 = P_ca;
     298        //P_ca = power10_table_128[scale_ca].w[0];
     299      }
     300  
     301      scale_k = 16 - scale_ca;
     302  
     303      // apply sign
     304      //Ts = (T1 + sign_ab) ^ sign_ab;
     305  
     306      // test range of ca
     307      //X = coefficient_a + Ts - P_ca_m1;
     308  
     309      // addition
     310      saved_ca = coefficient_a - T1;
     311      coefficient_a =
     312        (SINT64) saved_ca *(SINT64) power10_table_128[scale_k].w[0];
     313      extra_digits = diff_dec_expon - scale_k;
     314  
     315      // apply sign
     316      saved_cb = (coefficient_b + sign_ab) ^ sign_ab;
     317      // add 10^16 and rounding constant
     318      coefficient_b =
     319        saved_cb + 10000000000000000ull +
     320        round_const_table[rmode][extra_digits];
     321  
     322      // get P*(2^M[extra_digits])/10^extra_digits
     323      __mul_64x64_to_128 (CT, coefficient_b,
     324  			reciprocals10_64[extra_digits]);
     325  
     326      // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
     327      amount = short_recip_scale[extra_digits];
     328      C0_64 = CT.w[1] >> amount;
     329  
     330      // result coefficient 
     331      C64 = C0_64 + coefficient_a;
     332      // filter out difficult (corner) cases
     333      // the following test is equivalent to 
     334      // ( (initial_coefficient_a + Ts) < P_ca && 
     335      //     (initial_coefficient_a + Ts) > P_ca_m1 ), 
     336      // which ensures the number of digits in coefficient_a does not change 
     337      // after adding (the appropriately scaled and rounded) coefficient_b
     338      if ((UINT64) (C64 - 1000000000000000ull - 1) >
     339  	9000000000000000ull - 2) {
     340        if (C64 >= 10000000000000000ull) {
     341  	// result has more than 16 digits
     342  	if (!scale_k) {
     343  	  // must divide coeff_a by 10
     344  	  saved_ca = saved_ca + T1;
     345  	  __mul_64x64_to_128 (CA, saved_ca, 0x3333333333333334ull);
     346  	  //reciprocals10_64[1]);
     347  	  coefficient_a = CA.w[1] >> 1;
     348  	  rem_a =
     349  	    saved_ca - (coefficient_a << 3) - (coefficient_a << 1);
     350  	  coefficient_a = coefficient_a - T1;
     351  
     352  	  saved_cb +=
     353  	    /*90000000000000000 */ +rem_a *
     354  	    power10_table_128[diff_dec_expon].w[0];
     355  	} else
     356  	  coefficient_a =
     357  	    (SINT64) (saved_ca - T1 -
     358  		      (T1 << 3)) * (SINT64) power10_table_128[scale_k -
     359  							      1].w[0];
     360  
     361  	extra_digits++;
     362  	coefficient_b =
     363  	  saved_cb + 100000000000000000ull +
     364  	  round_const_table[rmode][extra_digits];
     365  
     366  	// get P*(2^M[extra_digits])/10^extra_digits
     367  	__mul_64x64_to_128 (CT, coefficient_b,
     368  			    reciprocals10_64[extra_digits]);
     369  
     370  	// now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
     371  	amount = short_recip_scale[extra_digits];
     372  	C0_64 = CT.w[1] >> amount;
     373  
     374  	// result coefficient 
     375  	C64 = C0_64 + coefficient_a;
     376        } else if (C64 <= 1000000000000000ull) {
     377  	// less than 16 digits in result
     378  	coefficient_a =
     379  	  (SINT64) saved_ca *(SINT64) power10_table_128[scale_k +
     380  							1].w[0];
     381  	//extra_digits --;
     382  	exponent_b--;
     383  	coefficient_b =
     384  	  (saved_cb << 3) + (saved_cb << 1) + 100000000000000000ull +
     385  	  round_const_table[rmode][extra_digits];
     386  
     387  	// get P*(2^M[extra_digits])/10^extra_digits
     388  	__mul_64x64_to_128 (CT_new, coefficient_b,
     389  			    reciprocals10_64[extra_digits]);
     390  
     391  	// now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
     392  	amount = short_recip_scale[extra_digits];
     393  	C0_64 = CT_new.w[1] >> amount;
     394  
     395  	// result coefficient 
     396  	C64_new = C0_64 + coefficient_a;
     397  	if (C64_new < 10000000000000000ull) {
     398  	  C64 = C64_new;
     399  #ifdef SET_STATUS_FLAGS
     400  	  CT = CT_new;
     401  #endif
     402  	} else
     403  	  exponent_b++;
     404        }
     405  
     406      }
     407  
     408    }
     409  
     410  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
     411  #ifndef IEEE_ROUND_NEAREST
     412    if (rmode == 0)	//ROUNDING_TO_NEAREST
     413  #endif
     414      if (C64 & 1) {
     415        // check whether fractional part of initial_P/10^extra_digits 
     416        // is exactly .5
     417        // this is the same as fractional part of 
     418        //      (initial_P + 0.5*10^extra_digits)/10^extra_digits is exactly zero
     419  
     420        // get remainder
     421        remainder_h = CT.w[1] << (64 - amount);
     422  
     423        // test whether fractional part is 0
     424        if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits])) {
     425  	C64--;
     426        }
     427      }
     428  #endif
     429  
     430  #ifdef SET_STATUS_FLAGS
     431    status = INEXACT_EXCEPTION;
     432  
     433    // get remainder
     434    remainder_h = CT.w[1] << (64 - amount);
     435  
     436    switch (rmode) {
     437    case ROUNDING_TO_NEAREST:
     438    case ROUNDING_TIES_AWAY:
     439      // test whether fractional part is 0
     440      if ((remainder_h == 0x8000000000000000ull)
     441  	&& (CT.w[0] < reciprocals10_64[extra_digits]))
     442        status = EXACT_STATUS;
     443      break;
     444    case ROUNDING_DOWN:
     445    case ROUNDING_TO_ZERO:
     446      if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits]))
     447        status = EXACT_STATUS;
     448      break;
     449    default:
     450      // round up
     451      __add_carry_out (tmp, carry, CT.w[0],
     452  		     reciprocals10_64[extra_digits]);
     453      if ((remainder_h >> (64 - amount)) + carry >=
     454  	(((UINT64) 1) << amount))
     455        status = EXACT_STATUS;
     456      break;
     457    }
     458    __set_status_flags (fpsc, status);
     459  
     460  #endif
     461  
     462    return get_BID64 (sign_s, exponent_b + extra_digits, C64,
     463  		    rounding_mode, fpsc);
     464  }
     465  
     466  
     467  ///////////////////////////////////////////////////////////////////
     468  // round 128-bit coefficient and return result in BID64 format
     469  // do not worry about midpoint cases
     470  //////////////////////////////////////////////////////////////////
     471  static UINT64
     472  __bid_simple_round64_sticky (UINT64 sign, int exponent, UINT128 P,
     473  			     int extra_digits, int rounding_mode,
     474  			     unsigned *fpsc) {
     475    UINT128 Q_high, Q_low, C128;
     476    UINT64 C64;
     477    int amount, rmode;
     478  
     479    rmode = rounding_mode;
     480  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
     481  #ifndef IEEE_ROUND_NEAREST
     482    if (sign && (unsigned) (rmode - 1) < 2)
     483      rmode = 3 - rmode;
     484  #endif
     485  #endif
     486    __add_128_64 (P, P, round_const_table[rmode][extra_digits]);
     487  
     488    // get P*(2^M[extra_digits])/10^extra_digits
     489    __mul_128x128_full (Q_high, Q_low, P,
     490  		      reciprocals10_128[extra_digits]);
     491  
     492    // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
     493    amount = recip_scale[extra_digits];
     494    __shr_128 (C128, Q_high, amount);
     495  
     496    C64 = __low_64 (C128);
     497  
     498  #ifdef SET_STATUS_FLAGS
     499  
     500    __set_status_flags (fpsc, INEXACT_EXCEPTION);
     501  
     502  #endif
     503  
     504    return get_BID64 (sign, exponent, C64, rounding_mode, fpsc);
     505  }
     506  
     507  ///////////////////////////////////////////////////////////////////
     508  // round 128-bit coefficient and return result in BID64 format
     509  ///////////////////////////////////////////////////////////////////
     510  static UINT64
     511  __bid_full_round64 (UINT64 sign, int exponent, UINT128 P,
     512  		    int extra_digits, int rounding_mode,
     513  		    unsigned *fpsc) {
     514    UINT128 Q_high, Q_low, C128, Stemp, PU;
     515    UINT64 remainder_h, C64, carry, CY;
     516    int amount, amount2, rmode, status = 0;
     517  
     518    if (exponent < 0) {
     519      if (exponent >= -16 && (extra_digits + exponent < 0)) {
     520        extra_digits = -exponent;
     521  #ifdef SET_STATUS_FLAGS
     522        if (extra_digits > 0) {
     523  	rmode = rounding_mode;
     524  	if (sign && (unsigned) (rmode - 1) < 2)
     525  	  rmode = 3 - rmode;
     526  	__add_128_128 (PU, P,
     527  		       round_const_table_128[rmode][extra_digits]);
     528  	if (__unsigned_compare_gt_128
     529  	    (power10_table_128[extra_digits + 15], PU))
     530  	  status = UNDERFLOW_EXCEPTION;
     531        }
     532  #endif
     533      }
     534    }
     535  
     536    if (extra_digits > 0) {
     537      exponent += extra_digits;
     538      rmode = rounding_mode;
     539      if (sign && (unsigned) (rmode - 1) < 2)
     540        rmode = 3 - rmode;
     541      __add_128_128 (P, P, round_const_table_128[rmode][extra_digits]);
     542  
     543      // get P*(2^M[extra_digits])/10^extra_digits
     544      __mul_128x128_full (Q_high, Q_low, P,
     545  			reciprocals10_128[extra_digits]);
     546  
     547      // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
     548      amount = recip_scale[extra_digits];
     549      __shr_128_long (C128, Q_high, amount);
     550  
     551      C64 = __low_64 (C128);
     552  
     553  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
     554  #ifndef IEEE_ROUND_NEAREST
     555      if (rmode == 0)	//ROUNDING_TO_NEAREST
     556  #endif
     557        if (C64 & 1) {
     558  	// check whether fractional part of initial_P/10^extra_digits 
     559  	// is exactly .5
     560  
     561  	// get remainder
     562  	amount2 = 64 - amount;
     563  	remainder_h = 0;
     564  	remainder_h--;
     565  	remainder_h >>= amount2;
     566  	remainder_h = remainder_h & Q_high.w[0];
     567  
     568  	if (!remainder_h
     569  	    && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
     570  		|| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
     571  		    && Q_low.w[0] <
     572  		    reciprocals10_128[extra_digits].w[0]))) {
     573  	  C64--;
     574  	}
     575        }
     576  #endif
     577  
     578  #ifdef SET_STATUS_FLAGS
     579      status |= INEXACT_EXCEPTION;
     580  
     581      // get remainder
     582      remainder_h = Q_high.w[0] << (64 - amount);
     583  
     584      switch (rmode) {
     585      case ROUNDING_TO_NEAREST:
     586      case ROUNDING_TIES_AWAY:
     587        // test whether fractional part is 0
     588        if (remainder_h == 0x8000000000000000ull
     589  	  && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
     590  	      || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
     591  		  && Q_low.w[0] <
     592  		  reciprocals10_128[extra_digits].w[0])))
     593  	status = EXACT_STATUS;
     594        break;
     595      case ROUNDING_DOWN:
     596      case ROUNDING_TO_ZERO:
     597        if (!remainder_h
     598  	  && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
     599  	      || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
     600  		  && Q_low.w[0] <
     601  		  reciprocals10_128[extra_digits].w[0])))
     602  	status = EXACT_STATUS;
     603        break;
     604      default:
     605        // round up
     606        __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
     607  		       reciprocals10_128[extra_digits].w[0]);
     608        __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
     609  			  reciprocals10_128[extra_digits].w[1], CY);
     610        if ((remainder_h >> (64 - amount)) + carry >=
     611  	  (((UINT64) 1) << amount))
     612  	status = EXACT_STATUS;
     613      }
     614  
     615      __set_status_flags (fpsc, status);
     616  
     617  #endif
     618    } else {
     619      C64 = P.w[0];
     620      if (!C64) {
     621        sign = 0;
     622        if (rounding_mode == ROUNDING_DOWN)
     623  	sign = 0x8000000000000000ull;
     624      }
     625    }
     626    return get_BID64 (sign, exponent, C64, rounding_mode, fpsc);
     627  }
     628  
     629  /////////////////////////////////////////////////////////////////////////////////
     630  // round 192-bit coefficient (P, remainder_P) and return result in BID64 format
     631  // the lowest 64 bits (remainder_P) are used for midpoint checking only
     632  ////////////////////////////////////////////////////////////////////////////////
     633  static UINT64
     634  __bid_full_round64_remainder (UINT64 sign, int exponent, UINT128 P,
     635  			      int extra_digits, UINT64 remainder_P,
     636  			      int rounding_mode, unsigned *fpsc,
     637  			      unsigned uf_status) {
     638    UINT128 Q_high, Q_low, C128, Stemp;
     639    UINT64 remainder_h, C64, carry, CY;
     640    int amount, amount2, rmode, status = uf_status;
     641  
     642    rmode = rounding_mode;
     643    if (sign && (unsigned) (rmode - 1) < 2)
     644      rmode = 3 - rmode;
     645    if (rmode == ROUNDING_UP && remainder_P) {
     646      P.w[0]++;
     647      if (!P.w[0])
     648        P.w[1]++;
     649    }
     650  
     651    if (extra_digits) {
     652      __add_128_64 (P, P, round_const_table[rmode][extra_digits]);
     653  
     654      // get P*(2^M[extra_digits])/10^extra_digits
     655      __mul_128x128_full (Q_high, Q_low, P,
     656  			reciprocals10_128[extra_digits]);
     657  
     658      // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
     659      amount = recip_scale[extra_digits];
     660      __shr_128 (C128, Q_high, amount);
     661  
     662      C64 = __low_64 (C128);
     663  
     664  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
     665  #ifndef IEEE_ROUND_NEAREST
     666      if (rmode == 0)	//ROUNDING_TO_NEAREST
     667  #endif
     668        if (!remainder_P && (C64 & 1)) {
     669  	// check whether fractional part of initial_P/10^extra_digits 
     670  	// is exactly .5
     671  
     672  	// get remainder
     673  	amount2 = 64 - amount;
     674  	remainder_h = 0;
     675  	remainder_h--;
     676  	remainder_h >>= amount2;
     677  	remainder_h = remainder_h & Q_high.w[0];
     678  
     679  	if (!remainder_h
     680  	    && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
     681  		|| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
     682  		    && Q_low.w[0] <
     683  		    reciprocals10_128[extra_digits].w[0]))) {
     684  	  C64--;
     685  	}
     686        }
     687  #endif
     688  
     689  #ifdef SET_STATUS_FLAGS
     690      status |= INEXACT_EXCEPTION;
     691  
     692      if (!remainder_P) {
     693        // get remainder
     694        remainder_h = Q_high.w[0] << (64 - amount);
     695  
     696        switch (rmode) {
     697        case ROUNDING_TO_NEAREST:
     698        case ROUNDING_TIES_AWAY:
     699  	// test whether fractional part is 0
     700  	if (remainder_h == 0x8000000000000000ull
     701  	    && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
     702  		|| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
     703  		    && Q_low.w[0] <
     704  		    reciprocals10_128[extra_digits].w[0])))
     705  	  status = EXACT_STATUS;
     706  	break;
     707        case ROUNDING_DOWN:
     708        case ROUNDING_TO_ZERO:
     709  	if (!remainder_h
     710  	    && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
     711  		|| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
     712  		    && Q_low.w[0] <
     713  		    reciprocals10_128[extra_digits].w[0])))
     714  	  status = EXACT_STATUS;
     715  	break;
     716        default:
     717  	// round up
     718  	__add_carry_out (Stemp.w[0], CY, Q_low.w[0],
     719  			 reciprocals10_128[extra_digits].w[0]);
     720  	__add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
     721  			    reciprocals10_128[extra_digits].w[1], CY);
     722  	if ((remainder_h >> (64 - amount)) + carry >=
     723  	    (((UINT64) 1) << amount))
     724  	  status = EXACT_STATUS;
     725        }
     726      }
     727      __set_status_flags (fpsc, status);
     728  
     729  #endif
     730    } else {
     731      C64 = P.w[0];
     732  #ifdef SET_STATUS_FLAGS
     733      if (remainder_P) {
     734        __set_status_flags (fpsc, uf_status | INEXACT_EXCEPTION);
     735      }
     736  #endif
     737    }
     738  
     739    return get_BID64 (sign, exponent + extra_digits, C64, rounding_mode,
     740  		    fpsc);
     741  }
     742  
     743  
     744  ///////////////////////////////////////////////////////////////////
     745  // get P/10^extra_digits
     746  // result fits in 64 bits
     747  ///////////////////////////////////////////////////////////////////
     748  __BID_INLINE__ UINT64
     749  __truncate (UINT128 P, int extra_digits)
     750  // extra_digits <= 16
     751  {
     752    UINT128 Q_high, Q_low, C128;
     753    UINT64 C64;
     754    int amount;
     755  
     756    // get P*(2^M[extra_digits])/10^extra_digits
     757    __mul_128x128_full (Q_high, Q_low, P,
     758  		      reciprocals10_128[extra_digits]);
     759  
     760    // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
     761    amount = recip_scale[extra_digits];
     762    __shr_128 (C128, Q_high, amount);
     763  
     764    C64 = __low_64 (C128);
     765  
     766    return C64;
     767  }
     768  
     769  
     770  ///////////////////////////////////////////////////////////////////
     771  // return number of decimal digits in 128-bit value X
     772  ///////////////////////////////////////////////////////////////////
     773  __BID_INLINE__ int
     774  __get_dec_digits64 (UINT128 X) {
     775    int_double tempx;
     776    int digits_x, bin_expon_cx;
     777  
     778    if (!X.w[1]) {
     779      //--- get number of bits in the coefficients of x and y ---
     780      tempx.d = (double) X.w[0];
     781      bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
     782      // get number of decimal digits in the coeff_x
     783      digits_x = estimate_decimal_digits[bin_expon_cx];
     784      if (X.w[0] >= power10_table_128[digits_x].w[0])
     785        digits_x++;
     786      return digits_x;
     787    }
     788    tempx.d = (double) X.w[1];
     789    bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
     790    // get number of decimal digits in the coeff_x
     791    digits_x = estimate_decimal_digits[bin_expon_cx + 64];
     792    if (__unsigned_compare_ge_128 (X, power10_table_128[digits_x]))
     793      digits_x++;
     794  
     795    return digits_x;
     796  }
     797  
     798  
     799  ////////////////////////////////////////////////////////////////////////////////
     800  //
     801  // add 64-bit coefficient to 128-bit coefficient, return result in BID64 format
     802  //
     803  ////////////////////////////////////////////////////////////////////////////////
     804  __BID_INLINE__ UINT64
     805  get_add128 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
     806  	    UINT64 sign_y, int final_exponent_y, UINT128 CY,
     807  	    int extra_digits, int rounding_mode, unsigned *fpsc) {
     808    UINT128 CY_L, CX, FS, F, CT, ST, T2;
     809    UINT64 CYh, CY0L, T, S, coefficient_y, remainder_y;
     810    SINT64 D = 0;
     811    int_double tempx;
     812    int diff_dec_expon, extra_digits2, exponent_y, status;
     813    int extra_dx, diff_dec2, bin_expon_cx, digits_x, rmode;
     814  
     815    // CY has more than 16 decimal digits
     816  
     817    exponent_y = final_exponent_y - extra_digits;
     818  
     819  #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
     820    rounding_mode = 0;
     821  #endif
     822  #ifdef IEEE_ROUND_NEAREST
     823    rounding_mode = 0;
     824  #endif
     825  
     826    if (exponent_x > exponent_y) {
     827      // normalize x
     828      //--- get number of bits in the coefficients of x and y ---
     829      tempx.d = (double) coefficient_x;
     830      bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
     831      // get number of decimal digits in the coeff_x
     832      digits_x = estimate_decimal_digits[bin_expon_cx];
     833      if (coefficient_x >= power10_table_128[digits_x].w[0])
     834        digits_x++;
     835  
     836      extra_dx = 16 - digits_x;
     837      coefficient_x *= power10_table_128[extra_dx].w[0];
     838      if ((sign_x ^ sign_y) && (coefficient_x == 1000000000000000ull)) {
     839        extra_dx++;
     840        coefficient_x = 10000000000000000ull;
     841      }
     842      exponent_x -= extra_dx;
     843  
     844      if (exponent_x > exponent_y) {
     845  
     846        // exponent_x > exponent_y
     847        diff_dec_expon = exponent_x - exponent_y;
     848  
     849        if (exponent_x <= final_exponent_y + 1) {
     850  	__mul_64x64_to_128 (CX, coefficient_x,
     851  			    power10_table_128[diff_dec_expon].w[0]);
     852  
     853  	if (sign_x == sign_y) {
     854  	  __add_128_128 (CT, CY, CX);
     855  	  if ((exponent_x >
     856  	       final_exponent_y) /*&& (final_exponent_y>0) */ )
     857  	    extra_digits++;
     858  	  if (__unsigned_compare_ge_128
     859  	      (CT, power10_table_128[16 + extra_digits]))
     860  	    extra_digits++;
     861  	} else {
     862  	  __sub_128_128 (CT, CY, CX);
     863  	  if (((SINT64) CT.w[1]) < 0) {
     864  	    CT.w[0] = 0 - CT.w[0];
     865  	    CT.w[1] = 0 - CT.w[1];
     866  	    if (CT.w[0])
     867  	      CT.w[1]--;
     868  	    sign_y = sign_x;
     869  	  } else if (!(CT.w[1] | CT.w[0])) {
     870  	    sign_y =
     871  	      (rounding_mode !=
     872  	       ROUNDING_DOWN) ? 0 : 0x8000000000000000ull;
     873  	  }
     874  	  if ((exponent_x + 1 >=
     875  	       final_exponent_y) /*&& (final_exponent_y>=0) */ ) {
     876  	    extra_digits = __get_dec_digits64 (CT) - 16;
     877  	    if (extra_digits <= 0) {
     878  	      if (!CT.w[0] && rounding_mode == ROUNDING_DOWN)
     879  		sign_y = 0x8000000000000000ull;
     880  	      return get_BID64 (sign_y, exponent_y, CT.w[0],
     881  				rounding_mode, fpsc);
     882  	    }
     883  	  } else
     884  	    if (__unsigned_compare_gt_128
     885  		(power10_table_128[15 + extra_digits], CT))
     886  	    extra_digits--;
     887  	}
     888  
     889  	return __bid_full_round64 (sign_y, exponent_y, CT, extra_digits,
     890  				   rounding_mode, fpsc);
     891        }
     892        // diff_dec2+extra_digits is the number of digits to eliminate from 
     893        //                           argument CY
     894        diff_dec2 = exponent_x - final_exponent_y;
     895  
     896        if (diff_dec2 >= 17) {
     897  #ifndef IEEE_ROUND_NEAREST
     898  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
     899  	if ((rounding_mode) & 3) {
     900  	  switch (rounding_mode) {
     901  	  case ROUNDING_UP:
     902  	    if (!sign_y) {
     903  	      D = ((SINT64) (sign_x ^ sign_y)) >> 63;
     904  	      D = D + D + 1;
     905  	      coefficient_x += D;
     906  	    }
     907  	    break;
     908  	  case ROUNDING_DOWN:
     909  	    if (sign_y) {
     910  	      D = ((SINT64) (sign_x ^ sign_y)) >> 63;
     911  	      D = D + D + 1;
     912  	      coefficient_x += D;
     913  	    }
     914  	    break;
     915  	  case ROUNDING_TO_ZERO:
     916  	    if (sign_y != sign_x) {
     917  	      D = 0 - 1;
     918  	      coefficient_x += D;
     919  	    }
     920  	    break;
     921  	  }
     922  	  if (coefficient_x < 1000000000000000ull) {
     923  	    coefficient_x -= D;
     924  	    coefficient_x =
     925  	      D + (coefficient_x << 1) + (coefficient_x << 3);
     926  	    exponent_x--;
     927  	  }
     928  	}
     929  #endif
     930  #endif
     931  #ifdef SET_STATUS_FLAGS
     932  	if (CY.w[1] | CY.w[0])
     933  	  __set_status_flags (fpsc, INEXACT_EXCEPTION);
     934  #endif
     935  	return get_BID64 (sign_x, exponent_x, coefficient_x,
     936  			  rounding_mode, fpsc);
     937        }
     938        // here exponent_x <= 16+final_exponent_y
     939  
     940        // truncate CY to 16 dec. digits
     941        CYh = __truncate (CY, extra_digits);
     942  
     943        // get remainder
     944        T = power10_table_128[extra_digits].w[0];
     945        __mul_64x64_to_64 (CY0L, CYh, T);
     946  
     947        remainder_y = CY.w[0] - CY0L;
     948  
     949        // align coeff_x, CYh
     950        __mul_64x64_to_128 (CX, coefficient_x,
     951  			  power10_table_128[diff_dec2].w[0]);
     952  
     953        if (sign_x == sign_y) {
     954  	__add_128_64 (CT, CX, CYh);
     955  	if (__unsigned_compare_ge_128
     956  	    (CT, power10_table_128[16 + diff_dec2]))
     957  	  diff_dec2++;
     958        } else {
     959  	if (remainder_y)
     960  	  CYh++;
     961  	__sub_128_64 (CT, CX, CYh);
     962  	if (__unsigned_compare_gt_128
     963  	    (power10_table_128[15 + diff_dec2], CT))
     964  	  diff_dec2--;
     965        }
     966  
     967        return __bid_full_round64_remainder (sign_x, final_exponent_y, CT,
     968  					   diff_dec2, remainder_y,
     969  					   rounding_mode, fpsc, 0);
     970      }
     971    }
     972    // Here (exponent_x <= exponent_y)
     973    {
     974      diff_dec_expon = exponent_y - exponent_x;
     975  
     976      if (diff_dec_expon > MAX_FORMAT_DIGITS) {
     977        rmode = rounding_mode;
     978  
     979        if ((sign_x ^ sign_y)) {
     980  	if (!CY.w[0])
     981  	  CY.w[1]--;
     982  	CY.w[0]--;
     983  	if (__unsigned_compare_gt_128
     984  	    (power10_table_128[15 + extra_digits], CY)) {
     985  	  if (rmode & 3) {
     986  	    extra_digits--;
     987  	    final_exponent_y--;
     988  	  } else {
     989  	    CY.w[0] = 1000000000000000ull;
     990  	    CY.w[1] = 0;
     991  	    extra_digits = 0;
     992  	  }
     993  	}
     994        }
     995        __scale128_10 (CY, CY);
     996        extra_digits++;
     997        CY.w[0] |= 1;
     998  
     999        return __bid_simple_round64_sticky (sign_y, final_exponent_y, CY,
    1000  					  extra_digits, rmode, fpsc);
    1001      }
    1002      // apply sign to coeff_x
    1003      sign_x ^= sign_y;
    1004      sign_x = ((SINT64) sign_x) >> 63;
    1005      CX.w[0] = (coefficient_x + sign_x) ^ sign_x;
    1006      CX.w[1] = sign_x;
    1007  
    1008      // check whether CY (rounded to 16 digits) and CX have 
    1009      //                     any digits in the same position
    1010      diff_dec2 = final_exponent_y - exponent_x;
    1011  
    1012      if (diff_dec2 <= 17) {
    1013        // align CY to 10^ex
    1014        S = power10_table_128[diff_dec_expon].w[0];
    1015        __mul_64x128_short (CY_L, S, CY);
    1016  
    1017        __add_128_128 (ST, CY_L, CX);
    1018        extra_digits2 = __get_dec_digits64 (ST) - 16;
    1019        return __bid_full_round64 (sign_y, exponent_x, ST, extra_digits2,
    1020  				 rounding_mode, fpsc);
    1021      }
    1022      // truncate CY to 16 dec. digits
    1023      CYh = __truncate (CY, extra_digits);
    1024  
    1025      // get remainder
    1026      T = power10_table_128[extra_digits].w[0];
    1027      __mul_64x64_to_64 (CY0L, CYh, T);
    1028  
    1029      coefficient_y = CY.w[0] - CY0L;
    1030      // add rounding constant
    1031      rmode = rounding_mode;
    1032      if (sign_y && (unsigned) (rmode - 1) < 2)
    1033        rmode = 3 - rmode;
    1034  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
    1035  #ifndef IEEE_ROUND_NEAREST
    1036      if (!(rmode & 3))	//ROUNDING_TO_NEAREST
    1037  #endif
    1038  #endif
    1039      {
    1040        coefficient_y += round_const_table[rmode][extra_digits];
    1041      }
    1042      // align coefficient_y,  coefficient_x
    1043      S = power10_table_128[diff_dec_expon].w[0];
    1044      __mul_64x64_to_128 (F, coefficient_y, S);
    1045  
    1046      // fraction
    1047      __add_128_128 (FS, F, CX);
    1048  
    1049  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
    1050  #ifndef IEEE_ROUND_NEAREST
    1051      if (rmode == 0)	//ROUNDING_TO_NEAREST
    1052  #endif
    1053      {
    1054        // rounding code, here RN_EVEN
    1055        // 10^(extra_digits+diff_dec_expon)
    1056        T2 = power10_table_128[diff_dec_expon + extra_digits];
    1057        if (__unsigned_compare_gt_128 (FS, T2)
    1058  	  || ((CYh & 1) && __test_equal_128 (FS, T2))) {
    1059  	CYh++;
    1060  	__sub_128_128 (FS, FS, T2);
    1061        }
    1062      }
    1063  #endif
    1064  #ifndef IEEE_ROUND_NEAREST
    1065  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
    1066      if (rmode == 4)	//ROUNDING_TO_NEAREST
    1067  #endif
    1068      {
    1069        // rounding code, here RN_AWAY
    1070        // 10^(extra_digits+diff_dec_expon)
    1071        T2 = power10_table_128[diff_dec_expon + extra_digits];
    1072        if (__unsigned_compare_ge_128 (FS, T2)) {
    1073  	CYh++;
    1074  	__sub_128_128 (FS, FS, T2);
    1075        }
    1076      }
    1077  #endif
    1078  #ifndef IEEE_ROUND_NEAREST
    1079  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
    1080      switch (rmode) {
    1081      case ROUNDING_DOWN:
    1082      case ROUNDING_TO_ZERO:
    1083        if ((SINT64) FS.w[1] < 0) {
    1084  	CYh--;
    1085  	if (CYh < 1000000000000000ull) {
    1086  	  CYh = 9999999999999999ull;
    1087  	  final_exponent_y--;
    1088  	}
    1089        } else {
    1090  	T2 = power10_table_128[diff_dec_expon + extra_digits];
    1091  	if (__unsigned_compare_ge_128 (FS, T2)) {
    1092  	  CYh++;
    1093  	  __sub_128_128 (FS, FS, T2);
    1094  	}
    1095        }
    1096        break;
    1097      case ROUNDING_UP:
    1098        if ((SINT64) FS.w[1] < 0)
    1099  	break;
    1100        T2 = power10_table_128[diff_dec_expon + extra_digits];
    1101        if (__unsigned_compare_gt_128 (FS, T2)) {
    1102  	CYh += 2;
    1103  	__sub_128_128 (FS, FS, T2);
    1104        } else if ((FS.w[1] == T2.w[1]) && (FS.w[0] == T2.w[0])) {
    1105  	CYh++;
    1106  	FS.w[1] = FS.w[0] = 0;
    1107        } else if (FS.w[1] | FS.w[0])
    1108  	CYh++;
    1109        break;
    1110      }
    1111  #endif
    1112  #endif
    1113  
    1114  #ifdef SET_STATUS_FLAGS
    1115      status = INEXACT_EXCEPTION;
    1116  #ifndef IEEE_ROUND_NEAREST
    1117  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
    1118      if (!(rmode & 3))
    1119  #endif
    1120  #endif
    1121      {
    1122        // RN modes
    1123        if ((FS.w[1] ==
    1124  	   round_const_table_128[0][diff_dec_expon + extra_digits].w[1])
    1125  	  && (FS.w[0] ==
    1126  	      round_const_table_128[0][diff_dec_expon +
    1127  				       extra_digits].w[0]))
    1128  	status = EXACT_STATUS;
    1129      }
    1130  #ifndef IEEE_ROUND_NEAREST
    1131  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
    1132      else if (!FS.w[1] && !FS.w[0])
    1133        status = EXACT_STATUS;
    1134  #endif
    1135  #endif
    1136  
    1137      __set_status_flags (fpsc, status);
    1138  #endif
    1139  
    1140      return get_BID64 (sign_y, final_exponent_y, CYh, rounding_mode,
    1141  		      fpsc);
    1142    }
    1143  
    1144  }
    1145  
    1146  //////////////////////////////////////////////////////////////////////////
    1147  //
    1148  //  If coefficient_z is less than 16 digits long, normalize to 16 digits
    1149  //
    1150  /////////////////////////////////////////////////////////////////////////
    1151  static UINT64
    1152  BID_normalize (UINT64 sign_z, int exponent_z,
    1153  	       UINT64 coefficient_z, UINT64 round_dir, int round_flag,
    1154  	       int rounding_mode, unsigned *fpsc) {
    1155    SINT64 D;
    1156    int_double tempx;
    1157    int digits_z, bin_expon, scale, rmode;
    1158  
    1159  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
    1160  #ifndef IEEE_ROUND_NEAREST
    1161    rmode = rounding_mode;
    1162    if (sign_z && (unsigned) (rmode - 1) < 2)
    1163      rmode = 3 - rmode;
    1164  #else
    1165    if (coefficient_z >= power10_table_128[15].w[0])
    1166      return z;
    1167  #endif
    1168  #endif
    1169  
    1170    //--- get number of bits in the coefficients of x and y ---
    1171    tempx.d = (double) coefficient_z;
    1172    bin_expon = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
    1173    // get number of decimal digits in the coeff_x
    1174    digits_z = estimate_decimal_digits[bin_expon];
    1175    if (coefficient_z >= power10_table_128[digits_z].w[0])
    1176      digits_z++;
    1177  
    1178    scale = 16 - digits_z;
    1179    exponent_z -= scale;
    1180    if (exponent_z < 0) {
    1181      scale += exponent_z;
    1182      exponent_z = 0;
    1183    }
    1184    coefficient_z *= power10_table_128[scale].w[0];
    1185  
    1186  #ifdef SET_STATUS_FLAGS
    1187    if (round_flag) {
    1188      __set_status_flags (fpsc, INEXACT_EXCEPTION);
    1189      if (coefficient_z < 1000000000000000ull)
    1190        __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
    1191      else if ((coefficient_z == 1000000000000000ull) && !exponent_z
    1192  	     && ((SINT64) (round_dir ^ sign_z) < 0) && round_flag
    1193  	     && (rmode == ROUNDING_DOWN || rmode == ROUNDING_TO_ZERO))
    1194        __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
    1195    }
    1196  #endif
    1197  
    1198  #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
    1199  #ifndef IEEE_ROUND_NEAREST
    1200    if (round_flag && (rmode & 3)) {
    1201      D = round_dir ^ sign_z;
    1202  
    1203      if (rmode == ROUNDING_UP) {
    1204        if (D >= 0)
    1205  	coefficient_z++;
    1206      } else {
    1207        if (D < 0)
    1208  	coefficient_z--;
    1209        if (coefficient_z < 1000000000000000ull && exponent_z) {
    1210  	coefficient_z = 9999999999999999ull;
    1211  	exponent_z--;
    1212        }
    1213      }
    1214    }
    1215  #endif
    1216  #endif
    1217  
    1218    return get_BID64 (sign_z, exponent_z, coefficient_z, rounding_mode,
    1219  		    fpsc);
    1220  }
    1221  
    1222  
    1223  //////////////////////////////////////////////////////////////////////////
    1224  //
    1225  //    0*10^ey + cz*10^ez,   ey<ez  
    1226  //
    1227  //////////////////////////////////////////////////////////////////////////
    1228  
    1229  __BID_INLINE__ UINT64
    1230  add_zero64 (int exponent_y, UINT64 sign_z, int exponent_z,
    1231  	    UINT64 coefficient_z, unsigned *prounding_mode,
    1232  	    unsigned *fpsc) {
    1233    int_double tempx;
    1234    int bin_expon, scale_k, scale_cz;
    1235    int diff_expon;
    1236  
    1237    diff_expon = exponent_z - exponent_y;
    1238  
    1239    tempx.d = (double) coefficient_z;
    1240    bin_expon = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
    1241    scale_cz = estimate_decimal_digits[bin_expon];
    1242    if (coefficient_z >= power10_table_128[scale_cz].w[0])
    1243      scale_cz++;
    1244  
    1245    scale_k = 16 - scale_cz;
    1246    if (diff_expon < scale_k)
    1247      scale_k = diff_expon;
    1248    coefficient_z *= power10_table_128[scale_k].w[0];
    1249  
    1250    return get_BID64 (sign_z, exponent_z - scale_k, coefficient_z,
    1251  		    *prounding_mode, fpsc);
    1252  }
    1253  #endif