1  /*
       2   * Included file to common source float/double checking
       3   * The following macros should be defined:
       4   *	TYPE	   -- floating point type
       5   *	NAME	   -- convert a name to include the type
       6   *	UNS_TYPE   -- type to hold TYPE as an unsigned number
       7   *	EXP_SIZE   -- size in bits of the exponent
       8   *	MAN_SIZE   -- size in bits of the mantissa
       9   *	UNS_ABS	   -- absolute value for UNS_TYPE
      10   *	FABS	   -- absolute value function for TYPE
      11   *	FMAX	   -- maximum function for TYPE
      12   *	FMIN	   -- minimum function for TYPE
      13   *	SQRT	   -- square root function for TYPE
      14   *	RMIN	   -- minimum random number to generate
      15   *	RMAX	   -- maximum random number to generate
      16   *	ASMDIV	   -- assembler instruction to do divide
      17   *	ASMSQRT	   -- assembler instruction to do square root
      18   *	BDIV	   -- # of bits of inaccuracy to allow for division
      19   *	BRSQRT	   -- # of bits of inaccuracy to allow for 1/sqrt
      20   *	INIT_DIV   -- Initial values to test 1/x against
      21   *	INIT_RSQRT -- Initial values to test 1/sqrt(x) against
      22   */
      23  
      24  typedef union
      25  {
      26    UNS_TYPE i;
      27    TYPE x;
      28  } NAME (union);
      29  
      30  /*
      31   * Input/output arrays.
      32   */
      33  
      34  static NAME (union) NAME (div_input)  [] __attribute__((__aligned__(32))) = INIT_DIV;
      35  static NAME (union) NAME (rsqrt_input)[] __attribute__((__aligned__(32))) = INIT_RSQRT;
      36  
      37  #define DIV_SIZE   (sizeof (NAME (div_input))   / sizeof (TYPE))
      38  #define RSQRT_SIZE (sizeof (NAME (rsqrt_input)) / sizeof (TYPE))
      39  
      40  static TYPE NAME (div_expected)[DIV_SIZE] __attribute__((__aligned__(32)));
      41  static TYPE NAME (div_output)  [DIV_SIZE] __attribute__((__aligned__(32)));
      42  
      43  static TYPE NAME (rsqrt_expected)[RSQRT_SIZE] __attribute__((__aligned__(32)));
      44  static TYPE NAME (rsqrt_output)  [RSQRT_SIZE] __attribute__((__aligned__(32)));
      45  
      46  
      47  /*
      48   * Crack a floating point number into sign bit, exponent, and mantissa.
      49   */
      50  
      51  static void
      52  NAME (crack) (TYPE number, unsigned int *p_sign, unsigned *p_exponent, UNS_TYPE *p_mantissa)
      53  {
      54    NAME (union) u;
      55    UNS_TYPE bits;
      56  
      57    u.x = number;
      58    bits = u.i;
      59  
      60    *p_sign = (unsigned int)((bits >> (EXP_SIZE + MAN_SIZE)) & 0x1);
      61    *p_exponent = (unsigned int)((bits >> MAN_SIZE) & ((((UNS_TYPE)1) << EXP_SIZE) - 1));
      62    *p_mantissa = bits & ((((UNS_TYPE)1) << MAN_SIZE) - 1);
      63    return;
      64  }
      65  
      66  
      67  /*
      68   * Prevent optimizer from eliminating + 0.0 to remove -0.0.
      69   */
      70  
      71  volatile TYPE NAME (math_diff_0) = ((TYPE) 0.0);
      72  
      73  /*
      74   * Return negative if two numbers are significanly different or return the
      75   * number of bits that are different in the mantissa.
      76   */
      77  
      78  static int
      79  NAME (math_diff) (TYPE a, TYPE b, int bits)
      80  {
      81    TYPE zero = NAME (math_diff_0);
      82    unsigned int sign_a, sign_b;
      83    unsigned int exponent_a, exponent_b;
      84    UNS_TYPE mantissa_a, mantissa_b, diff;
      85    int i;
      86  
      87    /* eliminate signed zero.  */
      88    a += zero;
      89    b += zero;
      90  
      91    /* special case Nan.  */
      92    if (__builtin_isnan (a))
      93      return (__builtin_isnan (b) ? 0 : -1);
      94  
      95    if (a == b)
      96      return 0;
      97  
      98    /* special case infinity.  */
      99    if (__builtin_isinf (a))
     100      return (__builtin_isinf (b) ? 0 : -1);
     101  
     102    /* punt on denormal numbers.  */
     103    if (!__builtin_isnormal (a) || !__builtin_isnormal (b))
     104      return -1;
     105  
     106    NAME (crack) (a, &sign_a, &exponent_a, &mantissa_a);
     107    NAME (crack) (b, &sign_b, &exponent_b, &mantissa_b);
     108  
     109    /* If the sign is different, there is no hope.  */
     110    if (sign_a != sign_b)
     111      return -1;
     112  
     113    /* If the exponent is off by 1, see if the values straddle the power of two,
     114       and adjust things to do the mantassa check if we can.  */
     115    if ((exponent_a == (exponent_b+1)) || (exponent_a == (exponent_b-1)))
     116      {
     117        TYPE big = FMAX (a, b);
     118        TYPE small = FMIN (a, b);
     119        TYPE diff = FABS (a - b);
     120        unsigned int sign_big, sign_small, sign_test;
     121        unsigned int exponent_big, exponent_small, exponent_test;
     122        UNS_TYPE mantissa_big, mantissa_small, mantissa_test;
     123  
     124        NAME (crack) (big, &sign_big, &exponent_big, &mantissa_big);
     125        NAME (crack) (small, &sign_small, &exponent_small, &mantissa_small);
     126  
     127        NAME (crack) (small - diff, &sign_test, &exponent_test, &mantissa_test);
     128        if ((sign_test == sign_small) && (exponent_test == exponent_small))
     129  	{
     130  	  mantissa_a = mantissa_small;
     131  	  mantissa_b = mantissa_test;
     132  	}
     133  
     134        else
     135  	{
     136  	  NAME (crack) (big + diff, &sign_test, &exponent_test, &mantissa_test);
     137  	  if ((sign_test == sign_big) && (exponent_test == exponent_big))
     138  	    {
     139  	      mantissa_a = mantissa_big;
     140  	      mantissa_b = mantissa_test;
     141  	    }
     142  
     143  	  else
     144  	    return -1;
     145  	}
     146      }
     147  
     148    else if (exponent_a != exponent_b)
     149      return -1;
     150  
     151    diff = UNS_ABS (mantissa_a - mantissa_b);
     152    for (i = MAN_SIZE; i > 0; i--)
     153      {
     154        if ((diff & ((UNS_TYPE)1) << (i-1)) != 0)
     155  	return i;
     156      }
     157  
     158    return -1;
     159  }
     160  
     161  
     162  /*
     163   * Turn off inlining to make code inspection easier.
     164   */
     165  
     166  static void NAME (asm_div) (void) __attribute__((__noinline__));
     167  static void NAME (vector_div) (void) __attribute__((__noinline__));
     168  static void NAME (scalar_div) (void) __attribute__((__noinline__));
     169  static void NAME (asm_rsqrt) (void) __attribute__((__noinline__));
     170  static void NAME (vector_rsqrt) (void) __attribute__((__noinline__));
     171  static void NAME (scalar_rsqrt) (void) __attribute__((__noinline__));
     172  static void NAME (check_div) (const char *) __attribute__((__noinline__));
     173  static void NAME (check_rsqrt) (const char *) __attribute__((__noinline__));
     174  static void NAME (run) (void) __attribute__((__noinline__));
     175  
     176  
     177  /*
     178   * Division function that might be vectorized.
     179   */
     180  
     181  static void
     182  NAME (vector_div) (void)
     183  {
     184    size_t i;
     185  
     186    for (i = 0; i < DIV_SIZE; i++)
     187      NAME (div_output)[i] = ((TYPE) 1.0) / NAME (div_input)[i].x;
     188  }
     189  
     190  /*
     191   * Division function that is not vectorized.
     192   */
     193  
     194  static void
     195  NAME (scalar_div) (void)
     196  {
     197    size_t i;
     198  
     199    for (i = 0; i < DIV_SIZE; i++)
     200      {
     201        TYPE x = ((TYPE) 1.0) / NAME (div_input)[i].x;
     202        TYPE y;
     203        __asm__ ("" : "=d" (y) : "0" (x));
     204        NAME (div_output)[i] = y;
     205      }
     206  }
     207  
     208  /*
     209   * Generate the division instruction via asm.
     210   */
     211  
     212  static void
     213  NAME (asm_div) (void)
     214  {
     215    size_t i;
     216  
     217    for (i = 0; i < DIV_SIZE; i++)
     218      {
     219        TYPE x;
     220        __asm__ (ASMDIV " %0,%1,%2"
     221  	       : "=d" (x)
     222  	       : "d" ((TYPE) 1.0), "d" (NAME (div_input)[i].x));
     223        NAME (div_expected)[i] = x;
     224      }
     225  }
     226  
     227  /*
     228   * Reciprocal square root function that might be vectorized.
     229   */
     230  
     231  static void
     232  NAME (vector_rsqrt) (void)
     233  {
     234    size_t i;
     235  
     236    for (i = 0; i < RSQRT_SIZE; i++)
     237      NAME (rsqrt_output)[i] = ((TYPE) 1.0) / SQRT (NAME (rsqrt_input)[i].x);
     238  }
     239  
     240  /*
     241   * Reciprocal square root function that is not vectorized.
     242   */
     243  
     244  static void
     245  NAME (scalar_rsqrt) (void)
     246  {
     247    size_t i;
     248  
     249    for (i = 0; i < RSQRT_SIZE; i++)
     250      {
     251        TYPE x = ((TYPE) 1.0) / SQRT (NAME (rsqrt_input)[i].x);
     252        TYPE y;
     253        __asm__ ("" : "=d" (y) : "0" (x));
     254        NAME (rsqrt_output)[i] = y;
     255      }
     256  }
     257  
     258  /*
     259   * Generate the 1/sqrt instructions via asm.
     260   */
     261  
     262  static void
     263  NAME (asm_rsqrt) (void)
     264  {
     265    size_t i;
     266  
     267    for (i = 0; i < RSQRT_SIZE; i++)
     268      {
     269        TYPE x;
     270        TYPE y;
     271        __asm__ (ASMSQRT " %0,%1" : "=d" (x) : "d" (NAME (rsqrt_input)[i].x));
     272        __asm__ (ASMDIV " %0,%1,%2" : "=d" (y) : "d" ((TYPE) 1.0), "d" (x));
     273        NAME (rsqrt_expected)[i] = y;
     274      }
     275  }
     276  
     277  
     278  /*
     279   * Functions to abort or report errors.
     280   */
     281  
     282  static int NAME (error_count) = 0;
     283  
     284  #ifdef VERBOSE
     285  static int NAME (max_bits_div)   = 0;
     286  static int NAME (max_bits_rsqrt) = 0;
     287  #endif
     288  
     289  
     290  /*
     291   * Compare the expected value with the value we got.
     292   */
     293  
     294  static void
     295  NAME (check_div) (const char *test)
     296  {
     297    size_t i;
     298    int b;
     299  
     300    for (i = 0; i < DIV_SIZE; i++)
     301      {
     302        TYPE exp = NAME (div_expected)[i];
     303        TYPE out = NAME (div_output)[i];
     304        b = NAME (math_diff) (exp, out, BDIV);
     305  
     306  #ifdef VERBOSE
     307        if (b != 0)
     308  	{
     309  	  NAME (union) u_in = NAME (div_input)[i];
     310  	  NAME (union) u_exp;
     311  	  NAME (union) u_out;
     312  	  char explanation[64];
     313  	  const char *p_exp;
     314  
     315  	  if (b < 0)
     316  	    p_exp = "failed";
     317  	  else
     318  	    {
     319  	      p_exp = explanation;
     320  	      sprintf (explanation, "%d bit error%s", b, (b > BDIV) ? ", failed" : "");
     321  	    }
     322  
     323  	  u_exp.x = exp;
     324  	  u_out.x = out;
     325  	  printf ("%s %s %s for 1.0 / %g [0x%llx], expected %g [0x%llx], got %g [0x%llx]\n",
     326  		  TNAME (TYPE), test, p_exp,
     327  		  (double) u_in.x, (unsigned long long) u_in.i,
     328  		  (double) exp,    (unsigned long long) u_exp.i,
     329  		  (double) out,    (unsigned long long) u_out.i);
     330  	}
     331  #endif
     332  
     333        if (b < 0 || b > BDIV)
     334  	NAME (error_count)++;
     335  
     336  #ifdef VERBOSE
     337        if (b > NAME (max_bits_div))
     338  	NAME (max_bits_div) = b;
     339  #endif
     340      }
     341  }
     342  
     343  static void
     344  NAME (check_rsqrt) (const char *test)
     345  {
     346    size_t i;
     347    int b;
     348  
     349    for (i = 0; i < RSQRT_SIZE; i++)
     350      {
     351        TYPE exp = NAME (rsqrt_expected)[i];
     352        TYPE out = NAME (rsqrt_output)[i];
     353        b = NAME (math_diff) (exp, out, BRSQRT);
     354  
     355  #ifdef VERBOSE
     356        if (b != 0)
     357  	{
     358  	  NAME (union) u_in = NAME (rsqrt_input)[i];
     359  	  NAME (union) u_exp;
     360  	  NAME (union) u_out;
     361  	  char explanation[64];
     362  	  const char *p_exp;
     363  
     364  	  if (b < 0)
     365  	    p_exp = "failed";
     366  	  else
     367  	    {
     368  	      p_exp = explanation;
     369  	      sprintf (explanation, "%d bit error%s", b, (b > BDIV) ? ", failed" : "");
     370  	    }
     371  
     372  	  u_exp.x = exp;
     373  	  u_out.x = out;
     374  	  printf ("%s %s %s for 1 / sqrt (%g) [0x%llx], expected %g [0x%llx], got %g [0x%llx]\n",
     375  		  TNAME (TYPE), test, p_exp,
     376  		  (double) u_in.x, (unsigned long long) u_in.i,
     377  		  (double) exp,    (unsigned long long) u_exp.i,
     378  		  (double) out,    (unsigned long long) u_out.i);
     379  	}
     380  #endif
     381  
     382        if (b < 0 || b > BRSQRT)
     383  	NAME (error_count)++;
     384  
     385  #ifdef VERBOSE
     386        if (b > NAME (max_bits_rsqrt))
     387  	NAME (max_bits_rsqrt) = b;
     388  #endif
     389      }
     390  }
     391  
     392  
     393  /*
     394   * Now do everything.
     395   */
     396  
     397  static void
     398  NAME (run) (void)
     399  {
     400  #ifdef VERBOSE
     401    printf ("start run_%s, divide size = %ld, rsqrt size = %ld, %d bit%s for a/b, %d bit%s for 1/sqrt(a)\n",
     402  	  TNAME (TYPE),
     403  	  (long)DIV_SIZE,
     404  	  (long)RSQRT_SIZE,
     405  	  BDIV, (BDIV == 1) ? "" : "s",
     406  	  BRSQRT, (BRSQRT == 1) ? "" : "s");
     407  #endif
     408  
     409    NAME (asm_div) ();
     410  
     411    NAME (scalar_div) ();
     412    NAME (check_div) ("scalar");
     413  
     414    NAME (vector_div) ();
     415    NAME (check_div) ("vector");
     416  
     417    NAME (asm_rsqrt) ();
     418  
     419    NAME (scalar_rsqrt) ();
     420    NAME (check_rsqrt) ("scalar");
     421  
     422    NAME (vector_rsqrt) ();
     423    NAME (check_rsqrt) ("vector");
     424  
     425  #ifdef VERBOSE
     426    printf ("end run_%s, errors = %d, max div bits = %d, max rsqrt bits = %d\n",
     427  	  TNAME (TYPE),
     428  	  NAME (error_count),
     429  	  NAME (max_bits_div),
     430  	  NAME (max_bits_rsqrt));
     431  #endif
     432  }