1  /* Check that the SIMD versions of math routines give the same (or
       2     sufficiently close) results as their scalar equivalents.  */
       3  
       4  /* { dg-do run } */
       5  /* { dg-options "-O2 -ftree-vectorize -fno-math-errno" } */
       6  /* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mstack-size=3000000 { target offload_target_amdgcn } } */
       7  /* { dg-additional-options -foffload-options=-lm } */
       8  
       9  #undef PRINT_RESULT
      10  #define VERBOSE 0
      11  #define EARLY_EXIT 1
      12  
      13  #include <math.h>
      14  #include <stdlib.h>
      15  
      16  #ifdef PRINT_RESULT
      17    #include <stdio.h>
      18    #define PRINTF printf
      19  #else
      20    static void null_printf (const char *f, ...) { }
      21  
      22    #define PRINTF null_printf
      23  #endif
      24  
      25  #define N 512
      26  #define EPSILON_float 1e-5
      27  #define EPSILON_double 1e-10
      28  
      29  static int xfail = 0;
      30  static int failed = 0;
      31  
      32  int deviation_float (float x, float y)
      33  {
      34    union {
      35      float f;
      36      unsigned u;
      37    } u, v;
      38  
      39    u.f = x;
      40    v.f = y;
      41  
      42    unsigned mask = 0x80000000U;
      43    int i;
      44  
      45    for (i = 32; i > 0; i--)
      46      if ((u.u ^ v.u) & mask)
      47        break;
      48      else
      49        mask >>= 1;
      50  
      51    return i;
      52  }
      53  
      54  int deviation_double (double x, double y)
      55  {
      56    union {
      57      double d;
      58      unsigned long long u;
      59    } u, v;
      60  
      61    u.d = x;
      62    v.d = y;
      63  
      64    unsigned long long mask = 0x8000000000000000ULL;
      65    int i;
      66  
      67    for (i = 64; i > 0; i--)
      68      if ((u.u ^ v.u) & mask)
      69        break;
      70      else
      71        mask >>= 1;
      72  
      73    return i;
      74  }
      75  
      76  #define TEST_FUN_XFAIL(TFLOAT, LOW, HIGH, FUN) \
      77    xfail = 1; \
      78    TEST_FUN (TFLOAT, LOW, HIGH, FUN); \
      79    xfail = 0;
      80  
      81  #define TEST_FUN(TFLOAT, LOW, HIGH, FUN) \
      82  __attribute__((optimize("no-tree-vectorize"))) \
      83  __attribute__((optimize("no-unsafe-math-optimizations"))) \
      84  void check_##FUN (TFLOAT res[N], TFLOAT a[N]) \
      85  { \
      86    for (int i = 0; i < N; i++) { \
      87      TFLOAT expected = FUN (a[i]); \
      88      TFLOAT diff = __builtin_fabs (expected - res[i]); \
      89      int deviation = deviation_##TFLOAT (expected, res[i]); \
      90      int fail = isnan (res[i]) != isnan (expected) \
      91  	       || isinf (res[i]) != isinf (expected) \
      92  	       || (diff > EPSILON_##TFLOAT && deviation > 10); \
      93      if (VERBOSE || fail) \
      94        PRINTF (#FUN "(%f) = %f, expected = %f, diff = %f, deviation = %d %s\n", \
      95  	      a[i], res[i], expected, diff, deviation, fail ? "(!)" : ""); \
      96      failed |= (fail && !xfail); \
      97      if (EARLY_EXIT && failed) \
      98        exit (1); \
      99    } \
     100  } \
     101  void test_##FUN (void) \
     102  { \
     103    TFLOAT res[N], a[N]; \
     104    for (int i = 0; i < N; i++) \
     105      a[i] = LOW + ((HIGH - LOW) / N) * i; \
     106    _Pragma ("omp target parallel for simd map(to:a) map(from:res)") \
     107      for (int i = 0; i < N; i++) \
     108        res[i] = FUN (a[i]); \
     109    check_##FUN (res, a); \
     110  }\
     111  test_##FUN ();
     112  
     113  #define TEST_FUN2(TFLOAT, LOW1, HIGH1, LOW2, HIGH2, FUN) \
     114  __attribute__((optimize("no-tree-vectorize"))) \
     115  __attribute__((optimize("no-unsafe-math-optimizations"))) \
     116  void check_##FUN (TFLOAT res[N], TFLOAT a[N], TFLOAT b[N]) \
     117  { \
     118    int failed = 0; \
     119    for (int i = 0; i < N; i++) { \
     120      TFLOAT expected = FUN (a[i], b[i]); \
     121      TFLOAT diff = __builtin_fabs (expected - res[i]); \
     122      int deviation = deviation_##TFLOAT (expected, res[i]); \
     123      int fail = isnan (res[i]) != isnan (expected) \
     124  	       || isinf (res[i]) != isinf (expected) \
     125  	       || (diff > EPSILON_##TFLOAT && deviation > 10); \
     126      failed |= fail; \
     127      if (VERBOSE || fail) \
     128        PRINTF (#FUN "(%f,%f) = %f, expected = %f, diff = %f, deviation = %d %s\n", \
     129  	      a[i], b[i], res[i], expected, diff, deviation, fail ? "(!)" : ""); \
     130      if (EARLY_EXIT && fail) \
     131        exit (1); \
     132    } \
     133  } \
     134  void test_##FUN (void) \
     135  { \
     136    TFLOAT res[N], a[N], b[N]; \
     137    for (int i = 0; i < N; i++) { \
     138      a[i] = LOW1 + ((HIGH1 - LOW1) / N) * i; \
     139      b[i] = LOW2 + ((HIGH2 - LOW2) / N) * i; \
     140    } \
     141    _Pragma ("omp target parallel for simd map(to:a) map(from:res)") \
     142      for (int i = 0; i < N; i++) \
     143        res[i] = FUN (a[i], b[i]); \
     144    check_##FUN (res, a, b); \
     145  }\
     146  test_##FUN ();
     147  
     148  int main (void)
     149  {
     150    TEST_FUN (float, -1.1, 1.1, acosf);
     151    TEST_FUN (float, -10, 10, acoshf);
     152    TEST_FUN (float, -1.1, 1.1, asinf);
     153    TEST_FUN (float, -10, 10, asinhf);
     154    TEST_FUN (float, -1.1, 1.1, atanf);
     155    TEST_FUN2 (float, -2.0, 2.0, 2.0, -2.0, atan2f);
     156    TEST_FUN (float, -2.0, 2.0, atanhf);
     157    TEST_FUN2 (float, -10.0, 10.0, 5.0, -15.0, copysignf);
     158    TEST_FUN (float, -3.14159265359, 3.14159265359, cosf);
     159    TEST_FUN (float, -3.14159265359, 3.14159265359, coshf);
     160    TEST_FUN (float, -10.0, 10.0, erff);
     161    TEST_FUN (float, -10.0, 10.0, expf);
     162    TEST_FUN (float, -10.0, 10.0, exp2f);
     163    TEST_FUN2 (float, -10.0, 10.0, 100.0, -25.0, fmodf);
     164    TEST_FUN (float, -10.0, 10.0, gammaf);
     165    TEST_FUN2 (float, -10.0, 10.0, 15.0, -5.0,hypotf);
     166    TEST_FUN (float, -10.0, 10.0, lgammaf);
     167    TEST_FUN (float, -1.0, 50.0, logf);
     168    TEST_FUN (float, -1.0, 500.0, log10f);
     169    TEST_FUN (float, -1.0, 64.0, log2f);
     170    TEST_FUN2 (float, -100.0, 100.0, 100.0, -100.0, powf);
     171    TEST_FUN2 (float, -50.0, 100.0, -2.0, 40.0, remainderf);
     172    TEST_FUN (float, -50.0, 50.0, rintf);
     173    TEST_FUN2 (float, -50.0, 50.0, -10.0, 32.0, __builtin_scalbf);
     174    TEST_FUN (float, -10.0, 10.0, __builtin_significandf);
     175    TEST_FUN (float, -3.14159265359, 3.14159265359, sinf);
     176    TEST_FUN (float, -3.14159265359, 3.14159265359, sinhf);
     177    TEST_FUN (float, -0.1, 10000.0, sqrtf);
     178    TEST_FUN (float, -5.0, 5.0, tanf);
     179    TEST_FUN (float, -3.14159265359, 3.14159265359, tanhf);
     180    /* Newlib's version of tgammaf is known to have poor accuracy.  */
     181    TEST_FUN_XFAIL (float, -10.0, 10.0, tgammaf);
     182  
     183    TEST_FUN (double, -1.1, 1.1, acos);
     184    TEST_FUN (double, -10, 10, acosh);
     185    TEST_FUN (double, -1.1, 1.1, asin);
     186    TEST_FUN (double, -10, 10, asinh);
     187    TEST_FUN (double, -1.1, 1.1, atan);
     188    TEST_FUN2 (double, -2.0, 2.0, 2.0, -2.0, atan2);
     189    TEST_FUN (double, -2.0, 2.0, atanh);
     190    TEST_FUN2 (double, -10.0, 10.0, 5.0, -15.0, copysign);
     191    TEST_FUN (double, -3.14159265359, 3.14159265359, cos);
     192    TEST_FUN (double, -3.14159265359, 3.14159265359, cosh);
     193    TEST_FUN (double, -10.0, 10.0, erf);
     194    TEST_FUN (double, -10.0, 10.0, exp);
     195    TEST_FUN (double, -10.0, 10.0, exp2);
     196    TEST_FUN2 (double, -10.0, 10.0, 100.0, -25.0, fmod);
     197    TEST_FUN (double, -10.0, 10.0, gamma);
     198    TEST_FUN2 (double, -10.0, 10.0, 15.0, -5.0, hypot);
     199    TEST_FUN (double, -10.0, 10.0, lgamma);
     200    TEST_FUN (double, -1.0, 50.0, log);
     201    TEST_FUN (double, -1.0, 500.0, log10);
     202    TEST_FUN (double, -1.0, 64.0, log2);
     203    TEST_FUN2 (double, -100.0, 100.0, 100.0, -100.0, pow);
     204    TEST_FUN2 (double, -50.0, 100.0, -2.0, 40.0, remainder);
     205    TEST_FUN (double, -50.0, 50.0, rint);
     206    TEST_FUN2 (double, -50.0, 50.0, -10.0, 32.0, __builtin_scalb);
     207    TEST_FUN (double, -10.0, 10.0, __builtin_significand);
     208    TEST_FUN (double, -3.14159265359, 3.14159265359, sin);
     209    TEST_FUN (double, -3.14159265359, 3.14159265359, sinh);
     210    TEST_FUN (double, -0.1, 10000.0, sqrt);
     211    TEST_FUN (double, -5.0, 5.0, tan);
     212    TEST_FUN (double, -3.14159265359, 3.14159265359, tanh);
     213    /* Newlib's version of tgamma is known to have poor accuracy.  */
     214    TEST_FUN_XFAIL (double, -10.0, 10.0, tgamma);
     215  
     216    return failed;
     217  }