(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
reduction-cplx-flt.c
       1  
       2  #if !defined(__hppa__) || !defined(__hpux__)
       3  #include <complex.h>
       4  #endif
       5  
       6  /* Single float has 23 bits of fraction. */
       7  #define FRAC (1.0f / (1 << 20))
       8  typedef float _Complex Type;
       9  
      10  int close_enough (Type a, Type b)
      11  {
      12    Type diff = a - b;
      13    float mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
      14    float mag2_diff = (__real__(diff) * __real__ (diff)
      15  		     + __imag__ (diff) * __imag__ (diff));
      16  
      17    return mag2_diff / mag2_a < (FRAC * FRAC);
      18  }
      19  
      20  #define N 100
      21  
      22  static int __attribute__ ((noinline))
      23  vector (Type ary[N], Type sum, Type prod)
      24  {
      25    Type tsum = 0, tprod = 1;
      26  
      27  #pragma acc parallel vector_length(32) copyin(ary[0:N])
      28    {
      29  #pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
      30      for (int ix = 0; ix < N; ix++)
      31        {
      32  	tsum += ary[ix];
      33  	tprod *= ary[ix];
      34        }
      35    }
      36  
      37    if (!close_enough (sum, tsum))
      38      return 1;
      39  
      40    if (!close_enough (prod, tprod))
      41      return 1;
      42  
      43    return 0;
      44  }
      45  
      46  static int __attribute__ ((noinline))
      47  worker (Type ary[N], Type sum, Type prod)
      48  {
      49    Type tsum = 0, tprod = 1;
      50  
      51  #pragma acc parallel num_workers(32) copyin(ary[0:N])
      52    {
      53  #pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
      54      for (int ix = 0; ix < N; ix++)
      55        {
      56  	tsum += ary[ix];
      57  	tprod *= ary[ix];
      58        }
      59    }
      60  
      61    if (!close_enough (sum, tsum))
      62      return 1;
      63  
      64    if (!close_enough (prod, tprod))
      65      return 1;
      66  
      67    return 0;
      68  }
      69  
      70  static int __attribute__ ((noinline))
      71  gang (Type ary[N], Type sum, Type prod)
      72  {
      73    Type tsum = 0, tprod = 1;
      74  
      75  #pragma acc parallel num_gangs (32) copyin(ary[0:N])
      76    {
      77  #pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
      78      for (int ix = 0; ix < N; ix++)
      79        {
      80  	tsum += ary[ix];
      81  	tprod *= ary[ix];
      82        }
      83    }
      84  
      85    if (!close_enough (sum, tsum))
      86      return 1;
      87  
      88    if (!close_enough (prod, tprod))
      89      return 1;
      90  
      91    return 0;
      92  }
      93  
      94  int main (void)
      95  {
      96    Type ary[N], sum = 0, prod = 1;
      97  
      98    for (int ix = 0; ix < N;  ix++)
      99      {
     100        float frac = ix * (1.0f / 1024) + 1.0f;
     101        
     102        ary[ix] = frac + frac * 2.0j - 1.0j;
     103        sum += ary[ix];
     104        prod *= ary[ix];
     105      }
     106  
     107    if (vector (ary, sum, prod))
     108      return 1;
     109    
     110    if (worker (ary, sum, prod))
     111      return 1;
     112  
     113    if (gang (ary, sum, prod))
     114      return 1;
     115  
     116    return 0;
     117  }