(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
reduction-flt.c
       1  
       2  /* Single float has 23 bits of fraction. */
       3  #define FRAC (1.0f / (1 << 20))
       4  typedef float Type;
       5  
       6  int close_enough (Type a, Type b)
       7  {
       8    Type diff = a - b;
       9    if (diff < 0)
      10      diff = -diff;
      11  
      12    return diff / a < FRAC;
      13  }
      14  
      15  #define N 100
      16  
      17  static int __attribute__ ((noinline))
      18  vector (Type ary[N], Type sum, Type prod)
      19  {
      20    Type tsum = 0, tprod = 1;
      21  
      22  #pragma acc parallel vector_length(32) copyin(ary[0:N])
      23    {
      24  #pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
      25      for (int ix = 0; ix < N; ix++)
      26        {
      27  	tsum += ary[ix];
      28  	tprod *= ary[ix];
      29        }
      30    }
      31  
      32    if (!close_enough (sum, tsum))
      33      return 1;
      34  
      35    if (!close_enough (prod, tprod))
      36      return 1;
      37  
      38    return 0;
      39  }
      40  
      41  static int __attribute__ ((noinline))
      42  worker (Type ary[N], Type sum, Type prod)
      43  {
      44    Type tsum = 0, tprod = 1;
      45  
      46  #pragma acc parallel num_workers(32) copyin(ary[0:N])
      47    {
      48  #pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
      49      for (int ix = 0; ix < N; ix++)
      50        {
      51  	tsum += ary[ix];
      52  	tprod *= ary[ix];
      53        }
      54    }
      55  
      56    if (!close_enough (sum, tsum))
      57      return 1;
      58  
      59    if (!close_enough (prod, tprod))
      60      return 1;
      61  
      62    return 0;
      63  }
      64  
      65  static int __attribute__ ((noinline))
      66  gang (Type ary[N], Type sum, Type prod)
      67  {
      68    Type tsum = 0, tprod = 1;
      69  
      70  #pragma acc parallel num_gangs (32) copyin(ary[0:N])
      71    {
      72  #pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
      73      for (int ix = 0; ix < N; ix++)
      74        {
      75  	tsum += ary[ix];
      76  	tprod *= ary[ix];
      77        }
      78    }
      79  
      80    if (!close_enough (sum, tsum))
      81      return 1;
      82  
      83    if (!close_enough (prod, tprod))
      84      return 1;
      85  
      86    return 0;
      87  }
      88  
      89  int main (void)
      90  {
      91    Type ary[N], sum = 0, prod = 1;
      92  
      93    for (int ix = 0; ix < N;  ix++)
      94      {
      95        float frac = ix * (1.0f / 1024) + 1.0f;
      96        
      97        ary[ix] = frac;
      98        sum += ary[ix];
      99        prod *= ary[ix];
     100      }
     101  
     102    if (vector (ary, sum, prod))
     103      return 1;
     104    
     105    if (worker (ary, sum, prod))
     106      return 1;
     107  
     108    if (gang (ary, sum, prod))
     109      return 1;
     110  
     111    return 0;
     112  }