(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
par-loop-comb-reduction-4.c
       1  #include <assert.h>
       2  
       3  #if ACC_DEVICE_TYPE_nvidia
       4  /* To avoid 'libgomp: The Nvidia accelerator has insufficient resources'.  */
       5  #define NUM_WORKERS 24
       6  #else
       7  #define NUM_WORKERS 32
       8  #endif
       9  
      10  /* Test of reduction on both parallel and loop directives (workers and vectors
      11     together in gang-partitioned mode, float type, multiple reductions).  */
      12  
      13  int
      14  main (int argc, char *argv[])
      15  {
      16    int i, j;
      17    float arr[32768];
      18    float res = 0, mres = 0, hres = 0, hmres = 0;
      19  
      20    for (i = 0; i < 32768; i++)
      21      arr[i] = i % (32768 / 64);
      22  
      23    #pragma acc parallel \
      24      num_gangs(32) num_workers(NUM_WORKERS) vector_length(32) \
      25      reduction(+:res) reduction(max:mres) copy(res, mres)
      26    {
      27      #pragma acc loop gang /* { dg-warning "nested loop in reduction needs reduction clause for 'm\?res'" "TODO" } */
      28      for (j = 0; j < 32; j++)
      29        {
      30  	#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
      31  	for (i = 0; i < 1024; i++)
      32  	  {
      33  	    res += arr[j * 1024 + i];
      34  	    if (arr[j * 1024 + i] > mres)
      35  	      mres = arr[j * 1024 + i];
      36  	  }
      37  
      38  	#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
      39  	for (i = 0; i < 1024; i++)
      40  	  {
      41  	    res += arr[j * 1024 + (1023 - i)];
      42  	    if (arr[j * 1024 + (1023 - i)] > mres)
      43  	      mres = arr[j * 1024 + (1023 - i)];
      44  	  }
      45        }
      46    }
      47  
      48    for (j = 0; j < 32; j++)
      49      for (i = 0; i < 1024; i++)
      50        {
      51          hres += arr[j * 1024 + i];
      52  	hres += arr[j * 1024 + (1023 - i)];
      53  	if (arr[j * 1024 + i] > hmres)
      54  	  hmres = arr[j * 1024 + i];
      55  	if (arr[j * 1024 + (1023 - i)] > hmres)
      56  	  hmres = arr[j * 1024 + (1023 - i)];
      57        }
      58  
      59    assert (hres <= 16777216);
      60    assert (res == hres);
      61  
      62    assert (hmres <= 16777216);
      63    assert (mres == hmres);
      64  
      65    return 0;
      66  }