(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
broadcast-many.c
       1  /* To avoid 'error: shared-memory region overflow':
       2     { dg-additional-options "-foffload-options=amdgcn-amdhsa=-mgang-private-size=64" { target openacc_radeon_accel_selected } }
       3  */
       4  
       5  #include <assert.h>
       6  #include <stdio.h>
       7  
       8  #if ACC_DEVICE_TYPE_nvidia
       9  /* To avoid 'libgomp: The Nvidia accelerator has insufficient resources'.  */
      10  #define NUM_WORKERS 28
      11  #else
      12  #define NUM_WORKERS 32
      13  #endif
      14  
      15  #define LOCAL(n) double n = input;
      16  #define LOCALS(n) LOCAL(n##1) LOCAL(n##2) LOCAL(n##3) LOCAL(n##4) \
      17  		  LOCAL(n##5) LOCAL(n##6) LOCAL(n##7) LOCAL(n##8)
      18  #define LOCALS2(n) LOCALS(n##a) LOCALS(n##b) LOCALS(n##c) LOCALS(n##d) \
      19  		   LOCALS(n##e) LOCALS(n##f) LOCALS(n##g) LOCALS(n##h)
      20  
      21  #define USE(n) n
      22  #define USES(n,OP) USE(n##1) OP USE(n##2) OP USE(n##3) OP USE (n##4) OP \
      23  		   USE(n##5) OP USE(n##6) OP USE(n##7) OP USE (n##8)
      24  #define USES2(n,OP) USES(n##a,OP) OP USES(n##b,OP) OP USES(n##c,OP) OP \
      25  		    USES(n##d,OP) OP USES(n##e,OP) OP USES(n##f,OP) OP \
      26  		    USES(n##g,OP) OP USES(n##h,OP)
      27  
      28  int main (void)
      29  {
      30    int ret;
      31    int input = 1;
      32  
      33    #pragma acc parallel num_gangs(1) num_workers(NUM_WORKERS) copyout(ret)
      34    {
      35      int w = 0;
      36      LOCALS2(h);
      37  
      38      #pragma acc loop worker reduction(+:w)
      39      for (int i = 0; i < 32; i++)
      40        {
      41  	int u = USES2(h,+);
      42  	w += u;
      43        }
      44  
      45      printf ("w=%d\n", w);
      46      /* { dg-output "w=2048(\n|\r\n|\r)" } */
      47  
      48      LOCALS2(i);
      49  
      50      #pragma acc loop worker reduction(+:w)
      51      for (int i = 0; i < 32; i++)
      52        {
      53  	int u = USES2(i,+);
      54  	w += u;
      55        }
      56  
      57      printf ("w=%d\n", w);
      58      /* { dg-output "w=4096(\n|\r\n|\r)" } */
      59  
      60      LOCALS2(j);
      61      LOCALS2(k);
      62  
      63      #pragma acc loop worker reduction(+:w)
      64      for (int i = 0; i < 32; i++)
      65        {
      66  	int u = USES2(j,+);
      67  	w += u;
      68        }
      69  
      70      printf ("w=%d\n", w);
      71      /* { dg-output "w=6144(\n|\r\n|\r)" } */
      72  
      73      #pragma acc loop worker reduction(+:w)
      74      for (int i = 0; i < 32; i++)
      75        {
      76  	int u = USES2(k,+);
      77  	w += u;
      78        }
      79  
      80      ret = (w == 64 * 32 * 4);
      81      printf ("w=%d\n", w);
      82      /* { dg-output "w=8192(\n|\r\n|\r)" } */
      83    }
      84  
      85    assert (ret);
      86  
      87    return 0;
      88  }