(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
loop-auto-1.c
       1  /* AMD GCN does not use 32-lane vectors.
       2     { dg-skip-if "unsuitable dimensions" { openacc_radeon_accel_selected } { "*" } { "" } } */
       3  
       4  /* { dg-additional-options "-fopenacc-dim=32" } */
       5  
       6  /* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
       7     aspects of that functionality.  */
       8  
       9  #include <stdio.h>
      10  #include <openacc.h>
      11  #include <gomp-constants.h>
      12  
      13  int check (const int *ary, int size, int gp, int wp, int vp)
      14  {
      15    int exit = 0;
      16    int ix;
      17    int gangs[32], workers[32], vectors[32];
      18  
      19    for (ix = 0; ix < 32; ix++)
      20      gangs[ix] = workers[ix] = vectors[ix] = 0;
      21    
      22    for (ix = 0; ix < size; ix++)
      23      {
      24        vectors[ary[ix] & 0xff]++;
      25        workers[(ary[ix] >> 8) & 0xff]++;
      26        gangs[(ary[ix] >> 16) & 0xff]++;
      27      }
      28  
      29    for (ix = 0; ix < 32; ix++)
      30      {
      31        if (gp)
      32  	{
      33  	  int expect = gangs[0];
      34  	  if (gangs[ix] != expect)
      35  	    {
      36  	      exit = 1;
      37  	      printf ("gang %d not used %d times\n", ix, expect);
      38  	    }
      39  	}
      40        else if (ix && gangs[ix])
      41  	{
      42  	  exit = 1;
      43  	  printf ("gang %d unexpectedly used\n", ix);
      44  	}
      45  
      46        if (wp)
      47  	{
      48  	  int expect = workers[0];
      49  	  if (workers[ix] != expect)
      50  	    {
      51  	      exit = 1;
      52  	      printf ("worker %d not used %d times\n", ix, expect);
      53  	    }
      54  	}
      55        else if (ix && workers[ix])
      56  	{
      57  	  exit = 1;
      58  	  printf ("worker %d unexpectedly used\n", ix);
      59  	}
      60  
      61        if (vp)
      62  	{
      63  	  int expect = vectors[0];
      64  	  if (vectors[ix] != expect)
      65  	    {
      66  	      exit = 1;
      67  	      printf ("vector %d not used %d times\n", ix, expect);
      68  	    }
      69  	}
      70        else if (ix && vectors[ix])
      71  	{
      72  	  exit = 1;
      73  	  printf ("vector %d unexpectedly used\n", ix);
      74  	}
      75        
      76      }
      77    return exit;
      78  }
      79  
      80  #pragma acc routine seq
      81  static int __attribute__((noinline)) place ()
      82  {
      83    int r = 0;
      84  
      85    int g = 0, w = 0, v = 0;
      86    g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
      87    w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
      88    v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
      89    r = (g << 16) | (w << 8) | v;
      90  
      91    return r;
      92  }
      93  
      94  static void clear (int *ary, int size)
      95  {
      96    int ix;
      97  
      98    for (ix = 0; ix < size; ix++)
      99      ary[ix] = -1;
     100  }
     101  
     102  int vector_1 (int *ary, int size)
     103  {
     104    clear (ary, size);
     105    
     106  #pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
     107    {
     108  #pragma acc loop gang
     109      for (int jx = 0; jx < 1; jx++)
     110  #pragma acc loop auto
     111        for (int ix = 0; ix < size; ix++)
     112  	ary[ix] = place ();
     113    }
     114  
     115    return check (ary, size, 0, 1, 1);
     116  }
     117  
     118  int vector_2 (int *ary, int size)
     119  {
     120    clear (ary, size);
     121    
     122  #pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
     123    {
     124  #pragma acc loop worker
     125      for (int jx = 0; jx < size  / 64; jx++)
     126  #pragma acc loop auto
     127        for (int ix = 0; ix < 64; ix++)
     128  	ary[ix + jx * 64] = place ();
     129    }
     130  
     131    return check (ary, size, 0, 1, 1);
     132  }
     133  
     134  int worker_1 (int *ary, int size)
     135  {
     136    clear (ary, size);
     137    
     138  #pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
     139    {
     140  #pragma acc loop gang
     141      for (int kx = 0; kx < 1; kx++)
     142  #pragma acc loop auto
     143        for (int jx = 0; jx <  size  / 64; jx++)
     144  #pragma acc loop vector
     145  	for (int ix = 0; ix < 64; ix++)
     146  	  ary[ix + jx * 64] = place ();
     147    }
     148  
     149    return check (ary, size, 0,  1, 1);
     150  }
     151  
     152  int gang_1 (int *ary, int size)
     153  {
     154    clear (ary, size);
     155    
     156  #pragma acc parallel num_gangs (32) num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
     157    /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-1 } */
     158    {
     159  #pragma acc loop auto
     160      for (int jx = 0; jx <  size  / 64; jx++)
     161  #pragma acc loop worker
     162        for (int ix = 0; ix < 64; ix++)
     163  	ary[ix + jx * 64] = place ();
     164    }
     165  
     166    return check (ary, size, 1, 1, 0);
     167  }
     168  
     169  int gang_2 (int *ary, int size)
     170  {
     171    clear (ary, size);
     172    
     173  #pragma acc parallel num_gangs (32) num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
     174    {
     175  #pragma acc loop auto
     176      for (int kx = 0; kx < size / (32 * 32); kx++)
     177  #pragma acc loop auto
     178        for (int jx = 0; jx <  32; jx++)
     179  #pragma acc loop auto
     180  	for (int ix = 0; ix < 32; ix++)
     181  	  ary[ix + jx * 32 + kx * 32 * 32] = place ();
     182    }
     183  
     184    return check (ary, size, 1, 1, 1);
     185  }
     186  
     187  int gang_3 (int *ary, int size)
     188  {
     189    clear (ary, size);
     190    
     191  #pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
     192    {
     193  #pragma acc loop auto
     194      for (int jx = 0; jx <  size  / 64; jx++)
     195  #pragma acc loop auto
     196        for (int ix = 0; ix < 64; ix++)
     197  	ary[ix + jx * 64] = place ();
     198    }
     199  
     200    return check (ary, size, 1, 1, 1);
     201  }
     202  
     203  int gang_4 (int *ary, int size)
     204  {
     205    clear (ary, size);
     206    
     207  #pragma acc parallel vector_length(32) copy(ary[0:size]) firstprivate (size)
     208    {
     209  #pragma acc loop auto
     210      for (int jx = 0; jx <  size; jx++)
     211        ary[jx] = place ();
     212    }
     213  
     214    return check (ary, size, 1, 0, 1);
     215  }
     216  
     217  #define N (32*32*32*2)
     218  int main ()
     219  {
     220    int ondev = 0;
     221  
     222  #pragma acc parallel copy(ondev)
     223    {
     224      ondev = acc_on_device (acc_device_not_host);
     225    }
     226    if (!ondev)
     227      return 0;
     228    
     229    int ary[N];
     230  
     231    if (vector_1 (ary,  N))
     232      return 1;
     233    if (vector_2 (ary,  N))
     234      return 1;
     235  
     236    if (worker_1 (ary,  N))
     237      return 1;
     238    
     239    if (gang_1 (ary,  N))
     240      return 1;
     241    if (gang_2 (ary,  N))
     242      return 1;
     243    if (gang_3 (ary,  N))
     244      return 1;
     245    if (gang_4 (ary,  N))
     246      return 1;
     247  
     248    return 0;
     249  }