(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
loop-dim-default.c
       1  /* { dg-additional-options "-fopenacc-dim=16:16" } */
       2  
       3  #include <openacc.h>
       4  #include <string.h>
       5  #include <stdio.h>
       6  #include <gomp-constants.h>
       7  
       8  #pragma acc routine
       9  static int __attribute__ ((noinline)) coord ()
      10  {
      11    int res = 0;
      12  
      13    if (acc_on_device (acc_device_not_host))
      14      {
      15        int g, w, v;
      16  
      17        g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
      18        w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
      19        v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
      20        res = (1 << 24) | (g << 16) | (w << 8) | v;
      21      }
      22    return res;
      23  }
      24  
      25  
      26  int check (const int *ary, int size, int gp, int wp, int vp)
      27  {
      28    int exit = 0;
      29    int ix;
      30    int *gangs = (int *)__builtin_alloca (gp * sizeof (int));
      31    int *workers = (int *)__builtin_alloca (wp * sizeof (int));
      32    int *vectors = (int *)__builtin_alloca (vp * sizeof (int));
      33    int offloaded = 0;
      34    
      35    memset (gangs, 0, gp * sizeof (int));
      36    memset (workers, 0, wp * sizeof (int));
      37    memset (vectors, 0, vp * sizeof (int));
      38    
      39    for (ix = 0; ix < size; ix++)
      40      {
      41        int g = (ary[ix] >> 16) & 0xff;
      42        int w = (ary[ix] >> 8) & 0xff;
      43        int v = (ary[ix] >> 0) & 0xff;
      44  
      45        if (g >= gp || w >= wp || v >= vp)
      46  	{
      47  	  printf ("unexpected cpu %#x used\n", ary[ix]);
      48  	  exit = 1;
      49  	}
      50        else
      51  	{
      52  	  vectors[v]++;
      53  	  workers[w]++;
      54  	  gangs[g]++;
      55  	}
      56        offloaded += ary[ix] >> 24;
      57      }
      58  
      59    if (!offloaded)
      60      return 0;
      61  
      62    if (offloaded != size)
      63      {
      64        printf ("offloaded %d times,  expected %d\n", offloaded, size);
      65        return 1;
      66      }
      67  
      68    for (ix = 0; ix < gp; ix++)
      69      if (gangs[ix] != gangs[0])
      70        {
      71  	printf ("gang %d not used %d times\n", ix, gangs[0]);
      72  	exit = 1;
      73        }
      74    
      75    for (ix = 0; ix < wp; ix++)
      76      if (workers[ix] != workers[0])
      77        {
      78  	printf ("worker %d not used %d times\n", ix, workers[0]);
      79  	exit = 1;
      80        }
      81    
      82    for (ix = 0; ix < vp; ix++)
      83      if (vectors[ix] != vectors[0])
      84        {
      85  	printf ("vector %d not used %d times\n", ix, vectors[0]);
      86  	exit = 1;
      87        }
      88    
      89    return exit;
      90  }
      91  
      92  #define N (32 *32*32)
      93  
      94  int test_1 (int gp, int wp, int vp)
      95  {
      96    int ary[N];
      97    int exit = 0;
      98    
      99  #pragma acc parallel copyout (ary)
     100    {
     101  #pragma acc loop gang (static:1)
     102      for (int ix = 0; ix < N; ix++)
     103        ary[ix] = coord ();
     104    }
     105  
     106    exit |= check (ary, N, gp, 1, 1);
     107  
     108  #pragma  acc parallel copyout (ary)
     109    {
     110  #pragma acc loop worker
     111      for (int ix = 0; ix < N; ix++)
     112        ary[ix] = coord ();
     113    }
     114  
     115    exit |= check (ary, N, 1, wp, 1);
     116  
     117  #pragma  acc parallel copyout (ary)
     118    {
     119  #pragma acc loop vector
     120      for (int ix = 0; ix < N; ix++)
     121        ary[ix] = coord ();
     122    }
     123  
     124    exit |= check (ary, N, 1, 1, vp);
     125  
     126    return exit;
     127  }
     128  
     129  int main ()
     130  {
     131  #ifdef ACC_DEVICE_TYPE_radeon
     132    /* AMD GCN uses the autovectorizer for the vector dimension: the use
     133       of a function call in vector-partitioned code in this test is not
     134       currently supported.  */
     135    return test_1 (16, 16, 1);
     136  #else
     137    return test_1 (16, 16, 32);
     138  #endif
     139  }