(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
tile-1.c
       1  /* AMD GCN does not use 32-lane vectors, so the expected use counts mismatch.
       2     { dg-skip-if "unsuitable dimensions" { openacc_radeon_accel_selected } { "*" } { "" } } */
       3  
       4  /* { dg-additional-options "-fopenacc-dim=32" } */
       5  
       6  #include <stdio.h>
       7  #include <openacc.h>
       8  #include <gomp-constants.h>
       9  
      10  static int check (const int *ary, int size, int gp, int wp, int vp)
      11  {
      12    int exit = 0;
      13    int ix;
      14    int gangs[32], workers[32], vectors[32];
      15  
      16    for (ix = 0; ix < 32; ix++)
      17      gangs[ix] = workers[ix] = vectors[ix] = 0;
      18    
      19    for (ix = 0; ix < size; ix++)
      20      {
      21        vectors[ary[ix] & 0xff]++;
      22        workers[(ary[ix] >> 8) & 0xff]++;
      23        gangs[(ary[ix] >> 16) & 0xff]++;
      24      }
      25  
      26    for (ix = 0; ix < 32; ix++)
      27      {
      28        if (gp)
      29  	{
      30  	  int expect = gangs[0];
      31  	  if (gangs[ix] != expect)
      32  	    {
      33  	      exit = 1;
      34  	      printf ("gang %d not used %d times\n", ix, expect);
      35  	    }
      36  	}
      37        else if (ix && gangs[ix])
      38  	{
      39  	  exit = 1;
      40  	  printf ("gang %d unexpectedly used\n", ix);
      41  	}
      42  
      43        if (wp)
      44  	{
      45  	  int expect = workers[0];
      46  	  if (workers[ix] != expect)
      47  	    {
      48  	      exit = 1;
      49  	      printf ("worker %d not used %d times\n", ix, expect);
      50  	    }
      51  	}
      52        else if (ix && workers[ix])
      53  	{
      54  	  exit = 1;
      55  	  printf ("worker %d unexpectedly used\n", ix);
      56  	}
      57  
      58        if (vp)
      59  	{
      60  	  int expect = vectors[0];
      61  	  if (vectors[ix] != expect)
      62  	    {
      63  	      exit = 1;
      64  	      printf ("vector %d not used %d times\n", ix, expect);
      65  	    }
      66  	}
      67        else if (ix && vectors[ix])
      68  	{
      69  	  exit = 1;
      70  	  printf ("vector %d unexpectedly used\n", ix);
      71  	}
      72        
      73      }
      74    return exit;
      75  }
      76  
      77  #pragma acc routine seq
      78  static int __attribute__((noinline)) place ()
      79  {
      80    int r = 0;
      81  
      82    if (acc_on_device (acc_device_not_host))
      83      {
      84        int g, w, v;
      85  
      86        g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
      87        w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
      88        v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
      89        r = (g << 16) | (w << 8) | v;
      90      }
      91    return r;
      92  }
      93  
      94  static void clear (int *ary, int size)
      95  {
      96    int ix;
      97  
      98    for (ix = 0; ix < size; ix++)
      99      ary[ix] = -1;
     100  }
     101  
     102  int gang_vector_1 (int *ary, int size)
     103  {
     104    clear (ary, size);
     105  #pragma acc parallel vector_length(32) num_gangs (32) copy (ary[0:size]) firstprivate (size)
     106    {
     107  #pragma acc loop tile(128) gang vector
     108      for (int jx = 0; jx < size; jx++)
     109        ary[jx] = place ();
     110    }
     111  
     112    return check (ary, size, 1, 0, 1);
     113  }
     114  
     115  int gang_vector_2a (int *ary, int size)
     116  {
     117    if (size % 256)
     118      return 1;
     119    
     120    clear (ary, size);
     121  #pragma acc parallel vector_length(32) num_gangs (32) copy (ary[0:size]) firstprivate (size)
     122    {
     123  #pragma acc loop tile(64, 64) gang vector
     124      for (int jx = 0; jx < size / 256; jx++)
     125        for (int ix = 0; ix < 256; ix++)
     126  	ary[jx * 256 + ix] = place ();
     127    }
     128  
     129    return check (ary, size, 1, 0, 1);
     130  }
     131  
     132  int gang_vector_2b (int *ary, int size)
     133  {
     134    if (size % 256)
     135      return 1;
     136    
     137    clear (ary, size);
     138  #pragma acc parallel vector_length(32) num_gangs (32) copy (ary[0:size]) firstprivate (size)
     139    {
     140  #pragma acc loop tile(64, 64) gang vector
     141      for (int jx = 0; jx < size; jx += 256)
     142        for (int ix = 0; ix < 256; ix++)
     143  	ary[jx + ix] = place ();
     144    }
     145  
     146    return check (ary, size, 1, 0, 1);
     147  }
     148  
     149  int worker_vector_2a (int *ary, int size)
     150  {
     151    if (size % 256)
     152      return 1;
     153  
     154    clear (ary, size);
     155  #pragma acc parallel vector_length(32) num_workers (32) copy (ary[0:size]) firstprivate (size)
     156    {
     157  #pragma acc loop tile(64, 64) worker vector
     158      for (int jx = 0; jx < size / 256; jx++)
     159        for (int ix = 0; ix < 256; ix++)
     160  	ary[jx * 256 + ix] = place ();
     161    }
     162  
     163    return check (ary, size, 0, 1, 1);
     164  }
     165  
     166  int worker_vector_2b (int *ary, int size)
     167  {
     168    if (size % 256)
     169      return 1;
     170  
     171    clear (ary, size);
     172  #pragma acc parallel vector_length(32) num_workers (32) copy (ary[0:size]) firstprivate (size)
     173    {
     174  #pragma acc loop tile(64, 64) worker vector
     175      for (int jx = 0; jx < size; jx += 256)
     176        for (int ix = 0; ix < 256; ix++)
     177  	ary[jx + ix] = place ();
     178    }
     179  
     180    return check (ary, size, 0, 1, 1);
     181  }
     182  
     183  int gang_worker_vector_2a (int *ary, int size)
     184  {
     185    if (size % 256)
     186      return 1;
     187    clear (ary, size);
     188  #pragma acc parallel vector_length(32) num_workers (32) num_gangs(32) copy (ary[0:size]) firstprivate (size)
     189    {
     190  #pragma acc loop tile(32, 32)
     191      for (int jx = 0; jx < size / 256; jx++)
     192        for (int ix = 0; ix < 256; ix++)
     193  	ary[jx * 256 + ix] = place ();
     194    }
     195  
     196    return check (ary, size, 1, 1, 1);
     197  }
     198  
     199  int gang_worker_vector_2b (int *ary, int size)
     200  {
     201    if (size % 256)
     202      return 1;
     203    clear (ary, size);
     204  #pragma acc parallel vector_length(32) num_workers (32) num_gangs(32) copy (ary[0:size]) firstprivate (size)
     205    {
     206  #pragma acc loop tile(32, 32)
     207      for (int jx = 0; jx < size; jx += 256)
     208        for (int ix = 0; ix < 256; ix++)
     209  	ary[jx + ix] = place ();
     210    }
     211  
     212    return check (ary, size, 1, 1, 1);
     213  }
     214  
     215  int gang_worker_vector_star_2a (int *ary, int size)
     216  {
     217    if (size % 256)
     218      return 1;
     219  
     220    clear (ary, size);
     221  #pragma acc parallel vector_length(32) num_workers (32) num_gangs(32) copy (ary[0:size]) firstprivate (size)
     222    {
     223  #pragma acc loop tile(*, *)
     224      for (int jx = 0; jx < size / 256; jx++)
     225        for (int ix = 0; ix < 256; ix++)
     226  	ary[jx * 256 + ix] = place ();
     227    }
     228  
     229    return check (ary, size, 1, 1, 1);
     230  }
     231  
     232  int gang_worker_vector_star_2b (int *ary, int size)
     233  {
     234    if (size % 256)
     235      return 1;
     236  
     237    clear (ary, size);
     238  #pragma acc parallel vector_length(32) num_workers (32) num_gangs(32) copy (ary[0:size]) firstprivate (size)
     239    {
     240  #pragma acc loop tile(*, *)
     241      for (int jx = 0; jx < size; jx +=256)
     242        for (int ix = 0; ix < 256; ix++)
     243  	ary[jx + ix] = place ();
     244    }
     245  
     246    return check (ary, size, 1, 1, 1);
     247  }
     248  
     249  #define N (32*32*32*8)
     250  int main ()
     251  {
     252    int ondev = 0;
     253  
     254  #pragma acc parallel copy(ondev)
     255    {
     256      ondev = acc_on_device (acc_device_not_host);
     257    }
     258    if (!ondev)
     259      return 0;
     260    
     261    int ary[N];
     262    if (gang_vector_1 (ary, N))
     263      return 1;
     264    if (gang_vector_2a (ary, N))
     265      return 1;
     266    if (worker_vector_2a (ary, N))
     267      return 1;
     268    if (gang_worker_vector_2a (ary, N))
     269      return 1;
     270    if (gang_worker_vector_star_2a (ary, N))
     271      return 1;
     272    if (gang_vector_2b (ary, N))
     273      return 1;
     274    if (worker_vector_2b (ary, N))
     275      return 1;
     276    if (gang_worker_vector_2b (ary, N))
     277      return 1;
     278    if (gang_worker_vector_star_2b (ary, N))
     279      return 1;
     280    return 0;
     281  }