(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
parallel-dims.c
       1  /* OpenACC parallelism dimensions clauses: num_gangs, num_workers,
       2     vector_length.  */
       3  
       4  /* { dg-additional-options "-DEXPENSIVE" { target run_expensive_tests } } */
       5  
       6  /* { dg-additional-options "--param=openacc-kernels=decompose" } */
       7  
       8  /* { dg-additional-options "-fopt-info-all-omp" }
       9     { dg-additional-options "-foffload=-fopt-info-all-omp" } */
      10  
      11  /* { dg-additional-options "--param=openacc-privatization=noisy" }
      12     { dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
      13     Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
      14     { dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
      15  
      16  /* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName'
      17     passed to 'incr' may be unset, and in that case, it will be set to [...]",
      18     so to maintain compatibility with earlier Tcl releases, we manually
      19     initialize counter variables:
      20     { dg-line l_dummy[variable c_compute 0 c_loop_i 0 c_loop_j 0 c_loop_k 0] }
      21     { dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid
      22     "WARNING: dg-line var l_dummy defined, but not used".  */
      23  
      24  /* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
      25     aspects of that functionality.  */
      26  
      27  /* See also '../libgomp.oacc-fortran/parallel-dims.f90'.  */
      28  
      29  #include <limits.h>
      30  #include <openacc.h>
      31  #include <gomp-constants.h>
      32  
      33  #pragma acc routine seq
      34  inline __attribute__ ((always_inline))
      35  static int acc_gang ()
      36  {
      37    return __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
      38  }
      39  
      40  #pragma acc routine seq
      41  inline __attribute__ ((always_inline))
      42  static int acc_worker ()
      43  {
      44    return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
      45  }
      46  
      47  #pragma acc routine seq
      48  inline __attribute__ ((always_inline))
      49  static int acc_vector ()
      50  {
      51    return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
      52  }
      53  
      54  #ifdef EXPENSIVE
      55  #define N 100
      56  #else
      57  #define N 50
      58  #endif
      59  
      60  int main ()
      61  {
      62    acc_init (acc_device_default);
      63  
      64    /* OpenACC parallel construct.  */
      65  
      66    /* Non-positive value.  */
      67  
      68    /* GR, WS, VS.  */
      69    {
      70  #define GANGS 0
      71      /* { dg-warning {'num_gangs' value must be positive} {} { target c } .-1 } */
      72      int gangs_actual = GANGS;
      73      int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
      74      gangs_min = workers_min = vectors_min = INT_MAX;
      75      gangs_max = workers_max = vectors_max = INT_MIN;
      76  #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
      77    copy (gangs_actual) \
      78    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
      79    num_gangs (GANGS)
      80      /* { dg-note {in expansion of macro 'GANGS'} {} { target c } .-1 } */
      81      /* { dg-warning {'num_gangs' value must be positive} {} { target c++ } .-2 } */
      82      /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
      83      {
      84        /* We're actually executing with num_gangs (1).  */
      85        gangs_actual = 1;
      86        for (int i = N * gangs_actual; i > -N * gangs_actual; --i)
      87  	{
      88  	  gangs_min = gangs_max = acc_gang ();
      89  	  workers_min = workers_max = acc_worker ();
      90  	  vectors_min = vectors_max = acc_vector ();
      91  	}
      92      }
      93      if (gangs_actual != 1)
      94        __builtin_abort ();
      95      if (gangs_min != 0 || gangs_max != gangs_actual - 1
      96  	|| workers_min != 0 || workers_max != 0
      97  	|| vectors_min != 0 || vectors_max != 0)
      98        __builtin_abort ();
      99  #undef GANGS
     100    }
     101  
     102    /* GP, WS, VS.  */
     103    {
     104  #define GANGS 0
     105      /* { dg-warning {'num_gangs' value must be positive} {} { target c } .-1 } */
     106      int gangs_actual = GANGS;
     107      int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     108      gangs_min = workers_min = vectors_min = INT_MAX;
     109      gangs_max = workers_max = vectors_max = INT_MIN;
     110  #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
     111    copy (gangs_actual) \
     112    num_gangs (GANGS)
     113      /* { dg-note {in expansion of macro 'GANGS'} {} { target c } .-1 } */
     114      /* { dg-warning {'num_gangs' value must be positive} {} { target c++ } .-2 } */
     115      /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
     116      /* { dg-warning {region contains gang partitioned code but is not gang partitioned} {} { target *-*-* } l_compute$c_compute } */
     117      {
     118        /* We're actually executing with num_gangs (1).  */
     119        gangs_actual = 1;
     120  #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
     121    gang \
     122    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     123        /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     124        /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
     125        for (int i = N * gangs_actual; i > -N * gangs_actual; --i)
     126  	{
     127  	  gangs_min = gangs_max = acc_gang ();
     128  	  workers_min = workers_max = acc_worker ();
     129  	  vectors_min = vectors_max = acc_vector ();
     130  	}
     131      }
     132      if (gangs_actual != 1)
     133        __builtin_abort ();
     134      if (gangs_min != 0 || gangs_max != gangs_actual - 1
     135  	|| workers_min != 0 || workers_max != 0
     136  	|| vectors_min != 0 || vectors_max != 0)
     137        __builtin_abort ();
     138  #undef GANGS
     139    }
     140  
     141    /* GR, WP, VS.  */
     142    {
     143  #define WORKERS 0
     144      /* { dg-warning {'num_workers' value must be positive} {} { target c } .-1 } */
     145      int workers_actual = WORKERS;
     146      int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     147      gangs_min = workers_min = vectors_min = INT_MAX;
     148      gangs_max = workers_max = vectors_max = INT_MIN;
     149  #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
     150    copy (workers_actual) \
     151    num_workers (WORKERS)
     152      /* { dg-note {in expansion of macro 'WORKERS'} {} { target c } .-1 } */
     153      /* { dg-warning {'num_workers' value must be positive} {} { target c++ } .-2 } */
     154      /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
     155      /* { dg-warning {region contains worker partitioned code but is not worker partitioned} {} { target *-*-* } l_compute$c_compute } */
     156      {
     157        /* We're actually executing with num_workers (1).  */
     158        workers_actual = 1;
     159  #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
     160    worker \
     161    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     162        /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     163        /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
     164        for (int i = N * workers_actual; i > -N * workers_actual; --i)
     165  	{
     166  	  gangs_min = gangs_max = acc_gang ();
     167  	  workers_min = workers_max = acc_worker ();
     168  	  vectors_min = vectors_max = acc_vector ();
     169  	}
     170      }
     171      if (workers_actual != 1)
     172        __builtin_abort ();
     173      if (gangs_min != 0 || gangs_max != 0
     174  	|| workers_min != 0 || workers_max != workers_actual - 1
     175  	|| vectors_min != 0 || vectors_max != 0)
     176        __builtin_abort ();
     177  #undef WORKERS
     178    }
     179  
     180    /* GR, WS, VP.  */
     181    {
     182  #define VECTORS 0
     183      /* { dg-warning {'vector_length' value must be positive} {} { target c } .-1 } */
     184      int vectors_actual = VECTORS;
     185      int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     186      gangs_min = workers_min = vectors_min = INT_MAX;
     187      gangs_max = workers_max = vectors_max = INT_MIN;
     188  #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
     189    copy (vectors_actual) \
     190    vector_length (VECTORS)
     191      /* { dg-note {in expansion of macro 'VECTORS'} {} { target c } .-1 } */
     192      /* { dg-warning {'vector_length' value must be positive} {} { target c++ } .-2 } */
     193      /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
     194      /* { dg-warning {region contains vector partitioned code but is not vector partitioned} {} { target *-*-* } l_compute$c_compute } */
     195      /* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
     196      {
     197        /* We're actually executing with vector_length (1), just the GCC nvptx
     198  	 back end enforces vector_length (32).  */
     199        if (acc_on_device (acc_device_nvidia))
     200  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     201  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     202  	vectors_actual = 32;
     203        else
     204  	vectors_actual = 1;
     205  #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
     206    vector \
     207    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     208        /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     209        /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
     210        for (int i = N * vectors_actual; i > -N * vectors_actual; --i)
     211  	{
     212  	  gangs_min = gangs_max = acc_gang ();
     213  	  workers_min = workers_max = acc_worker ();
     214  	  vectors_min = vectors_max = acc_vector ();
     215  	}
     216      }
     217      if (acc_get_device_type () == acc_device_nvidia)
     218        {
     219  	if (vectors_actual != 32)
     220  	  __builtin_abort ();
     221        }
     222      else
     223        if (vectors_actual != 1)
     224  	__builtin_abort ();
     225      if (gangs_min != 0 || gangs_max != 0
     226  	|| workers_min != 0 || workers_max != 0
     227  	|| vectors_min != 0 || vectors_max != vectors_actual - 1)
     228        __builtin_abort ();
     229  #undef VECTORS
     230    }
     231  
     232  
     233    /* High value.  */
     234    
     235    /* GR, WS, VS.  */
     236    {
     237      /* There is no actual limit for the number of gangs, so we try with a
     238         rather high value.  */
     239      int gangs = 12345;
     240      int gangs_actual = gangs;
     241      int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     242      gangs_min = workers_min = vectors_min = INT_MAX;
     243      gangs_max = workers_max = vectors_max = INT_MIN;
     244  #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
     245    copy (gangs_actual) \
     246    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
     247    num_gangs (gangs)
     248      /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
     249      /* { dg-bogus {warning: region is gang partitioned but does not contain gang partitioned code} {TODO 'reduction'} { xfail *-*-* } l_compute$c_compute } */
     250      {
     251        if (acc_on_device (acc_device_host))
     252  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     253  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     254  	{
     255  	  /* We're actually executing with num_gangs (1).  */
     256  	  gangs_actual = 1;
     257  	}
     258        /* As we're executing GR not GP, don't multiply with a "gangs_actual"
     259  	 factor.  */
     260        for (int i = N /* * gangs_actual */; i > -N /* * gangs_actual */; --i)
     261  	{
     262  	  gangs_min = gangs_max = acc_gang ();
     263  	  workers_min = workers_max = acc_worker ();
     264  	  vectors_min = vectors_max = acc_vector ();
     265  	}
     266      }
     267      if (gangs_actual < 1)
     268        __builtin_abort ();
     269      if (gangs_min != 0 || gangs_max != gangs_actual - 1
     270  	|| workers_min != 0 || workers_max != 0
     271  	|| vectors_min != 0 || vectors_max != 0)
     272        __builtin_abort ();
     273    }
     274  
     275    /* GP, WS, VS.  */
     276    {
     277      /* There is no actual limit for the number of gangs, so we try with a
     278         rather high value.  */
     279      int gangs = 12345;
     280      int gangs_actual = gangs;
     281      int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     282      gangs_min = workers_min = vectors_min = INT_MAX;
     283      gangs_max = workers_max = vectors_max = INT_MIN;
     284  #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
     285    copy (gangs_actual) \
     286    num_gangs (gangs)
     287      /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
     288      {
     289        if (acc_on_device (acc_device_host))
     290  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     291  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     292  	{
     293  	  /* We're actually executing with num_gangs (1).  */
     294  	  gangs_actual = 1;
     295  	}
     296  #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
     297    gang \
     298    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     299        /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     300        /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
     301        for (int i = N * gangs_actual; i > -N * gangs_actual; --i)
     302  	{
     303  	  gangs_min = gangs_max = acc_gang ();
     304  	  workers_min = workers_max = acc_worker ();
     305  	  vectors_min = vectors_max = acc_vector ();
     306  	}
     307      }
     308      if (gangs_actual < 1)
     309        __builtin_abort ();
     310      if (gangs_min != 0 || gangs_max != gangs_actual - 1
     311  	|| workers_min != 0 || workers_max != 0
     312  	|| vectors_min != 0 || vectors_max != 0)
     313        __builtin_abort ();
     314    }
     315  
     316    /* GR, WP, VS.  */
     317    {
     318      /* We try with an outrageously large value. */
     319  #define WORKERS 2 << 20
     320      int workers_actual = WORKERS;
     321      int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     322      gangs_min = workers_min = vectors_min = INT_MAX;
     323      gangs_max = workers_max = vectors_max = INT_MIN;
     324  #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
     325    copy (workers_actual) \
     326    num_workers (WORKERS)
     327      /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
     328      /* { dg-warning {using 'num_workers \(32\)', ignoring 2097152} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
     329      {
     330        if (acc_on_device (acc_device_host))
     331  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     332  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     333  	{
     334  	  /* We're actually executing with num_workers (1).  */
     335  	  workers_actual = 1;
     336  	}
     337        else if (acc_on_device (acc_device_nvidia))
     338  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     339  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     340  	{
     341  	  /* The GCC nvptx back end enforces num_workers (32).  */
     342  	  workers_actual = 32;
     343  	}
     344        else if (acc_on_device (acc_device_radeon))
     345  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     346  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     347  	{
     348  	  /* The GCC GCN back end is limited to num_workers (16).  */
     349  	  workers_actual = 16;
     350  	}
     351        else
     352  	__builtin_abort ();
     353  #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
     354    worker \
     355    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     356        /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     357        /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
     358        for (int i = N * workers_actual; i > -N * workers_actual; --i)
     359  	{
     360  	  gangs_min = gangs_max = acc_gang ();
     361  	  workers_min = workers_max = acc_worker ();
     362  	  vectors_min = vectors_max = acc_vector ();
     363  	}
     364      }
     365      if (workers_actual < 1)
     366        __builtin_abort ();
     367      if (gangs_min != 0 || gangs_max != 0
     368  	|| workers_min != 0 || workers_max != workers_actual - 1
     369  	|| vectors_min != 0 || vectors_max != 0)
     370        __builtin_abort ();
     371  #undef WORKERS
     372    }
     373  
     374    /* GR, WP, VS.  */
     375    {
     376      /* We try with an outrageously large value. */
     377      int workers = 2 << 20;
     378      /* For nvptx offloading, this one will not result in "using num_workers
     379         (32), ignoring runtime setting", and will in fact try to launch with
     380         "num_workers (workers)", which will run into "libgomp: cuLaunchKernel
     381         error: invalid argument".  So, limit ourselves here.  */
     382      if (acc_get_device_type () == acc_device_nvidia)
     383        workers = 32;
     384      int workers_actual = workers;
     385      int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     386      gangs_min = workers_min = vectors_min = INT_MAX;
     387      gangs_max = workers_max = vectors_max = INT_MIN;
     388  #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
     389    copy (workers_actual) \
     390    num_workers (workers)
     391      /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
     392      {
     393        if (acc_on_device (acc_device_host))
     394  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     395  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     396  	{
     397  	  /* We're actually executing with num_workers (1).  */
     398  	  workers_actual = 1;
     399  	}
     400        else if (acc_on_device (acc_device_nvidia))
     401  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     402  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     403  	{
     404  	  /* We're actually executing with num_workers (32).  */
     405  	  /* workers_actual = 32; */
     406  	}
     407        else if (acc_on_device (acc_device_radeon))
     408  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     409  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     410  	{
     411  	  /* The GCC GCN back end is limited to num_workers (16).  */
     412  	  workers_actual = 16;
     413  	}
     414        else
     415  	__builtin_abort ();
     416  #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
     417    worker \
     418    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     419        /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     420        /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
     421        for (int i = N * workers_actual; i > -N * workers_actual; --i)
     422  	{
     423  	  gangs_min = gangs_max = acc_gang ();
     424  	  workers_min = workers_max = acc_worker ();
     425  	  vectors_min = vectors_max = acc_vector ();
     426  	}
     427      }
     428      if (workers_actual < 1)
     429        __builtin_abort ();
     430      if (gangs_min != 0 || gangs_max != 0
     431  	|| workers_min != 0 || workers_max != workers_actual - 1
     432  	|| vectors_min != 0 || vectors_max != 0)
     433        __builtin_abort ();
     434    }
     435  
     436    /* GR, WS, VP.  */
     437    {
     438      /* We try with an outrageously large value. */
     439  #define VECTORS 2 << 20
     440      int vectors_actual = VECTORS;
     441      int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     442      gangs_min = workers_min = vectors_min = INT_MAX;
     443      gangs_max = workers_max = vectors_max = INT_MIN;
     444  #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
     445    copy (vectors_actual) \
     446    vector_length (VECTORS)
     447      /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
     448      /* { dg-warning {using 'vector_length \(1024\)', ignoring 2097152} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
     449      {
     450        if (acc_on_device (acc_device_host))
     451  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     452  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     453  	{
     454  	  /* We're actually executing with vector_length (1).  */
     455  	  vectors_actual = 1;
     456  	}
     457        else if (acc_on_device (acc_device_nvidia))
     458  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     459  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     460  	{
     461  	  /* The GCC nvptx back end reduces to vector_length (1024).  */
     462  	  vectors_actual = 1024;
     463  	}
     464        else if (acc_on_device (acc_device_radeon))
     465  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     466  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     467  	{
     468  	  /* The GCC GCN back end enforces vector_length (1): autovectorize. */
     469  	  vectors_actual = 1;
     470  	}
     471        else
     472  	__builtin_abort ();
     473  #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
     474    vector \
     475    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     476        /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     477        /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
     478        for (int i = N * vectors_actual; i > -N * vectors_actual; --i)
     479  	{
     480  	  gangs_min = gangs_max = acc_gang ();
     481  	  workers_min = workers_max = acc_worker ();
     482  	  vectors_min = vectors_max = acc_vector ();
     483  	}
     484      }
     485      if (vectors_actual < 1)
     486        __builtin_abort ();
     487      if (gangs_min != 0 || gangs_max != 0
     488  	|| workers_min != 0 || workers_max != 0
     489  	|| vectors_min != 0 || vectors_max != vectors_actual - 1)
     490        __builtin_abort ();
     491  #undef VECTORS
     492    }
     493  
     494    /* GR, WS, VP.  */
     495    {
     496      /* We try with an outrageously large value. */
     497      int vectors = 2 << 20;
     498      int vectors_actual = vectors;
     499      int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     500      gangs_min = workers_min = vectors_min = INT_MAX;
     501      gangs_max = workers_max = vectors_max = INT_MIN;
     502  #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
     503    copy (vectors_actual) \
     504    vector_length (vectors)
     505      /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
     506      /* { dg-warning {using 'vector_length \(32\)', ignoring runtime setting} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
     507      {
     508        if (acc_on_device (acc_device_host))
     509  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     510  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     511  	{
     512  	  /* We're actually executing with vector_length (1).  */
     513  	  vectors_actual = 1;
     514  	}
     515        else if (acc_on_device (acc_device_nvidia))
     516  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     517  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     518  	{
     519  	  /* The GCC nvptx back end enforces vector_length (32).  */
     520  	  vectors_actual = 32;
     521  	}
     522        else if (acc_on_device (acc_device_radeon))
     523  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     524  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     525  	{
     526  	  /* Because of the way vectors are implemented for GCN, a vector loop
     527  	     containing a seq routine call will not vectorize calls to that
     528  	     routine.  Hence, we'll only get one "vector".  */
     529  	  vectors_actual = 1;
     530  	}
     531        else
     532  	__builtin_abort ();
     533  #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
     534    vector \
     535    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     536        /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     537        /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
     538        for (int i = N * vectors_actual; i > -N * vectors_actual; --i)
     539  	{
     540  	  gangs_min = gangs_max = acc_gang ();
     541  	  workers_min = workers_max = acc_worker ();
     542  	  vectors_min = vectors_max = acc_vector ();
     543  	}
     544      }
     545      if (vectors_actual < 1)
     546        __builtin_abort ();
     547      if (gangs_min != 0 || gangs_max != 0
     548  	|| workers_min != 0 || workers_max != 0
     549  	|| vectors_min != 0 || vectors_max != vectors_actual - 1)
     550        __builtin_abort ();
     551    }
     552  
     553  
     554    /* Composition of GP, WP, VP.  */
     555    {
     556      int gangs = 12345;
     557      /* With nvptx offloading, multi-level reductions apparently are very slow
     558         in the following case.  So, limit ourselves here.  */
     559      if (acc_get_device_type () == acc_device_nvidia)
     560        gangs = 3;
     561      /* Similar appears to be true for GCN.  */
     562      if (acc_get_device_type () == acc_device_radeon)
     563        gangs = 3;
     564      int gangs_actual = gangs;
     565  #define WORKERS 3
     566      int workers_actual = WORKERS;
     567  #define VECTORS 11
     568      int vectors_actual = VECTORS;
     569      int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     570      gangs_min = workers_min = vectors_min = INT_MAX;
     571      gangs_max = workers_max = vectors_max = INT_MIN;
     572  #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
     573    copy (gangs_actual, workers_actual, vectors_actual) \
     574    num_gangs (gangs) \
     575    num_workers (WORKERS) \
     576    vector_length (VECTORS)
     577      /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
     578      /* { dg-warning {using 'vector_length \(32\)', ignoring 11} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
     579      {
     580        if (acc_on_device (acc_device_host))
     581  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     582  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     583  	{
     584  	  /* We're actually executing with num_gangs (1), num_workers (1),
     585  	     vector_length (1).  */
     586  	  gangs_actual = 1;
     587  	  workers_actual = 1;
     588  	  vectors_actual = 1;
     589  	}
     590        else if (acc_on_device (acc_device_nvidia))
     591  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     592  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     593  	{
     594  	  /* The GCC nvptx back end enforces vector_length (32).  */
     595  	  vectors_actual = 32;
     596  	}
     597        else if (acc_on_device (acc_device_radeon))
     598  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     599  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     600  	{
     601  	  /* See above comments about GCN vectors_actual.  */
     602  	  vectors_actual = 1;
     603  	}
     604        else
     605  	__builtin_abort ();
     606  #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
     607    gang \
     608    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     609        /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     610        /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     611        /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
     612        for (int i = N * gangs_actual; i > -N * gangs_actual; --i)
     613  #pragma acc loop /* { dg-line l_loop_j[incr c_loop_j] } */ \
     614    worker \
     615    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     616  	/* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
     617  	/* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
     618  	/* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_j$c_loop_j } */
     619  	for (int j = N * workers_actual; j > -N * workers_actual; --j)
     620  #pragma acc loop /* { dg-line l_loop_k[incr c_loop_k] } */ \
     621    vector \
     622    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     623  	  /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_k$c_loop_k } */
     624  	  /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_k$c_loop_k } */
     625  	  for (int k = N * vectors_actual; k > -N * vectors_actual; --k)
     626  	    {
     627  	      gangs_min = gangs_max = acc_gang ();
     628  	      workers_min = workers_max = acc_worker ();
     629  	      vectors_min = vectors_max = acc_vector ();
     630  	    }
     631      }
     632      if (gangs_min != 0 || gangs_max != gangs_actual - 1
     633  	|| workers_min != 0 || workers_max != workers_actual - 1
     634  	|| vectors_min != 0 || vectors_max != vectors_actual - 1)
     635        __builtin_abort ();
     636  #undef VECTORS
     637  #undef WORKERS
     638    }
     639  
     640  
     641    /* OpenACC kernels construct.  */
     642  
     643    /* We can't test parallelized OpenACC kernels constructs in this way: use of
     644       the acc_gang, acc_worker, acc_vector functions will make the construct
     645       unparallelizable.  */
     646  
     647  
     648    /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
     649       kernels.  */
     650    {
     651      int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     652      gangs_min = workers_min = vectors_min = INT_MAX;
     653      gangs_max = workers_max = vectors_max = INT_MIN;
     654  #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
     655      /* { dg-note {OpenACC 'kernels' decomposition: variable 'vectors_max' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
     656         { dg-note {variable 'vectors_max' made addressable} {} { target *-*-* } l_compute$c_compute } */
     657      /* { dg-note {OpenACC 'kernels' decomposition: variable 'vectors_min' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
     658         { dg-note {variable 'vectors_min' made addressable} {} { target *-*-* } l_compute$c_compute } */
     659      /* { dg-note {OpenACC 'kernels' decomposition: variable 'workers_max' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
     660         { dg-note {variable 'workers_max' made addressable} {} { target *-*-* } l_compute$c_compute } */
     661      /* { dg-note {OpenACC 'kernels' decomposition: variable 'workers_min' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
     662         { dg-note {variable 'workers_min' made addressable} {} { target *-*-* } l_compute$c_compute } */
     663      /* { dg-note {OpenACC 'kernels' decomposition: variable 'gangs_max' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
     664         { dg-note {variable 'gangs_max' made addressable} {} { target *-*-* } l_compute$c_compute } */
     665      /* { dg-note {OpenACC 'kernels' decomposition: variable 'gangs_min' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
     666         { dg-note {variable 'gangs_min' made addressable} {} { target *-*-* } l_compute$c_compute } */
     667      {
     668  #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
     669    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     670        /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
     671        /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     672        /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     673        /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
     674        for (int i = N; i > -N; --i)
     675  	{
     676  	  /* This is to make the loop unparallelizable.  */
     677  	  asm volatile ("" : : : "memory");
     678  
     679  	  gangs_min = gangs_max = acc_gang ();
     680  	  workers_min = workers_max = acc_worker ();
     681  	  vectors_min = vectors_max = acc_vector ();
     682  	}
     683      }
     684      if (gangs_min != 0 || gangs_max != 1 - 1
     685  	|| workers_min != 0 || workers_max != 1 - 1
     686  	|| vectors_min != 0 || vectors_max != 1 - 1)
     687        __builtin_abort ();
     688    }
     689  
     690  
     691    /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
     692       kernels even when there are explicit num_gangs, num_workers, or
     693       vector_length clauses.  */
     694    {
     695      int gangs = 5;
     696  #define WORKERS 5
     697  #define VECTORS 13
     698      int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     699      gangs_min = workers_min = vectors_min = INT_MAX;
     700      gangs_max = workers_max = vectors_max = INT_MIN;
     701  #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
     702    num_gangs (gangs) \
     703    num_workers (WORKERS) \
     704    vector_length (VECTORS)
     705      /* { dg-note {OpenACC 'kernels' decomposition: variable 'vectors_max' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
     706         { dg-note {variable 'vectors_max' made addressable} {} { target *-*-* } l_compute$c_compute } */
     707      /* { dg-note {OpenACC 'kernels' decomposition: variable 'vectors_min' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
     708         { dg-note {variable 'vectors_min' made addressable} {} { target *-*-* } l_compute$c_compute } */
     709      /* { dg-note {OpenACC 'kernels' decomposition: variable 'workers_max' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
     710         { dg-note {variable 'workers_max' made addressable} {} { target *-*-* } l_compute$c_compute } */
     711      /* { dg-note {OpenACC 'kernels' decomposition: variable 'workers_min' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
     712         { dg-note {variable 'workers_min' made addressable} {} { target *-*-* } l_compute$c_compute } */
     713      /* { dg-note {OpenACC 'kernels' decomposition: variable 'gangs_max' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
     714         { dg-note {variable 'gangs_max' made addressable} {} { target *-*-* } l_compute$c_compute } */
     715      /* { dg-note {OpenACC 'kernels' decomposition: variable 'gangs_min' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
     716         { dg-note {variable 'gangs_min' made addressable} {} { target *-*-* } l_compute$c_compute } */
     717      {
     718  #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
     719    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     720        /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
     721        /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     722        /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     723        /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
     724        for (int i = N; i > -N; --i)
     725  	{
     726  	  /* This is to make the loop unparallelizable.  */
     727  	  asm volatile ("" : : : "memory");
     728  
     729  	  gangs_min = gangs_max = acc_gang ();
     730  	  workers_min = workers_max = acc_worker ();
     731  	  vectors_min = vectors_max = acc_vector ();
     732  	}
     733      }
     734      if (gangs_min != 0 || gangs_max != 1 - 1
     735  	|| workers_min != 0 || workers_max != 1 - 1
     736  	|| vectors_min != 0 || vectors_max != 1 - 1)
     737        __builtin_abort ();
     738  #undef VECTORS
     739  #undef WORKERS
     740    }
     741  
     742  
     743    /* OpenACC serial construct.  */
     744  
     745    /* GR, WS, VS.  */
     746    {
     747      int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     748      gangs_min = workers_min = vectors_min = INT_MAX;
     749      gangs_max = workers_max = vectors_max = INT_MIN;
     750  #pragma acc serial /* { dg-line l_compute[incr c_compute] } */ \
     751    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     752      /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
     753      /* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
     754      {
     755        for (int i = N; i > -N; i--)
     756  	{
     757  	  gangs_min = gangs_max = acc_gang ();
     758  	  workers_min = workers_max = acc_worker ();
     759  	  vectors_min = vectors_max = acc_vector ();
     760  	}
     761      }
     762      if (gangs_min != 0 || gangs_max != 1 - 1
     763  	|| workers_min != 0 || workers_max != 1 - 1
     764  	|| vectors_min != 0 || vectors_max != 1 - 1)
     765        __builtin_abort ();
     766    }
     767  
     768    /* Composition of GP, WP, VP.  */
     769    {
     770      int vectors_actual = 1;  /* Implicit 'vector_length (1)' clause.  */
     771      int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     772      gangs_min = workers_min = vectors_min = INT_MAX;
     773      gangs_max = workers_max = vectors_max = INT_MIN;
     774  #pragma acc serial /* { dg-line l_compute[incr c_compute] } */ \
     775    copy (vectors_actual) \
     776    copy (gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max)
     777      /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
     778      /* { dg-bogus {warning: region contains gang partitioned code but is not gang partitioned} {TODO 'serial'} { xfail *-*-* } l_compute$c_compute }
     779         { dg-bogus {warning: region contains worker partitioned code but is not worker partitioned} {TODO 'serial'} { xfail *-*-* } l_compute$c_compute }
     780         { dg-bogus {warning: region contains vector partitioned code but is not vector partitioned} {TODO 'serial'} { xfail *-*-* } l_compute$c_compute } */
     781      /* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
     782      {
     783        if (acc_on_device (acc_device_nvidia))
     784  	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
     785  	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
     786  	{
     787  	  /* The GCC nvptx back end enforces vector_length (32).  */
     788  	  /* It's unclear if that's actually permissible here;
     789  	     <https://github.com/OpenACC/openacc-spec/issues/238> "OpenACC
     790  	     'serial' construct might not actually be serial".  */
     791  	  vectors_actual = 32;
     792  	}
     793  #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
     794    gang \
     795    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     796        /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     797        /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     798        /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
     799        for (int i = N; i > -N; i--)
     800  #pragma acc loop /* { dg-line l_loop_j[incr c_loop_j] } */ \
     801    worker \
     802    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     803  	/* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
     804  	/* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
     805  	/* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_j$c_loop_j } */
     806  	for (int j = N; j > -N; j--)
     807  #pragma acc loop /* { dg-line l_loop_k[incr c_loop_k] } */ \
     808    vector \
     809    reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
     810  	  /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_k$c_loop_k } */
     811  	  /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_k$c_loop_k } */
     812  	  for (int k = N * vectors_actual; k > -N * vectors_actual; k--)
     813  	    {
     814  	      gangs_min = gangs_max = acc_gang ();
     815  	      workers_min = workers_max = acc_worker ();
     816  	      vectors_min = vectors_max = acc_vector ();
     817  	    }
     818      }
     819      if (acc_get_device_type () == acc_device_nvidia)
     820        {
     821  	if (vectors_actual != 32)
     822  	  __builtin_abort ();
     823        }
     824      else
     825        if (vectors_actual != 1)
     826  	__builtin_abort ();
     827      if (gangs_min != 0 || gangs_max != 1 - 1
     828  	|| workers_min != 0 || workers_max != 1 - 1
     829  	|| vectors_min != 0 || vectors_max != vectors_actual - 1)
     830        __builtin_abort ();
     831    }
     832  
     833  
     834    return 0;
     835  }