1  /* Test OpenACC 'kernels' construct decomposition.  */
       2  
       3  /* { dg-additional-options "-fopt-info-omp-all" } */
       4  
       5  /* { dg-additional-options "-fdump-tree-gimple" } */
       6  
       7  /* { dg-additional-options "--param=openacc-kernels=decompose" }
       8     { dg-additional-options "-fdump-tree-omp_oacc_kernels_decompose" } */
       9  
      10  /* { dg-additional-options "--param=openacc-privatization=noisy" }
      11     Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
      12     { dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
      13  
      14  /* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
      15     aspects of that functionality.  */
      16  
      17  /* See also '../../gfortran.dg/goacc/kernels-decompose-1.f95'.  */
      18  
      19  /* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName'
      20     passed to 'incr' may be unset, and in that case, it will be set to [...]",
      21     so to maintain compatibility with earlier Tcl releases, we manually
      22     initialize counter variables:
      23     { dg-line l_dummy[variable c_compute 0 c_loop_i 0] }
      24     { dg-message "dummy" "" { target iN-VAl-Id } l_dummy } to avoid
      25     "WARNING: dg-line var l_dummy defined, but not used".  */
      26  
      27  #define N 1024
      28  
      29  unsigned int a[N];
      30  
      31  int
      32  main (void)
      33  {
      34    int i;
      35    unsigned int sum = 1;
      36  
      37  #pragma acc kernels copyin(a[0:N]) copy(sum) /* { dg-line l_compute[incr c_compute] } */
      38    /* { dg-note {variable 'sum\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
      39    /* { dg-bogus {optimized: assigned OpenACC seq loop parallelism} TODO { xfail *-*-* } l_compute$c_compute }
      40       TODO Is this maybe the report that belongs to the XFAILed report further down?  */
      41    {
      42      #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
      43      /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
      44      /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
      45      /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
      46      for (i = 0; i < N; ++i)
      47        sum += a[i];
      48  
      49      /* { dg-note {beginning 'gang-single' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
      50      sum++;
      51      a[0]++;
      52  
      53      #pragma acc loop independent /* { dg-line l_loop_i[incr c_loop_i] } */
      54      /* { dg-note {parallelized loop nest in OpenACC 'kernels' region} {} { target *-*-* } l_loop_i$c_loop_i } */
      55      /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
      56      /* { dg-optimized "assigned OpenACC gang vector loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
      57      for (i = 0; i < N; ++i)
      58        sum += a[i];
      59  
      60      /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
      61      if (sum > 10)
      62        { 
      63          #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
      64  	/* { dg-missed "unparallelized loop nest in OpenACC 'kernels' region: it's executed conditionally" "" { target *-*-* } l_loop_i$c_loop_i } */
      65  	/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
      66  	/*TODO { dg-optimized "assigned OpenACC seq loop parallelism" "TODO" { xfail *-*-* } l_loop_i$c_loop_i } */
      67          for (i = 0; i < N; ++i)
      68            sum += a[i];
      69        }
      70  
      71      #pragma acc loop auto /* { dg-line l_loop_i[incr c_loop_i] } */
      72      /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
      73      /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
      74      /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
      75      for (i = 0; i < N; ++i)
      76        sum += a[i];
      77    }
      78  
      79    return 0;
      80  }
      81  
      82  /* { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_kernels map\(tofrom:sum \[len: [0-9]+\]\) map\(to:a\[0\] \[len: [0-9]+\]\) map\(firstprivate:a \[pointer assign, bias: 0\]\)$} 1 "gimple" } }
      83  
      84     { dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\)$} 2 "gimple" } }
      85     { dg-final { scan-tree-dump-times {(?n)#pragma acc loop independent private\(i\)$} 1 "gimple" } }
      86     { dg-final { scan-tree-dump-times {(?n)#pragma acc loop auto private\(i\)$} 1 "gimple" } }
      87     { dg-final { scan-tree-dump-times {(?n)#pragma acc loop} 4 "gimple" } } */
      88  
      89  /* Check that the OpenACC 'kernels' got decomposed into 'data' and an enclosed
      90     sequence of compute constructs.
      91     { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_data_kernels map\(tofrom:sum \[len: [0-9]+\]\) map\(to:a\[0\] \[len: [0-9]+\]\)$} 1 "omp_oacc_kernels_decompose" } }
      92     As noted above, we get three "old-style" kernel regions, one gang-single region, and one parallelized loop region.
      93     { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_kernels async\(-1\) map\(force_present:sum \[len: [0-9]+\]\) map\(force_present:a\[0\] \[len: [0-9]+\]\) map\(firstprivate:a \[pointer assign, bias: 0\]\)$} 3 "omp_oacc_kernels_decompose" } }
      94     { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_parallelized async\(-1\) map\(force_present:sum \[len: [0-9]+\]\) map\(force_present:a\[0\] \[len: [0-9]+\]\) map\(firstprivate:a \[pointer assign, bias: 0\]\)$} 1 "omp_oacc_kernels_decompose" } }
      95     { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_gang_single async\(-1\) num_gangs\(1\) map\(force_present:sum \[len: [0-9]+\]\) map\(force_present:a\[0\] \[len: [0-9]+\]\) map\(firstprivate:a \[pointer assign, bias: 0\]\)$} 1 "omp_oacc_kernels_decompose" } }
      96  
      97     'data' plus five CCs.
      98     { dg-final { scan-tree-dump-times {(?n)#pragma omp target } 6 "omp_oacc_kernels_decompose" } }
      99  
     100     { dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\)$} 2 "omp_oacc_kernels_decompose" } }
     101     { dg-final { scan-tree-dump-times {(?n)#pragma acc loop independent private\(i\)$} 1 "omp_oacc_kernels_decompose" } }
     102     { dg-final { scan-tree-dump-times {(?n)#pragma acc loop auto private\(i\)$} 1 "omp_oacc_kernels_decompose" } }
     103     { dg-final { scan-tree-dump-times {(?n)#pragma acc loop} 4 "omp_oacc_kernels_decompose" } }
     104  
     105     Each of the parallel regions is async, and there is a final call to
     106     __builtin_GOACC_wait.
     107     { dg-final { scan-tree-dump-times "__builtin_GOACC_wait" 1 "omp_oacc_kernels_decompose" } } */