(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
pr85381-4.c
       1  /* { dg-do run { target openacc_nvidia_accel_selected } }
       2     { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
       3  /* { dg-additional-options "-foffload=-fdump-rtl-mach" } */
       4  
       5  #define n 1024
       6  
       7  int
       8  main (void)
       9  {
      10    #pragma acc parallel
      11    {
      12      #pragma acc loop worker
      13      for (int i = 0; i < n; i++)
      14        ;
      15  
      16      #pragma acc loop worker
      17      for (int i = 0; i < n; i++)
      18        ;
      19    }
      20  
      21    return 0;
      22  }
      23  
      24  /* Atm, %ntid.y is broadcast from one loop to the next, so there are 2 bar.syncs
      25     for that (the other two are there for the same reason as in pr85381-2.c).
      26     Todo: Recompute %ntid.y instead of broadcasting it. */
      27  /* { dg-final { scan-offload-rtl-dump-times "nvptx_barsync" 4 "mach" } } */