1 /* { dg-do run { target openacc_nvidia_accel_selected } }
2 { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
3 /* { dg-additional-options "-foffload=-fdump-rtl-mach" } */
4
5 #define n 1024
6
7 int
8 main (void)
9 {
10 #pragma acc parallel
11 {
12 #pragma acc loop worker
13 for (int i = 0; i < n; i++)
14 ;
15
16 #pragma acc loop worker
17 for (int i = 0; i < n; i++)
18 ;
19 }
20
21 return 0;
22 }
23
24 /* Atm, %ntid.y is broadcast from one loop to the next, so there are 2 bar.syncs
25 for that (the other two are there for the same reason as in pr85381-2.c).
26 Todo: Recompute %ntid.y instead of broadcasting it. */
27 /* { dg-final { scan-offload-rtl-dump-times "nvptx_barsync" 4 "mach" } } */