1 #include <stdlib.h>
2
3 #define N (1024 * 512)
4 #define COUNTERTYPE unsigned int
5
6 int
7 main (void)
8 {
9 unsigned int *__restrict a;
10 unsigned int *__restrict b;
11 unsigned int *__restrict c;
12
13 a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
14 b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
15 c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
16
17 /* Parallelism dimensions: compiler/runtime decides. */
18 #pragma acc kernels copyout (a[0:N])
19 {
20 for (COUNTERTYPE i = 0; i < N; i++)
21 a[i] = i * 2;
22 }
23
24 /* Parallelism dimensions: variable. */
25 #pragma acc kernels copyout (b[0:N]) \
26 num_gangs (3 + a[3]) num_workers (5 + a[5]) vector_length (7 + a[7])
27 /* { dg-prune-output "using .vector_length \\(32\\)., ignoring runtime setting" } */
28 {
29 for (COUNTERTYPE i = 0; i < N; i++)
30 b[i] = i * 4;
31 }
32
33 /* Parallelism dimensions: literal. */
34 #pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) \
35 num_gangs (3) num_workers (5) vector_length (7)
36 /* { dg-prune-output "using .vector_length \\(32\\)., ignoring 7" } */
37 {
38 for (COUNTERTYPE ii = 0; ii < N; ii++)
39 c[ii] = a[ii] + b[ii];
40 }
41
42 for (COUNTERTYPE i = 0; i < N; i++)
43 {
44 if (a[i] != i * 2)
45 abort ();
46 if (b[i] != i * 4)
47 abort ();
48 if (c[i] != a[i] + b[i])
49 abort ();
50 }
51
52 free (a);
53 free (b);
54 free (c);
55
56 return 0;
57 }