1 #include <assert.h>
2
3 #if ACC_DEVICE_TYPE_nvidia
4 /* To avoid 'libgomp: The Nvidia accelerator has insufficient resources'. */
5 #define NUM_WORKERS 24
6 #else
7 #define NUM_WORKERS 32
8 #endif
9
10 /* Test of reduction on both parallel and loop directives (workers and vectors
11 together in gang-partitioned mode, float type, multiple reductions). */
12
13 int
14 main (int argc, char *argv[])
15 {
16 int i, j;
17 float arr[32768];
18 float res = 0, mres = 0, hres = 0, hmres = 0;
19
20 for (i = 0; i < 32768; i++)
21 arr[i] = i % (32768 / 64);
22
23 #pragma acc parallel \
24 num_gangs(32) num_workers(NUM_WORKERS) vector_length(32) \
25 reduction(+:res) reduction(max:mres) copy(res, mres)
26 {
27 #pragma acc loop gang /* { dg-warning "nested loop in reduction needs reduction clause for 'm\?res'" "TODO" } */
28 for (j = 0; j < 32; j++)
29 {
30 #pragma acc loop worker vector reduction(+:res) reduction(max:mres)
31 for (i = 0; i < 1024; i++)
32 {
33 res += arr[j * 1024 + i];
34 if (arr[j * 1024 + i] > mres)
35 mres = arr[j * 1024 + i];
36 }
37
38 #pragma acc loop worker vector reduction(+:res) reduction(max:mres)
39 for (i = 0; i < 1024; i++)
40 {
41 res += arr[j * 1024 + (1023 - i)];
42 if (arr[j * 1024 + (1023 - i)] > mres)
43 mres = arr[j * 1024 + (1023 - i)];
44 }
45 }
46 }
47
48 for (j = 0; j < 32; j++)
49 for (i = 0; i < 1024; i++)
50 {
51 hres += arr[j * 1024 + i];
52 hres += arr[j * 1024 + (1023 - i)];
53 if (arr[j * 1024 + i] > hmres)
54 hmres = arr[j * 1024 + i];
55 if (arr[j * 1024 + (1023 - i)] > hmres)
56 hmres = arr[j * 1024 + (1023 - i)];
57 }
58
59 assert (hres <= 16777216);
60 assert (res == hres);
61
62 assert (hmres <= 16777216);
63 assert (mres == hmres);
64
65 return 0;
66 }