1 /* Test mapping of async values to specific underlying queues. */
2
3 #undef NDEBUG
4 #include <assert.h>
5 #include <openacc.h>
6
7 /* This is implemented in terms of the "acc_get_cuda_stream" interface. */
8
9 struct
10 {
11 int async;
12 void *cuda_stream;
13 } queues[] = { { acc_async_sync, NULL },
14 { acc_async_noval, NULL },
15 { 0, NULL },
16 { 1, NULL },
17 { 2, NULL },
18 { 36, NULL },
19 { 1982, NULL } };
20 const size_t queues_n = sizeof queues / sizeof queues[0];
21
22 int main(void)
23 {
24 /* Explicitly initialize: it's not clear whether the following OpenACC
25 runtime library calls implicitly initialize;
26 <https://github.com/OpenACC/openacc-spec/issues/102>. */
27 acc_device_t d;
28 #if defined ACC_DEVICE_TYPE_nvidia
29 d = acc_device_nvidia;
30 #elif defined ACC_DEVICE_TYPE_radeon
31 d = acc_device_radeon;
32 #elif defined ACC_DEVICE_TYPE_host
33 d = acc_device_host;
34 #else
35 # error Not ported to this ACC_DEVICE_TYPE
36 #endif
37 acc_init (d);
38
39 for (size_t i = 0; i < queues_n; ++i)
40 {
41 /* Before actually being used, there are all NULL. */
42 queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
43 assert (queues[i].cuda_stream == NULL);
44 }
45
46 /* No-ops still don't initialize them. */
47 {
48 size_t i = 0;
49 /* Find the first non-special async-argument. */
50 while (queues[i].async < 0)
51 ++i;
52 assert (i < queues_n);
53
54 #pragma acc wait(queues[i].async) // no-op
55
56 ++i;
57 assert (i < queues_n);
58 #pragma acc parallel wait(queues[i].async) // no-op
59 ;
60
61 ++i;
62 assert (i < queues_n);
63 acc_wait(queues[i].async); // no-op
64
65 i += 2;
66 assert (i < queues_n);
67 acc_wait_async(queues[i - 1].async, queues[i].async); // no-op, and async queue "i" does not get set up
68
69 for (size_t i = 0; i < queues_n; ++i)
70 {
71 queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
72 assert (queues[i].cuda_stream == NULL);
73 }
74 }
75
76 for (size_t i = 0; i < queues_n; ++i)
77 {
78 /* Use the queue to initialize it. */
79 #pragma acc parallel async(queues[i].async)
80 ;
81 #pragma acc wait
82
83 /* Verify CUDA stream used. */
84 queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
85 #if defined ACC_DEVICE_TYPE_nvidia
86 /* "acc_async_sync" maps to the NULL CUDA default stream. */
87 if (queues[i].async == acc_async_sync)
88 assert (queues[i].cuda_stream == NULL);
89 else
90 assert (queues[i].cuda_stream != NULL);
91 #elif defined ACC_DEVICE_TYPE_radeon
92 /* For "acc_device_radeon" there are no CUDA streams. */
93 assert (queues[i].cuda_stream == NULL);
94 #elif defined ACC_DEVICE_TYPE_host
95 /* For "acc_device_host" there are no CUDA streams. */
96 assert (queues[i].cuda_stream == NULL);
97 #else
98 # error Not ported to this ACC_DEVICE_TYPE
99 #endif
100 }
101
102 /* Verify same results. */
103 for (size_t i = 0; i < queues_n; ++i)
104 {
105 void *cuda_stream;
106
107 cuda_stream = acc_get_cuda_stream (queues[i].async);
108 assert (cuda_stream == queues[i].cuda_stream);
109
110 #pragma acc parallel async(queues[i].async)
111 ;
112 #pragma acc wait
113
114 cuda_stream = acc_get_cuda_stream (queues[i].async);
115 assert (cuda_stream == queues[i].cuda_stream);
116 }
117
118 /* Verify individual underlying queues are all different. */
119 for (size_t i = 0; i < queues_n; ++i)
120 {
121 if (queues[i].cuda_stream == NULL)
122 continue;
123 for (size_t j = i + 1; j < queues_n; ++j)
124 {
125 if (queues[j].cuda_stream == NULL)
126 continue;
127 assert (queues[j].cuda_stream != queues[i].cuda_stream);
128 }
129 }
130
131 return 0;
132 }