1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda" } */
3 /* { dg-require-effective-target openacc_cuda } */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <unistd.h>
8 #include <openacc.h>
9 #include <cuda.h>
10 #include "timer.h"
11
12 int
13 main (int argc, char **argv)
14 {
15 CUdevice dev;
16 CUfunction delay;
17 CUmodule module;
18 CUresult r;
19 int N;
20 int i;
21 CUstream stream;
22 unsigned long *a, *d_a, dticks;
23 int nbytes;
24 float atime, dtime, hitime, lotime;
25 void *kargs[2];
26 int clkrate;
27 int devnum, nprocs;
28
29 devnum = 2;
30
31 acc_init (acc_device_nvidia);
32
33 devnum = acc_get_device_num (acc_device_nvidia);
34
35 r = cuDeviceGet (&dev, devnum);
36 if (r != CUDA_SUCCESS)
37 {
38 fprintf (stderr, "cuDeviceGet failed: %d\n", r);
39 abort ();
40 }
41
42 r =
43 cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
44 dev);
45 if (r != CUDA_SUCCESS)
46 {
47 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
48 abort ();
49 }
50
51 r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
52 if (r != CUDA_SUCCESS)
53 {
54 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
55 abort ();
56 }
57
58 r = cuModuleLoad (&module, "subr.ptx");
59 if (r != CUDA_SUCCESS)
60 {
61 fprintf (stderr, "cuModuleLoad failed: %d\n", r);
62 abort ();
63 }
64
65 r = cuModuleGetFunction (&delay, module, "delay");
66 if (r != CUDA_SUCCESS)
67 {
68 fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
69 abort ();
70 }
71
72 nbytes = nprocs * sizeof (unsigned long);
73
74 dtime = 200.0;
75
76 dticks = (unsigned long) (dtime * clkrate);
77
78 N = nprocs;
79
80 a = (unsigned long *) malloc (nbytes);
81 d_a = (unsigned long *) acc_malloc (nbytes);
82
83 acc_map_data (a, d_a, nbytes);
84
85 r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
86 if (r != CUDA_SUCCESS)
87 {
88 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
89 abort ();
90 }
91
92 if (!acc_set_cuda_stream (1, stream))
93 abort ();
94
95 stream = (CUstream) acc_get_cuda_stream (0);
96 if (stream != NULL)
97 abort ();
98
99 r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
100 if (r != CUDA_SUCCESS)
101 {
102 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
103 abort ();
104 }
105
106 if (!acc_set_cuda_stream (0, stream))
107 abort ();
108
109 init_timers (1);
110
111 kargs[0] = (void *) &d_a;
112 kargs[1] = (void *) &dticks;
113
114 start_timer (0);
115
116 for (i = 0; i < N; i++)
117 {
118 r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
119 if (r != CUDA_SUCCESS)
120 {
121 fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
122 abort ();
123 }
124 }
125
126 if (acc_async_test (0) != 0)
127 abort ();
128
129 /* Test unseen async-argument. */
130 if (acc_async_test (1) != 1)
131 abort ();
132
133 acc_wait_async (0, 1);
134
135 if (acc_async_test (0) != 0)
136 abort ();
137
138 if (acc_async_test (1) != 0)
139 abort ();
140
141 /* Test unseen async-argument. */
142 {
143 if (acc_async_test (2) != 1)
144 abort ();
145
146 acc_wait_async (2, 1);
147
148 if (acc_async_test (0) != 0)
149 abort ();
150
151 if (acc_async_test (1) != 0)
152 abort ();
153
154 if (acc_async_test (2) != 1)
155 abort ();
156 }
157
158 acc_wait (1);
159
160 atime = stop_timer (0);
161
162 if (acc_async_test (0) != 1)
163 abort ();
164
165 if (acc_async_test (1) != 1)
166 abort ();
167
168 hitime = dtime * N;
169 hitime += hitime * 0.02;
170
171 lotime = dtime * N;
172 lotime -= lotime * 0.02;
173
174 if (atime > hitime || atime < lotime)
175 {
176 fprintf (stderr, "actual time < delay time\n");
177 abort ();
178 }
179
180 acc_unmap_data (a);
181
182 fini_timers ();
183
184 free (a);
185 acc_free (d_a);
186
187 acc_shutdown (acc_device_nvidia);
188
189 exit (0);
190 }
191
192 /* { dg-output "" } */