1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda" } */
3 /* { dg-require-effective-target openacc_cuda } */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <openacc.h>
8 #include <cuda.h>
9 #include "timer.h"
10
11 int
12 main (int argc, char **argv)
13 {
14 CUdevice dev;
15 CUfunction delay;
16 CUmodule module;
17 CUresult r;
18 CUstream stream;
19 unsigned long *a, *d_a, dticks;
20 int nbytes;
21 float atime, dtime;
22 void *kargs[2];
23 int clkrate;
24 int devnum, nprocs;
25
26 acc_init (acc_device_nvidia);
27
28 devnum = acc_get_device_num (acc_device_nvidia);
29
30 r = cuDeviceGet (&dev, devnum);
31 if (r != CUDA_SUCCESS)
32 {
33 fprintf (stderr, "cuDeviceGet failed: %d\n", r);
34 abort ();
35 }
36
37 r =
38 cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
39 dev);
40 if (r != CUDA_SUCCESS)
41 {
42 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
43 abort ();
44 }
45
46 r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
47 if (r != CUDA_SUCCESS)
48 {
49 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
50 abort ();
51 }
52
53 r = cuModuleLoad (&module, "subr.ptx");
54 if (r != CUDA_SUCCESS)
55 {
56 fprintf (stderr, "cuModuleLoad failed: %d\n", r);
57 abort ();
58 }
59
60 r = cuModuleGetFunction (&delay, module, "delay");
61 if (r != CUDA_SUCCESS)
62 {
63 fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
64 abort ();
65 }
66
67 nbytes = nprocs * sizeof (unsigned long);
68
69 dtime = 200.0;
70
71 dticks = (unsigned long) (dtime * clkrate);
72
73 a = (unsigned long *) malloc (nbytes);
74 d_a = (unsigned long *) acc_malloc (nbytes);
75
76 acc_map_data (a, d_a, nbytes);
77
78 kargs[0] = (void *) &d_a;
79 kargs[1] = (void *) &dticks;
80
81 stream = (CUstream) acc_get_cuda_stream (0);
82 if (stream != NULL)
83 abort ();
84
85 r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
86 if (r != CUDA_SUCCESS)
87 {
88 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
89 abort ();
90 }
91
92 if (!acc_set_cuda_stream (0, stream))
93 abort ();
94
95 init_timers (1);
96
97 start_timer (0);
98
99 r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
100 if (r != CUDA_SUCCESS)
101 {
102 fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
103 abort ();
104 }
105
106 acc_wait (0);
107 /* Test unseen async-argument. */
108 acc_wait (1);
109
110 atime = stop_timer (0);
111
112 if (atime < dtime)
113 {
114 fprintf (stderr, "actual time < delay time\n");
115 abort ();
116 }
117
118 start_timer (0);
119
120 acc_wait (0);
121 /* Test unseen async-argument. */
122 acc_wait (1);
123
124 atime = stop_timer (0);
125
126 if (0.010 < atime)
127 {
128 fprintf (stderr, "actual time too long\n");
129 abort ();
130 }
131
132 acc_unmap_data (a);
133
134 fini_timers ();
135
136 free (a);
137 acc_free (d_a);
138
139 acc_shutdown (acc_device_nvidia);
140
141 exit (0);
142 }
143
144 /* { dg-output "" } */