1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda" } */
3 /* { dg-require-effective-target openacc_cuda } */
4
5 #include <stdio.h>
6 #include <unistd.h>
7 #include <stdlib.h>
8 #include <openacc.h>
9 #include <cuda.h>
10 #include "timer.h"
11
12 int
13 main (int argc, char **argv)
14 {
15 CUdevice dev;
16 CUfunction delay;
17 CUmodule module;
18 CUresult r;
19 int N;
20 int i;
21 CUstream stream;
22 unsigned long *a, *d_a, dticks;
23 int nbytes;
24 float atime, dtime, hitime, lotime;
25 void *kargs[2];
26 int clkrate;
27 int devnum, nprocs;
28
29 acc_init (acc_device_nvidia);
30
31 devnum = acc_get_device_num (acc_device_nvidia);
32
33 r = cuDeviceGet (&dev, devnum);
34 if (r != CUDA_SUCCESS)
35 {
36 fprintf (stderr, "cuDeviceGet failed: %d\n", r);
37 abort ();
38 }
39
40 r =
41 cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
42 dev);
43 if (r != CUDA_SUCCESS)
44 {
45 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
46 abort ();
47 }
48
49 r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
50 if (r != CUDA_SUCCESS)
51 {
52 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
53 abort ();
54 }
55
56 r = cuModuleLoad (&module, "subr.ptx");
57 if (r != CUDA_SUCCESS)
58 {
59 fprintf (stderr, "cuModuleLoad failed: %d\n", r);
60 abort ();
61 }
62
63 r = cuModuleGetFunction (&delay, module, "delay");
64 if (r != CUDA_SUCCESS)
65 {
66 fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
67 abort ();
68 }
69
70 nbytes = nprocs * sizeof (unsigned long);
71
72 dtime = 200.0;
73
74 dticks = (unsigned long) (dtime * clkrate);
75
76 N = nprocs;
77
78 a = (unsigned long *) malloc (nbytes);
79 d_a = (unsigned long *) acc_malloc (nbytes);
80
81 acc_map_data (a, d_a, nbytes);
82
83 stream = (CUstream) acc_get_cuda_stream (0);
84 if (stream != NULL)
85 abort ();
86
87 r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
88 if (r != CUDA_SUCCESS)
89 {
90 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
91 abort ();
92 }
93
94 if (!acc_set_cuda_stream (0, stream))
95 abort ();
96
97 init_timers (1);
98
99 kargs[0] = (void *) &d_a;
100 kargs[1] = (void *) &dticks;
101
102 start_timer (0);
103
104 for (i = 0; i < N; i++)
105 {
106 r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
107 if (r != CUDA_SUCCESS)
108 {
109 fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
110 abort ();
111 }
112
113 acc_wait (0);
114 }
115
116 atime = stop_timer (0);
117
118 hitime = dtime * N;
119 hitime += hitime * 0.02;
120
121 lotime = dtime * N;
122 lotime -= lotime * 0.02;
123
124 if (atime > hitime || atime < lotime)
125 {
126 fprintf (stderr, "actual time < delay time\n");
127 abort ();
128 }
129
130 acc_unmap_data (a);
131
132 fini_timers ();
133
134 free (a);
135 acc_free (d_a);
136
137 acc_shutdown (acc_device_nvidia);
138
139 exit (0);
140 }
141
142 /* { dg-output "" } */