1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda" } */
3 /* { dg-require-effective-target openacc_cuda } */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <unistd.h>
8 #include <openacc.h>
9 #include <cuda.h>
10 #include "timer.h"
11
12 int
13 main (int argc, char **argv)
14 {
15 CUdevice dev;
16 CUfunction delay;
17 CUmodule module;
18 CUresult r;
19 int N;
20 int i;
21 CUstream *streams;
22 unsigned long *a, *d_a, dticks;
23 int nbytes;
24 float atime, dtime, hitime, lotime;
25 void *kargs[2];
26 int clkrate;
27 int devnum, nprocs;
28
29 acc_init (acc_device_nvidia);
30
31 devnum = acc_get_device_num (acc_device_nvidia);
32
33 r = cuDeviceGet (&dev, devnum);
34 if (r != CUDA_SUCCESS)
35 {
36 fprintf (stderr, "cuDeviceGet failed: %d\n", r);
37 abort ();
38 }
39
40 r =
41 cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
42 dev);
43 if (r != CUDA_SUCCESS)
44 {
45 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
46 abort ();
47 }
48
49 r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
50 if (r != CUDA_SUCCESS)
51 {
52 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
53 abort ();
54 }
55
56 r = cuModuleLoad (&module, "subr.ptx");
57 if (r != CUDA_SUCCESS)
58 {
59 fprintf (stderr, "cuModuleLoad failed: %d\n", r);
60 abort ();
61 }
62
63 r = cuModuleGetFunction (&delay, module, "delay");
64 if (r != CUDA_SUCCESS)
65 {
66 fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
67 abort ();
68 }
69
70 nbytes = nprocs * sizeof (unsigned long);
71
72 dtime = 200.0;
73
74 dticks = (unsigned long) (dtime * clkrate);
75
76 N = nprocs;
77
78 a = (unsigned long *) malloc (nbytes);
79 d_a = (unsigned long *) acc_malloc (nbytes);
80
81 acc_map_data (a, d_a, nbytes);
82
83 streams = (CUstream *) malloc (N * sizeof (void *));
84
85 for (i = 0; i < N; i++)
86 {
87 streams[i] = (CUstream) acc_get_cuda_stream (i);
88 if (streams[i] != NULL)
89 abort ();
90
91 r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
92 if (r != CUDA_SUCCESS)
93 {
94 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
95 abort ();
96 }
97
98 if (!acc_set_cuda_stream (i, streams[i]))
99 abort ();
100 }
101
102 init_timers (1);
103
104 kargs[0] = (void *) &d_a;
105 kargs[1] = (void *) &dticks;
106
107 start_timer (0);
108
109 for (i = 0; i < N; i++)
110 {
111 r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
112 if (r != CUDA_SUCCESS)
113 {
114 fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
115 abort ();
116 }
117
118 acc_wait (i);
119 }
120
121 atime = stop_timer (0);
122
123 hitime = dtime * N;
124 hitime += hitime * 0.02;
125
126 lotime = dtime * N;
127 lotime -= lotime * 0.02;
128
129 if (atime > hitime || atime < lotime)
130 {
131 fprintf (stderr, "actual time < delay time\n");
132 abort ();
133 }
134
135 acc_unmap_data (a);
136
137 fini_timers ();
138
139 free (streams);
140 free (a);
141 acc_free (d_a);
142
143 acc_shutdown (acc_device_nvidia);
144
145 exit (0);
146 }
147
148 /* { dg-output "" } */