1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lm -lcuda -lcublas -lcudart -Wall -Wextra" } */
3 /* { dg-require-effective-target openacc_cublas } */
4 /* { dg-require-effective-target openacc_cudart } */
5
6 #include <stdlib.h>
7 #include <math.h>
8 #include <openacc.h>
9 #include <cuda.h>
10 #include <cuda_runtime_api.h>
11 #include <cublas_v2.h>
12
13 #pragma acc routine
14 void
15 saxpy (int n, float a, float *x, float *y)
16 {
17 int i;
18
19 for (i = 0; i < n; i++)
20 y[i] = y[i] + a * x[i];
21 }
22
23 void
24 validate_results (int n, float *a, float *b)
25 {
26 int i;
27
28 for (i = 0; i < n; i++)
29 if (fabs (a[i] - b[i]) > .00001)
30 abort ();
31 }
32
33 int
34 main()
35 {
36 #define N 8
37 int i;
38 float x_ref[N], y_ref[N];
39 float x[N], y[N];
40 cublasHandle_t h;
41 float a = 2.0;
42
43 for (i = 0; i < N; i++)
44 {
45 x[i] = x_ref[i] = 4.0 + i;
46 y[i] = y_ref[i] = 3.0;
47 }
48
49 saxpy (N, a, x_ref, y_ref);
50
51 cublasCreate (&h);
52
53 #pragma acc data copyin (x[0:N]) copy (y[0:N])
54 {
55 #pragma acc host_data use_device (x, y)
56 {
57 cublasSaxpy (h, N, &a, x, 1, y, 1);
58 }
59 }
60
61 validate_results (N, y, y_ref);
62
63 #pragma acc data create (x[0:N]) copyout (y[0:N])
64 {
65 #pragma acc kernels
66 for (i = 0; i < N; i++)
67 y[i] = 3.0;
68
69 #pragma acc host_data use_device (x, y)
70 {
71 cublasSaxpy (h, N, &a, x, 1, y, 1);
72 }
73 }
74
75 cublasDestroy (h);
76
77 validate_results (N, y, y_ref);
78
79 for (i = 0; i < N; i++)
80 y[i] = 3.0;
81
82 /* There's no need to use host_data here. */
83 #pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N])
84 {
85 #pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
86 saxpy (N, a, x, y);
87 }
88
89 validate_results (N, y, y_ref);
90
91 /* Exercise host_data with data transferred with acc enter data. */
92
93 for (i = 0; i < N; i++)
94 y[i] = 3.0;
95
96 #pragma acc enter data copyin (x, a, y)
97 #pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
98 {
99 saxpy (N, a, x, y);
100 }
101 #pragma acc exit data delete (x, a) copyout (y)
102
103 validate_results (N, y, y_ref);
104
105 return 0;
106 }