(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
host_data-1.c
       1  /* { dg-do run { target openacc_nvidia_accel_selected } } */
       2  /* { dg-additional-options "-lm -lcuda -lcublas -lcudart -Wall -Wextra" } */
       3  /* { dg-require-effective-target openacc_cublas } */
       4  /* { dg-require-effective-target openacc_cudart } */
       5  
       6  #include <stdlib.h>
       7  #include <math.h>
       8  #include <openacc.h>
       9  #include <cuda.h>
      10  #include <cuda_runtime_api.h>
      11  #include <cublas_v2.h>
      12  
      13  #pragma acc routine
      14  void
      15  saxpy (int n, float a, float *x, float *y)
      16  {
      17    int i;
      18  
      19    for (i = 0; i < n; i++)
      20      y[i] = y[i] + a * x[i];
      21  }
      22  
      23  void
      24  validate_results (int n, float *a, float *b)
      25  {
      26    int i;
      27  
      28    for (i = 0; i < n; i++)
      29      if (fabs (a[i] - b[i]) > .00001)
      30        abort ();
      31  }
      32  
      33  int
      34  main()
      35  {
      36  #define N 8
      37    int i;
      38    float x_ref[N], y_ref[N];
      39    float x[N], y[N];
      40    cublasHandle_t h;
      41    float a = 2.0;
      42  
      43    for (i = 0; i < N; i++)
      44      {
      45        x[i] = x_ref[i] = 4.0 + i;
      46        y[i] = y_ref[i] = 3.0;
      47      }
      48  
      49    saxpy (N, a, x_ref, y_ref);
      50  
      51    cublasCreate (&h);
      52  
      53  #pragma acc data copyin (x[0:N]) copy (y[0:N])
      54    {
      55  #pragma acc host_data use_device (x, y)
      56      {
      57        cublasSaxpy (h, N, &a, x, 1, y, 1);
      58      }
      59    }
      60  
      61    validate_results (N, y, y_ref);
      62  
      63  #pragma acc data create (x[0:N]) copyout (y[0:N])
      64    {
      65  #pragma acc kernels
      66      for (i = 0; i < N; i++)
      67        y[i] = 3.0;
      68  
      69  #pragma acc host_data use_device (x, y)
      70      {
      71        cublasSaxpy (h, N, &a, x, 1, y, 1);
      72      }
      73    }
      74  
      75    cublasDestroy (h);
      76  
      77    validate_results (N, y, y_ref);
      78  
      79    for (i = 0; i < N; i++)
      80      y[i] = 3.0;
      81  
      82    /* There's no need to use host_data here.  */
      83  #pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N])
      84    {
      85  #pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
      86      saxpy (N, a, x, y);
      87    }
      88  
      89    validate_results (N, y, y_ref);
      90  
      91    /* Exercise host_data with data transferred with acc enter data.  */
      92  
      93    for (i = 0; i < N; i++)
      94      y[i] = 3.0;
      95  
      96  #pragma acc enter data copyin (x, a, y)
      97  #pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
      98    {
      99      saxpy (N, a, x, y);
     100    }
     101  #pragma acc exit data delete (x, a) copyout (y)
     102  
     103    validate_results (N, y, y_ref);
     104  
     105    return 0;
     106  }