(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
context-2.c
       1  /* { dg-do run { target openacc_nvidia_accel_selected } } */
       2  /* { dg-additional-options "-lcuda -lcublas -lcudart" } */
       3  /* { dg-require-effective-target openacc_cublas } */
       4  /* { dg-require-effective-target openacc_cudart } */
       5  
       6  #include <stdio.h>
       7  #include <stdlib.h>
       8  #include <cuda.h>
       9  #include <cuda_runtime_api.h>
      10  #include <cublas_v2.h>
      11  #include <openacc.h>
      12  
      13  void
      14  saxpy (int n, float a, float *x, float *y)
      15  {
      16      int i;
      17  
      18      for (i = 0; i < n; i++)
      19      {
      20          y[i] = a * x[i] + y[i];
      21      }
      22  }
      23  
      24  void
      25  context_check (CUcontext ctx1)
      26  {
      27      CUcontext ctx2, ctx3;
      28      CUresult r;
      29  
      30      r = cuCtxGetCurrent (&ctx2);
      31      if (r != CUDA_SUCCESS)
      32      {
      33          fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
      34          exit (EXIT_FAILURE);
      35      }
      36  
      37      if (ctx1 != ctx2)
      38      {
      39          fprintf (stderr, "new context established\n");
      40          exit (EXIT_FAILURE);
      41      }
      42  
      43      ctx3 = (CUcontext) acc_get_current_cuda_context ();
      44  
      45      if (ctx1 != ctx3)
      46      {
      47          fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
      48          exit (EXIT_FAILURE);
      49      }
      50  
      51      return;
      52  }
      53  
      54  int
      55  main (int argc, char **argv)
      56  {
      57      cublasStatus_t s;
      58      cudaError_t e;
      59      cublasHandle_t h;
      60      CUcontext pctx, ctx;
      61      CUresult r;
      62      int dev;
      63      int i;
      64      const int N = 256;
      65      float *h_X, *h_Y1, *h_Y2;
      66      float *d_X,*d_Y;
      67      float alpha = 2.0f;
      68      float error_norm;
      69      float ref_norm;
      70  
      71      /* Test 2 - cuBLAS creates, OpenACC shares.  */
      72  
      73      s = cublasCreate (&h);
      74      if (s != CUBLAS_STATUS_SUCCESS)
      75      {
      76          fprintf (stderr, "cublasCreate failed: %d\n", s);
      77          exit (EXIT_FAILURE);
      78      }
      79  
      80      r = cuCtxGetCurrent (&pctx);
      81      if (r != CUDA_SUCCESS)
      82      {
      83          fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
      84          exit (EXIT_FAILURE);
      85      }
      86  
      87      e = cudaGetDevice (&dev);
      88      if (e != cudaSuccess)
      89      {
      90          fprintf (stderr, "cudaGetDevice failed: %d\n", e);
      91          exit (EXIT_FAILURE);
      92      }
      93  
      94      acc_set_device_num (dev, acc_device_nvidia);
      95  
      96      h_X = (float *) malloc (N * sizeof (float));
      97      if (h_X == 0)
      98      {
      99          fprintf (stderr, "malloc failed: for h_X\n");
     100          exit (EXIT_FAILURE);
     101      }
     102  
     103      h_Y1 = (float *) malloc (N * sizeof (float));
     104      if (h_Y1 == 0)
     105      {
     106          fprintf (stderr, "malloc failed: for h_Y1\n");
     107          exit (EXIT_FAILURE);
     108      }
     109  
     110      h_Y2 = (float *) malloc (N * sizeof (float));
     111      if (h_Y2 == 0)
     112      {
     113          fprintf (stderr, "malloc failed: for h_Y2\n");
     114          exit (EXIT_FAILURE);
     115      }
     116  
     117      for (i = 0; i < N; i++)
     118      {
     119          h_X[i] = rand () / (float) RAND_MAX;
     120          h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
     121      }
     122  
     123      d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
     124      if (d_X == NULL)
     125      {
     126          fprintf (stderr, "copyin error h_X\n");
     127          exit (EXIT_FAILURE);
     128      }
     129  
     130      context_check (pctx);
     131  
     132      d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
     133      if (d_Y == NULL)
     134      {
     135          fprintf (stderr, "copyin error h_Y1\n");
     136          exit (EXIT_FAILURE);
     137      }
     138  
     139      context_check (pctx);
     140  
     141      s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
     142      if (s != CUBLAS_STATUS_SUCCESS)
     143      {
     144          fprintf (stderr, "cublasSaxpy failed: %d\n", s);
     145          exit (EXIT_FAILURE);
     146      }
     147  
     148      context_check (pctx);
     149  
     150      acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
     151  
     152      context_check (pctx);
     153  
     154  #pragma acc parallel present (h_X[0:N]), copy (h_Y2[0:N]) copyin (alpha)
     155      {
     156          int i;
     157  
     158          for (i = 0; i < N; i++)
     159          {
     160              h_Y2[i] = alpha * h_X[i] + h_Y2[i];
     161          }
     162      }
     163  
     164      context_check (pctx);
     165  
     166      error_norm = 0;
     167      ref_norm = 0;
     168  
     169      for (i = 0; i < N; ++i)
     170      {
     171          float diff;
     172  
     173          diff = h_Y1[i] - h_Y2[i];
     174          error_norm += diff * diff;
     175          ref_norm += h_Y2[i] * h_Y2[i];
     176      }
     177  
     178      error_norm = (float) sqrt ((double) error_norm);
     179      ref_norm = (float) sqrt ((double) ref_norm);
     180  
     181      if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
     182      {
     183          fprintf (stderr, "math error\n");
     184          exit (EXIT_FAILURE);
     185      }
     186  
     187      acc_delete (&h_X[0], N * sizeof (float));
     188      acc_delete (&h_Y1[0], N * sizeof (float));
     189  
     190      free (h_X);
     191      free (h_Y1);
     192      free (h_Y2);
     193  
     194      context_check (pctx);
     195  
     196      s = cublasDestroy (h);
     197      if (s != CUBLAS_STATUS_SUCCESS)
     198      {
     199          fprintf (stderr, "cublasDestroy failed: %d\n", s);
     200          exit (EXIT_FAILURE);
     201      }
     202  
     203      acc_shutdown (acc_device_nvidia);
     204  
     205      r = cuCtxGetCurrent (&ctx);
     206      if (r != CUDA_SUCCESS)
     207      {
     208          fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
     209          exit (EXIT_FAILURE);
     210      }
     211  
     212      if (!ctx)
     213      {
     214          fprintf (stderr, "Expected context\n");
     215          exit (EXIT_FAILURE);
     216      }
     217  
     218      if (pctx != ctx)
     219      {
     220          fprintf (stderr, "Unexpected new context\n");
     221          exit (EXIT_FAILURE);
     222      }
     223  
     224      return EXIT_SUCCESS;
     225  }