(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
context-1.c
       1  /* { dg-do run { target openacc_nvidia_accel_selected } } */
       2  /* { dg-additional-options "-lcuda -lcublas -lcudart" } */
       3  /* { dg-require-effective-target openacc_cublas } */
       4  /* { dg-require-effective-target openacc_cudart } */
       5  
       6  #include <stdio.h>
       7  #include <stdlib.h>
       8  #include <cuda.h>
       9  #include <cuda_runtime_api.h>
      10  #include <cublas_v2.h>
      11  #include <openacc.h>
      12  
      13  void
      14  saxpy (int n, float a, float *x, float *y)
      15  {
      16      int i;
      17  
      18      for (i = 0; i < n; i++)
      19      {
      20          y[i] = a * x[i] + y[i];
      21      }
      22  }
      23  
      24  void
      25  context_check (CUcontext ctx1)
      26  {
      27      CUcontext ctx2, ctx3;
      28      CUresult r;
      29  
      30      r = cuCtxGetCurrent (&ctx2);
      31      if (r != CUDA_SUCCESS)
      32      {
      33          fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
      34          exit (EXIT_FAILURE);
      35      }
      36  
      37      if (ctx1 != ctx2)
      38      {
      39          fprintf (stderr, "new context established\n");
      40          exit (EXIT_FAILURE);
      41      }
      42  
      43      ctx3 = (CUcontext) acc_get_current_cuda_context ();
      44  
      45      if (ctx1 != ctx3)
      46      {
      47          fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
      48          exit (EXIT_FAILURE);
      49      }
      50  
      51      return;
      52  }
      53  
      54  int
      55  main (int argc, char **argv)
      56  {
      57      cublasStatus_t s;
      58      cudaError_t e;
      59      cublasHandle_t h;
      60      CUcontext pctx, ctx;
      61      CUresult r;
      62      int dev;
      63      int i;
      64      const int N = 256;
      65      float *h_X, *h_Y1, *h_Y2;
      66      float *d_X,*d_Y;
      67      float alpha = 2.0f;
      68      float error_norm;
      69      float ref_norm;
      70  
      71      /* Test 1 - cuBLAS creates, OpenACC shares.  */
      72  
      73      s = cublasCreate (&h);
      74      if (s != CUBLAS_STATUS_SUCCESS)
      75      {
      76          fprintf (stderr, "cublasCreate failed: %d\n", s);
      77          exit (EXIT_FAILURE);
      78      }
      79  
      80      r = cuCtxGetCurrent (&pctx);
      81      if (r != CUDA_SUCCESS)
      82      {
      83          fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
      84          exit (EXIT_FAILURE);
      85      }
      86  
      87      e = cudaGetDevice (&dev);
      88      if (e != cudaSuccess)
      89      {
      90          fprintf (stderr, "cudaGetDevice failed: %d\n", e);
      91          exit (EXIT_FAILURE);
      92      }
      93  
      94      acc_set_device_num (dev, acc_device_nvidia);
      95  
      96      h_X = (float *) malloc (N * sizeof (float));
      97      if (!h_X)
      98      {
      99          fprintf (stderr, "malloc failed: for h_X\n");
     100          exit (EXIT_FAILURE);
     101      }
     102  
     103      h_Y1 = (float *) malloc (N * sizeof (float));
     104      if (!h_Y1)
     105      {
     106          fprintf (stderr, "malloc failed: for h_Y1\n");
     107          exit (EXIT_FAILURE);
     108      }
     109  
     110      h_Y2 = (float *) malloc (N * sizeof (float));
     111      if (!h_Y2)
     112      {
     113          fprintf (stderr, "malloc failed: for h_Y2\n");
     114          exit (EXIT_FAILURE);
     115      }
     116  
     117      for (i = 0; i < N; i++)
     118      {
     119          h_X[i] = rand () / (float) RAND_MAX;
     120          h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
     121      }
     122  
     123      d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
     124      if (d_X == NULL)
     125      {
     126          fprintf (stderr, "copyin error h_X\n");
     127          exit (EXIT_FAILURE);
     128      }
     129  
     130      context_check (pctx);
     131  
     132      d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
     133      if (d_Y == NULL)
     134      {
     135          fprintf (stderr, "copyin error h_Y1\n");
     136          exit (EXIT_FAILURE);
     137      }
     138  
     139      context_check (pctx);
     140  
     141      s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
     142      if (s != CUBLAS_STATUS_SUCCESS)
     143      {
     144          fprintf (stderr, "cublasSaxpy failed: %d\n", s);
     145          exit (EXIT_FAILURE);
     146      }
     147  
     148      context_check (pctx);
     149  
     150      acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
     151  
     152      context_check (pctx);
     153  
     154      saxpy (N, alpha, h_X, h_Y2);
     155  
     156      error_norm = 0;
     157      ref_norm = 0;
     158  
     159      for (i = 0; i < N; ++i)
     160      {
     161          float diff;
     162  
     163          diff = h_Y1[i] - h_Y2[i];
     164          error_norm += diff * diff;
     165          ref_norm += h_Y2[i] * h_Y2[i];
     166      }
     167  
     168      error_norm = (float) sqrt ((double) error_norm);
     169      ref_norm = (float) sqrt ((double) ref_norm);
     170  
     171      if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
     172      {
     173          fprintf (stderr, "math error\n");
     174          exit (EXIT_FAILURE);
     175      }
     176  
     177      acc_delete (&h_X[0], N * sizeof (float));
     178      acc_delete (&h_Y1[0], N * sizeof (float));
     179  
     180      free (h_X);
     181      free (h_Y1);
     182      free (h_Y2);
     183  
     184      context_check (pctx);
     185  
     186      s = cublasDestroy (h);
     187      if (s != CUBLAS_STATUS_SUCCESS)
     188      {
     189          fprintf (stderr, "cublasDestroy failed: %d\n", s);
     190          exit (EXIT_FAILURE);
     191      }
     192  
     193      acc_shutdown (acc_device_nvidia);
     194  
     195      r = cuCtxGetCurrent (&ctx);
     196      if (r != CUDA_SUCCESS)
     197      {
     198          fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
     199          exit (EXIT_FAILURE);
     200      }
     201  
     202      if (!ctx)
     203      {
     204          fprintf (stderr, "Expected context\n");
     205          exit (EXIT_FAILURE);
     206      }
     207  
     208      if (pctx != ctx)
     209      {
     210          fprintf (stderr, "Unexpected new context\n");
     211          exit (EXIT_FAILURE);
     212      }
     213  
     214      return EXIT_SUCCESS;
     215  }