(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
context-3.c
       1  /* { dg-do run { target openacc_nvidia_accel_selected } } */
       2  /* { dg-additional-options "-lcuda -lcublas -lcudart" } */
       3  /* { dg-require-effective-target openacc_cublas } */
       4  /* { dg-require-effective-target openacc_cudart } */
       5  
       6  #include <stdio.h>
       7  #include <stdlib.h>
       8  #include <cuda.h>
       9  #include <cuda_runtime_api.h>
      10  #include <cublas_v2.h>
      11  #include <openacc.h>
      12  
      13  void
      14  saxpy (int n, float a, float *x, float *y)
      15  {
      16      int i;
      17  
      18      for (i = 0; i < n; i++)
      19      {
      20          y[i] = a * x[i] + y[i];
      21      }
      22  }
      23  
      24  void
      25  context_check (CUcontext ctx1)
      26  {
      27      CUcontext ctx2, ctx3;
      28      CUresult r;
      29  
      30      r = cuCtxGetCurrent (&ctx2);
      31      if (r != CUDA_SUCCESS)
      32      {
      33          fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
      34          exit (EXIT_FAILURE);
      35      }
      36  
      37      if (ctx1 != ctx2)
      38      {
      39          fprintf (stderr, "new context established\n");
      40          exit (EXIT_FAILURE);
      41      }
      42  
      43      ctx3 = (CUcontext) acc_get_current_cuda_context ();
      44  
      45      if (ctx1 != ctx3)
      46      {
      47          fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
      48          exit (EXIT_FAILURE);
      49      }
      50  
      51      return;
      52  }
      53  
      54  int
      55  main (int argc, char **argv)
      56  {
      57      cublasStatus_t s;
      58      cublasHandle_t h;
      59      CUcontext pctx;
      60      CUresult r;
      61      int i;
      62      const int N = 256;
      63      float *h_X, *h_Y1, *h_Y2;
      64      float *d_X,*d_Y;
      65      float alpha = 2.0f;
      66      float error_norm;
      67      float ref_norm;
      68  
      69      /* Test 3 - OpenACC creates, cuBLAS shares.  */
      70  
      71      acc_set_device_num (0, acc_device_nvidia);
      72  
      73      r = cuCtxGetCurrent (&pctx);
      74      if (r != CUDA_SUCCESS)
      75      {
      76          fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
      77          exit (EXIT_FAILURE);
      78      }
      79  
      80      h_X = (float *) malloc (N * sizeof (float));
      81      if (h_X == 0)
      82      {
      83          fprintf (stderr, "malloc failed: for h_X\n");
      84          exit (EXIT_FAILURE);
      85      }
      86  
      87      h_Y1 = (float *) malloc (N * sizeof (float));
      88      if (h_Y1 == 0)
      89      {
      90          fprintf (stderr, "malloc failed: for h_Y1\n");
      91          exit (EXIT_FAILURE);
      92      }
      93  
      94      h_Y2 = (float *) malloc (N * sizeof (float));
      95      if (h_Y2 == 0)
      96      {
      97          fprintf (stderr, "malloc failed: for h_Y2\n");
      98          exit (EXIT_FAILURE);
      99      }
     100  
     101      for (i = 0; i < N; i++)
     102      {
     103          h_X[i] = rand () / (float) RAND_MAX;
     104          h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
     105      }
     106  
     107      d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
     108      if (d_X == NULL)
     109      {
     110          fprintf (stderr, "copyin error h_X\n");
     111          exit (EXIT_FAILURE);
     112      }
     113  
     114      d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
     115      if (d_Y == NULL)
     116      {
     117          fprintf (stderr, "copyin error h_Y1\n");
     118          exit (EXIT_FAILURE);
     119      }
     120  
     121      context_check (pctx);
     122  
     123      s = cublasCreate (&h);
     124      if (s != CUBLAS_STATUS_SUCCESS)
     125      {
     126          fprintf (stderr, "cublasCreate failed: %d\n", s);
     127          exit (EXIT_FAILURE);
     128      }
     129  
     130      context_check (pctx);
     131  
     132      s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
     133      if (s != CUBLAS_STATUS_SUCCESS)
     134      {
     135          fprintf (stderr, "cublasSaxpy failed: %d\n", s);
     136          exit (EXIT_FAILURE);
     137      }
     138  
     139      context_check (pctx);
     140  
     141      acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
     142  
     143      context_check (pctx);
     144  
     145      saxpy (N, alpha, h_X, h_Y2);
     146  
     147      error_norm = 0;
     148      ref_norm = 0;
     149  
     150      for (i = 0; i < N; ++i)
     151      {
     152          float diff;
     153  
     154          diff = h_Y1[i] - h_Y2[i];
     155          error_norm += diff * diff;
     156          ref_norm += h_Y2[i] * h_Y2[i];
     157      }
     158  
     159      error_norm = (float) sqrt ((double) error_norm);
     160      ref_norm = (float) sqrt ((double) ref_norm);
     161  
     162      if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
     163      {
     164          fprintf (stderr, "math error\n");
     165          exit (EXIT_FAILURE);
     166      }
     167  
     168      acc_delete (&h_X[0], N * sizeof (float));
     169      acc_delete (&h_Y1[0], N * sizeof (float));
     170  
     171      free (h_X);
     172      free (h_Y1);
     173      free (h_Y2);
     174  
     175      context_check (pctx);
     176  
     177      s = cublasDestroy (h);
     178      if (s != CUBLAS_STATUS_SUCCESS)
     179      {
     180          fprintf (stderr, "cublasDestroy failed: %d\n", s);
     181          exit (EXIT_FAILURE);
     182      }
     183  
     184      context_check (pctx);
     185  
     186      acc_shutdown (acc_device_nvidia);
     187  
     188      r = cuCtxGetCurrent (&pctx);
     189      if (r != CUDA_SUCCESS)
     190      {
     191          fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
     192          exit (EXIT_FAILURE);
     193      }
     194  
     195      if (pctx)
     196      {
     197          fprintf (stderr, "Unexpected context\n");
     198          exit (EXIT_FAILURE);
     199      }
     200  
     201      return EXIT_SUCCESS;
     202  }