(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
context-4.c
       1  /* { dg-do run { target openacc_nvidia_accel_selected } } */
       2  /* { dg-additional-options "-lcuda -lcublas -lcudart" } */
       3  /* { dg-require-effective-target openacc_cublas } */
       4  /* { dg-require-effective-target openacc_cudart } */
       5  
       6  #include <stdio.h>
       7  #include <stdlib.h>
       8  #include <cuda.h>
       9  #include <cuda_runtime_api.h>
      10  #include <cublas_v2.h>
      11  #include <openacc.h>
      12  
      13  void
      14  saxpy (int n, float a, float *x, float *y)
      15  {
      16      int i;
      17  
      18      for (i = 0; i < n; i++)
      19      {
      20          y[i] = a * x[i] + y[i];
      21      }
      22  }
      23  
      24  void
      25  context_check (CUcontext ctx1)
      26  {
      27      CUcontext ctx2, ctx3;
      28      CUresult r;
      29  
      30      r = cuCtxGetCurrent (&ctx2);
      31      if (r != CUDA_SUCCESS)
      32      {
      33          fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
      34          exit (EXIT_FAILURE);
      35      }
      36  
      37      if (ctx1 != ctx2)
      38      {
      39          fprintf (stderr, "new context established\n");
      40          exit (EXIT_FAILURE);
      41      }
      42  
      43      ctx3 = (CUcontext) acc_get_current_cuda_context ();
      44  
      45      if (ctx1 != ctx3)
      46      {
      47          fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
      48          exit (EXIT_FAILURE);
      49      }
      50  
      51      return;
      52  }
      53  
      54  int
      55  main (int argc, char **argv)
      56  {
      57      cublasStatus_t s;
      58      cublasHandle_t h;
      59      CUcontext pctx;
      60      CUresult r;
      61      int i;
      62      const int N = 256;
      63      float *h_X, *h_Y1, *h_Y2;
      64      float *d_X,*d_Y;
      65      float alpha = 2.0f;
      66      float error_norm;
      67      float ref_norm;
      68  
      69      /* Test 4 - OpenACC creates, cuBLAS shares.  */
      70  
      71      acc_set_device_num (0, acc_device_nvidia);
      72  
      73      r = cuCtxGetCurrent (&pctx);
      74      if (r != CUDA_SUCCESS)
      75      {
      76          fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
      77          exit (EXIT_FAILURE);
      78      }
      79  
      80      h_X = (float *) malloc (N * sizeof (float));
      81      if (h_X == 0)
      82      {
      83          fprintf (stderr, "malloc failed: for h_X\n");
      84          exit (EXIT_FAILURE);
      85      }
      86  
      87      h_Y1 = (float *) malloc (N * sizeof (float));
      88      if (h_Y1 == 0)
      89      {
      90          fprintf (stderr, "malloc failed: for h_Y1\n");
      91          exit (EXIT_FAILURE);
      92      }
      93  
      94      h_Y2 = (float *) malloc (N * sizeof (float));
      95      if (h_Y2 == 0)
      96      {
      97          fprintf (stderr, "malloc failed: for h_Y2\n");
      98          exit (EXIT_FAILURE);
      99      }
     100  
     101      for (i = 0; i < N; i++)
     102      {
     103          h_X[i] = rand () / (float) RAND_MAX;
     104          h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
     105      }
     106  
     107  #pragma acc parallel copyin (h_X[0:N]), copy (h_Y2[0:N]) copy (alpha)
     108      {
     109          int i;
     110  
     111          for (i = 0; i < N; i++)
     112          {
     113              h_Y2[i] = alpha * h_X[i] + h_Y2[i];
     114          }
     115      }
     116  
     117      r = cuCtxGetCurrent (&pctx);
     118      if (r != CUDA_SUCCESS)
     119      {
     120          fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
     121          exit (EXIT_FAILURE);
     122      }
     123  
     124      d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
     125      if (d_X == NULL)
     126      {
     127          fprintf (stderr, "copyin error h_Y1\n");
     128          exit (EXIT_FAILURE);
     129      }
     130  
     131      d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
     132      if (d_Y == NULL)
     133      {
     134          fprintf (stderr, "copyin error h_Y1\n");
     135          exit (EXIT_FAILURE);
     136      }
     137  
     138      s = cublasCreate (&h);
     139      if (s != CUBLAS_STATUS_SUCCESS)
     140      {
     141          fprintf (stderr, "cublasCreate failed: %d\n", s);
     142          exit (EXIT_FAILURE);
     143      }
     144  
     145      context_check (pctx);
     146  
     147      s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
     148      if (s != CUBLAS_STATUS_SUCCESS)
     149      {
     150          fprintf (stderr, "cublasSaxpy failed: %d\n", s);
     151          exit (EXIT_FAILURE);
     152      }
     153  
     154      context_check (pctx);
     155  
     156      acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
     157  
     158      context_check (pctx);
     159  
     160      error_norm = 0;
     161      ref_norm = 0;
     162  
     163      for (i = 0; i < N; ++i)
     164      {
     165          float diff;
     166  
     167          diff = h_Y1[i] - h_Y2[i];
     168          error_norm += diff * diff;
     169          ref_norm += h_Y2[i] * h_Y2[i];
     170      }
     171  
     172      error_norm = (float) sqrt ((double) error_norm);
     173      ref_norm = (float) sqrt ((double) ref_norm);
     174  
     175      if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
     176      {
     177          fprintf (stderr, "math error\n");
     178          exit (EXIT_FAILURE);
     179      }
     180  
     181      acc_delete (&h_X[0], N * sizeof (float));
     182      acc_delete (&h_Y1[0], N * sizeof (float));
     183  
     184      free (h_X);
     185      free (h_Y1);
     186      free (h_Y2);
     187  
     188      context_check (pctx);
     189  
     190      s = cublasDestroy (h);
     191      if (s != CUBLAS_STATUS_SUCCESS)
     192      {
     193          fprintf (stderr, "cublasDestroy failed: %d\n", s);
     194          exit (EXIT_FAILURE);
     195      }
     196  
     197      context_check (pctx);
     198  
     199      acc_shutdown (acc_device_nvidia);
     200  
     201      r = cuCtxGetCurrent (&pctx);
     202      if (r != CUDA_SUCCESS)
     203      {
     204          fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
     205          exit (EXIT_FAILURE);
     206      }
     207  
     208      if (pctx)
     209      {
     210          fprintf (stderr, "Unexpected context\n");
     211          exit (EXIT_FAILURE);
     212      }
     213  
     214      return EXIT_SUCCESS;
     215  }