(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
lib-82.c
       1  /* { dg-do run { target openacc_nvidia_accel_selected } } */
       2  /* { dg-additional-options "-lcuda" } */
       3  /* { dg-require-effective-target openacc_cuda } */
       4  
       5  #include <stdio.h>
       6  #include <stdlib.h>
       7  #include <unistd.h>
       8  #include <openacc.h>
       9  #include <cuda.h>
      10  
      11  int
      12  main (int argc, char **argv)
      13  {
      14    CUdevice dev;
      15    CUfunction delay2;
      16    CUmodule module;
      17    CUresult r;
      18    int N;
      19    int i;
      20    CUstream *streams;
      21    unsigned long **a, **d_a, *tid, ticks;
      22    int nbytes;
      23    void *kargs[3];
      24    int clkrate;
      25    int devnum, nprocs;
      26  
      27    acc_init (acc_device_nvidia);
      28  
      29    devnum = acc_get_device_num (acc_device_nvidia);
      30  
      31    r = cuDeviceGet (&dev, devnum);
      32    if (r != CUDA_SUCCESS)
      33      {
      34        fprintf (stderr, "cuDeviceGet failed: %d\n", r);
      35        abort ();
      36      }
      37  
      38    r =
      39      cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
      40  			  dev);
      41    if (r != CUDA_SUCCESS)
      42      {
      43        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      44        abort ();
      45      }
      46  
      47    r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
      48    if (r != CUDA_SUCCESS)
      49      {
      50        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      51        abort ();
      52      }
      53  
      54    r = cuModuleLoad (&module, "subr.ptx");
      55    if (r != CUDA_SUCCESS)
      56      {
      57        fprintf (stderr, "cuModuleLoad failed: %d\n", r);
      58        abort ();
      59      }
      60  
      61    r = cuModuleGetFunction (&delay2, module, "delay2");
      62    if (r != CUDA_SUCCESS)
      63      {
      64        fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
      65        abort ();
      66      }
      67  
      68    nbytes = sizeof (int);
      69  
      70    ticks = (unsigned long) (200.0 * clkrate);
      71  
      72    N = nprocs;
      73  
      74    streams = (CUstream *) malloc (N * sizeof (void *));
      75  
      76    a = (unsigned long **) malloc (N * sizeof (unsigned long *));
      77    d_a = (unsigned long **) malloc (N * sizeof (unsigned long *));
      78    tid = (unsigned long *) malloc (N * sizeof (unsigned long));
      79  
      80    for (i = 0; i < N; i++)
      81      {
      82        a[i] = (unsigned long *) malloc (sizeof (unsigned long));
      83        *a[i] = N;
      84        d_a[i] = (unsigned long *) acc_malloc (nbytes);
      85        tid[i] = i;
      86  
      87        acc_map_data (a[i], d_a[i], nbytes);
      88  
      89        streams[i] = (CUstream) acc_get_cuda_stream (i);
      90        if (streams[i] != NULL)
      91          abort ();
      92  
      93        r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
      94        if (r != CUDA_SUCCESS)
      95          {
      96            fprintf (stderr, "cuStreamCreate failed: %d\n", r);
      97            abort ();
      98          }
      99  
     100         if (!acc_set_cuda_stream (i, streams[i]))
     101          abort ();
     102      }
     103  
     104    for (i = 0; i < N; i++)
     105      {
     106        kargs[0] = (void *) &d_a[i];
     107        kargs[1] = (void *) &ticks;
     108        kargs[2] = (void *) &tid[i];
     109  
     110        r = cuLaunchKernel (delay2, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
     111        if (r != CUDA_SUCCESS)
     112  	{
     113  	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
     114  	  abort ();
     115  	}
     116  
     117        ticks = (unsigned long) (50.0 * clkrate);
     118      }
     119  
     120    acc_wait_all_async (0);
     121  
     122    for (i = 0; i < N; i++)
     123      {
     124        acc_memcpy_from_device (a[i], d_a[i], nbytes);
     125        if (*a[i] != i)
     126  	abort ();
     127  
     128        acc_unmap_data (a[i]);
     129  
     130        acc_free (d_a[i]);
     131      }
     132  
     133    free (streams);
     134  
     135    for (i = 0; i < N; i++)
     136      {
     137        free (a[i]);
     138      }
     139  
     140    free (a);
     141    free (d_a);
     142    free (tid);
     143  
     144    acc_shutdown (acc_device_nvidia);
     145  
     146    exit (0);
     147  }
     148  
     149  /* { dg-output "" } */