1  /* { dg-do run { target openacc_nvidia_accel_selected } } */
       2  /* { dg-additional-options "-lcuda" } */
       3  /* { dg-require-effective-target openacc_cuda } */
       4  
       5  #include <stdio.h>
       6  #include <unistd.h>
       7  #include <openacc.h>
       8  #include <cuda.h>
       9  
      10  int
      11  main (int argc, char **argv)
      12  {
      13    CUdevice dev;
      14    CUfunction delay;
      15    CUmodule module;
      16    CUresult r;
      17    CUstream stream;
      18    unsigned long *a, *d_a, dticks;
      19    int nbytes;
      20    float dtime;
      21    void *kargs[2];
      22    int clkrate;
      23    int devnum, nprocs;
      24  
      25    acc_init (acc_device_nvidia);
      26  
      27    devnum = acc_get_device_num (acc_device_nvidia);
      28  
      29    r = cuDeviceGet (&dev, devnum);
      30    if (r != CUDA_SUCCESS)
      31      {
      32        fprintf (stderr, "cuDeviceGet failed: %d\n", r);
      33        abort ();
      34      }
      35  
      36    r =
      37      cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
      38  			  dev);
      39    if (r != CUDA_SUCCESS)
      40      {
      41        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      42        abort ();
      43      }
      44  
      45    r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
      46    if (r != CUDA_SUCCESS)
      47      {
      48        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      49        abort ();
      50      }
      51  
      52    r = cuModuleLoad (&module, "subr.ptx");
      53    if (r != CUDA_SUCCESS)
      54      {
      55        fprintf (stderr, "cuModuleLoad failed: %d\n", r);
      56        abort ();
      57      }
      58  
      59    r = cuModuleGetFunction (&delay, module, "delay");
      60    if (r != CUDA_SUCCESS)
      61      {
      62        fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
      63        abort ();
      64      }
      65  
      66    nbytes = nprocs * sizeof (unsigned long);
      67  
      68    dtime = 200.0;
      69  
      70    dticks = (unsigned long) (dtime * clkrate);
      71  
      72    a = (unsigned long *) malloc (nbytes);
      73    d_a = (unsigned long *) acc_malloc (nbytes);
      74  
      75    acc_map_data (a, d_a, nbytes);
      76  
      77    kargs[0] = (void *) &d_a;
      78    kargs[1] = (void *) &dticks;
      79  
      80    stream = (CUstream) acc_get_cuda_stream (0);
      81    if (stream != NULL)
      82      abort ();
      83  
      84    r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
      85    if (r != CUDA_SUCCESS)
      86      {
      87        fprintf (stderr, "cuStreamCreate failed: %d\n", r);
      88        abort ();
      89      }
      90  
      91    if (!acc_set_cuda_stream (0, stream))
      92      abort ();
      93  
      94    r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
      95    if (r != CUDA_SUCCESS)
      96      {
      97        fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
      98        abort ();
      99      }
     100  
     101    if (acc_async_test (0) != 0)
     102      {
     103        fprintf (stderr, "asynchronous operation not running\n");
     104        abort ();
     105      }
     106  
     107    /* Test unseen async-argument.  */
     108    if (acc_async_test (1) != 1)
     109      {
     110        fprintf (stderr, "acc_async_test failed on unseen async-argument\n");
     111        abort ();
     112      }
     113  
     114    sleep (1);
     115  
     116    if (acc_async_test (0) != 1)
     117      {
     118        fprintf (stderr, "found asynchronous operation still running\n");
     119        abort ();
     120      }
     121  
     122    acc_unmap_data (a);
     123  
     124    free (a);
     125    acc_free (d_a);
     126  
     127    acc_shutdown (acc_device_nvidia);
     128  
     129    exit (0);
     130  }
     131  
     132  /* { dg-output "" } */