1  /* { dg-do run { target openacc_nvidia_accel_selected } } */
       2  /* { dg-additional-options "-lcuda" } */
       3  /* { dg-require-effective-target openacc_cuda } */
       4  
       5  #include <stdio.h>
       6  #include <unistd.h>
       7  #include <stdlib.h>
       8  #include <openacc.h>
       9  #include <cuda.h>
      10  
      11  int
      12  main (int argc, char **argv)
      13  {
      14    CUdevice dev;
      15    CUfunction delay;
      16    CUmodule module;
      17    CUresult r;
      18    CUstream stream;
      19    unsigned long *a, *d_a, dticks;
      20    int nbytes;
      21    float dtime;
      22    void *kargs[2];
      23    int clkrate;
      24    int devnum, nprocs;
      25  
      26    acc_init (acc_device_nvidia);
      27  
      28    devnum = acc_get_device_num (acc_device_nvidia);
      29  
      30    r = cuDeviceGet (&dev, devnum);
      31    if (r != CUDA_SUCCESS)
      32      {
      33        fprintf (stderr, "cuDeviceGet failed: %d\n", r);
      34        abort ();
      35      }
      36  
      37    r =
      38      cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
      39  			  dev);
      40    if (r != CUDA_SUCCESS)
      41      {
      42        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      43        abort ();
      44      }
      45  
      46    r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
      47    if (r != CUDA_SUCCESS)
      48      {
      49        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      50        abort ();
      51      }
      52  
      53    r = cuModuleLoad (&module, "subr.ptx");
      54    if (r != CUDA_SUCCESS)
      55      {
      56        fprintf (stderr, "cuModuleLoad failed: %d\n", r);
      57        abort ();
      58      }
      59  
      60    r = cuModuleGetFunction (&delay, module, "delay");
      61    if (r != CUDA_SUCCESS)
      62      {
      63        fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
      64        abort ();
      65      }
      66  
      67    nbytes = nprocs * sizeof (unsigned long);
      68  
      69    dtime = 200.0;
      70  
      71    dticks = (unsigned long) (dtime * clkrate);
      72  
      73    a = (unsigned long *) malloc (nbytes);
      74    d_a = (unsigned long *) acc_malloc (nbytes);
      75  
      76    acc_map_data (a, d_a, nbytes);
      77  
      78    kargs[0] = (void *) &d_a;
      79    kargs[1] = (void *) &dticks;
      80  
      81    r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
      82    if (r != CUDA_SUCCESS)
      83      {
      84        fprintf (stderr, "cuStreamCreate failed: %d\n", r);
      85        abort ();
      86      }
      87  
      88    if (!acc_set_cuda_stream (0, stream))
      89      abort ();
      90      
      91    r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
      92    if (r != CUDA_SUCCESS)
      93      {
      94        fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
      95        abort ();
      96      }
      97  
      98    if (acc_async_test_all () != 0)
      99      {
     100        fprintf (stderr, "asynchronous operation not running\n");
     101        abort ();
     102      }
     103  
     104    sleep ((int) (dtime / 1000.f) + 1);
     105  
     106    if (acc_async_test_all () != 1)
     107      {
     108        fprintf (stderr, "found asynchronous operation still running\n");
     109        abort ();
     110      }
     111  
     112    acc_unmap_data (a);
     113  
     114    free (a);
     115    acc_free (d_a);
     116  
     117    acc_shutdown (acc_device_nvidia);
     118  
     119    exit (0);
     120  }
     121  
     122  /* { dg-output "" } */