1  /* { dg-do run { target openacc_nvidia_accel_selected } } */
       2  /* { dg-additional-options "-lcuda" } */
       3  /* { dg-require-effective-target openacc_cuda } */
       4  
       5  #include <stdio.h>
       6  #include <stdlib.h>
       7  #include <unistd.h>
       8  #include <openacc.h>
       9  #include <cuda.h>
      10  #include "timer.h"
      11  
      12  int
      13  main (int argc, char **argv)
      14  {
      15    CUdevice dev;
      16    CUfunction delay;
      17    CUmodule module;
      18    CUresult r;
      19    CUstream stream;
      20    unsigned long *a, *d_a, dticks;
      21    int nbytes;
      22    float atime, dtime;
      23    void *kargs[2];
      24    int clkrate;
      25    int devnum, nprocs;
      26  
      27    acc_init (acc_device_nvidia);
      28  
      29    devnum = acc_get_device_num (acc_device_nvidia);
      30  
      31    r = cuDeviceGet (&dev, devnum);
      32    if (r != CUDA_SUCCESS)
      33      {
      34        fprintf (stderr, "cuDeviceGet failed: %d\n", r);
      35        abort ();
      36      }
      37  
      38    r =
      39      cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
      40  			  dev);
      41    if (r != CUDA_SUCCESS)
      42      {
      43        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      44        abort ();
      45      }
      46  
      47    r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
      48    if (r != CUDA_SUCCESS)
      49      {
      50        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      51        abort ();
      52      }
      53  
      54    r = cuModuleLoad (&module, "subr.ptx");
      55    if (r != CUDA_SUCCESS)
      56      {
      57        fprintf (stderr, "cuModuleLoad failed: %d\n", r);
      58        abort ();
      59      }
      60  
      61    r = cuModuleGetFunction (&delay, module, "delay");
      62    if (r != CUDA_SUCCESS)
      63      {
      64        fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
      65        abort ();
      66      }
      67  
      68    nbytes = nprocs * sizeof (unsigned long);
      69  
      70    dtime = 200.0;
      71  
      72    dticks = (unsigned long) (dtime * clkrate);
      73  
      74    a = (unsigned long *) malloc (nbytes);
      75    d_a = (unsigned long *) acc_malloc (nbytes);
      76  
      77    acc_map_data (a, d_a, nbytes);
      78  
      79    kargs[0] = (void *) &d_a;
      80    kargs[1] = (void *) &dticks;
      81  
      82    stream = (CUstream) acc_get_cuda_stream (0);
      83    if (stream != NULL)
      84      abort ();
      85  
      86    r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
      87    if (r != CUDA_SUCCESS)
      88      {
      89        fprintf (stderr, "cuStreamCreate failed: %d\n", r);
      90        abort ();
      91      }
      92  
      93    if (!acc_set_cuda_stream (0, stream))
      94      abort ();
      95  
      96    init_timers (1);
      97  
      98    start_timer (0);
      99  
     100    r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
     101    if (r != CUDA_SUCCESS)
     102      {
     103        fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
     104        abort ();
     105      }
     106  
     107    acc_wait_all ();
     108  
     109    atime = stop_timer (0);
     110  
     111    if (atime < dtime)
     112      {
     113        fprintf (stderr, "actual time < delay time\n");
     114        abort ();
     115      }
     116  
     117    start_timer (0);
     118  
     119    acc_wait_all ();
     120  
     121    atime = stop_timer (0);
     122  
     123    if (0.010 < atime)
     124      {
     125        fprintf (stderr, "actual time too long\n");
     126        abort ();
     127      }
     128  
     129    acc_unmap_data (a);
     130  
     131    fini_timers ();
     132  
     133    free (a);
     134    acc_free (d_a);
     135  
     136    acc_shutdown (acc_device_nvidia);
     137  
     138    exit (0);
     139  }
     140  
     141  /* { dg-output "" } */