1  /* { dg-do run { target openacc_nvidia_accel_selected } } */
       2  /* { dg-additional-options "-lcuda" } */
       3  /* { dg-require-effective-target openacc_cuda } */
       4  
       5  #include <stdio.h>
       6  #include <unistd.h>
       7  #include <stdlib.h>
       8  #include <openacc.h>
       9  #include <cuda.h>
      10  #include "timer.h"
      11  
      12  int
      13  main (int argc, char **argv)
      14  {
      15    CUdevice dev;
      16    CUfunction delay;
      17    CUmodule module;
      18    CUresult r;
      19    int N;
      20    int i;
      21    CUstream stream;
      22    unsigned long *a, *d_a, dticks;
      23    int nbytes;
      24    float atime, dtime, hitime, lotime;
      25    void *kargs[2];
      26    int clkrate;
      27    int devnum, nprocs;
      28  
      29    acc_init (acc_device_nvidia);
      30  
      31    devnum = acc_get_device_num (acc_device_nvidia);
      32  
      33    r = cuDeviceGet (&dev, devnum);
      34    if (r != CUDA_SUCCESS)
      35      {
      36        fprintf (stderr, "cuDeviceGet failed: %d\n", r);
      37        abort ();
      38      }
      39  
      40    r =
      41      cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
      42  			  dev);
      43    if (r != CUDA_SUCCESS)
      44      {
      45        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      46        abort ();
      47      }
      48  
      49    r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
      50    if (r != CUDA_SUCCESS)
      51      {
      52        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      53        abort ();
      54      }
      55  
      56    r = cuModuleLoad (&module, "subr.ptx");
      57    if (r != CUDA_SUCCESS)
      58      {
      59        fprintf (stderr, "cuModuleLoad failed: %d\n", r);
      60        abort ();
      61      }
      62  
      63    r = cuModuleGetFunction (&delay, module, "delay");
      64    if (r != CUDA_SUCCESS)
      65      {
      66        fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
      67        abort ();
      68      }
      69  
      70    nbytes = nprocs * sizeof (unsigned long);
      71  
      72    dtime = 200.0;
      73  
      74    dticks = (unsigned long) (dtime * clkrate);
      75  
      76    N = nprocs;
      77  
      78    a = (unsigned long *) malloc (nbytes);
      79    d_a = (unsigned long *) acc_malloc (nbytes);
      80  
      81    acc_map_data (a, d_a, nbytes);
      82  
      83    stream = (CUstream) acc_get_cuda_stream (0);
      84    if (stream != NULL)
      85      abort ();
      86  
      87    r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
      88    if (r != CUDA_SUCCESS)
      89      {
      90        fprintf (stderr, "cuStreamCreate failed: %d\n", r);
      91        abort ();
      92      }
      93  
      94    if (!acc_set_cuda_stream (0, stream))
      95      abort ();
      96  
      97    init_timers (1);
      98  
      99    kargs[0] = (void *) &d_a;
     100    kargs[1] = (void *) &dticks;
     101  
     102    start_timer (0);
     103  
     104    for (i = 0; i < N; i++)
     105      {
     106        r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
     107        if (r != CUDA_SUCCESS)
     108  	{
     109  	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
     110  	  abort ();
     111  	}
     112  
     113        acc_wait (0);
     114      }
     115  
     116    atime = stop_timer (0);
     117  
     118    hitime = dtime * N;
     119    hitime += hitime * 0.02;
     120  
     121    lotime = dtime * N;
     122    lotime -= lotime * 0.02;
     123  
     124    if (atime > hitime || atime < lotime)
     125      {
     126        fprintf (stderr, "actual time < delay time\n");
     127        abort ();
     128      }
     129  
     130    acc_unmap_data (a);
     131  
     132    fini_timers ();
     133  
     134    free (a);
     135    acc_free (d_a);
     136  
     137    acc_shutdown (acc_device_nvidia);
     138  
     139    exit (0);
     140  }
     141  
     142  /* { dg-output "" } */