1  /* { dg-do run { target openacc_nvidia_accel_selected } } */
       2  /* { dg-additional-options "-lcuda" } */
       3  /* { dg-require-effective-target openacc_cuda } */
       4  
       5  #include <stdio.h>
       6  #include <stdlib.h>
       7  #include <openacc.h>
       8  #include <cuda.h>
       9  #include "timer.h"
      10  
      11  int
      12  main (int argc, char **argv)
      13  {
      14    CUdevice dev;
      15    CUfunction delay;
      16    CUmodule module;
      17    CUresult r;
      18    CUstream stream;
      19    unsigned long *a, *d_a, dticks;
      20    int nbytes;
      21    float atime, dtime;
      22    void *kargs[2];
      23    int clkrate;
      24    int devnum, nprocs;
      25  
      26    acc_init (acc_device_nvidia);
      27  
      28    devnum = acc_get_device_num (acc_device_nvidia);
      29  
      30    r = cuDeviceGet (&dev, devnum);
      31    if (r != CUDA_SUCCESS)
      32      {
      33        fprintf (stderr, "cuDeviceGet failed: %d\n", r);
      34        abort ();
      35      }
      36  
      37    r =
      38      cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
      39  			  dev);
      40    if (r != CUDA_SUCCESS)
      41      {
      42        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      43        abort ();
      44      }
      45  
      46    r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
      47    if (r != CUDA_SUCCESS)
      48      {
      49        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      50        abort ();
      51      }
      52  
      53    r = cuModuleLoad (&module, "subr.ptx");
      54    if (r != CUDA_SUCCESS)
      55      {
      56        fprintf (stderr, "cuModuleLoad failed: %d\n", r);
      57        abort ();
      58      }
      59  
      60    r = cuModuleGetFunction (&delay, module, "delay");
      61    if (r != CUDA_SUCCESS)
      62      {
      63        fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
      64        abort ();
      65      }
      66  
      67    nbytes = nprocs * sizeof (unsigned long);
      68  
      69    dtime = 200.0;
      70  
      71    dticks = (unsigned long) (dtime * clkrate);
      72  
      73    a = (unsigned long *) malloc (nbytes);
      74    d_a = (unsigned long *) acc_malloc (nbytes);
      75  
      76    acc_map_data (a, d_a, nbytes);
      77  
      78    kargs[0] = (void *) &d_a;
      79    kargs[1] = (void *) &dticks;
      80  
      81    stream = (CUstream) acc_get_cuda_stream (0);
      82    if (stream != NULL)
      83      abort ();
      84  
      85    r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
      86    if (r != CUDA_SUCCESS)
      87      {
      88        fprintf (stderr, "cuStreamCreate failed: %d\n", r);
      89        abort ();
      90      }
      91  
      92    if (!acc_set_cuda_stream (0, stream))
      93      abort ();
      94  
      95    init_timers (1);
      96  
      97    start_timer (0);
      98  
      99    r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
     100    if (r != CUDA_SUCCESS)
     101      {
     102        fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
     103        abort ();
     104      }
     105  
     106    acc_wait (0);
     107    /* Test unseen async-argument.  */
     108    acc_wait (1);
     109  
     110    atime = stop_timer (0);
     111  
     112    if (atime < dtime)
     113      {
     114        fprintf (stderr, "actual time < delay time\n");
     115        abort ();
     116      }
     117  
     118    start_timer (0);
     119  
     120    acc_wait (0);
     121    /* Test unseen async-argument.  */
     122    acc_wait (1);
     123  
     124    atime = stop_timer (0);
     125  
     126    if (0.010 < atime)
     127      {
     128        fprintf (stderr, "actual time too long\n");
     129        abort ();
     130      }
     131  
     132    acc_unmap_data (a);
     133  
     134    fini_timers ();
     135  
     136    free (a);
     137    acc_free (d_a);
     138  
     139    acc_shutdown (acc_device_nvidia);
     140  
     141    exit (0);
     142  }
     143  
     144  /* { dg-output "" } */