1  /* { dg-do run { target openacc_nvidia_accel_selected } } */
       2  /* { dg-additional-options "-lcuda" } */
       3  /* { dg-require-effective-target openacc_cuda } */
       4  
       5  #include <stdio.h>
       6  #include <stdlib.h>
       7  #include <unistd.h>
       8  #include <openacc.h>
       9  #include <cuda.h>
      10  #include "timer.h"
      11  
      12  int
      13  main (int argc, char **argv)
      14  {
      15    CUdevice dev;
      16    CUfunction delay;
      17    CUmodule module;
      18    CUresult r;
      19    int N;
      20    int i;
      21    CUstream stream;
      22    unsigned long *a, *d_a, dticks;
      23    int nbytes;
      24    float atime, dtime, hitime, lotime;
      25    void *kargs[2];
      26    int clkrate;
      27    int devnum, nprocs;
      28  
      29    devnum = 2;
      30  
      31    acc_init (acc_device_nvidia);
      32  
      33    devnum = acc_get_device_num (acc_device_nvidia);
      34  
      35    r = cuDeviceGet (&dev, devnum);
      36    if (r != CUDA_SUCCESS)
      37      {
      38        fprintf (stderr, "cuDeviceGet failed: %d\n", r);
      39        abort ();
      40      }
      41  
      42    r =
      43      cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
      44  			  dev);
      45    if (r != CUDA_SUCCESS)
      46      {
      47        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      48        abort ();
      49      }
      50  
      51    r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
      52    if (r != CUDA_SUCCESS)
      53      {
      54        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      55        abort ();
      56      }
      57  
      58    r = cuModuleLoad (&module, "subr.ptx");
      59    if (r != CUDA_SUCCESS)
      60      {
      61        fprintf (stderr, "cuModuleLoad failed: %d\n", r);
      62        abort ();
      63      }
      64  
      65    r = cuModuleGetFunction (&delay, module, "delay");
      66    if (r != CUDA_SUCCESS)
      67      {
      68        fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
      69        abort ();
      70      }
      71  
      72    nbytes = nprocs * sizeof (unsigned long);
      73  
      74    dtime = 200.0;
      75  
      76    dticks = (unsigned long) (dtime * clkrate);
      77  
      78    N = nprocs;
      79  
      80    a = (unsigned long *) malloc (nbytes);
      81    d_a = (unsigned long *) acc_malloc (nbytes);
      82  
      83    acc_map_data (a, d_a, nbytes);
      84  
      85    r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
      86    if (r != CUDA_SUCCESS)
      87      {
      88        fprintf (stderr, "cuStreamCreate failed: %d\n", r);
      89        abort ();
      90      }
      91  
      92    if (!acc_set_cuda_stream (1, stream))
      93      abort ();
      94  
      95    stream = (CUstream) acc_get_cuda_stream (0);
      96    if (stream != NULL)
      97      abort ();
      98  
      99    r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
     100    if (r != CUDA_SUCCESS)
     101      {
     102        fprintf (stderr, "cuStreamCreate failed: %d\n", r);
     103        abort ();
     104      }
     105  
     106    if (!acc_set_cuda_stream (0, stream))
     107      abort ();
     108  
     109    init_timers (1);
     110  
     111    kargs[0] = (void *) &d_a;
     112    kargs[1] = (void *) &dticks;
     113  
     114    start_timer (0);
     115  
     116    for (i = 0; i < N; i++)
     117      {
     118        r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
     119        if (r != CUDA_SUCCESS)
     120  	{
     121  	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
     122  	  abort ();
     123  	}
     124      }
     125  
     126    if (acc_async_test (0) != 0)
     127      abort ();
     128  
     129    /* Test unseen async-argument.  */
     130    if (acc_async_test (1) != 1)
     131      abort ();
     132  
     133    acc_wait_async (0, 1);
     134  
     135    if (acc_async_test (0) != 0)
     136      abort ();
     137  
     138    if (acc_async_test (1) != 0)
     139      abort ();
     140  
     141    /* Test unseen async-argument.  */
     142    {
     143      if (acc_async_test (2) != 1)
     144        abort ();
     145  
     146      acc_wait_async (2, 1);
     147  
     148      if (acc_async_test (0) != 0)
     149        abort ();
     150  
     151      if (acc_async_test (1) != 0)
     152        abort ();
     153  
     154      if (acc_async_test (2) != 1)
     155        abort ();
     156    }
     157  
     158    acc_wait (1);
     159  
     160    atime = stop_timer (0);
     161  
     162    if (acc_async_test (0) != 1)
     163      abort ();
     164  
     165    if (acc_async_test (1) != 1)
     166      abort ();
     167  
     168    hitime = dtime * N;
     169    hitime += hitime * 0.02;
     170  
     171    lotime = dtime * N;
     172    lotime -= lotime * 0.02;
     173  
     174    if (atime > hitime || atime < lotime)
     175      {
     176        fprintf (stderr, "actual time < delay time\n");
     177        abort ();
     178      }
     179  
     180    acc_unmap_data (a);
     181  
     182    fini_timers ();
     183  
     184    free (a);
     185    acc_free (d_a);
     186  
     187    acc_shutdown (acc_device_nvidia);
     188  
     189    exit (0);
     190  }
     191  
     192  /* { dg-output "" } */