(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
lib-81.c
       1  /* { dg-do run { target openacc_nvidia_accel_selected } } */
       2  /* { dg-additional-options "-lcuda" } */
       3  /* { dg-require-effective-target openacc_cuda } */
       4  
       5  #include <stdio.h>
       6  #include <stdlib.h>
       7  #include <unistd.h>
       8  #include <openacc.h>
       9  #include <cuda.h>
      10  #include "timer.h"
      11  
      12  int
      13  main (int argc, char **argv)
      14  {
      15    CUdevice dev;
      16    CUfunction delay;
      17    CUmodule module;
      18    CUresult r;
      19    int N;
      20    int i;
      21    CUstream *streams, stream;
      22    unsigned long *a, *d_a, dticks;
      23    int nbytes;
      24    float atime, dtime;
      25    void *kargs[2];
      26    int clkrate;
      27    int devnum, nprocs;
      28  
      29    acc_init (acc_device_nvidia);
      30  
      31    devnum = acc_get_device_num (acc_device_nvidia);
      32  
      33    r = cuDeviceGet (&dev, devnum);
      34    if (r != CUDA_SUCCESS)
      35      {
      36        fprintf (stderr, "cuDeviceGet failed: %d\n", r);
      37        abort ();
      38      }
      39  
      40    r =
      41      cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
      42  			  dev);
      43    if (r != CUDA_SUCCESS)
      44      {
      45        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      46        abort ();
      47      }
      48  
      49    r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
      50    if (r != CUDA_SUCCESS)
      51      {
      52        fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      53        abort ();
      54      }
      55  
      56    r = cuModuleLoad (&module, "subr.ptx");
      57    if (r != CUDA_SUCCESS)
      58      {
      59        fprintf (stderr, "cuModuleLoad failed: %d\n", r);
      60        abort ();
      61      }
      62  
      63    r = cuModuleGetFunction (&delay, module, "delay");
      64    if (r != CUDA_SUCCESS)
      65      {
      66        fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
      67        abort ();
      68      }
      69  
      70    nbytes = nprocs * sizeof (unsigned long);
      71  
      72    dtime = 500.0;
      73  
      74    dticks = (unsigned long) (dtime * clkrate);
      75  
      76    N = nprocs;
      77  
      78    a = (unsigned long *) malloc (nbytes);
      79    d_a = (unsigned long *) acc_malloc (nbytes);
      80  
      81    acc_map_data (a, d_a, nbytes);
      82  
      83    streams = (CUstream *) malloc (N * sizeof (void *));
      84  
      85    for (i = 0; i < N; i++)
      86      {
      87        streams[i] = (CUstream) acc_get_cuda_stream (i);
      88        if (streams[i] != NULL)
      89  	abort ();
      90  
      91        r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
      92        if (r != CUDA_SUCCESS)
      93  	{
      94  	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
      95  	  abort ();
      96  	}
      97  
      98          if (!acc_set_cuda_stream (i, streams[i]))
      99  	  abort ();
     100      }
     101  
     102    init_timers (1);
     103  
     104    kargs[0] = (void *) &d_a;
     105    kargs[1] = (void *) &dticks;
     106  
     107    stream = (CUstream) acc_get_cuda_stream (N);
     108    if (stream != NULL)
     109      abort ();
     110  
     111    r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
     112    if (r != CUDA_SUCCESS)
     113      {
     114        fprintf (stderr, "cuStreamCreate failed: %d\n", r);
     115        abort ();
     116      }
     117  
     118    if (!acc_set_cuda_stream (N, stream))
     119      abort ();
     120  
     121    start_timer (0);
     122  
     123    for (i = 0; i < N; i++)
     124      {
     125        r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
     126        if (r != CUDA_SUCCESS)
     127  	{
     128  	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
     129  	  abort ();
     130  	}
     131      }
     132  
     133    acc_wait_all_async (N);
     134  
     135    for (i = 0; i <= N; i++)
     136      {
     137        if (acc_async_test (i) != 0)
     138  	abort ();
     139      }
     140  
     141    acc_wait (N);
     142  
     143    for (i = 0; i <= N; i++)
     144      {
     145        if (acc_async_test (i) != 1)
     146  	abort ();
     147      }
     148  
     149    atime = stop_timer (0);
     150  
     151    if (atime < dtime)
     152      {
     153        fprintf (stderr, "actual time < delay time\n");
     154        abort ();
     155      }
     156  
     157    start_timer (0);
     158  
     159    stream = (CUstream) acc_get_cuda_stream (N + 1);
     160    if (stream != NULL)
     161      abort ();
     162  
     163    r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
     164    if (r != CUDA_SUCCESS)
     165      {
     166        fprintf (stderr, "cuStreamCreate failed: %d\n", r);
     167        abort ();
     168      }
     169  
     170    if (!acc_set_cuda_stream (N + 1, stream))
     171      abort ();
     172  
     173    acc_wait_all_async (N + 1);
     174  
     175    acc_wait (N + 1);
     176  
     177    atime = stop_timer (0);
     178  
     179    if (0.10 < atime)
     180      {
     181        fprintf (stderr, "actual time too long\n");
     182        abort ();
     183      }
     184  
     185    start_timer (0);
     186  
     187    acc_wait_all_async (N);
     188  
     189    acc_wait (N);
     190  
     191    atime = stop_timer (0);
     192  
     193    if (0.10 < atime)
     194      {
     195        fprintf (stderr, "actual time too long\n");
     196        abort ();
     197      }
     198  
     199    acc_unmap_data (a);
     200  
     201    fini_timers ();
     202  
     203    free (streams);
     204    free (a);
     205    acc_free (d_a);
     206  
     207    acc_shutdown (acc_device_nvidia);
     208  
     209    exit (0);
     210  }
     211  
     212  /* { dg-output "" } */