(root)/
gcc-13.2.0/
libgomp/
testsuite/
libgomp.oacc-c-c++-common/
data-2-lib.c
       1  /* Test asynchronous, unstructed data regions, runtime library variant.  */
       2  /* See also data-2.c.  */
       3  
       4  #include <stdlib.h>
       5  #undef NDEBUG
       6  #include <assert.h>
       7  #include <openacc.h>
       8  
       9  int
      10  main (int argc, char **argv)
      11  {
      12    int N = 12345;
      13    float *a, *b, *c, *d, *e;
      14    void *d_a, *d_b, *d_c, *d_d;
      15    int i;
      16    int nbytes;
      17  
      18    nbytes = N * sizeof (float);
      19  
      20    a = (float *) malloc (nbytes);
      21    b = (float *) malloc (nbytes);
      22    c = (float *) malloc (nbytes);
      23    d = (float *) malloc (nbytes);
      24    e = (float *) malloc (nbytes);
      25  
      26    for (i = 0; i < N; i++)
      27      {
      28        a[i] = 3.0;
      29        b[i] = 0.0;
      30      }
      31  
      32    acc_copyin_async (a, nbytes, acc_async_noval);
      33    acc_copyin_async (b, nbytes, acc_async_noval);
      34    acc_copyin_async (&N, sizeof (int), acc_async_noval);
      35    
      36  #pragma acc parallel present (a[0:N], b[0:N], N) async
      37  #pragma acc loop
      38    for (i = 0; i < N; i++)
      39      b[i] = a[i];
      40  
      41    d_a = acc_deviceptr (a);
      42    acc_memcpy_from_device_async (a, d_a, nbytes, acc_async_noval);
      43    d_b = acc_deviceptr (b);
      44    acc_memcpy_from_device_async (b, d_b, nbytes, acc_async_noval);
      45  
      46    acc_wait (acc_async_noval);
      47  
      48    for (i = 0; i < N; i++)
      49      {
      50        assert (a[i] == 3.0);
      51        assert (b[i] == 3.0);
      52      }
      53  
      54    for (i = 0; i < N; i++)
      55      {
      56        a[i] = 2.0;
      57        b[i] = 0.0;
      58      }
      59  
      60    acc_update_device_async (a, nbytes, 1);
      61    acc_update_device_async (b, nbytes, 1);
      62    
      63  #pragma acc parallel present (a[0:N], b[0:N], N) async (1)
      64  #pragma acc loop
      65    for (i = 0; i < N; i++)
      66      b[i] = a[i];
      67  
      68    acc_memcpy_from_device_async (a, d_a, nbytes, 1);
      69    acc_memcpy_from_device_async (b, d_b, nbytes, 1);
      70  
      71    acc_wait (1);
      72    /* Test unseen async-argument.  */
      73    acc_wait (10);
      74  
      75    for (i = 0; i < N; i++)
      76      {
      77        assert (a[i] == 2.0);
      78        assert (b[i] == 2.0);
      79      }
      80  
      81    for (i = 0; i < N; i++)
      82      {
      83        a[i] = 3.0;
      84        b[i] = 0.0;
      85        c[i] = 0.0;
      86        d[i] = 0.0;
      87      }
      88  
      89    acc_update_device_async (a, nbytes, 0);
      90    acc_update_device_async (b, nbytes, 1);
      91    acc_copyin_async (c, nbytes, 2);
      92    acc_copyin_async (d, nbytes, 3);
      93  
      94  #pragma acc parallel present (a[0:N], b[0:N], N) wait (0) async (1)
      95  #pragma acc loop
      96    for (i = 0; i < N; i++)
      97      b[i] = (a[i] * a[i] * a[i]) / a[i];
      98  
      99  #pragma acc parallel present (a[0:N], c[0:N], N) wait (0) async (2)
     100  #pragma acc loop
     101    for (i = 0; i < N; i++)
     102      c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i];
     103  
     104  #pragma acc parallel present (a[0:N], d[0:N], N) wait (0) async (3)
     105  #pragma acc loop
     106    for (i = 0; i < N; i++)
     107      d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
     108  
     109    acc_memcpy_from_device_async (a, d_a, nbytes, 0);
     110    acc_memcpy_from_device_async (b, d_b, nbytes, 1);
     111    d_c = acc_deviceptr (c);
     112    acc_memcpy_from_device_async (c, d_c, nbytes, 2);
     113    d_d = acc_deviceptr (d);
     114    acc_memcpy_from_device_async (d, d_d, nbytes, 3);
     115    
     116    acc_wait_all_async (0);
     117    acc_wait (0);
     118    
     119    for (i = 0; i < N; i++)
     120      {
     121        assert (a[i] == 3.0);
     122        assert (b[i] == 9.0);
     123        assert (c[i] == 4.0);
     124        assert (d[i] == 1.0);
     125      }
     126  
     127    for (i = 0; i < N; i++)
     128      {
     129        a[i] = 2.0;
     130        b[i] = 0.0;
     131        c[i] = 0.0;
     132        d[i] = 0.0;
     133        e[i] = 0.0;
     134      }
     135  
     136    acc_update_device_async (a, nbytes, 10);
     137    acc_update_device_async (b, nbytes, 11);
     138    acc_update_device_async (c, nbytes, 12);
     139    acc_update_device_async (d, nbytes, 13);
     140    acc_copyin_async (e, nbytes, 14);
     141  
     142  #pragma acc parallel present (a[0:N], b[0:N], N) wait (10) async (11)
     143    for (int ii = 0; ii < N; ii++)
     144      b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
     145  
     146  #pragma acc parallel present (a[0:N], c[0:N], N) wait (10) async (12)
     147    for (int ii = 0; ii < N; ii++)
     148      c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
     149  
     150  #pragma acc parallel present (a[0:N], d[0:N], N) wait (10) async (13)
     151    for (int ii = 0; ii < N; ii++)
     152      d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
     153  
     154  #pragma acc parallel present (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N], N)  wait (11) wait (12) wait (13) async (14)
     155    for (int ii = 0; ii < N; ii++)
     156      e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
     157  
     158    acc_copyout_async (a, nbytes, 10);
     159    acc_copyout_async (b, nbytes, 11);
     160    acc_copyout_async (c, nbytes, 12);
     161    acc_copyout_async (d, nbytes, 13);
     162    acc_copyout_async (e, nbytes, 14);
     163    acc_delete_async (&N, sizeof (int), 15);
     164    acc_wait_all ();
     165  
     166    for (i = 0; i < N; i++)
     167      {
     168        assert (a[i] == 2.0);
     169        assert (b[i] == 4.0);
     170        assert (c[i] == 4.0);
     171        assert (d[i] == 1.0);
     172        assert (e[i] == 11.0);
     173      }
     174  
     175    return 0;
     176  }