1  /* Test mapping of async values to specific underlying queues.  */
       2  
       3  #undef NDEBUG
       4  #include <assert.h>
       5  #include <openacc.h>
       6  
       7  /* This is implemented in terms of the "acc_get_cuda_stream" interface.  */
       8  
       9  struct
      10  {
      11    int async;
      12    void *cuda_stream;
      13  } queues[] = { { acc_async_sync, NULL },
      14  	       { acc_async_noval, NULL },
      15  	       { 0, NULL },
      16  	       { 1, NULL },
      17  	       { 2, NULL },
      18  	       { 36, NULL },
      19  	       { 1982, NULL } };
      20  const size_t queues_n = sizeof queues / sizeof queues[0];
      21  
      22  int main(void)
      23  {
      24    /* Explicitly initialize: it's not clear whether the following OpenACC
      25       runtime library calls implicitly initialize;
      26       <https://github.com/OpenACC/openacc-spec/issues/102>.  */
      27    acc_device_t d;
      28  #if defined ACC_DEVICE_TYPE_nvidia
      29    d = acc_device_nvidia;
      30  #elif defined ACC_DEVICE_TYPE_radeon
      31    d = acc_device_radeon;
      32  #elif defined ACC_DEVICE_TYPE_host
      33    d = acc_device_host;
      34  #else
      35  # error Not ported to this ACC_DEVICE_TYPE
      36  #endif
      37    acc_init (d);
      38  
      39    for (size_t i = 0; i < queues_n; ++i)
      40      {
      41        /* Before actually being used, there are all NULL.  */
      42        queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
      43        assert (queues[i].cuda_stream == NULL);
      44      }
      45  
      46    /* No-ops still don't initialize them.  */
      47    {
      48      size_t i = 0;
      49      /* Find the first non-special async-argument.  */
      50      while (queues[i].async < 0)
      51        ++i;
      52      assert (i < queues_n);
      53  
      54  #pragma acc wait(queues[i].async) // no-op
      55  
      56      ++i;
      57      assert (i < queues_n);
      58  #pragma acc parallel wait(queues[i].async) // no-op
      59      ;
      60  
      61      ++i;
      62      assert (i < queues_n);
      63      acc_wait(queues[i].async); // no-op
      64  
      65      i += 2;
      66      assert (i < queues_n);
      67      acc_wait_async(queues[i - 1].async, queues[i].async); // no-op, and async queue "i" does not get set up
      68  
      69      for (size_t i = 0; i < queues_n; ++i)
      70        {
      71  	queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
      72  	assert (queues[i].cuda_stream == NULL);
      73        }
      74    }
      75  
      76    for (size_t i = 0; i < queues_n; ++i)
      77      {
      78        /* Use the queue to initialize it.  */
      79  #pragma acc parallel async(queues[i].async)
      80        ;
      81  #pragma acc wait
      82  
      83        /* Verify CUDA stream used.  */
      84        queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
      85  #if defined ACC_DEVICE_TYPE_nvidia
      86        /* "acc_async_sync" maps to the NULL CUDA default stream.  */
      87        if (queues[i].async == acc_async_sync)
      88  	assert (queues[i].cuda_stream == NULL);
      89        else
      90  	assert (queues[i].cuda_stream != NULL);
      91  #elif defined ACC_DEVICE_TYPE_radeon
      92        /* For "acc_device_radeon" there are no CUDA streams.  */
      93        assert (queues[i].cuda_stream == NULL);
      94  #elif defined ACC_DEVICE_TYPE_host
      95        /* For "acc_device_host" there are no CUDA streams.  */
      96        assert (queues[i].cuda_stream == NULL);
      97  #else
      98  # error Not ported to this ACC_DEVICE_TYPE
      99  #endif
     100      }
     101  
     102    /* Verify same results.  */
     103    for (size_t i = 0; i < queues_n; ++i)
     104      {
     105        void *cuda_stream;
     106  
     107        cuda_stream = acc_get_cuda_stream (queues[i].async);
     108        assert (cuda_stream == queues[i].cuda_stream);
     109  
     110  #pragma acc parallel async(queues[i].async)
     111        ;
     112  #pragma acc wait
     113  
     114        cuda_stream = acc_get_cuda_stream (queues[i].async);
     115        assert (cuda_stream == queues[i].cuda_stream);
     116      }
     117  
     118    /* Verify individual underlying queues are all different.  */
     119    for (size_t i = 0; i < queues_n; ++i)
     120      {
     121        if (queues[i].cuda_stream == NULL)
     122  	continue;
     123        for (size_t j = i + 1; j < queues_n; ++j)
     124  	{
     125  	  if (queues[j].cuda_stream == NULL)
     126  	    continue;
     127  	  assert (queues[j].cuda_stream != queues[i].cuda_stream);
     128  	}
     129      }
     130  
     131    return 0;
     132  }