(root)/
gcc-13.2.0/
libgomp/
oacc-async.c
       1  /* OpenACC Runtime Library Definitions.
       2  
       3     Copyright (C) 2013-2023 Free Software Foundation, Inc.
       4  
       5     Contributed by Mentor Embedded.
       6  
       7     This file is part of the GNU Offloading and Multi Processing Library
       8     (libgomp).
       9  
      10     Libgomp is free software; you can redistribute it and/or modify it
      11     under the terms of the GNU General Public License as published by
      12     the Free Software Foundation; either version 3, or (at your option)
      13     any later version.
      14  
      15     Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
      16     WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
      17     FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
      18     more details.
      19  
      20     Under Section 7 of GPL version 3, you are granted additional
      21     permissions described in the GCC Runtime Library Exception, version
      22     3.1, as published by the Free Software Foundation.
      23  
      24     You should have received a copy of the GNU General Public License and
      25     a copy of the GCC Runtime Library Exception along with this program;
      26     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      27     <http://www.gnu.org/licenses/>.  */
      28  
      29  #include <assert.h>
      30  #include <string.h>
      31  #include "openacc.h"
      32  #include "libgomp.h"
      33  #include "oacc-int.h"
      34  
      35  static struct goacc_thread *
      36  get_goacc_thread (void)
      37  {
      38    struct goacc_thread *thr = goacc_thread ();
      39  
      40    if (!thr || !thr->dev)
      41      gomp_fatal ("no device active");
      42  
      43    return thr;
      44  }
      45  
      46  static int
      47  validate_async_val (int async)
      48  {
      49    if (!async_valid_p (async))
      50      gomp_fatal ("invalid async-argument: %d", async);
      51  
      52    if (async == acc_async_sync)
      53      return -1;
      54  
      55    if (async == acc_async_noval)
      56      return 0;
      57  
      58    if (async >= 0)
      59      /* TODO: we reserve 0 for acc_async_noval before we can clarify the
      60         semantics of "default_async".  */
      61      return 1 + async;
      62    else
      63      __builtin_unreachable ();
      64  }
      65  
      66  /* Return the asyncqueue to be used for OpenACC async-argument ASYNC.  This
      67     might return NULL if no asyncqueue is to be used.  Otherwise, if CREATE,
      68     create the asyncqueue if it doesn't exist yet.
      69  
      70     Unless CREATE, this will not generate any OpenACC Profiling Interface
      71     events.  */
      72  
      73  attribute_hidden struct goacc_asyncqueue *
      74  lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
      75  {
      76    async = validate_async_val (async);
      77    if (async < 0)
      78      return NULL;
      79  
      80    struct goacc_asyncqueue *ret_aq = NULL;
      81    struct gomp_device_descr *dev = thr->dev;
      82  
      83    gomp_mutex_lock (&dev->openacc.async.lock);
      84  
      85    if (!create
      86        && (async >= dev->openacc.async.nasyncqueue
      87  	  || !dev->openacc.async.asyncqueue[async]))
      88      goto end;
      89  
      90    if (async >= dev->openacc.async.nasyncqueue)
      91      {
      92        int diff = async + 1 - dev->openacc.async.nasyncqueue;
      93        dev->openacc.async.asyncqueue
      94  	= gomp_realloc (dev->openacc.async.asyncqueue,
      95  			sizeof (goacc_aq) * (async + 1));
      96        memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
      97  	      0, sizeof (goacc_aq) * diff);
      98        dev->openacc.async.nasyncqueue = async + 1;
      99      }
     100  
     101    if (!dev->openacc.async.asyncqueue[async])
     102      {
     103        dev->openacc.async.asyncqueue[async]
     104  	= dev->openacc.async.construct_func (dev->target_id);
     105  
     106        if (!dev->openacc.async.asyncqueue[async])
     107  	{
     108  	  gomp_mutex_unlock (&dev->openacc.async.lock);
     109  	  gomp_fatal ("async %d creation failed", async);
     110  	}
     111        
     112        /* Link new async queue into active list.  */
     113        goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
     114        n->aq = dev->openacc.async.asyncqueue[async];
     115        n->next = dev->openacc.async.active;
     116        dev->openacc.async.active = n;
     117      }
     118  
     119    ret_aq = dev->openacc.async.asyncqueue[async];
     120  
     121   end:
     122    gomp_mutex_unlock (&dev->openacc.async.lock);
     123    return ret_aq;
     124  }
     125  
     126  /* Return the asyncqueue to be used for OpenACC async-argument ASYNC.  This
     127     might return NULL if no asyncqueue is to be used.  Otherwise, create the
     128     asyncqueue if it doesn't exist yet.  */
     129  
     130  attribute_hidden struct goacc_asyncqueue *
     131  get_goacc_asyncqueue (int async)
     132  {
     133    struct goacc_thread *thr = get_goacc_thread ();
     134    return lookup_goacc_asyncqueue (thr, true, async);
     135  }
     136  
     137  int
     138  acc_async_test (int async)
     139  {
     140    struct goacc_thread *thr = goacc_thread ();
     141  
     142    if (!thr || !thr->dev)
     143      gomp_fatal ("no device active");
     144  
     145    goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
     146    if (!aq)
     147      return 1;
     148  
     149    acc_prof_info prof_info;
     150    acc_api_info api_info;
     151    bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
     152    if (profiling_p)
     153      {
     154        prof_info.async = async;
     155        prof_info.async_queue = prof_info.async;
     156      }
     157  
     158    int res = thr->dev->openacc.async.test_func (aq);
     159  
     160    if (profiling_p)
     161      {
     162        thr->prof_info = NULL;
     163        thr->api_info = NULL;
     164      }
     165  
     166    return res;
     167  }
     168  
     169  int
     170  acc_async_test_all (void)
     171  {
     172    struct goacc_thread *thr = get_goacc_thread ();
     173  
     174    acc_prof_info prof_info;
     175    acc_api_info api_info;
     176    bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
     177  
     178    int ret = 1;
     179    gomp_mutex_lock (&thr->dev->openacc.async.lock);
     180    for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
     181      if (!thr->dev->openacc.async.test_func (l->aq))
     182        {
     183  	ret = 0;
     184  	break;
     185        }
     186    gomp_mutex_unlock (&thr->dev->openacc.async.lock);
     187  
     188    if (profiling_p)
     189      {
     190        thr->prof_info = NULL;
     191        thr->api_info = NULL;
     192      }
     193  
     194    return ret;
     195  }
     196  
     197  void
     198  acc_wait (int async)
     199  {
     200    struct goacc_thread *thr = get_goacc_thread ();
     201  
     202    goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
     203    if (!aq)
     204      return;
     205  
     206    acc_prof_info prof_info;
     207    acc_api_info api_info;
     208    bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
     209    if (profiling_p)
     210      {
     211        prof_info.async = async;
     212        prof_info.async_queue = prof_info.async;
     213      }
     214  
     215    if (!thr->dev->openacc.async.synchronize_func (aq))
     216      gomp_fatal ("wait on %d failed", async);
     217  
     218    if (profiling_p)
     219      {
     220        thr->prof_info = NULL;
     221        thr->api_info = NULL;
     222      }
     223  }
     224  
     225  /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait.  */
     226  #ifdef HAVE_ATTRIBUTE_ALIAS
     227  strong_alias (acc_wait, acc_async_wait)
     228  #else
     229  void
     230  acc_async_wait (int async)
     231  {
     232    acc_wait (async);
     233  }
     234  #endif
     235  
     236  void
     237  acc_wait_async (int async1, int async2)
     238  {
     239    struct goacc_thread *thr = get_goacc_thread ();
     240  
     241    goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1);
     242    /* TODO: Is this also correct for acc_async_sync, assuming that in this case,
     243       we'll always be synchronous anyways?  */
     244    if (!aq1)
     245      return;
     246  
     247    acc_prof_info prof_info;
     248    acc_api_info api_info;
     249    bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
     250    if (profiling_p)
     251      {
     252        prof_info.async = async2;
     253        prof_info.async_queue = prof_info.async;
     254      }
     255  
     256    goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2);
     257    /* An async queue is always synchronized with itself.  */
     258    if (aq1 == aq2)
     259      goto out_prof;
     260  
     261    if (aq2)
     262      {
     263        if (!thr->dev->openacc.async.serialize_func (aq1, aq2))
     264  	gomp_fatal ("ordering of async ids %d and %d failed", async1, async2);
     265      }
     266    else
     267      {
     268        /* TODO: Local thread synchronization.
     269  	 Necessary for the "async2 == acc_async_sync" case, or can just skip?  */
     270        if (!thr->dev->openacc.async.synchronize_func (aq1))
     271  	gomp_fatal ("wait on %d failed", async1);
     272      }
     273  
     274   out_prof:
     275    if (profiling_p)
     276      {
     277        thr->prof_info = NULL;
     278        thr->api_info = NULL;
     279      }
     280  }
     281  
     282  void
     283  acc_wait_all (void)
     284  {
     285    struct goacc_thread *thr = goacc_thread ();
     286  
     287    acc_prof_info prof_info;
     288    acc_api_info api_info;
     289    bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
     290  
     291    bool ret = true;
     292    gomp_mutex_lock (&thr->dev->openacc.async.lock);
     293    for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
     294      ret &= thr->dev->openacc.async.synchronize_func (l->aq);
     295    gomp_mutex_unlock (&thr->dev->openacc.async.lock);
     296  
     297    if (profiling_p)
     298      {
     299        thr->prof_info = NULL;
     300        thr->api_info = NULL;
     301      }
     302  
     303    if (!ret)
     304      gomp_fatal ("wait all failed");
     305  }
     306  
     307  /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all.  */
     308  #ifdef HAVE_ATTRIBUTE_ALIAS
     309  strong_alias (acc_wait_all, acc_async_wait_all)
     310  #else
     311  void
     312  acc_async_wait_all (void)
     313  {
     314    acc_wait_all ();
     315  }
     316  #endif
     317  
     318  void
     319  acc_wait_all_async (int async)
     320  {
     321    struct goacc_thread *thr = get_goacc_thread ();
     322  
     323    acc_prof_info prof_info;
     324    acc_api_info api_info;
     325    bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
     326    if (profiling_p)
     327      {
     328        prof_info.async = async;
     329        prof_info.async_queue = prof_info.async;
     330      }
     331  
     332    goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async);
     333  
     334    bool ret = true;
     335    gomp_mutex_lock (&thr->dev->openacc.async.lock);
     336    for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
     337      {
     338        if (waiting_queue)
     339  	ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue);
     340        else
     341  	/* TODO: Local thread synchronization.
     342  	   Necessary for the "async2 == acc_async_sync" case, or can just skip?  */
     343  	ret &= thr->dev->openacc.async.synchronize_func (l->aq);
     344      }
     345    gomp_mutex_unlock (&thr->dev->openacc.async.lock);
     346  
     347    if (profiling_p)
     348      {
     349        thr->prof_info = NULL;
     350        thr->api_info = NULL;
     351      }
     352  
     353    if (!ret)
     354      gomp_fatal ("wait all async(%d) failed", async);
     355  }
     356  
     357  void
     358  GOACC_wait (int async, int num_waits, ...)
     359  {
     360    goacc_lazy_initialize ();
     361  
     362    struct goacc_thread *thr = goacc_thread ();
     363  
     364    /* No nesting.  */
     365    assert (thr->prof_info == NULL);
     366    assert (thr->api_info == NULL);
     367    acc_prof_info prof_info;
     368    acc_api_info api_info;
     369    bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
     370    if (profiling_p)
     371      {
     372        prof_info.async = async;
     373        prof_info.async_queue = prof_info.async;
     374      }
     375  
     376    if (num_waits)
     377      {
     378        va_list ap;
     379  
     380        va_start (ap, num_waits);
     381        goacc_wait (async, num_waits, &ap);
     382        va_end (ap);
     383      }
     384    else if (async == acc_async_sync)
     385      acc_wait_all ();
     386    else
     387      acc_wait_all_async (async);
     388  
     389    if (profiling_p)
     390      {
     391        thr->prof_info = NULL;
     392        thr->api_info = NULL;
     393      }
     394  }
     395  
     396  attribute_hidden void
     397  goacc_wait (int async, int num_waits, va_list *ap)
     398  {
     399    while (num_waits--)
     400      {
     401        int qid = va_arg (*ap, int);
     402  
     403        /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'.  */
     404        if (qid == acc_async_noval)
     405  	{
     406  	  if (async == acc_async_sync)
     407  	    acc_wait_all ();
     408  	  else
     409  	    acc_wait_all_async (async);
     410  	  break;
     411  	}
     412  
     413        if (async == acc_async_sync)
     414  	acc_wait (qid);
     415        else if (qid == async)
     416  	/* If we're waiting on the same asynchronous queue as we're
     417  	   launching on, the queue itself will order work as
     418  	   required, so there's no need to wait explicitly.  */
     419  	;
     420        else
     421  	acc_wait_async (qid, async);
     422      }
     423  }
     424  
     425  attribute_hidden void
     426  goacc_async_free (struct gomp_device_descr *devicep,
     427  		  struct goacc_asyncqueue *aq, void *ptr)
     428  {
     429    if (!aq)
     430      free (ptr);
     431    else
     432      devicep->openacc.async.queue_callback_func (aq, free, ptr);
     433  }
     434  
     435  /* This function initializes the asyncqueues for the device specified by
     436     DEVICEP.  TODO DEVICEP must be locked on entry, and remains locked on
     437     return.  */
     438  
     439  attribute_hidden void
     440  goacc_init_asyncqueues (struct gomp_device_descr *devicep)
     441  {
     442    devicep->openacc.async.nasyncqueue = 0;
     443    devicep->openacc.async.asyncqueue = NULL;
     444    devicep->openacc.async.active = NULL;
     445    gomp_mutex_init (&devicep->openacc.async.lock);
     446  }
     447  
     448  /* This function finalizes the asyncqueues for the device specified by DEVICEP.
     449     TODO DEVICEP must be locked on entry, and remains locked on return.  */
     450  
     451  attribute_hidden bool
     452  goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
     453  {
     454    bool ret = true;
     455    gomp_mutex_lock (&devicep->openacc.async.lock);
     456    if (devicep->openacc.async.nasyncqueue > 0)
     457      {
     458        goacc_aq_list next;
     459        for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
     460  	{
     461  	  ret &= devicep->openacc.async.destruct_func (l->aq);
     462  	  next = l->next;
     463  	  free (l);
     464  	}
     465        free (devicep->openacc.async.asyncqueue);
     466        devicep->openacc.async.nasyncqueue = 0;
     467        devicep->openacc.async.asyncqueue = NULL;
     468        devicep->openacc.async.active = NULL;
     469      }
     470    gomp_mutex_unlock (&devicep->openacc.async.lock);
     471    gomp_mutex_destroy (&devicep->openacc.async.lock);
     472    return ret;
     473  }