(root)/
gcc-13.2.0/
libgomp/
oacc-profiling.c
       1  /* OpenACC Profiling Interface
       2  
       3     Copyright (C) 2019-2023 Free Software Foundation, Inc.
       4  
       5     Contributed by Mentor, a Siemens Business.
       6  
       7     This file is part of the GNU Offloading and Multi Processing Library
       8     (libgomp).
       9  
      10     Libgomp is free software; you can redistribute it and/or modify it
      11     under the terms of the GNU General Public License as published by
      12     the Free Software Foundation; either version 3, or (at your option)
      13     any later version.
      14  
      15     Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
      16     WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
      17     FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
      18     more details.
      19  
      20     Under Section 7 of GPL version 3, you are granted additional
      21     permissions described in the GCC Runtime Library Exception, version
      22     3.1, as published by the Free Software Foundation.
      23  
      24     You should have received a copy of the GNU General Public License and
      25     a copy of the GCC Runtime Library Exception along with this program;
      26     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      27     <http://www.gnu.org/licenses/>.  */
      28  
      29  #define _GNU_SOURCE
      30  #include "libgomp.h"
      31  #include "oacc-int.h"
      32  #include "secure_getenv.h"
      33  #include "acc_prof.h"
      34  #include <assert.h>
      35  #ifdef HAVE_STRING_H
      36  # include <string.h>
      37  #endif
      38  #ifdef PLUGIN_SUPPORT
      39  # include <dlfcn.h>
      40  #endif
      41  
      42  #define STATIC_ASSERT(expr) _Static_assert (expr, "!(" #expr ")")
      43  
      44  /* Statically assert that the layout of the common fields in the
      45     'acc_event_info' variants matches.  */
      46  /* 'event_type' */
      47  STATIC_ASSERT (offsetof (acc_event_info, event_type)
      48  	       == offsetof (acc_event_info, data_event.event_type));
      49  STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
      50  	       == offsetof (acc_event_info, launch_event.event_type));
      51  STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
      52  	       == offsetof (acc_event_info, other_event.event_type));
      53  /* 'valid_bytes' */
      54  STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
      55  	       == offsetof (acc_event_info, launch_event.valid_bytes));
      56  STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
      57  	       == offsetof (acc_event_info, other_event.valid_bytes));
      58  /* 'parent_construct' */
      59  STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
      60  	       == offsetof (acc_event_info, launch_event.parent_construct));
      61  STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
      62  	       == offsetof (acc_event_info, other_event.parent_construct));
      63  /* 'implicit' */
      64  STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
      65  	       == offsetof (acc_event_info, launch_event.implicit));
      66  STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
      67  	       == offsetof (acc_event_info, other_event.implicit));
      68  /* 'tool_info' */
      69  STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
      70  	       == offsetof (acc_event_info, launch_event.tool_info));
      71  STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
      72  	       == offsetof (acc_event_info, other_event.tool_info));
      73  
      74  struct goacc_prof_callback_entry
      75  {
      76    acc_prof_callback cb;
      77    int ref;
      78    bool enabled;
      79    struct goacc_prof_callback_entry *next;
      80  };
      81  
      82  /* Use a separate flag to minimize run-time performance impact for the (very
      83     common) case that profiling is not enabled.
      84  
      85     Once enabled, we're not going to disable this anymore, anywhere.  We
      86     probably could, by adding appropriate logic to 'acc_prof_register',
      87     'acc_prof_unregister'.  */
      88  bool goacc_prof_enabled = false;
      89  
      90  /* Global state for registered callbacks.
      91     'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle.  */
      92  static bool goacc_prof_callbacks_enabled[acc_ev_last];
      93  static struct goacc_prof_callback_entry *goacc_prof_callback_entries[acc_ev_last];
      94  /* Lock used to protect access to 'goacc_prof_callbacks_enabled', and
      95     'goacc_prof_callback_entries'.  */
      96  static gomp_mutex_t goacc_prof_lock;
      97  
      98  void
      99  goacc_profiling_initialize (void)
     100  {
     101    gomp_mutex_init (&goacc_prof_lock);
     102  
     103    /* Initially, all callbacks for all events are enabled.  */
     104    for (int i = 0; i < acc_ev_last; ++i)
     105      goacc_prof_callbacks_enabled[i] = true;
     106  
     107  
     108  #ifdef PLUGIN_SUPPORT
     109    char *acc_proflibs = secure_getenv ("ACC_PROFLIB");
     110    while (acc_proflibs != NULL && acc_proflibs[0] != '\0')
     111      {
     112        char *acc_proflibs_sep = strchr (acc_proflibs, ';');
     113        char *acc_proflib;
     114        if (acc_proflibs_sep == acc_proflibs)
     115  	{
     116  	  /* Stray ';' separator: make sure we don't 'dlopen' the main
     117  	     program.  */
     118  	  acc_proflib = NULL;
     119  	}
     120        else
     121  	{
     122  	  if (acc_proflibs_sep != NULL)
     123  	    {
     124  	      /* Single out the first library.  */
     125  	      acc_proflib = gomp_malloc (acc_proflibs_sep - acc_proflibs + 1);
     126  	      memcpy (acc_proflib, acc_proflibs,
     127  		      acc_proflibs_sep - acc_proflibs);
     128  	      acc_proflib[acc_proflibs_sep - acc_proflibs] = '\0';
     129  	    }
     130  	  else
     131  	    {
     132  	      /* No ';' separator, so only one library.  */
     133  	      acc_proflib = acc_proflibs;
     134  	    }
     135  
     136  	  gomp_debug (0, "%s: dlopen (\"%s\")\n", __FUNCTION__, acc_proflib);
     137  	  void *dl_handle = dlopen (acc_proflib, RTLD_LAZY);
     138  	  if (dl_handle != NULL)
     139  	    {
     140  	      typeof (&acc_register_library) a_r_l
     141  		= dlsym (dl_handle, "acc_register_library");
     142  	      if (a_r_l == NULL)
     143  		goto dl_fail;
     144  	      gomp_debug (0, "  %s: calling %s:acc_register_library\n",
     145  			  __FUNCTION__, acc_proflib);
     146  	      a_r_l (acc_prof_register, acc_prof_unregister,
     147  		     acc_prof_lookup);
     148  	    }
     149  	  else
     150  	    {
     151  	    dl_fail:
     152  	      gomp_error ("while loading ACC_PROFLIB \"%s\": %s",
     153  			  acc_proflib, dlerror ());
     154  	      if (dl_handle != NULL)
     155  		{
     156  		  int err = dlclose (dl_handle);
     157  		  dl_handle = NULL;
     158  		  if (err != 0)
     159  		    goto dl_fail;
     160  		}
     161  	    }
     162  	}
     163  
     164        if (acc_proflib != acc_proflibs)
     165  	{
     166  	  free (acc_proflib);
     167  
     168  	  acc_proflibs = acc_proflibs_sep + 1;
     169  	}
     170        else
     171  	acc_proflibs = NULL;
     172      }
     173  #endif /* PLUGIN_SUPPORT */
     174  }
     175  
     176  void
     177  acc_prof_register (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
     178  {
     179    gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
     180  	      __FUNCTION__, (int) ev, (void *) cb, (int) reg);
     181  
     182  
     183    /* For any events to be dispatched, the user first has to register a
     184       callback, which makes this here a good place for enabling the whole
     185       machinery.  */
     186    if (!GOACC_PROF_ENABLED)
     187      __atomic_store_n (&goacc_prof_enabled, true, MEMMODEL_RELEASE);
     188  
     189  
     190    enum
     191    {
     192      EVENT_KIND_BOGUS,
     193      EVENT_KIND_NORMAL,
     194      /* As end events invoke callbacks in the reverse order, we register these
     195         in the reverse order here.  */
     196      EVENT_KIND_END,
     197    } event_kind = EVENT_KIND_BOGUS;
     198    switch (ev)
     199      {
     200      case acc_ev_none:
     201      case acc_ev_device_init_start:
     202      case acc_ev_device_shutdown_start:
     203      case acc_ev_runtime_shutdown:
     204      case acc_ev_create:
     205      case acc_ev_delete:
     206      case acc_ev_alloc:
     207      case acc_ev_free:
     208      case acc_ev_enter_data_start:
     209      case acc_ev_exit_data_start:
     210      case acc_ev_update_start:
     211      case acc_ev_compute_construct_start:
     212      case acc_ev_enqueue_launch_start:
     213      case acc_ev_enqueue_upload_start:
     214      case acc_ev_enqueue_download_start:
     215      case acc_ev_wait_start:
     216        event_kind = EVENT_KIND_NORMAL;
     217        break;
     218      case acc_ev_device_init_end:
     219      case acc_ev_device_shutdown_end:
     220      case acc_ev_enter_data_end:
     221      case acc_ev_exit_data_end:
     222      case acc_ev_update_end:
     223      case acc_ev_compute_construct_end:
     224      case acc_ev_enqueue_launch_end:
     225      case acc_ev_enqueue_upload_end:
     226      case acc_ev_enqueue_download_end:
     227      case acc_ev_wait_end:
     228        event_kind = EVENT_KIND_END;
     229        break;
     230      case acc_ev_last:
     231        break;
     232      }
     233    if (event_kind == EVENT_KIND_BOGUS)
     234      {
     235        /* Silently ignore.  */
     236        gomp_debug (0, "  ignoring request for bogus 'acc_event_t'\n");
     237        return;
     238      }
     239  
     240    bool bogus = true;
     241    switch (reg)
     242      {
     243      case acc_reg:
     244      case acc_toggle:
     245      case acc_toggle_per_thread:
     246        bogus = false;
     247        break;
     248      }
     249    if (bogus)
     250      {
     251        /* Silently ignore.  */
     252        gomp_debug (0, "  ignoring request with bogus 'acc_register_t'\n");
     253        return;
     254      }
     255  
     256    /* Special cases.  */
     257    if (reg == acc_toggle)
     258      {
     259        if (cb == NULL)
     260  	{
     261  	  gomp_debug (0, "  globally enabling callbacks\n");
     262  	  gomp_mutex_lock (&goacc_prof_lock);
     263  	  /* For 'acc_ev_none', this acts as a global toggle.  */
     264  	  goacc_prof_callbacks_enabled[ev] = true;
     265  	  gomp_mutex_unlock (&goacc_prof_lock);
     266  	  return;
     267  	}
     268        else if (ev == acc_ev_none && cb != NULL)
     269  	{
     270  	  gomp_debug (0, "  ignoring request\n");
     271  	  return;
     272  	}
     273      }
     274    else if (reg == acc_toggle_per_thread)
     275      {
     276        if (ev == acc_ev_none && cb == NULL)
     277  	{
     278  	  gomp_debug (0, "  thread: enabling callbacks\n");
     279  	  goacc_lazy_initialize ();
     280  	  struct goacc_thread *thr = goacc_thread ();
     281  	  thr->prof_callbacks_enabled = true;
     282  	  return;
     283  	}
     284        /* Silently ignore.  */
     285        gomp_debug (0, "  ignoring bogus request\n");
     286        return;
     287      }
     288  
     289    gomp_mutex_lock (&goacc_prof_lock);
     290  
     291    struct goacc_prof_callback_entry *it, *it_p;
     292    it = goacc_prof_callback_entries[ev];
     293    it_p = NULL;
     294    while (it)
     295      {
     296        if (it->cb == cb)
     297  	break;
     298        it_p = it;
     299        it = it->next;
     300      }
     301  
     302    switch (reg)
     303      {
     304      case acc_reg:
     305        /* If we already have this callback registered, just increment its
     306  	 reference count.  */
     307        if (it != NULL)
     308  	{
     309  	  it->ref++;
     310  	  gomp_debug (0, "  already registered;"
     311  		      " incrementing reference count to: %d\n", it->ref);
     312  	}
     313        else
     314  	{
     315  	  struct goacc_prof_callback_entry *e
     316  	    = gomp_malloc (sizeof (struct goacc_prof_callback_entry));
     317  	  e->cb = cb;
     318  	  e->ref = 1;
     319  	  e->enabled = true;
     320  	  bool prepend = (event_kind == EVENT_KIND_END);
     321  	  /* If we don't have any callback registered yet, also use the
     322  	     'prepend' code path.  */
     323  	  if (it_p == NULL)
     324  	    prepend = true;
     325  	  if (prepend)
     326  	    {
     327  	      gomp_debug (0, "  prepending\n");
     328  	      e->next = goacc_prof_callback_entries[ev];
     329  	      goacc_prof_callback_entries[ev] = e;
     330  	    }
     331  	  else
     332  	    {
     333  	      gomp_debug (0, "  appending\n");
     334  	      e->next = NULL;
     335  	      it_p->next = e;
     336  	    }
     337  	}
     338        break;
     339  
     340      case acc_toggle:
     341        if (it == NULL)
     342  	{
     343  	  gomp_debug (0, "  ignoring request: is not registered\n");
     344  	  break;
     345  	}
     346        else
     347  	{
     348  	  gomp_debug (0, "  enabling\n");
     349  	  it->enabled = true;
     350  	}
     351        break;
     352  
     353      case acc_toggle_per_thread:
     354        __builtin_unreachable ();
     355      }
     356  
     357    gomp_mutex_unlock (&goacc_prof_lock);
     358  }
     359  
     360  void
     361  acc_prof_unregister (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
     362  {
     363    gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
     364  	      __FUNCTION__, (int) ev, (void *) cb, (int) reg);
     365  
     366    /* If profiling is not enabled, there cannot be anything to unregister.  */
     367    if (!GOACC_PROF_ENABLED)
     368      return;
     369  
     370    if (ev < acc_ev_none
     371        || ev >= acc_ev_last)
     372      {
     373        /* Silently ignore.  */
     374        gomp_debug (0, "  ignoring request for bogus 'acc_event_t'\n");
     375        return;
     376      }
     377  
     378    bool bogus = true;
     379    switch (reg)
     380      {
     381      case acc_reg:
     382      case acc_toggle:
     383      case acc_toggle_per_thread:
     384        bogus = false;
     385        break;
     386      }
     387    if (bogus)
     388      {
     389        /* Silently ignore.  */
     390        gomp_debug (0, "  ignoring request with bogus 'acc_register_t'\n");
     391        return;
     392      }
     393  
     394    /* Special cases.  */
     395    if (reg == acc_toggle)
     396      {
     397        if (cb == NULL)
     398  	{
     399  	  gomp_debug (0, "  globally disabling callbacks\n");
     400  	  gomp_mutex_lock (&goacc_prof_lock);
     401  	  /* For 'acc_ev_none', this acts as a global toggle.  */
     402  	  goacc_prof_callbacks_enabled[ev] = false;
     403  	  gomp_mutex_unlock (&goacc_prof_lock);
     404  	  return;
     405  	}
     406        else if (ev == acc_ev_none && cb != NULL)
     407  	{
     408  	  gomp_debug (0, "  ignoring request\n");
     409  	  return;
     410  	}
     411      }
     412    else if (reg == acc_toggle_per_thread)
     413      {
     414        if (ev == acc_ev_none && cb == NULL)
     415  	{
     416  	  gomp_debug (0, "  thread: disabling callbacks\n");
     417  	  goacc_lazy_initialize ();
     418  	  struct goacc_thread *thr = goacc_thread ();
     419  	  thr->prof_callbacks_enabled = false;
     420  	  return;
     421  	}
     422        /* Silently ignore.  */
     423        gomp_debug (0, "  ignoring bogus request\n");
     424        return;
     425      }
     426  
     427    gomp_mutex_lock (&goacc_prof_lock);
     428  
     429    struct goacc_prof_callback_entry *it, *it_p;
     430    it = goacc_prof_callback_entries[ev];
     431    it_p = NULL;
     432    while (it)
     433      {
     434        if (it->cb == cb)
     435  	break;
     436        it_p = it;
     437        it = it->next;
     438      }
     439  
     440    switch (reg)
     441      {
     442      case acc_reg:
     443        if (it == NULL)
     444  	{
     445  	  /* Silently ignore.  */
     446  	  gomp_debug (0, "  ignoring bogus request: is not registered\n");
     447  	  break;
     448  	}
     449        it->ref--;
     450        gomp_debug (0, "  decrementing reference count to: %d\n", it->ref);
     451        if (it->ref == 0)
     452  	{
     453  	  if (it_p == NULL)
     454  	    goacc_prof_callback_entries[ev] = it->next;
     455  	  else
     456  	    it_p->next = it->next;
     457  	  free (it);
     458  	}
     459        break;
     460  
     461      case acc_toggle:
     462        if (it == NULL)
     463  	{
     464  	  gomp_debug (0, "  ignoring request: is not registered\n");
     465  	  break;
     466  	}
     467        else
     468  	{
     469  	  gomp_debug (0, "  disabling\n");
     470  	  it->enabled = false;
     471  	}
     472        break;
     473  
     474      case acc_toggle_per_thread:
     475        __builtin_unreachable ();
     476      }
     477  
     478    gomp_mutex_unlock (&goacc_prof_lock);
     479  }
     480  
     481  acc_query_fn
     482  acc_prof_lookup (const char *name)
     483  {
     484    gomp_debug (0, "%s (%s)\n",
     485  	      __FUNCTION__, name ?: "NULL");
     486  
     487    return NULL;
     488  }
     489  
     490  void
     491  acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
     492  		      acc_prof_lookup_func lookup)
     493  {
     494    gomp_fatal ("TODO");
     495  }
     496  
     497  /* Prepare to dispatch events?  */
     498  
     499  bool
     500  _goacc_profiling_dispatch_p (bool check_not_nested_p)
     501  {
     502    gomp_debug (0, "%s\n", __FUNCTION__);
     503  
     504    bool ret;
     505  
     506    struct goacc_thread *thr = goacc_thread ();
     507    if (__builtin_expect (thr == NULL, false))
     508      {
     509        /* If we don't have any per-thread state yet, that means that per-thread
     510  	 callback dispatch has not been explicitly disabled (which only a call
     511  	 to 'acc_prof_unregister' with 'acc_toggle_per_thread' would do, and
     512  	 that would have allocated per-thread state via
     513  	 'goacc_lazy_initialize'); initially, all callbacks for all events are
     514  	 enabled.  */
     515        gomp_debug (0, "  %s: don't have any per-thread state yet\n", __FUNCTION__);
     516      }
     517    else
     518      {
     519        if (check_not_nested_p)
     520  	{
     521  	  /* No nesting.  */
     522  	  assert (thr->prof_info == NULL);
     523  	  assert (thr->api_info == NULL);
     524  	}
     525  
     526        if (__builtin_expect (!thr->prof_callbacks_enabled, true))
     527  	{
     528  	  gomp_debug (0, "  %s: disabled for this thread\n", __FUNCTION__);
     529  	  ret = false;
     530  	  goto out;
     531  	}
     532      }
     533  
     534    gomp_mutex_lock (&goacc_prof_lock);
     535  
     536    /* 'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle.  */
     537    if (__builtin_expect (!goacc_prof_callbacks_enabled[acc_ev_none], true))
     538      {
     539        gomp_debug (0, "  %s: disabled globally\n", __FUNCTION__);
     540        ret = false;
     541        goto out_unlock;
     542      }
     543    else
     544      ret = true;
     545  
     546   out_unlock:
     547    gomp_mutex_unlock (&goacc_prof_lock);
     548  
     549   out:
     550    return ret;
     551  }
     552  
     553  /* Set up to dispatch events?  */
     554  
     555  bool
     556  _goacc_profiling_setup_p (struct goacc_thread *thr,
     557  			  acc_prof_info *prof_info, acc_api_info *api_info)
     558  {
     559    gomp_debug (0, "%s (%p)\n", __FUNCTION__, thr);
     560  
     561    /* If we don't have any per-thread state yet, we can't register 'prof_info'
     562       and 'api_info'.  */
     563    if (__builtin_expect (thr == NULL, false))
     564      {
     565        gomp_debug (0, "Can't dispatch OpenACC Profiling Interface events for"
     566  		  " the current call, construct, or directive\n");
     567        return false;
     568      }
     569  
     570    if (thr->prof_info != NULL)
     571      {
     572        /* Profiling has already been set up for an outer construct.  In this
     573  	 case, we continue to use the existing information, and thus return
     574  	 'false' here.
     575  
     576  	 This can happen, for example, for an 'enter data' directive, which
     577  	 sets up profiling, then calls into 'acc_copyin', which should not
     578  	 again set up profiling, should not overwrite the existing
     579  	 information.  */
     580        return false;
     581      }
     582  
     583    thr->prof_info = prof_info;
     584    thr->api_info = api_info;
     585  
     586    /* Fill in some defaults.  */
     587  
     588    prof_info->event_type = -1; /* Must be set later.  */
     589    prof_info->valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
     590    prof_info->version = _ACC_PROF_INFO_VERSION;
     591    if (thr->dev)
     592      {
     593        prof_info->device_type = acc_device_type (thr->dev->type);
     594        prof_info->device_number = thr->dev->target_id;
     595      }
     596    else
     597      {
     598        prof_info->device_type = -1;
     599        prof_info->device_number = -1;
     600      }
     601    prof_info->thread_id = -1;
     602    prof_info->async = acc_async_sync;
     603    prof_info->async_queue = prof_info->async;
     604    prof_info->src_file = NULL;
     605    prof_info->func_name = NULL;
     606    prof_info->line_no = -1;
     607    prof_info->end_line_no = -1;
     608    prof_info->func_line_no = -1;
     609    prof_info->func_end_line_no = -1;
     610  
     611    api_info->device_api = acc_device_api_none;
     612    api_info->valid_bytes = _ACC_API_INFO_VALID_BYTES;
     613    api_info->device_type = prof_info->device_type;
     614    api_info->vendor = -1;
     615    api_info->device_handle = NULL;
     616    api_info->context_handle = NULL;
     617    api_info->async_handle = NULL;
     618  
     619    return true;
     620  }
     621  
     622  /* Dispatch events.
     623  
     624     This must only be called if 'GOACC_PROFILING_DISPATCH_P' or
     625     'GOACC_PROFILING_SETUP_P' returned a true result.  */
     626  
     627  void
     628  goacc_profiling_dispatch (acc_prof_info *prof_info, acc_event_info *event_info,
     629  			  acc_api_info *apt_info)
     630  {
     631    acc_event_t event_type = event_info->event_type;
     632    gomp_debug (0, "%s: event_type=%d\n", __FUNCTION__, (int) event_type);
     633    assert (event_type > acc_ev_none
     634  	  && event_type < acc_ev_last);
     635  
     636    gomp_mutex_lock (&goacc_prof_lock);
     637  
     638    if (!goacc_prof_callbacks_enabled[event_type])
     639      {
     640        gomp_debug (0, "  disabled for this event type\n");
     641  
     642        goto out_unlock;
     643      }
     644  
     645    for (struct goacc_prof_callback_entry *e
     646  	 = goacc_prof_callback_entries[event_type];
     647         e != NULL;
     648         e = e->next)
     649      {
     650        if (!e->enabled)
     651  	{
     652  	  gomp_debug (0, "  disabled for callback %p\n", e->cb);
     653  	  continue;
     654  	}
     655  
     656        gomp_debug (0, "  calling callback %p\n", e->cb);
     657        e->cb (prof_info, event_info, apt_info);
     658      }
     659  
     660   out_unlock:
     661    gomp_mutex_unlock (&goacc_prof_lock);
     662  }