1  /* Copyright (C) 2005-2023 Free Software Foundation, Inc.
       2     Contributed by Richard Henderson <rth@redhat.com>.
       3  
       4     This file is part of the GNU Offloading and Multi Processing Library
       5     (libgomp).
       6  
       7     Libgomp is free software; you can redistribute it and/or modify it
       8     under the terms of the GNU General Public License as published by
       9     the Free Software Foundation; either version 3, or (at your option)
      10     any later version.
      11  
      12     Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
      13     WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
      14     FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
      15     more details.
      16  
      17     Under Section 7 of GPL version 3, you are granted additional
      18     permissions described in the GCC Runtime Library Exception, version
      19     3.1, as published by the Free Software Foundation.
      20  
      21     You should have received a copy of the GNU General Public License and
      22     a copy of the GCC Runtime Library Exception along with this program;
      23     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      24     <http://www.gnu.org/licenses/>.  */
      25  
      26  /* This file contains routines to manage the work-share queue for a team
      27     of threads.  */
      28  
      29  #include "libgomp.h"
      30  #include <stddef.h>
      31  #include <stdlib.h>
      32  #include <string.h>
      33  
      34  
      35  /* Allocate a new work share structure, preferably from current team's
      36     free gomp_work_share cache.  */
      37  
      38  static struct gomp_work_share *
      39  alloc_work_share (struct gomp_team *team)
      40  {
      41    struct gomp_work_share *ws;
      42    unsigned int i;
      43  
      44    /* This is called in a critical section.  */
      45    if (team->work_share_list_alloc != NULL)
      46      {
      47        ws = team->work_share_list_alloc;
      48        team->work_share_list_alloc = ws->next_free;
      49        return ws;
      50      }
      51  
      52  #ifdef HAVE_SYNC_BUILTINS
      53    ws = team->work_share_list_free;
      54    /* We need atomic read from work_share_list_free,
      55       as free_work_share can be called concurrently.  */
      56    __asm ("" : "+r" (ws));
      57  
      58    if (ws && ws->next_free)
      59      {
      60        struct gomp_work_share *next = ws->next_free;
      61        ws->next_free = NULL;
      62        team->work_share_list_alloc = next->next_free;
      63        return next;
      64      }
      65  #else
      66    gomp_mutex_lock (&team->work_share_list_free_lock);
      67    ws = team->work_share_list_free;
      68    if (ws)
      69      {
      70        team->work_share_list_alloc = ws->next_free;
      71        team->work_share_list_free = NULL;
      72        gomp_mutex_unlock (&team->work_share_list_free_lock);
      73        return ws;
      74      }
      75    gomp_mutex_unlock (&team->work_share_list_free_lock);
      76  #endif
      77  
      78    team->work_share_chunk *= 2;
      79    /* Allocating gomp_work_share structures aligned is just an
      80       optimization, don't do it when using the fallback method.  */
      81  #ifdef GOMP_USE_ALIGNED_WORK_SHARES
      82    ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
      83  			   team->work_share_chunk
      84  			   * sizeof (struct gomp_work_share));
      85  #else
      86    ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share));
      87  #endif
      88    ws->next_alloc = team->work_shares[0].next_alloc;
      89    team->work_shares[0].next_alloc = ws;
      90    team->work_share_list_alloc = &ws[1];
      91    for (i = 1; i < team->work_share_chunk - 1; i++)
      92      ws[i].next_free = &ws[i + 1];
      93    ws[i].next_free = NULL;
      94    return ws;
      95  }
      96  
      97  /* Initialize an already allocated struct gomp_work_share.
      98     This shouldn't touch the next_alloc field.  */
      99  
     100  void
     101  gomp_init_work_share (struct gomp_work_share *ws, size_t ordered,
     102  		      unsigned nthreads)
     103  {
     104    gomp_mutex_init (&ws->lock);
     105    if (__builtin_expect (ordered, 0))
     106      {
     107  #define INLINE_ORDERED_TEAM_IDS_SIZE \
     108    (sizeof (struct gomp_work_share) \
     109     - offsetof (struct gomp_work_share, inline_ordered_team_ids))
     110  
     111        if (__builtin_expect (ordered != 1, 0))
     112  	{
     113  	  size_t o = nthreads * sizeof (*ws->ordered_team_ids);
     114  	  o += __alignof__ (long long) - 1;
     115  	  if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
     116  	       & (__alignof__ (long long) - 1)) == 0
     117  	      && __alignof__ (struct gomp_work_share)
     118  		 >= __alignof__ (long long))
     119  	    o &= ~(__alignof__ (long long) - 1);
     120  	  ordered += o - 1;
     121  	}
     122        else
     123  	ordered = nthreads * sizeof (*ws->ordered_team_ids);
     124        if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE)
     125  	ws->ordered_team_ids = team_malloc (ordered);
     126        else
     127  	ws->ordered_team_ids = ws->inline_ordered_team_ids;
     128        memset (ws->ordered_team_ids, '\0', ordered);
     129        ws->ordered_num_used = 0;
     130        ws->ordered_owner = -1;
     131        ws->ordered_cur = 0;
     132      }
     133    else
     134      ws->ordered_team_ids = ws->inline_ordered_team_ids;
     135    gomp_ptrlock_init (&ws->next_ws, NULL);
     136    ws->threads_completed = 0;
     137  }
     138  
     139  /* Do any needed destruction of gomp_work_share fields before it
     140     is put back into free gomp_work_share cache or freed.  */
     141  
     142  void
     143  gomp_fini_work_share (struct gomp_work_share *ws)
     144  {
     145    gomp_mutex_destroy (&ws->lock);
     146    if (ws->ordered_team_ids != ws->inline_ordered_team_ids)
     147      team_free (ws->ordered_team_ids);
     148    gomp_ptrlock_destroy (&ws->next_ws);
     149  }
     150  
     151  /* Free a work share struct, if not orphaned, put it into current
     152     team's free gomp_work_share cache.  */
     153  
     154  static inline void
     155  free_work_share (struct gomp_team *team, struct gomp_work_share *ws)
     156  {
     157    gomp_fini_work_share (ws);
     158    if (__builtin_expect (team == NULL, 0))
     159      free (ws);
     160    else
     161      {
     162        struct gomp_work_share *next_ws;
     163  #ifdef HAVE_SYNC_BUILTINS
     164        do
     165  	{
     166  	  next_ws = team->work_share_list_free;
     167  	  ws->next_free = next_ws;
     168  	}
     169        while (!__sync_bool_compare_and_swap (&team->work_share_list_free,
     170  					    next_ws, ws));
     171  #else
     172        gomp_mutex_lock (&team->work_share_list_free_lock);
     173        next_ws = team->work_share_list_free;
     174        ws->next_free = next_ws;
     175        team->work_share_list_free = ws;
     176        gomp_mutex_unlock (&team->work_share_list_free_lock);
     177  #endif
     178      }
     179  }
     180  
     181  /* The current thread is ready to begin the next work sharing construct.
     182     In all cases, thr->ts.work_share is updated to point to the new
     183     structure.  In all cases the work_share lock is locked.  Return true
     184     if this was the first thread to reach this point.  */
     185  
     186  bool
     187  gomp_work_share_start (size_t ordered)
     188  {
     189    struct gomp_thread *thr = gomp_thread ();
     190    struct gomp_team *team = thr->ts.team;
     191    struct gomp_work_share *ws;
     192  
     193    /* Work sharing constructs can be orphaned.  */
     194    if (team == NULL)
     195      {
     196  #ifdef GOMP_USE_ALIGNED_WORK_SHARES
     197        ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
     198  			       sizeof (*ws));
     199  #else
     200        ws = gomp_malloc (sizeof (*ws));
     201  #endif
     202        gomp_init_work_share (ws, ordered, 1);
     203        thr->ts.work_share = ws;
     204        return true;
     205      }
     206  
     207    ws = thr->ts.work_share;
     208    thr->ts.last_work_share = ws;
     209    ws = gomp_ptrlock_get (&ws->next_ws);
     210    if (ws == NULL)
     211      {
     212        /* This thread encountered a new ws first.  */
     213        struct gomp_work_share *ws = alloc_work_share (team);
     214        gomp_init_work_share (ws, ordered, team->nthreads);
     215        thr->ts.work_share = ws;
     216        return true;
     217      }
     218    else
     219      {
     220        thr->ts.work_share = ws;
     221        return false;
     222      }
     223  }
     224  
     225  /* The current thread is done with its current work sharing construct.
     226     This version does imply a barrier at the end of the work-share.  */
     227  
     228  void
     229  gomp_work_share_end (void)
     230  {
     231    struct gomp_thread *thr = gomp_thread ();
     232    struct gomp_team *team = thr->ts.team;
     233    gomp_barrier_state_t bstate;
     234  
     235    /* Work sharing constructs can be orphaned.  */
     236    if (team == NULL)
     237      {
     238        free_work_share (NULL, thr->ts.work_share);
     239        thr->ts.work_share = NULL;
     240        return;
     241      }
     242  
     243    bstate = gomp_barrier_wait_start (&team->barrier);
     244  
     245    if (gomp_barrier_last_thread (bstate))
     246      {
     247        if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
     248  	{
     249  	  team->work_shares_to_free = thr->ts.work_share;
     250  	  free_work_share (team, thr->ts.last_work_share);
     251  	}
     252      }
     253  
     254    gomp_team_barrier_wait_end (&team->barrier, bstate);
     255    thr->ts.last_work_share = NULL;
     256  }
     257  
     258  /* The current thread is done with its current work sharing construct.
     259     This version implies a cancellable barrier at the end of the work-share.  */
     260  
     261  bool
     262  gomp_work_share_end_cancel (void)
     263  {
     264    struct gomp_thread *thr = gomp_thread ();
     265    struct gomp_team *team = thr->ts.team;
     266    gomp_barrier_state_t bstate;
     267  
     268    /* Cancellable work sharing constructs cannot be orphaned.  */
     269    bstate = gomp_barrier_wait_cancel_start (&team->barrier);
     270  
     271    if (gomp_barrier_last_thread (bstate))
     272      {
     273        if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
     274  	{
     275  	  team->work_shares_to_free = thr->ts.work_share;
     276  	  free_work_share (team, thr->ts.last_work_share);
     277  	}
     278      }
     279    thr->ts.last_work_share = NULL;
     280  
     281    return gomp_team_barrier_wait_cancel_end (&team->barrier, bstate);
     282  }
     283  
     284  /* The current thread is done with its current work sharing construct.
     285     This version does NOT imply a barrier at the end of the work-share.  */
     286  
     287  void
     288  gomp_work_share_end_nowait (void)
     289  {
     290    struct gomp_thread *thr = gomp_thread ();
     291    struct gomp_team *team = thr->ts.team;
     292    struct gomp_work_share *ws = thr->ts.work_share;
     293    unsigned completed;
     294  
     295    /* Work sharing constructs can be orphaned.  */
     296    if (team == NULL)
     297      {
     298        free_work_share (NULL, ws);
     299        thr->ts.work_share = NULL;
     300        return;
     301      }
     302  
     303    if (__builtin_expect (thr->ts.last_work_share == NULL, 0))
     304      return;
     305  
     306  #ifdef HAVE_SYNC_BUILTINS
     307    completed = __sync_add_and_fetch (&ws->threads_completed, 1);
     308  #else
     309    gomp_mutex_lock (&ws->lock);
     310    completed = ++ws->threads_completed;
     311    gomp_mutex_unlock (&ws->lock);
     312  #endif
     313  
     314    if (completed == team->nthreads)
     315      {
     316        team->work_shares_to_free = thr->ts.work_share;
     317        free_work_share (team, thr->ts.last_work_share);
     318      }
     319    thr->ts.last_work_share = NULL;
     320  }