1  /* Copyright (C) 2015-2023 Free Software Foundation, Inc.
       2     Contributed by Mentor Embedded.
       3  
       4     This file is part of the GNU Offloading and Multi Processing Library
       5     (libgomp).
       6  
       7     Libgomp is free software; you can redistribute it and/or modify it
       8     under the terms of the GNU General Public License as published by
       9     the Free Software Foundation; either version 3, or (at your option)
      10     any later version.
      11  
      12     Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
      13     WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
      14     FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
      15     more details.
      16  
      17     Under Section 7 of GPL version 3, you are granted additional
      18     permissions described in the GCC Runtime Library Exception, version
      19     3.1, as published by the Free Software Foundation.
      20  
      21     You should have received a copy of the GNU General Public License and
      22     a copy of the GCC Runtime Library Exception along with this program;
      23     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      24     <http://www.gnu.org/licenses/>.  */
      25  
      26  /* This is an AMD GCN specific implementation of a barrier synchronization
      27     mechanism for libgomp.  This type is private to the library.  This
      28     implementation uses atomic instructions and s_barrier instruction.  It
      29     uses MEMMODEL_RELAXED here because barriers are within workgroups and
      30     therefore don't need to flush caches.  */
      31  
      32  #include <limits.h>
      33  #include "libgomp.h"
      34  
      35  
      36  void
      37  gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
      38  {
      39    if (__builtin_expect (state & BAR_WAS_LAST, 0))
      40      {
      41        /* Next time we'll be awaiting TOTAL threads again.  */
      42        bar->awaited = bar->total;
      43        __atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
      44  			MEMMODEL_RELAXED);
      45      }
      46    if (bar->total > 1)
      47      asm ("s_barrier" ::: "memory");
      48  }
      49  
      50  void
      51  gomp_barrier_wait (gomp_barrier_t *bar)
      52  {
      53    gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
      54  }
      55  
      56  /* Like gomp_barrier_wait, except that if the encountering thread
      57     is not the last one to hit the barrier, it returns immediately.
      58     The intended usage is that a thread which intends to gomp_barrier_destroy
      59     this barrier calls gomp_barrier_wait, while all other threads
      60     call gomp_barrier_wait_last.  When gomp_barrier_wait returns,
      61     the barrier can be safely destroyed.  */
      62  
      63  void
      64  gomp_barrier_wait_last (gomp_barrier_t *bar)
      65  {
      66    /* Deferring to gomp_barrier_wait does not use the optimization opportunity
      67       allowed by the interface contract for all-but-last participants.  The
      68       original implementation in config/linux/bar.c handles this better.  */
      69    gomp_barrier_wait (bar);
      70  }
      71  
      72  void
      73  gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
      74  {
      75    if (bar->total > 1)
      76      asm ("s_barrier" ::: "memory");
      77  }
      78  
      79  void
      80  gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
      81  {
      82    unsigned int generation, gen;
      83  
      84    if (__builtin_expect (state & BAR_WAS_LAST, 0))
      85      {
      86        /* Next time we'll be awaiting TOTAL threads again.  */
      87        struct gomp_thread *thr = gomp_thread ();
      88        struct gomp_team *team = thr->ts.team;
      89  
      90        bar->awaited = bar->total;
      91        team->work_share_cancelled = 0;
      92        if (__builtin_expect (team->task_count, 0))
      93  	{
      94  	  gomp_barrier_handle_tasks (state);
      95  	  state &= ~BAR_WAS_LAST;
      96  	}
      97        else
      98  	{
      99  	  state &= ~BAR_CANCELLED;
     100  	  state += BAR_INCR - BAR_WAS_LAST;
     101  	  __atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
     102  	  if (bar->total > 1)
     103  	    asm ("s_barrier" ::: "memory");
     104  	  return;
     105  	}
     106      }
     107  
     108    generation = state;
     109    state &= ~BAR_CANCELLED;
     110    int retry = 100;
     111    do
     112      {
     113        if (retry-- == 0)
     114  	{
     115  	  /* It really shouldn't happen that barriers get out of sync, but
     116  	     if they do then this will loop until they realign, so we need
     117  	     to avoid an infinite loop where the thread just isn't there.  */
     118  	  const char msg[] = ("Barrier sync failed (another thread died?);"
     119  			      " aborting.");
     120  	  write (2, msg, sizeof (msg)-1);
     121  	  abort();
     122  	}
     123  
     124        asm ("s_barrier" ::: "memory");
     125        gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
     126        if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
     127  	{
     128  	  gomp_barrier_handle_tasks (state);
     129  	  gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
     130  	}
     131        generation |= gen & BAR_WAITING_FOR_TASK;
     132      }
     133    while (gen != state + BAR_INCR);
     134  }
     135  
     136  void
     137  gomp_team_barrier_wait (gomp_barrier_t *bar)
     138  {
     139    gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
     140  }
     141  
     142  void
     143  gomp_team_barrier_wait_final (gomp_barrier_t *bar)
     144  {
     145    gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
     146    if (__builtin_expect (state & BAR_WAS_LAST, 0))
     147      bar->awaited_final = bar->total;
     148    gomp_team_barrier_wait_end (bar, state);
     149  }
     150  
     151  bool
     152  gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
     153  				   gomp_barrier_state_t state)
     154  {
     155    unsigned int generation, gen;
     156  
     157    if (__builtin_expect (state & BAR_WAS_LAST, 0))
     158      {
     159        /* Next time we'll be awaiting TOTAL threads again.  */
     160        /* BAR_CANCELLED should never be set in state here, because
     161  	 cancellation means that at least one of the threads has been
     162  	 cancelled, thus on a cancellable barrier we should never see
     163  	 all threads to arrive.  */
     164        struct gomp_thread *thr = gomp_thread ();
     165        struct gomp_team *team = thr->ts.team;
     166  
     167        bar->awaited = bar->total;
     168        team->work_share_cancelled = 0;
     169        if (__builtin_expect (team->task_count, 0))
     170  	{
     171  	  gomp_barrier_handle_tasks (state);
     172  	  state &= ~BAR_WAS_LAST;
     173  	}
     174        else
     175  	{
     176  	  state += BAR_INCR - BAR_WAS_LAST;
     177  	  __atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
     178  	  if (bar->total > 1)
     179  	    asm ("s_barrier" ::: "memory");
     180  	  return false;
     181  	}
     182      }
     183  
     184    if (__builtin_expect (state & BAR_CANCELLED, 0))
     185      return true;
     186  
     187    generation = state;
     188    int retry = 100;
     189    do
     190      {
     191        if (retry-- == 0)
     192  	{
     193  	  /* It really shouldn't happen that barriers get out of sync, but
     194  	     if they do then this will loop until they realign, so we need
     195  	     to avoid an infinite loop where the thread just isn't there.  */
     196  	  const char msg[] = ("Barrier sync failed (another thread died?);"
     197  			      " aborting.");
     198  	  write (2, msg, sizeof (msg)-1);
     199  	  abort();
     200  	}
     201  
     202        if (bar->total > 1)
     203  	asm ("s_barrier" ::: "memory");
     204        gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
     205        if (__builtin_expect (gen & BAR_CANCELLED, 0))
     206  	return true;
     207        if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
     208  	{
     209  	  gomp_barrier_handle_tasks (state);
     210  	  gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
     211  	}
     212        generation |= gen & BAR_WAITING_FOR_TASK;
     213      }
     214    while (gen != state + BAR_INCR);
     215  
     216    return false;
     217  }
     218  
     219  bool
     220  gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
     221  {
     222    return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
     223  }
     224  
     225  void
     226  gomp_team_barrier_cancel (struct gomp_team *team)
     227  {
     228    gomp_mutex_lock (&team->task_lock);
     229    if (team->barrier.generation & BAR_CANCELLED)
     230      {
     231        gomp_mutex_unlock (&team->task_lock);
     232        return;
     233      }
     234    team->barrier.generation |= BAR_CANCELLED;
     235    gomp_mutex_unlock (&team->task_lock);
     236    gomp_team_barrier_wake (&team->barrier, INT_MAX);
     237  }