1  /* Copyright (C) 2017-2023 Free Software Foundation, Inc.
       2     Contributed by Mentor Embedded.
       3  
       4     This file is part of the GNU Offloading and Multi Processing Library
       5     (libgomp).
       6  
       7     Libgomp is free software; you can redistribute it and/or modify it
       8     under the terms of the GNU General Public License as published by
       9     the Free Software Foundation; either version 3, or (at your option)
      10     any later version.
      11  
      12     Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
      13     WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
      14     FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
      15     more details.
      16  
      17     Under Section 7 of GPL version 3, you are granted additional
      18     permissions described in the GCC Runtime Library Exception, version
      19     3.1, as published by the Free Software Foundation.
      20  
      21     You should have received a copy of the GNU General Public License and
      22     a copy of the GCC Runtime Library Exception along with this program;
      23     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      24     <http://www.gnu.org/licenses/>.  */
      25  
      26  /* This file handles maintenance of threads on AMD GCN.  */
      27  
      28  #include "libgomp.h"
      29  #include <stdlib.h>
      30  #include <string.h>
      31  
      32  static void gomp_thread_start (struct gomp_thread_pool *);
      33  
      34  /* This externally visible function handles target region entry.  It
      35     sets up a per-team thread pool and transfers control by returning to
      36     the kernel in the master thread or gomp_thread_start in other threads.
      37  
      38     The name of this function is part of the interface with the compiler: for
      39     each OpenMP kernel the compiler configures the stack, then calls here.
      40  
      41     Likewise, gomp_gcn_exit_kernel is called during the kernel epilogue.  */
      42  
      43  void
      44  gomp_gcn_enter_kernel (void)
      45  {
      46    int threadid = __builtin_gcn_dim_pos (1);
      47  
      48    if (threadid == 0)
      49      {
      50        int numthreads = __builtin_gcn_dim_size (1);
      51        int teamid = __builtin_gcn_dim_pos(0);
      52  
      53        /* Set up the global state.
      54  	 Every team will do this, but that should be harmless.  */
      55        gomp_global_icv.nthreads_var = 16;
      56        gomp_global_icv.thread_limit_var = numthreads;
      57        /* Starting additional threads is not supported.  */
      58        gomp_global_icv.dyn_var = true;
      59  
      60        /* Initialize the team arena for optimized memory allocation.
      61           The arena has been allocated on the host side, and the address
      62           passed in via the kernargs.  Each team takes a small slice of it.  */
      63        struct kernargs_abi *kernargs =
      64  	(struct kernargs_abi*) __builtin_gcn_kernarg_ptr ();
      65        void *team_arena = ((void*)kernargs->arena_ptr
      66  			  + kernargs->arena_size_per_team * teamid);
      67        void * __lds *arena_start = (void * __lds *)TEAM_ARENA_START;
      68        void * __lds *arena_free = (void * __lds *)TEAM_ARENA_FREE;
      69        void * __lds *arena_end = (void * __lds *)TEAM_ARENA_END;
      70        *arena_start = team_arena;
      71        *arena_free = team_arena;
      72        *arena_end = team_arena + kernargs->arena_size_per_team;
      73  
      74        /* Allocate and initialize the team-local-storage data.  */
      75        struct gomp_thread *thrs = team_malloc_cleared (sizeof (*thrs)
      76  						      * numthreads);
      77        set_gcn_thrs (thrs);
      78  
      79        /* Allocate and initialize a pool of threads in the team.
      80           The threads are already running, of course, we just need to manage
      81           the communication between them.  */
      82        struct gomp_thread_pool *pool = team_malloc (sizeof (*pool));
      83        pool->threads = team_malloc (sizeof (void *) * numthreads);
      84        for (int tid = 0; tid < numthreads; tid++)
      85  	pool->threads[tid] = &thrs[tid];
      86        pool->threads_size = numthreads;
      87        pool->threads_used = numthreads;
      88        pool->threads_busy = 1;
      89        pool->last_team = NULL;
      90        gomp_simple_barrier_init (&pool->threads_dock, numthreads);
      91        thrs->thread_pool = pool;
      92  
      93        asm ("s_barrier" ::: "memory");
      94        return;  /* Return to kernel.  */
      95      }
      96    else
      97      {
      98        asm ("s_barrier" ::: "memory");
      99        gomp_thread_start (gcn_thrs ()[0].thread_pool);
     100        /* gomp_thread_start does not return.  */
     101      }
     102  }
     103  
     104  void
     105  gomp_gcn_exit_kernel (void)
     106  {
     107    gomp_free_thread (gcn_thrs ());
     108    team_free (gcn_thrs ());
     109  }
     110  
     111  /* This function contains the idle loop in which a thread waits
     112     to be called up to become part of a team.  */
     113  
     114  static void
     115  gomp_thread_start (struct gomp_thread_pool *pool)
     116  {
     117    struct gomp_thread *thr = gomp_thread ();
     118  
     119    gomp_sem_init (&thr->release, 0);
     120    thr->thread_pool = pool;
     121  
     122    /* The loop exits only when "fn" is assigned "gomp_free_pool_helper",
     123       which contains "s_endpgm", or an infinite no-op loop is
     124       suspected (this happens when the thread master crashes).  */
     125    int nul_limit = 99;
     126    do
     127      {
     128        gomp_simple_barrier_wait (&pool->threads_dock);
     129        if (!thr->fn)
     130  	{
     131  	  if (nul_limit-- > 0)
     132  	    continue;
     133  	  else
     134  	    {
     135  	      const char msg[] = ("team master not responding;"
     136  				  " slave thread aborting");
     137  	      write (2, msg, sizeof (msg)-1);
     138  	      abort();
     139  	    }
     140  	}
     141        thr->fn (thr->data);
     142        thr->fn = NULL;
     143  
     144        struct gomp_task *task = thr->task;
     145        gomp_team_barrier_wait_final (&thr->ts.team->barrier);
     146        gomp_finish_task (task);
     147      }
     148    while (1);
     149  }
     150  
     151  /* Launch a team.  */
     152  
     153  void
     154  gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
     155  		 unsigned flags, struct gomp_team *team,
     156  		 struct gomp_taskgroup *taskgroup)
     157  {
     158    struct gomp_thread *thr, *nthr;
     159    struct gomp_task *task;
     160    struct gomp_task_icv *icv;
     161    struct gomp_thread_pool *pool;
     162    unsigned long nthreads_var;
     163  
     164    thr = gomp_thread ();
     165    pool = thr->thread_pool;
     166    task = thr->task;
     167    icv = task ? &task->icv : &gomp_global_icv;
     168  
     169    /* Always save the previous state, even if this isn't a nested team.
     170       In particular, we should save any work share state from an outer
     171       orphaned work share construct.  */
     172    team->prev_ts = thr->ts;
     173  
     174    thr->ts.team = team;
     175    thr->ts.team_id = 0;
     176    ++thr->ts.level;
     177    if (nthreads > 1)
     178      ++thr->ts.active_level;
     179    thr->ts.work_share = &team->work_shares[0];
     180    thr->ts.last_work_share = NULL;
     181    thr->ts.single_count = 0;
     182    thr->ts.static_trip = 0;
     183    thr->task = &team->implicit_task[0];
     184    nthreads_var = icv->nthreads_var;
     185    gomp_init_task (thr->task, task, icv);
     186    team->implicit_task[0].icv.nthreads_var = nthreads_var;
     187    team->implicit_task[0].taskgroup = taskgroup;
     188  
     189    if (nthreads == 1)
     190      return;
     191  
     192    /* Release existing idle threads.  */
     193    for (unsigned i = 1; i < nthreads; ++i)
     194      {
     195        nthr = pool->threads[i];
     196        nthr->ts.team = team;
     197        nthr->ts.work_share = &team->work_shares[0];
     198        nthr->ts.last_work_share = NULL;
     199        nthr->ts.team_id = i;
     200        nthr->ts.level = team->prev_ts.level + 1;
     201        nthr->ts.active_level = thr->ts.active_level;
     202        nthr->ts.single_count = 0;
     203        nthr->ts.static_trip = 0;
     204        nthr->task = &team->implicit_task[i];
     205        gomp_init_task (nthr->task, task, icv);
     206        team->implicit_task[i].icv.nthreads_var = nthreads_var;
     207        team->implicit_task[i].taskgroup = taskgroup;
     208        nthr->fn = fn;
     209        nthr->data = data;
     210        team->ordered_release[i] = &nthr->release;
     211      }
     212  
     213    gomp_simple_barrier_wait (&pool->threads_dock);
     214  }
     215  
     216  #include "../../team.c"