(root)/
gcc-13.2.0/
libgomp/
taskloop.c
       1  /* Copyright (C) 2015-2023 Free Software Foundation, Inc.
       2     Contributed by Jakub Jelinek <jakub@redhat.com>.
       3  
       4     This file is part of the GNU Offloading and Multi Processing Library
       5     (libgomp).
       6  
       7     Libgomp is free software; you can redistribute it and/or modify it
       8     under the terms of the GNU General Public License as published by
       9     the Free Software Foundation; either version 3, or (at your option)
      10     any later version.
      11  
      12     Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
      13     WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
      14     FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
      15     more details.
      16  
      17     Under Section 7 of GPL version 3, you are granted additional
      18     permissions described in the GCC Runtime Library Exception, version
      19     3.1, as published by the Free Software Foundation.
      20  
      21     You should have received a copy of the GNU General Public License and
      22     a copy of the GCC Runtime Library Exception along with this program;
      23     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      24     <http://www.gnu.org/licenses/>.  */
      25  
      26  /* This file handles the taskloop construct.  It is included twice, once
      27     for the long and once for unsigned long long variant.  */
      28  
      29  /* Called when encountering an explicit task directive.  If IF_CLAUSE is
      30     false, then we must not delay in executing the task.  If UNTIED is true,
      31     then the task may be executed by any member of the team.  */
      32  
      33  void
      34  GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
      35  	       long arg_size, long arg_align, unsigned flags,
      36  	       unsigned long num_tasks, int priority,
      37  	       TYPE start, TYPE end, TYPE step)
      38  {
      39    struct gomp_thread *thr = gomp_thread ();
      40    struct gomp_team *team = thr->ts.team;
      41  
      42  #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
      43    /* If pthread_mutex_* is used for omp_*lock*, then each task must be
      44       tied to one thread all the time.  This means UNTIED tasks must be
      45       tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
      46       might be running on different thread than FN.  */
      47    if (cpyfn)
      48      flags &= ~GOMP_TASK_FLAG_IF;
      49    flags &= ~GOMP_TASK_FLAG_UNTIED;
      50  #endif
      51  
      52    /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
      53    if (team && gomp_team_barrier_cancelled (&team->barrier))
      54      {
      55      early_return:
      56        if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION))
      57  	  == GOMP_TASK_FLAG_REDUCTION)
      58  	{
      59  	  struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
      60  	  uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
      61  	  /* Tell callers GOMP_taskgroup_reduction_register has not been
      62  	     called.  */
      63  	  ptr[2] = 0;
      64  	}
      65        return;
      66      }
      67  
      68  #ifdef TYPE_is_long
      69    TYPE s = step;
      70    if (step > 0)
      71      {
      72        if (start >= end)
      73  	goto early_return;
      74        s--;
      75      }
      76    else
      77      {
      78        if (start <= end)
      79  	goto early_return;
      80        s++;
      81      }
      82    UTYPE n = (end - start + s) / step;
      83  #else
      84    UTYPE n;
      85    if (flags & GOMP_TASK_FLAG_UP)
      86      {
      87        if (start >= end)
      88  	goto early_return;
      89        n = (end - start + step - 1) / step;
      90      }
      91    else
      92      {
      93        if (start <= end)
      94  	goto early_return;
      95        n = (start - end - step - 1) / -step;
      96      }
      97  #endif
      98  
      99    TYPE task_step = step;
     100    TYPE nfirst_task_step = step;
     101    unsigned long nfirst = n;
     102    if (flags & GOMP_TASK_FLAG_GRAINSIZE)
     103      {
     104        unsigned long grainsize = num_tasks;
     105  #ifdef TYPE_is_long
     106        num_tasks = n / grainsize;
     107  #else
     108        UTYPE ndiv = n / grainsize;
     109        num_tasks = ndiv;
     110        if (num_tasks != ndiv)
     111  	num_tasks = ~0UL;
     112  #endif
     113        if ((flags & GOMP_TASK_FLAG_STRICT)
     114  	  && num_tasks != ~0ULL)
     115  	{
     116  	  UTYPE mod = n % grainsize;
     117  	  task_step = (TYPE) grainsize * step;
     118  	  if (mod)
     119  	    {
     120  	      num_tasks++;
     121  	      nfirst_task_step = (TYPE) mod * step;
     122  	      if (num_tasks == 1)
     123  		task_step = nfirst_task_step;
     124  	      else
     125  		nfirst = num_tasks - 2;
     126  	    }
     127  	}
     128        else if (num_tasks <= 1)
     129  	{
     130  	  num_tasks = 1;
     131  	  task_step = end - start;
     132  	}
     133        else if (num_tasks >= grainsize
     134  #ifndef TYPE_is_long
     135  	       && num_tasks != ~0UL
     136  #endif
     137  	      )
     138  	{
     139  	  UTYPE mul = num_tasks * grainsize;
     140  	  task_step = (TYPE) grainsize * step;
     141  	  if (mul != n)
     142  	    {
     143  	      nfirst_task_step = task_step;
     144  	      task_step += step;
     145  	      nfirst = n - mul - 1;
     146  	    }
     147  	}
     148        else
     149  	{
     150  	  UTYPE div = n / num_tasks;
     151  	  UTYPE mod = n % num_tasks;
     152  	  task_step = (TYPE) div * step;
     153  	  if (mod)
     154  	    {
     155  	      nfirst_task_step = task_step;
     156  	      task_step += step;
     157  	      nfirst = mod - 1;
     158  	    }
     159  	}
     160      }
     161    else
     162      {
     163        if (num_tasks == 0)
     164  	num_tasks = team ? team->nthreads : 1;
     165        if (num_tasks >= n)
     166  	num_tasks = n;
     167        else
     168  	{
     169  	  UTYPE div = n / num_tasks;
     170  	  UTYPE mod = n % num_tasks;
     171  	  task_step = (TYPE) div * step;
     172  	  if (mod)
     173  	    {
     174  	      nfirst_task_step = task_step;
     175  	      task_step += step;
     176  	      nfirst = mod - 1;
     177  	    }
     178  	}
     179      }
     180  
     181    if (flags & GOMP_TASK_FLAG_NOGROUP)
     182      {
     183        if (__builtin_expect (gomp_cancel_var, 0)
     184  	  && thr->task
     185  	  && thr->task->taskgroup)
     186  	{
     187  	  if (thr->task->taskgroup->cancelled)
     188  	    return;
     189  	  if (thr->task->taskgroup->workshare
     190  	      && thr->task->taskgroup->prev
     191  	      && thr->task->taskgroup->prev->cancelled)
     192  	    return;
     193  	}
     194      }
     195    else
     196      {
     197        ialias_call (GOMP_taskgroup_start) ();
     198        if (flags & GOMP_TASK_FLAG_REDUCTION)
     199  	{
     200  	  struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
     201  	  uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
     202  	  ialias_call (GOMP_taskgroup_reduction_register) (ptr);
     203  	}
     204      }
     205  
     206    if (priority > gomp_max_task_priority_var)
     207      priority = gomp_max_task_priority_var;
     208  
     209    if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
     210        || (thr->task && thr->task->final_task)
     211        || team->task_count + num_tasks > 64 * team->nthreads)
     212      {
     213        unsigned long i;
     214        if (__builtin_expect (cpyfn != NULL, 0))
     215  	{
     216  	  struct gomp_task task[num_tasks];
     217  	  struct gomp_task *parent = thr->task;
     218  	  arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
     219  	  char buf[num_tasks * arg_size + arg_align - 1];
     220  	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
     221  				& ~(uintptr_t) (arg_align - 1));
     222  	  char *orig_arg = arg;
     223  	  for (i = 0; i < num_tasks; i++)
     224  	    {
     225  	      gomp_init_task (&task[i], parent, gomp_icv (false));
     226  	      task[i].priority = priority;
     227  	      task[i].kind = GOMP_TASK_UNDEFERRED;
     228  	      task[i].final_task = (thr->task && thr->task->final_task)
     229  				   || (flags & GOMP_TASK_FLAG_FINAL);
     230  	      if (thr->task)
     231  		{
     232  		  task[i].in_tied_task = thr->task->in_tied_task;
     233  		  task[i].taskgroup = thr->task->taskgroup;
     234  		}
     235  	      thr->task = &task[i];
     236  	      cpyfn (arg, data);
     237  	      arg += arg_size;
     238  	    }
     239  	  arg = orig_arg;
     240  	  for (i = 0; i < num_tasks; i++)
     241  	    {
     242  	      thr->task = &task[i];
     243  	      ((TYPE *)arg)[0] = start;
     244  	      start += task_step;
     245  	      ((TYPE *)arg)[1] = start;
     246  	      if (i == nfirst)
     247  		task_step = nfirst_task_step;
     248  	      fn (arg);
     249  	      arg += arg_size;
     250  	      if (!priority_queue_empty_p (&task[i].children_queue,
     251  					   MEMMODEL_RELAXED))
     252  		{
     253  		  gomp_mutex_lock (&team->task_lock);
     254  		  gomp_clear_parent (&task[i].children_queue);
     255  		  gomp_mutex_unlock (&team->task_lock);
     256  		}
     257  	      gomp_end_task ();
     258  	    }
     259  	}
     260        else
     261  	for (i = 0; i < num_tasks; i++)
     262  	  {
     263  	    struct gomp_task task;
     264  
     265  	    gomp_init_task (&task, thr->task, gomp_icv (false));
     266  	    task.priority = priority;
     267  	    task.kind = GOMP_TASK_UNDEFERRED;
     268  	    task.final_task = (thr->task && thr->task->final_task)
     269  			      || (flags & GOMP_TASK_FLAG_FINAL);
     270  	    if (thr->task)
     271  	      {
     272  		task.in_tied_task = thr->task->in_tied_task;
     273  		task.taskgroup = thr->task->taskgroup;
     274  	      }
     275  	    thr->task = &task;
     276  	    ((TYPE *)data)[0] = start;
     277  	    start += task_step;
     278  	    ((TYPE *)data)[1] = start;
     279  	    if (i == nfirst)
     280  	      task_step = nfirst_task_step;
     281  	    fn (data);
     282  	    if (!priority_queue_empty_p (&task.children_queue,
     283  					 MEMMODEL_RELAXED))
     284  	      {
     285  		gomp_mutex_lock (&team->task_lock);
     286  		gomp_clear_parent (&task.children_queue);
     287  		gomp_mutex_unlock (&team->task_lock);
     288  	      }
     289  	    gomp_end_task ();
     290  	  }
     291      }
     292    else
     293      {
     294        struct gomp_task *tasks[num_tasks];
     295        struct gomp_task *parent = thr->task;
     296        struct gomp_taskgroup *taskgroup = parent->taskgroup;
     297        char *arg;
     298        int do_wake;
     299        unsigned long i;
     300  
     301        for (i = 0; i < num_tasks; i++)
     302  	{
     303  	  struct gomp_task *task
     304  	    = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
     305  	  tasks[i] = task;
     306  	  arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
     307  			  & ~(uintptr_t) (arg_align - 1));
     308  	  gomp_init_task (task, parent, gomp_icv (false));
     309  	  task->priority = priority;
     310  	  task->kind = GOMP_TASK_UNDEFERRED;
     311  	  task->in_tied_task = parent->in_tied_task;
     312  	  task->taskgroup = taskgroup;
     313  	  thr->task = task;
     314  	  if (cpyfn)
     315  	    {
     316  	      cpyfn (arg, data);
     317  	      task->copy_ctors_done = true;
     318  	    }
     319  	  else
     320  	    memcpy (arg, data, arg_size);
     321  	  ((TYPE *)arg)[0] = start;
     322  	  start += task_step;
     323  	  ((TYPE *)arg)[1] = start;
     324  	  if (i == nfirst)
     325  	    task_step = nfirst_task_step;
     326  	  thr->task = parent;
     327  	  task->kind = GOMP_TASK_WAITING;
     328  	  task->fn = fn;
     329  	  task->fn_data = arg;
     330  	  task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
     331  	}
     332        gomp_mutex_lock (&team->task_lock);
     333        /* If parallel or taskgroup has been cancelled, don't start new
     334  	 tasks.  */
     335        if (__builtin_expect (gomp_cancel_var, 0)
     336  	  && cpyfn == NULL)
     337  	{
     338  	  if (gomp_team_barrier_cancelled (&team->barrier))
     339  	    {
     340  	    do_cancel:
     341  	      gomp_mutex_unlock (&team->task_lock);
     342  	      for (i = 0; i < num_tasks; i++)
     343  		{
     344  		  gomp_finish_task (tasks[i]);
     345  		  free (tasks[i]);
     346  		}
     347  	      if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
     348  		ialias_call (GOMP_taskgroup_end) ();
     349  	      return;
     350  	    }
     351  	  if (taskgroup)
     352  	    {
     353  	      if (taskgroup->cancelled)
     354  		goto do_cancel;
     355  	      if (taskgroup->workshare
     356  		  && taskgroup->prev
     357  		  && taskgroup->prev->cancelled)
     358  		goto do_cancel;
     359  	    }
     360  	}
     361        if (taskgroup)
     362  	taskgroup->num_children += num_tasks;
     363        for (i = 0; i < num_tasks; i++)
     364  	{
     365  	  struct gomp_task *task = tasks[i];
     366  	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
     367  				 task, priority,
     368  				 PRIORITY_INSERT_BEGIN,
     369  				 /*last_parent_depends_on=*/false,
     370  				 task->parent_depends_on);
     371  	  if (taskgroup)
     372  	    priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
     373  				   task, priority, PRIORITY_INSERT_BEGIN,
     374  				   /*last_parent_depends_on=*/false,
     375  				   task->parent_depends_on);
     376  	  priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
     377  				 PRIORITY_INSERT_END,
     378  				 /*last_parent_depends_on=*/false,
     379  				 task->parent_depends_on);
     380  	  ++team->task_count;
     381  	  ++team->task_queued_count;
     382  	}
     383        gomp_team_barrier_set_task_pending (&team->barrier);
     384        if (team->task_running_count + !parent->in_tied_task
     385  	  < team->nthreads)
     386  	{
     387  	  do_wake = team->nthreads - team->task_running_count
     388  		    - !parent->in_tied_task;
     389  	  if ((unsigned long) do_wake > num_tasks)
     390  	    do_wake = num_tasks;
     391  	}
     392        else
     393  	do_wake = 0;
     394        gomp_mutex_unlock (&team->task_lock);
     395        if (do_wake)
     396  	gomp_team_barrier_wake (&team->barrier, do_wake);
     397      }
     398    if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
     399      ialias_call (GOMP_taskgroup_end) ();
     400  }