1  // -*- C++ -*-
       2  
       3  // Copyright (C) 2007-2023 Free Software Foundation, Inc.
       4  //
       5  // This file is part of the GNU ISO C++ Library.  This library is free
       6  // software; you can redistribute it and/or modify it under the terms
       7  // of the GNU General Public License as published by the Free Software
       8  // Foundation; either version 3, or (at your option) any later
       9  // version.
      10  
      11  // This library is distributed in the hope that it will be useful, but
      12  // WITHOUT ANY WARRANTY; without even the implied warranty of
      13  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14  // General Public License for more details.
      15  
      16  // Under Section 7 of GPL version 3, you are granted additional
      17  // permissions described in the GCC Runtime Library Exception, version
      18  // 3.1, as published by the Free Software Foundation.
      19  
      20  // You should have received a copy of the GNU General Public License and
      21  // a copy of the GCC Runtime Library Exception along with this program;
      22  // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      23  // <http://www.gnu.org/licenses/>.
      24  
      25  /** @file parallel/partial_sum.h
      26   *  @brief Parallel implementation of std::partial_sum(), i.e. prefix
      27  *  sums.
      28   *  This file is a GNU parallel extension to the Standard C++ Library.
      29   */
      30  
      31  // Written by Johannes Singler.
      32  
      33  #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
      34  #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
      35  
      36  #include <omp.h>
      37  #include <new>
      38  #include <bits/stl_algobase.h>
      39  #include <parallel/parallel.h>
      40  #include <parallel/numericfwd.h>
      41  
      42  namespace __gnu_parallel
      43  {
      44    // Problem: there is no 0-element given.
      45  
      46    /** @brief Base case prefix sum routine.
      47     *  @param __begin Begin iterator of input sequence.
      48     *  @param __end End iterator of input sequence.
      49     *  @param __result Begin iterator of output sequence.
      50     *  @param __bin_op Associative binary function.
      51     *  @param __value Start value. Must be passed since the neutral
      52     *  element is unknown in general.
      53     *  @return End iterator of output sequence. */
      54    template<typename _IIter,
      55  	   typename _OutputIterator,
      56  	   typename _BinaryOperation>
      57      _OutputIterator
      58      __parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
      59  				    _OutputIterator __result,
      60  				    _BinaryOperation __bin_op,
      61        typename std::iterator_traits <_IIter>::value_type __value)
      62      {
      63        if (__begin == __end)
      64  	return __result;
      65  
      66        while (__begin != __end)
      67  	{
      68  	  __value = __bin_op(__value, *__begin);
      69  	  *__result = __value;
      70  	  ++__result;
      71  	  ++__begin;
      72  	}
      73        return __result;
      74      }
      75  
      76    /** @brief Parallel partial sum implementation, two-phase approach,
      77        no recursion.
      78        *  @param __begin Begin iterator of input sequence.
      79        *  @param __end End iterator of input sequence.
      80        *  @param __result Begin iterator of output sequence.
      81        *  @param __bin_op Associative binary function.
      82        *  @param __n Length of sequence.
      83        *  @return End iterator of output sequence.
      84        */
      85    template<typename _IIter,
      86  	   typename _OutputIterator,
      87  	   typename _BinaryOperation>
      88      _OutputIterator
      89      __parallel_partial_sum_linear(_IIter __begin, _IIter __end,
      90  				  _OutputIterator __result,
      91  				  _BinaryOperation __bin_op,
      92        typename std::iterator_traits<_IIter>::difference_type __n)
      93      {
      94        typedef std::iterator_traits<_IIter> _TraitsType;
      95        typedef typename _TraitsType::value_type _ValueType;
      96        typedef typename _TraitsType::difference_type _DifferenceType;
      97  
      98        if (__begin == __end)
      99  	return __result;
     100  
     101        _ThreadIndex __num_threads =
     102          std::min<_DifferenceType>(__get_max_threads(), __n - 1);
     103  
     104        if (__num_threads < 2)
     105  	{
     106  	  *__result = *__begin;
     107  	  return __parallel_partial_sum_basecase(__begin + 1, __end,
     108  						 __result + 1, __bin_op,
     109  						 *__begin);
     110  	}
     111  
     112        _DifferenceType* __borders;
     113        _ValueType* __sums;
     114  
     115        const _Settings& __s = _Settings::get();
     116  
     117  #     pragma omp parallel num_threads(__num_threads)
     118        {
     119  #       pragma omp single
     120  	{
     121  	  __num_threads = omp_get_num_threads();
     122  	    
     123  	  __borders = new _DifferenceType[__num_threads + 2];
     124  
     125  	  if (__s.partial_sum_dilation == 1.0f)
     126  	    __equally_split(__n, __num_threads + 1, __borders);
     127  	  else
     128  	    {
     129  	      _DifferenceType __first_part_length =
     130  		  std::max<_DifferenceType>(1,
     131  		    __n / (1.0f + __s.partial_sum_dilation * __num_threads));
     132  	      _DifferenceType __chunk_length =
     133  		  (__n - __first_part_length) / __num_threads;
     134  	      _DifferenceType __borderstart =
     135  		  __n - __num_threads * __chunk_length;
     136  	      __borders[0] = 0;
     137  	      for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i)
     138  		{
     139  		  __borders[__i] = __borderstart;
     140  		  __borderstart += __chunk_length;
     141  		}
     142  	      __borders[__num_threads + 1] = __n;
     143  	    }
     144  
     145  	  __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
     146                                                             * __num_threads));
     147  	  _OutputIterator __target_end;
     148  	} //single
     149  
     150          _ThreadIndex __iam = omp_get_thread_num();
     151          if (__iam == 0)
     152            {
     153              *__result = *__begin;
     154              __parallel_partial_sum_basecase(__begin + 1,
     155  					    __begin + __borders[1],
     156  					    __result + 1,
     157  					    __bin_op, *__begin);
     158              ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
     159            }
     160          else
     161            {
     162              ::new(&(__sums[__iam]))
     163                _ValueType(__gnu_parallel::accumulate(
     164                                           __begin + __borders[__iam] + 1,
     165                                           __begin + __borders[__iam + 1],
     166                                           *(__begin + __borders[__iam]),
     167                                           __bin_op,
     168                                           __gnu_parallel::sequential_tag()));
     169            }
     170  
     171  #       pragma omp barrier
     172  
     173  #       pragma omp single
     174  	__parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
     175  					__sums + 1, __bin_op, __sums[0]);
     176  
     177  #       pragma omp barrier
     178  
     179  	// Still same team.
     180          __parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
     181  					__begin + __borders[__iam + 2],
     182  					__result + __borders[__iam + 1],
     183  					__bin_op, __sums[__iam]);
     184        } //parallel
     185  
     186        for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
     187  	__sums[__i].~_ValueType();
     188        ::operator delete(__sums);
     189  
     190        delete[] __borders;
     191  
     192        return __result + __n;
     193      }
     194  
     195    /** @brief Parallel partial sum front-__end.
     196     *  @param __begin Begin iterator of input sequence.
     197     *  @param __end End iterator of input sequence.
     198     *  @param __result Begin iterator of output sequence.
     199     *  @param __bin_op Associative binary function.
     200     *  @return End iterator of output sequence. */
     201    template<typename _IIter,
     202  	   typename _OutputIterator,
     203  	   typename _BinaryOperation>
     204      _OutputIterator
     205      __parallel_partial_sum(_IIter __begin, _IIter __end,
     206  			   _OutputIterator __result, _BinaryOperation __bin_op)
     207      {
     208        _GLIBCXX_CALL(__begin - __end)
     209  
     210        typedef std::iterator_traits<_IIter> _TraitsType;
     211        typedef typename _TraitsType::value_type _ValueType;
     212        typedef typename _TraitsType::difference_type _DifferenceType;
     213  
     214        _DifferenceType __n = __end - __begin;
     215  
     216        switch (_Settings::get().partial_sum_algorithm)
     217  	{
     218  	case LINEAR:
     219  	  // Need an initial offset.
     220  	  return __parallel_partial_sum_linear(__begin, __end, __result,
     221  					       __bin_op, __n);
     222  	default:
     223  	  // Partial_sum algorithm not implemented.
     224  	  _GLIBCXX_PARALLEL_ASSERT(0);
     225  	  return __result + __n;
     226  	}
     227      }
     228  }
     229  
     230  #endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */