1  // -*- C++ -*-
       2  
       3  // Copyright (C) 2007-2023 Free Software Foundation, Inc.
       4  //
       5  // This file is part of the GNU ISO C++ Library.  This library is free
       6  // software; you can redistribute it and/or modify it under the terms
       7  // of the GNU General Public License as published by the Free Software
       8  // Foundation; either version 3, or (at your option) any later
       9  // version.
      10  
      11  // This library is distributed in the hope that it will be useful, but
      12  // WITHOUT ANY WARRANTY; without even the implied warranty of
      13  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14  // General Public License for more details.
      15  
      16  // Under Section 7 of GPL version 3, you are granted additional
      17  // permissions described in the GCC Runtime Library Exception, version
      18  // 3.1, as published by the Free Software Foundation.
      19  
      20  // You should have received a copy of the GNU General Public License and
      21  // a copy of the GCC Runtime Library Exception along with this program;
      22  // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      23  // <http://www.gnu.org/licenses/>.
      24  
      25  /** @file parallel/multiway_mergesort.h
      26   *  @brief Parallel multiway merge sort.
      27   *  This file is a GNU parallel extension to the Standard C++ Library.
      28   */
      29  
      30  // Written by Johannes Singler.
      31  
      32  #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
      33  #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
      34  
      35  #include <vector>
      36  
      37  #include <parallel/basic_iterator.h>
      38  #include <bits/stl_algo.h>
      39  #include <parallel/parallel.h>
      40  #include <parallel/multiway_merge.h>
      41  
      42  namespace __gnu_parallel
      43  {
      44    /** @brief Subsequence description. */
      45    template<typename _DifferenceTp>
      46      struct _Piece
      47      {
      48        typedef _DifferenceTp _DifferenceType;
      49  
      50        /** @brief Begin of subsequence. */
      51        _DifferenceType _M_begin;
      52  
      53        /** @brief End of subsequence. */
      54        _DifferenceType _M_end;
      55      };
      56  
      57    /** @brief Data accessed by all threads.
      58     *
      59     *  PMWMS = parallel multiway mergesort */
      60    template<typename _RAIter>
      61      struct _PMWMSSortingData
      62      {
      63        typedef std::iterator_traits<_RAIter> _TraitsType;
      64        typedef typename _TraitsType::value_type _ValueType;
      65        typedef typename _TraitsType::difference_type _DifferenceType;
      66  
      67        /** @brief Number of threads involved. */
      68        _ThreadIndex _M_num_threads;
      69  
      70        /** @brief Input __begin. */
      71        _RAIter _M_source;
      72  
      73        /** @brief Start indices, per thread. */
      74        _DifferenceType* _M_starts;
      75  
      76        /** @brief Storage in which to sort. */
      77        _ValueType** _M_temporary;
      78  
      79        /** @brief Samples. */
      80        _ValueType* _M_samples;
      81  
      82        /** @brief Offsets to add to the found positions. */
      83        _DifferenceType* _M_offsets;
      84  
      85        /** @brief Pieces of data to merge @c [thread][__sequence] */
      86        std::vector<_Piece<_DifferenceType> >* _M_pieces;
      87    };
      88  
      89    /**
      90     *  @brief Select _M_samples from a sequence.
      91     *  @param __sd Pointer to algorithm data. _Result will be placed in
      92     *  @c __sd->_M_samples.
      93     *  @param __num_samples Number of _M_samples to select.
      94     */
      95    template<typename _RAIter, typename _DifferenceTp>
      96      void
      97      __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
      98  			_DifferenceTp __num_samples)
      99      {
     100        typedef std::iterator_traits<_RAIter> _TraitsType;
     101        typedef typename _TraitsType::value_type _ValueType;
     102        typedef _DifferenceTp _DifferenceType;
     103  
     104        _ThreadIndex __iam = omp_get_thread_num();
     105  
     106        _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
     107  
     108        __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam], 
     109  		      __num_samples + 1, __es);
     110  
     111        for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
     112  	::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
     113  	    _ValueType(__sd->_M_source[__sd->_M_starts[__iam]
     114  				       + __es[__i + 1]]);
     115  
     116        delete[] __es;
     117      }
     118  
     119    /** @brief Split consistently. */
     120    template<bool __exact, typename _RAIter,
     121  	   typename _Compare, typename _SortingPlacesIterator>
     122      struct _SplitConsistently
     123      { };
     124  
     125    /** @brief Split by exact splitting. */
     126    template<typename _RAIter, typename _Compare,
     127  	   typename _SortingPlacesIterator>
     128      struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>
     129      {
     130        void
     131        operator()(const _ThreadIndex __iam,
     132  		 _PMWMSSortingData<_RAIter>* __sd,
     133  		 _Compare& __comp,
     134  		 const typename
     135  		 std::iterator_traits<_RAIter>::difference_type
     136  		 __num_samples) const
     137        {
     138  #       pragma omp barrier
     139  
     140  	std::vector<std::pair<_SortingPlacesIterator,
     141  	                      _SortingPlacesIterator> >
     142  	  __seqs(__sd->_M_num_threads);
     143  	for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
     144  	  __seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
     145  				       __sd->_M_temporary[__s]
     146  				       + (__sd->_M_starts[__s + 1]
     147  					  - __sd->_M_starts[__s]));
     148  
     149  	std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
     150  
     151  	// if not last thread
     152  	if (__iam < __sd->_M_num_threads - 1)
     153  	  multiseq_partition(__seqs.begin(), __seqs.end(),
     154  			     __sd->_M_starts[__iam + 1], __offsets.begin(),
     155  			     __comp);
     156  
     157  	for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
     158  	  {
     159  	    // for each sequence
     160  	    if (__iam < (__sd->_M_num_threads - 1))
     161  	      __sd->_M_pieces[__iam][__seq]._M_end
     162  		= __offsets[__seq] - __seqs[__seq].first;
     163  	    else
     164  	      // very end of this sequence
     165  	      __sd->_M_pieces[__iam][__seq]._M_end =
     166  		__sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
     167  	  }
     168  
     169  #       pragma omp barrier
     170  
     171  	for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
     172  	  {
     173  	    // For each sequence.
     174  	    if (__iam > 0)
     175  	      __sd->_M_pieces[__iam][__seq]._M_begin =
     176  		__sd->_M_pieces[__iam - 1][__seq]._M_end;
     177  	    else
     178  	      // Absolute beginning.
     179  	      __sd->_M_pieces[__iam][__seq]._M_begin = 0;
     180  	  }
     181        }
     182    };
     183  
     184    /** @brief Split by sampling. */ 
     185    template<typename _RAIter, typename _Compare,
     186  	   typename _SortingPlacesIterator>
     187      struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>
     188      {
     189        void
     190        operator()(const _ThreadIndex __iam,
     191  		 _PMWMSSortingData<_RAIter>* __sd,
     192  		 _Compare& __comp,
     193  		 const typename
     194  		 std::iterator_traits<_RAIter>::difference_type
     195  		 __num_samples) const
     196        {
     197  	typedef std::iterator_traits<_RAIter> _TraitsType;
     198  	typedef typename _TraitsType::value_type _ValueType;
     199  	typedef typename _TraitsType::difference_type _DifferenceType;
     200  
     201  	__determine_samples(__sd, __num_samples);
     202  
     203  #       pragma omp barrier
     204  
     205  #       pragma omp single
     206  	__gnu_sequential::sort(__sd->_M_samples,
     207  			       __sd->_M_samples
     208  			       + (__num_samples * __sd->_M_num_threads),
     209  			       __comp);
     210  
     211  #       pragma omp barrier
     212  
     213  	for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
     214  	  {
     215  	    // For each sequence.
     216  	    if (__num_samples * __iam > 0)
     217  	      __sd->_M_pieces[__iam][__s]._M_begin =
     218                  std::lower_bound(__sd->_M_temporary[__s],
     219  				 __sd->_M_temporary[__s]
     220  				 + (__sd->_M_starts[__s + 1]
     221  				    - __sd->_M_starts[__s]),
     222  				 __sd->_M_samples[__num_samples * __iam],
     223  				 __comp)
     224                  - __sd->_M_temporary[__s];
     225  	    else
     226  	      // Absolute beginning.
     227  	      __sd->_M_pieces[__iam][__s]._M_begin = 0;
     228  
     229  	    if ((__num_samples * (__iam + 1)) <
     230  		(__num_samples * __sd->_M_num_threads))
     231  	      __sd->_M_pieces[__iam][__s]._M_end =
     232                  std::lower_bound(__sd->_M_temporary[__s],
     233  				 __sd->_M_temporary[__s]
     234  				 + (__sd->_M_starts[__s + 1]
     235  				    - __sd->_M_starts[__s]),
     236  				 __sd->_M_samples[__num_samples * (__iam + 1)],
     237  				 __comp)
     238                  - __sd->_M_temporary[__s];
     239  	    else
     240  	      // Absolute end.
     241  	      __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
     242  						    - __sd->_M_starts[__s]);
     243  	  }
     244        }
     245    };
     246    
     247    template<bool __stable, typename _RAIter, typename _Compare>
     248      struct __possibly_stable_sort
     249      { };
     250  
     251    template<typename _RAIter, typename _Compare>
     252      struct __possibly_stable_sort<true, _RAIter, _Compare>
     253      {
     254        void operator()(const _RAIter& __begin,
     255  		      const _RAIter& __end, _Compare& __comp) const
     256        { __gnu_sequential::stable_sort(__begin, __end, __comp); }
     257      };
     258  
     259    template<typename _RAIter, typename _Compare>
     260      struct __possibly_stable_sort<false, _RAIter, _Compare>
     261      {
     262        void operator()(const _RAIter __begin,
     263  		      const _RAIter __end, _Compare& __comp) const
     264        { __gnu_sequential::sort(__begin, __end, __comp); }
     265      };
     266  
     267    template<bool __stable, typename _Seq_RAIter,
     268  	   typename _RAIter, typename _Compare,
     269  	   typename _DiffType>
     270      struct __possibly_stable_multiway_merge
     271      { };
     272  
     273    template<typename _Seq_RAIter, typename _RAIter,
     274  	   typename _Compare, typename _DiffType>
     275      struct __possibly_stable_multiway_merge<true, _Seq_RAIter,
     276  					    _RAIter, _Compare, _DiffType>
     277      {
     278        void operator()(const _Seq_RAIter& __seqs_begin,
     279  		      const _Seq_RAIter& __seqs_end,
     280  		      const _RAIter& __target,
     281  		      _Compare& __comp,
     282  		      _DiffType __length_am) const
     283        { stable_multiway_merge(__seqs_begin, __seqs_end, __target,
     284  			      __length_am, __comp, sequential_tag()); }
     285      };
     286  
     287    template<typename _Seq_RAIter, typename _RAIter,
     288  	   typename _Compare, typename _DiffType>
     289      struct __possibly_stable_multiway_merge<false, _Seq_RAIter,
     290  					    _RAIter, _Compare, _DiffType>
     291      {
     292        void operator()(const _Seq_RAIter& __seqs_begin,
     293                        const _Seq_RAIter& __seqs_end,
     294                        const _RAIter& __target,
     295                        _Compare& __comp,
     296                        _DiffType __length_am) const
     297        { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
     298  		       __comp, sequential_tag()); }
     299      };
     300  
     301    /** @brief PMWMS code executed by each thread.
     302     *  @param __sd Pointer to algorithm data.
     303     *  @param __comp Comparator.
     304     */
     305    template<bool __stable, bool __exact, typename _RAIter,
     306  	   typename _Compare>
     307      void
     308      parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
     309  			  _Compare& __comp)
     310      {
     311        typedef std::iterator_traits<_RAIter> _TraitsType;
     312        typedef typename _TraitsType::value_type _ValueType;
     313        typedef typename _TraitsType::difference_type _DifferenceType;
     314  
     315        _ThreadIndex __iam = omp_get_thread_num();
     316  
     317        // Length of this thread's chunk, before merging.
     318        _DifferenceType __length_local =
     319  	__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
     320  
     321        // Sort in temporary storage, leave space for sentinel.
     322  
     323        typedef _ValueType* _SortingPlacesIterator;
     324  
     325        __sd->_M_temporary[__iam] =
     326          static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
     327  						* (__length_local + 1)));
     328  
     329        // Copy there.
     330        std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
     331  			      __sd->_M_source + __sd->_M_starts[__iam]
     332  			      + __length_local,
     333  			      __sd->_M_temporary[__iam]);
     334  
     335        __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
     336          (__sd->_M_temporary[__iam],
     337  	 __sd->_M_temporary[__iam] + __length_local,
     338           __comp);
     339  
     340        // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
     341        // __sd->_M_temporary[__iam] + __length_local.
     342  
     343        // No barrier here: Synchronization is done by the splitting routine.
     344  
     345        _DifferenceType __num_samples =
     346          _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
     347        _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
     348          (__iam, __sd, __comp, __num_samples);
     349  
     350        // Offset from __target __begin, __length after merging.
     351        _DifferenceType __offset = 0, __length_am = 0;
     352        for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
     353  	{
     354  	  __length_am += (__sd->_M_pieces[__iam][__s]._M_end
     355  			  - __sd->_M_pieces[__iam][__s]._M_begin);
     356  	  __offset += __sd->_M_pieces[__iam][__s]._M_begin;
     357  	}
     358  
     359        typedef std::vector<
     360          std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
     361          _SeqVector;
     362        _SeqVector __seqs(__sd->_M_num_threads);
     363  
     364        for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
     365  	{
     366  	  __seqs[__s] =
     367  	    std::make_pair(__sd->_M_temporary[__s]
     368  			   + __sd->_M_pieces[__iam][__s]._M_begin,
     369  			   __sd->_M_temporary[__s]
     370  			   + __sd->_M_pieces[__iam][__s]._M_end);
     371  	}
     372  
     373        __possibly_stable_multiway_merge<
     374          __stable, typename _SeqVector::iterator,
     375  	_RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
     376  				     __sd->_M_source + __offset, __comp,
     377  				     __length_am);
     378  
     379  #     pragma omp barrier
     380  
     381        for (_DifferenceType __i = 0; __i < __length_local; ++__i)
     382  	__sd->_M_temporary[__iam][__i].~_ValueType();
     383        ::operator delete(__sd->_M_temporary[__iam]);
     384      }
     385  
     386    /** @brief PMWMS main call.
     387     *  @param __begin Begin iterator of sequence.
     388     *  @param __end End iterator of sequence.
     389     *  @param __comp Comparator.
     390     *  @param __num_threads Number of threads to use.
     391     */
     392    template<bool __stable, bool __exact, typename _RAIter,
     393             typename _Compare>
     394      void
     395      parallel_sort_mwms(_RAIter __begin, _RAIter __end,
     396  		       _Compare __comp,
     397  		       _ThreadIndex __num_threads)
     398      {
     399        _GLIBCXX_CALL(__end - __begin)
     400  
     401        typedef std::iterator_traits<_RAIter> _TraitsType;
     402        typedef typename _TraitsType::value_type _ValueType;
     403        typedef typename _TraitsType::difference_type _DifferenceType;
     404  
     405        _DifferenceType __n = __end - __begin;
     406  
     407        if (__n <= 1)
     408  	return;
     409  
     410        // at least one element per thread
     411        if (__num_threads > __n)
     412  	__num_threads = static_cast<_ThreadIndex>(__n);
     413  
     414        // shared variables
     415        _PMWMSSortingData<_RAIter> __sd;
     416        _DifferenceType* __starts;
     417        _DifferenceType __size;
     418  
     419  #     pragma omp parallel num_threads(__num_threads)
     420        {
     421          __num_threads = omp_get_num_threads(); //no more threads than requested
     422  
     423  #       pragma omp single
     424  	{
     425  	  __sd._M_num_threads = __num_threads;
     426  	  __sd._M_source = __begin;
     427  	  
     428  	  __sd._M_temporary = new _ValueType*[__num_threads];
     429  
     430  	  if (!__exact)
     431  	    {
     432  	      __size =
     433  		(_Settings::get().sort_mwms_oversampling * __num_threads - 1)
     434  		* __num_threads;
     435  	      __sd._M_samples = static_cast<_ValueType*>
     436  		(::operator new(__size * sizeof(_ValueType)));
     437  	    }
     438  	  else
     439  	    __sd._M_samples = 0;
     440  
     441  	  __sd._M_offsets = new _DifferenceType[__num_threads - 1];
     442  	  __sd._M_pieces
     443  	    = new std::vector<_Piece<_DifferenceType> >[__num_threads];
     444  	  for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)
     445  	    __sd._M_pieces[__s].resize(__num_threads);
     446  	  __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
     447  
     448  	  _DifferenceType __chunk_length = __n / __num_threads;
     449  	  _DifferenceType __split = __n % __num_threads;
     450  	  _DifferenceType __pos = 0;
     451  	  for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
     452  	    {
     453  	      __starts[__i] = __pos;
     454  	      __pos += ((__i < __split)
     455  			? (__chunk_length + 1) : __chunk_length);
     456  	    }
     457  	  __starts[__num_threads] = __pos;
     458  	} //single
     459  
     460          // Now sort in parallel.
     461          parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
     462        } //parallel
     463  
     464        delete[] __starts;
     465        delete[] __sd._M_temporary;
     466  
     467        if (!__exact)
     468  	{
     469  	  for (_DifferenceType __i = 0; __i < __size; ++__i)
     470  	    __sd._M_samples[__i].~_ValueType();
     471  	  ::operator delete(__sd._M_samples);
     472  	}
     473  
     474        delete[] __sd._M_offsets;
     475        delete[] __sd._M_pieces;
     476      }
     477  
     478  } //namespace __gnu_parallel
     479  
     480  #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */