1  // Optimizations for random number extensions, x86 version -*- C++ -*-
       2  
       3  // Copyright (C) 2012-2023 Free Software Foundation, Inc.
       4  //
       5  // This file is part of the GNU ISO C++ Library.  This library is free
       6  // software; you can redistribute it and/or modify it under the
       7  // terms of the GNU General Public License as published by the
       8  // Free Software Foundation; either version 3, or (at your option)
       9  // any later version.
      10  
      11  // This library is distributed in the hope that it will be useful,
      12  // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14  // GNU General Public License for more details.
      15  
      16  // Under Section 7 of GPL version 3, you are granted additional
      17  // permissions described in the GCC Runtime Library Exception, version
      18  // 3.1, as published by the Free Software Foundation.
      19  
      20  // You should have received a copy of the GNU General Public License and
      21  // a copy of the GCC Runtime Library Exception along with this program;
      22  // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      23  // <http://www.gnu.org/licenses/>.
      24  
      25  /** @file ext/random.tcc
      26   *  This is an internal header file, included by other library headers.
      27   *  Do not attempt to use it directly. @headername{ext/random}
      28   */
      29  
      30  #ifndef _EXT_OPT_RANDOM_H
      31  #define _EXT_OPT_RANDOM_H 1
      32  
      33  #pragma GCC system_header
      34  
      35  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
      36  
      37  #ifdef __SSE2__
      38  
      39  namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
      40  {
      41  _GLIBCXX_BEGIN_NAMESPACE_VERSION
      42  
      43    namespace {
      44  
      45      template<size_t __sl1, size_t __sl2, size_t __sr1, size_t __sr2,
      46  	     uint32_t __msk1, uint32_t __msk2, uint32_t __msk3, uint32_t __msk4>
      47        inline __m128i __sse2_recursion(__m128i __a, __m128i __b,
      48  				      __m128i __c, __m128i __d)
      49        {
      50  	__m128i __y = _mm_srli_epi32(__b, __sr1);
      51  	__m128i __z = _mm_srli_si128(__c, __sr2);
      52  	__m128i __v = _mm_slli_epi32(__d, __sl1);
      53  	__z = _mm_xor_si128(__z, __a);
      54  	__z = _mm_xor_si128(__z, __v);
      55  	__m128i __x = _mm_slli_si128(__a, __sl2);
      56  	__y = _mm_and_si128(__y, _mm_set_epi32(__msk4, __msk3, __msk2, __msk1));
      57  	__z = _mm_xor_si128(__z, __x);
      58  	return _mm_xor_si128(__z, __y);
      59        }
      60  
      61    }
      62  
      63  
      64  #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_GEN_READ	1
      65    template<typename _UIntType, size_t __m,
      66  	   size_t __pos1, size_t __sl1, size_t __sl2,
      67  	   size_t __sr1, size_t __sr2,
      68  	   uint32_t __msk1, uint32_t __msk2,
      69  	   uint32_t __msk3, uint32_t __msk4,
      70  	   uint32_t __parity1, uint32_t __parity2,
      71  	   uint32_t __parity3, uint32_t __parity4>
      72      void simd_fast_mersenne_twister_engine<_UIntType, __m,
      73  					   __pos1, __sl1, __sl2, __sr1, __sr2,
      74  					   __msk1, __msk2, __msk3, __msk4,
      75  					   __parity1, __parity2, __parity3,
      76  					   __parity4>::
      77      _M_gen_rand(void)
      78      {
      79        __m128i __r1 = _mm_load_si128(&_M_state[_M_nstate - 2]);
      80        __m128i __r2 = _mm_load_si128(&_M_state[_M_nstate - 1]);
      81  
      82        size_t __i;
      83        for (__i = 0; __i < _M_nstate - __pos1; ++__i)
      84  	{
      85  	  __m128i __r = __sse2_recursion<__sl1, __sl2, __sr1, __sr2,
      86  					 __msk1, __msk2, __msk3, __msk4>
      87  	    (_M_state[__i], _M_state[__i + __pos1], __r1, __r2);
      88  	  _mm_store_si128(&_M_state[__i], __r);
      89  	  __r1 = __r2;
      90  	  __r2 = __r;
      91  	}
      92        for (; __i < _M_nstate; ++__i)
      93  	{
      94  	  __m128i __r = __sse2_recursion<__sl1, __sl2, __sr1, __sr2,
      95  					 __msk1, __msk2, __msk3, __msk4>
      96  	    (_M_state[__i], _M_state[__i + __pos1 - _M_nstate], __r1, __r2);
      97  	  _mm_store_si128(&_M_state[__i], __r);
      98  	  __r1 = __r2;
      99  	  __r2 = __r;
     100  	}
     101  
     102        _M_pos = 0;
     103      }
     104  
     105  
     106  #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_OPERATOREQUAL	1
     107    template<typename _UIntType, size_t __m,
     108  	   size_t __pos1, size_t __sl1, size_t __sl2,
     109  	   size_t __sr1, size_t __sr2,
     110  	   uint32_t __msk1, uint32_t __msk2,
     111  	   uint32_t __msk3, uint32_t __msk4,
     112  	   uint32_t __parity1, uint32_t __parity2,
     113  	   uint32_t __parity3, uint32_t __parity4>
     114      bool
     115      operator==(const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
     116  	       __m, __pos1, __sl1, __sl2, __sr1, __sr2,
     117  	       __msk1, __msk2, __msk3, __msk4,
     118  	       __parity1, __parity2, __parity3, __parity4>& __lhs,
     119  	       const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
     120  	       __m, __pos1, __sl1, __sl2, __sr1, __sr2,
     121  	       __msk1, __msk2, __msk3, __msk4,
     122  	       __parity1, __parity2, __parity3, __parity4>& __rhs)
     123      {
     124        __m128i __res = _mm_cmpeq_epi8(__lhs._M_state[0], __rhs._M_state[0]);
     125        for (size_t __i = 1; __i < __lhs._M_nstate; ++__i)
     126  	__res = _mm_and_si128(__res, _mm_cmpeq_epi8(__lhs._M_state[__i],
     127  						    __rhs._M_state[__i]));
     128        return (_mm_movemask_epi8(__res) == 0xffff
     129  	      && __lhs._M_pos == __rhs._M_pos);
     130      }
     131  
     132  
     133  _GLIBCXX_END_NAMESPACE_VERSION
     134  } // namespace
     135  
     136  #endif // __SSE2__
     137  
     138  #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
     139  
     140  #endif // _EXT_OPT_RANDOM_H