1  // Optimizations for random number extensions, aarch64 version -*- C++ -*-
       2  
       3  // Copyright (C) 2017-2023 Free Software Foundation, Inc.
       4  //
       5  // This file is part of the GNU ISO C++ Library.  This library is free
       6  // software; you can redistribute it and/or modify it under the
       7  // terms of the GNU General Public License as published by the
       8  // Free Software Foundation; either version 3, or (at your option)
       9  // any later version.
      10  
      11  // This library is distributed in the hope that it will be useful,
      12  // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14  // GNU General Public License for more details.
      15  
      16  // Under Section 7 of GPL version 3, you are granted additional
      17  // permissions described in the GCC Runtime Library Exception, version
      18  // 3.1, as published by the Free Software Foundation.
      19  
      20  // You should have received a copy of the GNU General Public License and
      21  // a copy of the GCC Runtime Library Exception along with this program;
      22  // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      23  // <http://www.gnu.org/licenses/>.
      24  
      25  /** @file ext/random.tcc
      26   *  This is an internal header file, included by other library headers.
      27   *  Do not attempt to use it directly. @headername{ext/random}
      28   */
      29  
      30  #ifndef _EXT_OPT_RANDOM_H
      31  #define _EXT_OPT_RANDOM_H 1
      32  
      33  #pragma GCC system_header
      34  
      35  #ifdef __ARM_NEON
      36  
      37  #ifdef __ARM_BIG_ENDIAN
      38  # define __VEXT(_A,_B,_C) __builtin_shuffle (_A, _B, (__Uint8x16_t) \
      39      {16-_C, 17-_C, 18-_C, 19-_C, 20-_C, 21-_C, 22-_C, 23-_C, \
      40       24-_C, 25-_C, 26-_C, 27-_C, 28-_C, 29-_C, 30-_C, 31-_C})
      41  #else
      42  # define __VEXT(_A,_B,_C) __builtin_shuffle (_B, _A, (__Uint8x16_t) \
      43      {_C, _C+1, _C+2, _C+3, _C+4, _C+5, _C+6, _C+7, \
      44       _C+8, _C+9, _C+10, _C+11, _C+12, _C+13, _C+14, _C+15})
      45  #endif
      46  
      47  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
      48  namespace __gnu_cxx _GLIBCXX_VISIBILITY (default)
      49  {
      50  _GLIBCXX_BEGIN_NAMESPACE_VERSION
      51  
      52    namespace {
      53      // Logical Shift right 128-bits by c * 8 bits
      54  
      55      __extension__ extern __inline __Uint32x4_t
      56      __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
      57      __aarch64_lsr_128 (__Uint8x16_t __a, __const int __c)
      58      {
      59        const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0,
      60  				   0, 0, 0, 0, 0, 0, 0, 0};
      61  
      62        return (__Uint32x4_t) __VEXT (__zero, __a, __c);
      63      }
      64  
      65      // Logical Shift left 128-bits by c * 8 bits
      66  
      67      __extension__ extern __inline __Uint32x4_t
      68      __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
      69      __aarch64_lsl_128 (__Uint8x16_t __a, __const int __c)
      70      {
      71        const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0,
      72  				   0, 0, 0, 0, 0, 0, 0, 0};
      73  
      74        return (__Uint32x4_t) __VEXT (__a, __zero, 16 - __c);
      75      }
      76  
      77      template<size_t __sl1, size_t __sl2, size_t __sr1, size_t __sr2>
      78        inline __Uint32x4_t __aarch64_recursion (__Uint32x4_t __a,
      79  					       __Uint32x4_t __b,
      80  					       __Uint32x4_t __c,
      81  					       __Uint32x4_t __d,
      82  					       __Uint32x4_t __e)
      83      {
      84        __Uint32x4_t __y = (__b >> __sr1);
      85        __Uint32x4_t __z = __aarch64_lsr_128 ((__Uint8x16_t) __c, __sr2);
      86  
      87        __Uint32x4_t __v = __d << __sl1;
      88  
      89        __z = __z ^ __a;
      90        __z = __z ^ __v;
      91  
      92        __Uint32x4_t __x = __aarch64_lsl_128 ((__Uint8x16_t) __a, __sl2);
      93  
      94        __y = __y & __e;
      95        __z = __z ^ __x;
      96        return __z ^ __y;
      97      }
      98  }
      99  
     100  #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_GEN_READ	1
     101    template<typename _UIntType, size_t __m,
     102  	   size_t __pos1, size_t __sl1, size_t __sl2,
     103  	   size_t __sr1, size_t __sr2,
     104  	   uint32_t __msk1, uint32_t __msk2,
     105  	   uint32_t __msk3, uint32_t __msk4,
     106  	   uint32_t __parity1, uint32_t __parity2,
     107  	   uint32_t __parity3, uint32_t __parity4>
     108      void simd_fast_mersenne_twister_engine<_UIntType, __m,
     109  					   __pos1, __sl1, __sl2, __sr1, __sr2,
     110  					   __msk1, __msk2, __msk3, __msk4,
     111  					   __parity1, __parity2, __parity3,
     112  					   __parity4>::
     113      _M_gen_rand (void)
     114      {
     115        __Uint32x4_t __r1 = _M_state[_M_nstate - 2];
     116        __Uint32x4_t __r2 = _M_state[_M_nstate - 1];
     117  
     118        __Uint32x4_t __aData = {__msk1, __msk2, __msk3, __msk4};
     119  
     120        size_t __i;
     121        for (__i = 0; __i < _M_nstate - __pos1; ++__i)
     122  	{
     123  	  __Uint32x4_t __r = __aarch64_recursion<__sl1, __sl2, __sr1, __sr2>
     124  	    (_M_state[__i], _M_state[__i + __pos1], __r1, __r2, __aData);
     125  
     126  	  _M_state[__i] = __r;
     127  
     128  	  __r1 = __r2;
     129  	  __r2 = __r;
     130  	}
     131        for (; __i < _M_nstate; ++__i)
     132  	{
     133  	  __Uint32x4_t __r = __aarch64_recursion<__sl1, __sl2, __sr1, __sr2>
     134  	    (_M_state[__i], _M_state[__i + __pos1 - _M_nstate], __r1, __r2,
     135  	     __aData);
     136  
     137  	  _M_state[__i] = __r;
     138  
     139  	  __r1 = __r2;
     140  	  __r2 = __r;
     141  	}
     142  
     143        _M_pos = 0;
     144      }
     145  
     146  
     147  #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_OPERATOREQUAL	1
     148    template<typename _UIntType, size_t __m,
     149  	   size_t __pos1, size_t __sl1, size_t __sl2,
     150  	   size_t __sr1, size_t __sr2,
     151  	   uint32_t __msk1, uint32_t __msk2,
     152  	   uint32_t __msk3, uint32_t __msk4,
     153  	   uint32_t __parity1, uint32_t __parity2,
     154  	   uint32_t __parity3, uint32_t __parity4>
     155      bool
     156      operator==(const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
     157  	       __m, __pos1, __sl1, __sl2, __sr1, __sr2,
     158  	       __msk1, __msk2, __msk3, __msk4,
     159  	       __parity1, __parity2, __parity3, __parity4>& __lhs,
     160  	       const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
     161  	       __m, __pos1, __sl1, __sl2, __sr1, __sr2,
     162  	       __msk1, __msk2, __msk3, __msk4,
     163  	       __parity1, __parity2, __parity3, __parity4>& __rhs)
     164      {
     165        if (__lhs._M_pos != __rhs._M_pos)
     166  	return false;
     167  
     168        __Uint32x4_t __res = __lhs._M_state[0] ^ __rhs._M_state[0];
     169  
     170        for (size_t __i = 1; __i < __lhs._M_nstate; ++__i)
     171  	__res |= __lhs._M_state[__i] ^ __rhs._M_state[__i];
     172  
     173        return (__int128) __res == 0;
     174      }
     175  
     176  _GLIBCXX_END_NAMESPACE_VERSION
     177    } // namespace
     178  
     179  #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
     180  #endif // __ARM_NEON
     181  
     182  #endif // _EXT_OPT_RANDOM_H