1  // Simd PowerPC specific implementations -*- C++ -*-
       2  
       3  // Copyright (C) 2020-2023 Free Software Foundation, Inc.
       4  //
       5  // This file is part of the GNU ISO C++ Library.  This library is free
       6  // software; you can redistribute it and/or modify it under the
       7  // terms of the GNU General Public License as published by the
       8  // Free Software Foundation; either version 3, or (at your option)
       9  // any later version.
      10  
      11  // This library is distributed in the hope that it will be useful,
      12  // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14  // GNU General Public License for more details.
      15  
      16  // Under Section 7 of GPL version 3, you are granted additional
      17  // permissions described in the GCC Runtime Library Exception, version
      18  // 3.1, as published by the Free Software Foundation.
      19  
      20  // You should have received a copy of the GNU General Public License and
      21  // a copy of the GCC Runtime Library Exception along with this program;
      22  // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      23  // <http://www.gnu.org/licenses/>.
      24  
      25  #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
      26  #define _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
      27  
      28  #if __cplusplus >= 201703L
      29  
      30  #ifndef __ALTIVEC__
      31  #error "simd_ppc.h may only be included when AltiVec/VMX is available"
      32  #endif
      33  #include <altivec.h>
      34  
      35  _GLIBCXX_SIMD_BEGIN_NAMESPACE
      36  
      37  // _SimdImplPpc {{{
      38  template <typename _Abi, typename>
      39    struct _SimdImplPpc : _SimdImplBuiltin<_Abi>
      40    {
      41      using _Base = _SimdImplBuiltin<_Abi>;
      42  
      43      // Byte and halfword shift instructions on PPC only consider the low 3 or 4
      44      // bits of the RHS. Consequently, shifting by sizeof(_Tp)*CHAR_BIT (or more)
      45      // is UB without extra measures. To match scalar behavior, byte and halfword
      46      // shifts need an extra fixup step.
      47  
      48      // _S_bit_shift_left {{{
      49      template <typename _Tp, size_t _Np>
      50        _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      51        _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      52        {
      53  	__x = _Base::_S_bit_shift_left(__x, __y);
      54  	if constexpr (sizeof(_Tp) < sizeof(int))
      55  	  __x._M_data
      56  	    = (__y._M_data < sizeof(_Tp) * __CHAR_BIT__) & __x._M_data;
      57  	return __x;
      58        }
      59  
      60      template <typename _Tp, size_t _Np>
      61        _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      62        _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, int __y)
      63        {
      64  	__x = _Base::_S_bit_shift_left(__x, __y);
      65  	if constexpr (sizeof(_Tp) < sizeof(int))
      66  	  {
      67  	    if (__y >= int(sizeof(_Tp) * __CHAR_BIT__))
      68  	      return {};
      69  	  }
      70  	return __x;
      71        }
      72  
      73      // }}}
      74      // _S_bit_shift_right {{{
      75      template <typename _Tp, size_t _Np>
      76        _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      77        _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
      78        {
      79  	if constexpr (sizeof(_Tp) < sizeof(int))
      80  	  {
      81  	    constexpr int __nbits = sizeof(_Tp) * __CHAR_BIT__;
      82  	    if constexpr (is_unsigned_v<_Tp>)
      83  	      return (__y._M_data < __nbits)
      84  		     & _Base::_S_bit_shift_right(__x, __y)._M_data;
      85  	    else
      86  	      {
      87  		_Base::_S_masked_assign(_SimdWrapper<_Tp, _Np>(__y._M_data
      88  							       >= __nbits),
      89  					__y, __nbits - 1);
      90  		return _Base::_S_bit_shift_right(__x, __y);
      91  	      }
      92  	  }
      93  	else
      94  	  return _Base::_S_bit_shift_right(__x, __y);
      95        }
      96  
      97      template <typename _Tp, size_t _Np>
      98        _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
      99        _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, int __y)
     100        {
     101  	if constexpr (sizeof(_Tp) < sizeof(int))
     102  	  {
     103  	    constexpr int __nbits = sizeof(_Tp) * __CHAR_BIT__;
     104  	    if (__y >= __nbits)
     105  	      {
     106  		if constexpr (is_unsigned_v<_Tp>)
     107  		  return {};
     108  		else
     109  		  return _Base::_S_bit_shift_right(__x, __nbits - 1);
     110  	      }
     111  	  }
     112  	return _Base::_S_bit_shift_right(__x, __y);
     113        }
     114  
     115      // }}}
     116    };
     117  
     118  // }}}
     119  // _MaskImplPpc {{{
     120  template <typename _Abi, typename>
     121    struct _MaskImplPpc : _MaskImplBuiltin<_Abi>
     122    {
     123      using _Base = _MaskImplBuiltin<_Abi>;
     124  
     125      // _S_popcount {{{
     126      template <typename _Tp>
     127        _GLIBCXX_SIMD_INTRINSIC static int
     128        _S_popcount(simd_mask<_Tp, _Abi> __k)
     129        {
     130  	const auto __kv = __as_vector(__k);
     131  	if constexpr (__have_power10vec)
     132  	  {
     133  	    using _Intrin = __intrinsic_type16_t<make_unsigned_t<__int_for_sizeof_t<_Tp>>>;
     134  	    return vec_cntm(reinterpret_cast<_Intrin>(__kv), 1);
     135  	  }
     136  	else if constexpr (sizeof(_Tp) >= sizeof(int))
     137  	  {
     138  	    using _Intrin = __intrinsic_type16_t<int>;
     139  	    const int __sum = -vec_sums(__intrin_bitcast<_Intrin>(__kv), _Intrin())[3];
     140  	    return __sum / (sizeof(_Tp) / sizeof(int));
     141  	  }
     142  	else
     143  	  {
     144  	    const auto __summed_to_int = vec_sum4s(__to_intrin(__kv), __intrinsic_type16_t<int>());
     145  	    return -vec_sums(__summed_to_int, __intrinsic_type16_t<int>())[3];
     146  	  }
     147        }
     148  
     149      // }}}
     150    };
     151  
     152  // }}}
     153  
     154  _GLIBCXX_SIMD_END_NAMESPACE
     155  #endif // __cplusplus >= 201703L
     156  #endif // _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
     157  
     158  // vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=100