(root)/
gcc-13.2.0/
libstdc++-v3/
include/
experimental/
bits/
simd_detail.h
       1  // Internal macros for the simd implementation -*- C++ -*-
       2  
       3  // Copyright (C) 2020-2023 Free Software Foundation, Inc.
       4  //
       5  // This file is part of the GNU ISO C++ Library.  This library is free
       6  // software; you can redistribute it and/or modify it under the
       7  // terms of the GNU General Public License as published by the
       8  // Free Software Foundation; either version 3, or (at your option)
       9  // any later version.
      10  
      11  // This library is distributed in the hope that it will be useful,
      12  // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14  // GNU General Public License for more details.
      15  
      16  // Under Section 7 of GPL version 3, you are granted additional
      17  // permissions described in the GCC Runtime Library Exception, version
      18  // 3.1, as published by the Free Software Foundation.
      19  
      20  // You should have received a copy of the GNU General Public License and
      21  // a copy of the GCC Runtime Library Exception along with this program;
      22  // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      23  // <http://www.gnu.org/licenses/>.
      24  
      25  #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
      26  #define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
      27  
      28  #if __cplusplus >= 201703L
      29  
      30  #include <cstddef>
      31  #include <cstdint>
      32  
      33  /// @cond undocumented
      34  
      35  #define _GLIBCXX_SIMD_BEGIN_NAMESPACE                                          \
      36    namespace std _GLIBCXX_VISIBILITY(default)                                   \
      37    {                                                                            \
      38      _GLIBCXX_BEGIN_NAMESPACE_VERSION                                           \
      39        namespace experimental {                                                 \
      40        inline namespace parallelism_v2 {
      41  #define _GLIBCXX_SIMD_END_NAMESPACE                                            \
      42    }                                                                            \
      43    }                                                                            \
      44    _GLIBCXX_END_NAMESPACE_VERSION                                               \
      45    }
      46  
      47  // ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX
      48  // macros ARM{{{
      49  #if defined __ARM_NEON
      50  #define _GLIBCXX_SIMD_HAVE_NEON 1
      51  #else
      52  #define _GLIBCXX_SIMD_HAVE_NEON 0
      53  #endif
      54  #if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__)
      55  #define _GLIBCXX_SIMD_HAVE_NEON_A32 1
      56  #else
      57  #define _GLIBCXX_SIMD_HAVE_NEON_A32 0
      58  #endif
      59  #if defined __ARM_NEON && defined __aarch64__
      60  #define _GLIBCXX_SIMD_HAVE_NEON_A64 1
      61  #else
      62  #define _GLIBCXX_SIMD_HAVE_NEON_A64 0
      63  #endif
      64  //}}}
      65  // x86{{{
      66  #ifdef __MMX__
      67  #define _GLIBCXX_SIMD_HAVE_MMX 1
      68  #else
      69  #define _GLIBCXX_SIMD_HAVE_MMX 0
      70  #endif
      71  #if defined __SSE__ || defined __x86_64__
      72  #define _GLIBCXX_SIMD_HAVE_SSE 1
      73  #else
      74  #define _GLIBCXX_SIMD_HAVE_SSE 0
      75  #endif
      76  #if defined __SSE2__ || defined __x86_64__
      77  #define _GLIBCXX_SIMD_HAVE_SSE2 1
      78  #else
      79  #define _GLIBCXX_SIMD_HAVE_SSE2 0
      80  #endif
      81  #ifdef __SSE3__
      82  #define _GLIBCXX_SIMD_HAVE_SSE3 1
      83  #else
      84  #define _GLIBCXX_SIMD_HAVE_SSE3 0
      85  #endif
      86  #ifdef __SSSE3__
      87  #define _GLIBCXX_SIMD_HAVE_SSSE3 1
      88  #else
      89  #define _GLIBCXX_SIMD_HAVE_SSSE3 0
      90  #endif
      91  #ifdef __SSE4_1__
      92  #define _GLIBCXX_SIMD_HAVE_SSE4_1 1
      93  #else
      94  #define _GLIBCXX_SIMD_HAVE_SSE4_1 0
      95  #endif
      96  #ifdef __SSE4_2__
      97  #define _GLIBCXX_SIMD_HAVE_SSE4_2 1
      98  #else
      99  #define _GLIBCXX_SIMD_HAVE_SSE4_2 0
     100  #endif
     101  #ifdef __XOP__
     102  #define _GLIBCXX_SIMD_HAVE_XOP 1
     103  #else
     104  #define _GLIBCXX_SIMD_HAVE_XOP 0
     105  #endif
     106  #ifdef __AVX__
     107  #define _GLIBCXX_SIMD_HAVE_AVX 1
     108  #else
     109  #define _GLIBCXX_SIMD_HAVE_AVX 0
     110  #endif
     111  #ifdef __AVX2__
     112  #define _GLIBCXX_SIMD_HAVE_AVX2 1
     113  #else
     114  #define _GLIBCXX_SIMD_HAVE_AVX2 0
     115  #endif
     116  #ifdef __BMI__
     117  #define _GLIBCXX_SIMD_HAVE_BMI1 1
     118  #else
     119  #define _GLIBCXX_SIMD_HAVE_BMI1 0
     120  #endif
     121  #ifdef __BMI2__
     122  #define _GLIBCXX_SIMD_HAVE_BMI2 1
     123  #else
     124  #define _GLIBCXX_SIMD_HAVE_BMI2 0
     125  #endif
     126  #ifdef __LZCNT__
     127  #define _GLIBCXX_SIMD_HAVE_LZCNT 1
     128  #else
     129  #define _GLIBCXX_SIMD_HAVE_LZCNT 0
     130  #endif
     131  #ifdef __SSE4A__
     132  #define _GLIBCXX_SIMD_HAVE_SSE4A 1
     133  #else
     134  #define _GLIBCXX_SIMD_HAVE_SSE4A 0
     135  #endif
     136  #ifdef __FMA__
     137  #define _GLIBCXX_SIMD_HAVE_FMA 1
     138  #else
     139  #define _GLIBCXX_SIMD_HAVE_FMA 0
     140  #endif
     141  #ifdef __FMA4__
     142  #define _GLIBCXX_SIMD_HAVE_FMA4 1
     143  #else
     144  #define _GLIBCXX_SIMD_HAVE_FMA4 0
     145  #endif
     146  #ifdef __F16C__
     147  #define _GLIBCXX_SIMD_HAVE_F16C 1
     148  #else
     149  #define _GLIBCXX_SIMD_HAVE_F16C 0
     150  #endif
     151  #ifdef __POPCNT__
     152  #define _GLIBCXX_SIMD_HAVE_POPCNT 1
     153  #else
     154  #define _GLIBCXX_SIMD_HAVE_POPCNT 0
     155  #endif
     156  #ifdef __AVX512F__
     157  #define _GLIBCXX_SIMD_HAVE_AVX512F 1
     158  #else
     159  #define _GLIBCXX_SIMD_HAVE_AVX512F 0
     160  #endif
     161  #ifdef __AVX512DQ__
     162  #define _GLIBCXX_SIMD_HAVE_AVX512DQ 1
     163  #else
     164  #define _GLIBCXX_SIMD_HAVE_AVX512DQ 0
     165  #endif
     166  #ifdef __AVX512VL__
     167  #define _GLIBCXX_SIMD_HAVE_AVX512VL 1
     168  #else
     169  #define _GLIBCXX_SIMD_HAVE_AVX512VL 0
     170  #endif
     171  #ifdef __AVX512BW__
     172  #define _GLIBCXX_SIMD_HAVE_AVX512BW 1
     173  #else
     174  #define _GLIBCXX_SIMD_HAVE_AVX512BW 0
     175  #endif
     176  #ifdef __AVX512BITALG__
     177  #define _GLIBCXX_SIMD_HAVE_AVX512BITALG 1
     178  #else
     179  #define _GLIBCXX_SIMD_HAVE_AVX512BITALG 0
     180  #endif
     181  #ifdef __AVX512VBMI2__
     182  #define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 1
     183  #else
     184  #define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 0
     185  #endif
     186  #ifdef __AVX512VBMI__
     187  #define _GLIBCXX_SIMD_HAVE_AVX512VBMI 1
     188  #else
     189  #define _GLIBCXX_SIMD_HAVE_AVX512VBMI 0
     190  #endif
     191  #ifdef __AVX512IFMA__
     192  #define _GLIBCXX_SIMD_HAVE_AVX512IFMA 1
     193  #else
     194  #define _GLIBCXX_SIMD_HAVE_AVX512IFMA 0
     195  #endif
     196  #ifdef __AVX512CD__
     197  #define _GLIBCXX_SIMD_HAVE_AVX512CD 1
     198  #else
     199  #define _GLIBCXX_SIMD_HAVE_AVX512CD 0
     200  #endif
     201  #ifdef __AVX512VNNI__
     202  #define _GLIBCXX_SIMD_HAVE_AVX512VNNI 1
     203  #else
     204  #define _GLIBCXX_SIMD_HAVE_AVX512VNNI 0
     205  #endif
     206  #ifdef __AVX512VPOPCNTDQ__
     207  #define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 1
     208  #else
     209  #define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 0
     210  #endif
     211  #ifdef __AVX512VP2INTERSECT__
     212  #define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 1
     213  #else
     214  #define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 0
     215  #endif
     216  
     217  #if _GLIBCXX_SIMD_HAVE_SSE
     218  #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1
     219  #else
     220  #define _GLIBCXX_SIMD_HAVE_SSE_ABI 0
     221  #endif
     222  #if _GLIBCXX_SIMD_HAVE_SSE2
     223  #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1
     224  #else
     225  #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0
     226  #endif
     227  
     228  #if _GLIBCXX_SIMD_HAVE_AVX
     229  #define _GLIBCXX_SIMD_HAVE_AVX_ABI 1
     230  #else
     231  #define _GLIBCXX_SIMD_HAVE_AVX_ABI 0
     232  #endif
     233  #if _GLIBCXX_SIMD_HAVE_AVX2
     234  #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1
     235  #else
     236  #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0
     237  #endif
     238  
     239  #if _GLIBCXX_SIMD_HAVE_AVX512F
     240  #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1
     241  #else
     242  #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0
     243  #endif
     244  #if _GLIBCXX_SIMD_HAVE_AVX512BW
     245  #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1
     246  #else
     247  #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0
     248  #endif
     249  
     250  #if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2
     251  #error "Use of SSE2 is required on AMD64"
     252  #endif
     253  //}}}
     254  
     255  #ifdef __clang__
     256  #define _GLIBCXX_SIMD_NORMAL_MATH
     257  #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
     258  #else
     259  #define _GLIBCXX_SIMD_NORMAL_MATH                                              \
     260    [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]]
     261  #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA __attribute__((__always_inline__))
     262  #endif
     263  #define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]]
     264  #define _GLIBCXX_SIMD_INTRINSIC                                                \
     265    [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline
     266  #define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline
     267  #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
     268  #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
     269  
     270  #if __STRICT_ANSI__ || defined __clang__
     271  #define _GLIBCXX_SIMD_CONSTEXPR
     272  #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
     273  #else
     274  #define _GLIBCXX_SIMD_CONSTEXPR constexpr
     275  #define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr
     276  #endif
     277  
     278  #if defined __clang__
     279  #define _GLIBCXX_SIMD_USE_CONSTEXPR const
     280  #else
     281  #define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr
     282  #endif
     283  
     284  #define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^)
     285  #define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>)
     286  #define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro)                                \
     287    __macro(+) __macro(-) __macro(*) __macro(/) __macro(%)
     288  
     289  #define _GLIBCXX_SIMD_ALL_BINARY(__macro)                                      \
     290    _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true)
     291  #define _GLIBCXX_SIMD_ALL_SHIFTS(__macro)                                      \
     292    _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true)
     293  #define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro)                                 \
     294    _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true)
     295  
     296  #ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE
     297  #undef _GLIBCXX_SIMD_ALWAYS_INLINE
     298  #define _GLIBCXX_SIMD_ALWAYS_INLINE inline
     299  #undef _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
     300  #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
     301  #undef _GLIBCXX_SIMD_INTRINSIC
     302  #define _GLIBCXX_SIMD_INTRINSIC inline
     303  #endif
     304  
     305  #if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX
     306  #define _GLIBCXX_SIMD_X86INTRIN 1
     307  #else
     308  #define _GLIBCXX_SIMD_X86INTRIN 0
     309  #endif
     310  
     311  // workaround macros {{{
     312  // use aliasing loads to help GCC understand the data accesses better
     313  // This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with
     314  // fixed_size_simd<float, 16> x.
     315  #define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1
     316  
     317  // vector conversions on x86 not optimized:
     318  #if _GLIBCXX_SIMD_X86INTRIN
     319  #define _GLIBCXX_SIMD_WORKAROUND_PR85048 1
     320  #endif
     321  
     322  // integer division not optimized
     323  #ifndef __clang__
     324  #define _GLIBCXX_SIMD_WORKAROUND_PR90993 1
     325  #endif
     326  
     327  // very bad codegen for extraction and concatenation of 128/256 "subregisters"
     328  // with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM
     329  #if _GLIBCXX_SIMD_X86INTRIN
     330  #define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1
     331  #endif
     332  
     333  // bad codegen for 8 Byte memcpy to __vector_type_t<char, 16>
     334  #define _GLIBCXX_SIMD_WORKAROUND_PR90424 1
     335  
     336  // bad codegen for zero-extend using simple concat(__x, 0)
     337  #if _GLIBCXX_SIMD_X86INTRIN
     338  #define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1
     339  #endif
     340  
     341  // https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type
     342  // of static_simd_cast)
     343  #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1
     344  
     345  // https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE
     346  // constraint on (static)_simd_cast)
     347  #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1
     348  // }}}
     349  
     350  /// @endcond
     351  
     352  #endif // __cplusplus >= 201703L
     353  #endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
     354  
     355  // vim: foldmethod=marker