(root)/
gcc-13.2.0/
gcc/
config/
i386/
avx512vbmi2vlintrin.h
       1  /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
       2  
       3     This file is part of GCC.
       4  
       5     GCC is free software; you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3, or (at your option)
       8     any later version.
       9  
      10     GCC is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     Under Section 7 of GPL version 3, you are granted additional
      16     permissions described in the GCC Runtime Library Exception, version
      17     3.1, as published by the Free Software Foundation.
      18  
      19     You should have received a copy of the GNU General Public License and
      20     a copy of the GCC Runtime Library Exception along with this program;
      21     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      22     <http://www.gnu.org/licenses/>.  */
      23  
      24  #ifndef _IMMINTRIN_H_INCLUDED
      25  #error "Never use <avx512vbmi2vlintrin.h> directly; include <immintrin.h> instead."
      26  #endif
      27  
      28  #ifndef _AVX512VBMI2VLINTRIN_H_INCLUDED
      29  #define _AVX512VBMI2VLINTRIN_H_INCLUDED
      30  
      31  #if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__)
      32  #pragma GCC push_options
      33  #pragma GCC target("avx512vbmi2,avx512vl")
      34  #define __DISABLE_AVX512VBMI2VL__
      35  #endif /* __AVX512VBMIVL__ */
      36  
      37  extern __inline __m128i
      38  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      39  _mm_mask_compress_epi8 (__m128i __A, __mmask16 __B, __m128i __C)
      40  {
      41    return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi)__C,
      42  						(__v16qi)__A, (__mmask16)__B);
      43  }
      44  
      45  extern __inline __m128i
      46  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      47  _mm_maskz_compress_epi8 (__mmask16 __A, __m128i __B)
      48  {
      49    return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __B,
      50  			(__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
      51  }
      52  
      53  
      54  extern __inline void
      55  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      56  _mm256_mask_compressstoreu_epi16 (void * __A, __mmask16 __B, __m256i __C)
      57  {
      58    __builtin_ia32_compressstoreuhi256_mask ((__v16hi *) __A, (__v16hi) __C,
      59  							(__mmask16) __B);
      60  }
      61  
      62  extern __inline __m128i
      63  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      64  _mm_mask_compress_epi16 (__m128i __A, __mmask8 __B, __m128i __C)
      65  {
      66    return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi)__C, (__v8hi)__A,
      67  								(__mmask8)__B);
      68  }
      69  
      70  extern __inline __m128i
      71  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      72  _mm_maskz_compress_epi16 (__mmask8 __A, __m128i __B)
      73  {
      74    return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __B,
      75  				(__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
      76  }
      77  
      78  extern __inline __m256i
      79  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      80  _mm256_mask_compress_epi16 (__m256i __A, __mmask16 __B, __m256i __C)
      81  {
      82    return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi)__C,
      83  						(__v16hi)__A, (__mmask16)__B);
      84  }
      85  
      86  extern __inline __m256i
      87  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      88  _mm256_maskz_compress_epi16 (__mmask16 __A, __m256i __B)
      89  {
      90    return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __B,
      91  			(__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
      92  }
      93  
      94  extern __inline void
      95  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      96  _mm_mask_compressstoreu_epi8 (void * __A, __mmask16 __B, __m128i __C)
      97  {
      98    __builtin_ia32_compressstoreuqi128_mask ((__v16qi *) __A, (__v16qi) __C,
      99  							(__mmask16) __B);
     100  }
     101  
     102  extern __inline void
     103  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     104  _mm_mask_compressstoreu_epi16 (void * __A, __mmask8 __B, __m128i __C)
     105  {
     106    __builtin_ia32_compressstoreuhi128_mask ((__v8hi *) __A, (__v8hi) __C,
     107  							(__mmask8) __B);
     108  }
     109  
     110  extern __inline __m128i
     111  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     112  _mm_mask_expand_epi8 (__m128i __A, __mmask16 __B, __m128i __C)
     113  {
     114    return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __C,
     115  						    (__v16qi) __A,
     116  						    (__mmask16) __B);
     117  }
     118  
     119  extern __inline __m128i
     120  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     121  _mm_maskz_expand_epi8 (__mmask16 __A, __m128i __B)
     122  {
     123    return (__m128i) __builtin_ia32_expandqi128_maskz ((__v16qi) __B,
     124  			(__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
     125  }
     126  
     127  extern __inline __m128i
     128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     129  _mm_mask_expandloadu_epi8 (__m128i __A, __mmask16 __B, const void * __C)
     130  {
     131    return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *) __C,
     132  					(__v16qi) __A, (__mmask16) __B);
     133  }
     134  
     135  extern __inline __m128i
     136  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     137  _mm_maskz_expandloadu_epi8 (__mmask16 __A, const void * __B)
     138  {
     139    return (__m128i) __builtin_ia32_expandloadqi128_maskz ((const __v16qi *) __B,
     140  			(__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
     141  }
     142  
     143  extern __inline __m128i
     144  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     145  _mm_mask_expand_epi16 (__m128i __A, __mmask8 __B, __m128i __C)
     146  {
     147    return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __C,
     148  						    (__v8hi) __A,
     149  						    (__mmask8) __B);
     150  }
     151  
     152  extern __inline __m128i
     153  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     154  _mm_maskz_expand_epi16 (__mmask8 __A, __m128i __B)
     155  {
     156    return (__m128i) __builtin_ia32_expandhi128_maskz ((__v8hi) __B,
     157  				(__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
     158  }
     159  
     160  extern __inline __m128i
     161  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     162  _mm_mask_expandloadu_epi16 (__m128i __A, __mmask8 __B, const void * __C)
     163  {
     164    return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *) __C,
     165  						(__v8hi) __A, (__mmask8) __B);
     166  }
     167  
     168  extern __inline __m128i
     169  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     170  _mm_maskz_expandloadu_epi16 (__mmask8 __A, const void * __B)
     171  {
     172    return (__m128i) __builtin_ia32_expandloadhi128_maskz ((const __v8hi *) __B,
     173  				(__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
     174  }
     175  extern __inline __m256i
     176  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     177  _mm256_mask_expand_epi16 (__m256i __A, __mmask16 __B, __m256i __C)
     178  {
     179    return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __C,
     180  						    (__v16hi) __A,
     181  						    (__mmask16) __B);
     182  }
     183  
     184  extern __inline __m256i
     185  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     186  _mm256_maskz_expand_epi16 (__mmask16 __A, __m256i __B)
     187  {
     188    return (__m256i) __builtin_ia32_expandhi256_maskz ((__v16hi) __B,
     189  			(__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
     190  }
     191  
     192  extern __inline __m256i
     193  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     194  _mm256_mask_expandloadu_epi16 (__m256i __A, __mmask16 __B, const void * __C)
     195  {
     196    return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *) __C,
     197  					(__v16hi) __A, (__mmask16) __B);
     198  }
     199  
     200  extern __inline __m256i
     201  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     202  _mm256_maskz_expandloadu_epi16 (__mmask16 __A, const void * __B)
     203  {
     204    return (__m256i) __builtin_ia32_expandloadhi256_maskz ((const __v16hi *) __B,
     205  			(__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
     206  }
     207  
     208  #ifdef __OPTIMIZE__
     209  extern __inline __m256i
     210  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     211  _mm256_shrdi_epi16 (__m256i __A, __m256i __B, int __C)
     212  {
     213    return (__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)__A, (__v16hi) __B,
     214  									__C);
     215  }
     216  
     217  extern __inline __m256i
     218  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     219  _mm256_mask_shrdi_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D,
     220  								int __E)
     221  {
     222    return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__C,
     223  			(__v16hi) __D, __E, (__v16hi) __A, (__mmask16)__B);
     224  }
     225  
     226  extern __inline __m256i
     227  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     228  _mm256_maskz_shrdi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D)
     229  {
     230    return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__B,
     231  	(__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A);
     232  }
     233  
     234  extern __inline __m256i
     235  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     236  _mm256_mask_shrdi_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
     237  								int __E)
     238  {
     239    return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__C, (__v8si) __D,
     240  					__E, (__v8si) __A, (__mmask8)__B);
     241  }
     242  
     243  extern __inline __m256i
     244  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     245  _mm256_maskz_shrdi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
     246  {
     247    return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__B, (__v8si) __C,
     248  			__D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A);
     249  }
     250  
     251  extern __inline __m256i
     252  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     253  _mm256_shrdi_epi32 (__m256i __A, __m256i __B, int __C)
     254  {
     255    return (__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)__A, (__v8si) __B, __C);
     256  }
     257  
     258  extern __inline __m256i
     259  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     260  _mm256_mask_shrdi_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
     261  								int __E)
     262  {
     263    return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__C, (__v4di) __D,
     264  					__E, (__v4di) __A, (__mmask8)__B);
     265  }
     266  
     267  extern __inline __m256i
     268  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     269  _mm256_maskz_shrdi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
     270  {
     271    return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__B, (__v4di) __C,
     272  			__D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A);
     273  }
     274  
     275  extern __inline __m256i
     276  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     277  _mm256_shrdi_epi64 (__m256i __A, __m256i __B, int __C)
     278  {
     279    return (__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)__A, (__v4di) __B, __C);
     280  }
     281  
     282  extern __inline __m128i
     283  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     284  _mm_mask_shrdi_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
     285  								int __E)
     286  {
     287    return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__C, (__v8hi) __D,
     288  					__E, (__v8hi) __A, (__mmask8)__B);
     289  }
     290  
     291  extern __inline __m128i
     292  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     293  _mm_maskz_shrdi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
     294  {
     295    return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__B, (__v8hi) __C,
     296  			__D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A);
     297  }
     298  
     299  extern __inline __m128i
     300  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     301  _mm_shrdi_epi16 (__m128i __A, __m128i __B, int __C)
     302  {
     303    return (__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)__A, (__v8hi) __B, __C);
     304  }
     305  
     306  extern __inline __m128i
     307  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     308  _mm_mask_shrdi_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
     309  								int __E)
     310  {
     311    return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__C, (__v4si) __D,
     312  					__E, (__v4si) __A, (__mmask8)__B);
     313  }
     314  
     315  extern __inline __m128i
     316  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     317  _mm_maskz_shrdi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
     318  {
     319    return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__B, (__v4si) __C,
     320  			__D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A);
     321  }
     322  
     323  extern __inline __m128i
     324  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     325  _mm_shrdi_epi32 (__m128i __A, __m128i __B, int __C)
     326  {
     327    return (__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)__A, (__v4si) __B, __C);
     328  }
     329  
     330  extern __inline __m128i
     331  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     332  _mm_mask_shrdi_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
     333  								int __E)
     334  {
     335    return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__C, (__v2di) __D,
     336  					__E, (__v2di) __A, (__mmask8)__B);
     337  }
     338  
     339  extern __inline __m128i
     340  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     341  _mm_maskz_shrdi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
     342  {
     343    return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__B, (__v2di) __C,
     344  			__D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A);
     345  }
     346  
     347  extern __inline __m128i
     348  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     349  _mm_shrdi_epi64 (__m128i __A, __m128i __B, int __C)
     350  {
     351    return (__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)__A, (__v2di) __B, __C);
     352  }
     353  
     354  extern __inline __m256i
     355  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     356  _mm256_shldi_epi16 (__m256i __A, __m256i __B, int __C)
     357  {
     358    return (__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)__A, (__v16hi) __B,
     359  									__C);
     360  }
     361  
     362  extern __inline __m256i
     363  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     364  _mm256_mask_shldi_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D,
     365  								int __E)
     366  {
     367    return (__m256i)__builtin_ia32_vpshld_v16hi_mask ((__v16hi)__C,
     368  			(__v16hi) __D, __E, (__v16hi) __A, (__mmask16)__B);
     369  }
     370  
     371  extern __inline __m256i
     372  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     373  _mm256_maskz_shldi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D)
     374  {
     375    return (__m256i)__builtin_ia32_vpshld_v16hi_mask ((__v16hi)__B,
     376  	(__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A);
     377  }
     378  
     379  extern __inline __m256i
     380  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     381  _mm256_mask_shldi_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
     382  								int __E)
     383  {
     384    return (__m256i)__builtin_ia32_vpshld_v8si_mask ((__v8si)__C, (__v8si) __D,
     385  					__E, (__v8si) __A, (__mmask8)__B);
     386  }
     387  
     388  extern __inline __m256i
     389  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     390  _mm256_maskz_shldi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
     391  {
     392    return (__m256i)__builtin_ia32_vpshld_v8si_mask ((__v8si)__B, (__v8si) __C,
     393  			__D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A);
     394  }
     395  
     396  extern __inline __m256i
     397  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     398  _mm256_shldi_epi32 (__m256i __A, __m256i __B, int __C)
     399  {
     400    return (__m256i) __builtin_ia32_vpshld_v8si ((__v8si)__A, (__v8si) __B, __C);
     401  }
     402  
     403  extern __inline __m256i
     404  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     405  _mm256_mask_shldi_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
     406  								int __E)
     407  {
     408    return (__m256i)__builtin_ia32_vpshld_v4di_mask ((__v4di)__C, (__v4di) __D,
     409  					__E, (__v4di) __A, (__mmask8)__B);
     410  }
     411  
     412  extern __inline __m256i
     413  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     414  _mm256_maskz_shldi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
     415  {
     416    return (__m256i)__builtin_ia32_vpshld_v4di_mask ((__v4di)__B, (__v4di) __C,
     417  			__D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A);
     418  }
     419  
     420  extern __inline __m256i
     421  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     422  _mm256_shldi_epi64 (__m256i __A, __m256i __B, int __C)
     423  {
     424    return (__m256i) __builtin_ia32_vpshld_v4di ((__v4di)__A, (__v4di) __B, __C);
     425  }
     426  
     427  extern __inline __m128i
     428  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     429  _mm_mask_shldi_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
     430  								int __E)
     431  {
     432    return (__m128i)__builtin_ia32_vpshld_v8hi_mask ((__v8hi)__C, (__v8hi) __D,
     433  					__E, (__v8hi) __A, (__mmask8)__B);
     434  }
     435  
     436  extern __inline __m128i
     437  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     438  _mm_maskz_shldi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
     439  {
     440    return (__m128i)__builtin_ia32_vpshld_v8hi_mask ((__v8hi)__B, (__v8hi) __C,
     441  			__D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A);
     442  }
     443  
     444  extern __inline __m128i
     445  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     446  _mm_shldi_epi16 (__m128i __A, __m128i __B, int __C)
     447  {
     448    return (__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)__A, (__v8hi) __B, __C);
     449  }
     450  
     451  extern __inline __m128i
     452  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     453  _mm_mask_shldi_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
     454  								int __E)
     455  {
     456    return (__m128i)__builtin_ia32_vpshld_v4si_mask ((__v4si)__C, (__v4si) __D,
     457  					__E, (__v4si) __A, (__mmask8)__B);
     458  }
     459  
     460  extern __inline __m128i
     461  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     462  _mm_maskz_shldi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
     463  {
     464    return (__m128i)__builtin_ia32_vpshld_v4si_mask ((__v4si)__B, (__v4si) __C,
     465  			__D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A);
     466  }
     467  
     468  extern __inline __m128i
     469  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     470  _mm_shldi_epi32 (__m128i __A, __m128i __B, int __C)
     471  {
     472    return (__m128i) __builtin_ia32_vpshld_v4si ((__v4si)__A, (__v4si) __B, __C);
     473  }
     474  
     475  extern __inline __m128i
     476  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     477  _mm_mask_shldi_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
     478  								int __E)
     479  {
     480    return (__m128i)__builtin_ia32_vpshld_v2di_mask ((__v2di)__C, (__v2di) __D,
     481  					__E, (__v2di) __A, (__mmask8)__B);
     482  }
     483  
     484  extern __inline __m128i
     485  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     486  _mm_maskz_shldi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
     487  {
     488    return (__m128i)__builtin_ia32_vpshld_v2di_mask ((__v2di)__B, (__v2di) __C,
     489  			__D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A);
     490  }
     491  
     492  extern __inline __m128i
     493  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     494  _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
     495  {
     496    return (__m128i) __builtin_ia32_vpshld_v2di ((__v2di)__A, (__v2di) __B, __C);
     497  }
     498  #else
     499  #define _mm256_shrdi_epi16(A, B, C) \
     500    ((__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)(__m256i)(A), \
     501  					  (__v16hi)(__m256i)(B),(int)(C)))
     502  #define _mm256_mask_shrdi_epi16(A, B, C, D, E) \
     503    ((__m256i) __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(C), \
     504  					       (__v16hi)(__m256i)(D), \
     505  					       (int)(E),		\
     506  					       (__v16hi)(__m256i)(A), \
     507  					       (__mmask16)(B)))
     508  #define _mm256_maskz_shrdi_epi16(A, B, C, D) \
     509    ((__m256i) \
     510     __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(B),		\
     511  				     (__v16hi)(__m256i)(C),(int)(D),	\
     512  				     (__v16hi)(__m256i)_mm256_setzero_si256 (), \
     513  				     (__mmask16)(A)))
     514  #define _mm256_shrdi_epi32(A, B, C) \
     515    ((__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)(__m256i)(A), \
     516  					 (__v8si)(__m256i)(B),(int)(C)))
     517  #define _mm256_mask_shrdi_epi32(A, B, C, D, E) \
     518    ((__m256i) __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(C), \
     519  					      (__v8si)(__m256i)(D), \
     520  					      (int)(E), \
     521  					      (__v8si)(__m256i)(A), \
     522  					      (__mmask8)(B)))
     523  #define _mm256_maskz_shrdi_epi32(A, B, C, D) \
     524    ((__m256i) \
     525     __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(B),		\
     526  				    (__v8si)(__m256i)(C),(int)(D),	\
     527  				    (__v8si)(__m256i)_mm256_setzero_si256 (), \
     528  				    (__mmask8)(A)))
     529  #define _mm256_shrdi_epi64(A, B, C) \
     530    ((__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)(__m256i)(A), \
     531  					 (__v4di)(__m256i)(B),(int)(C)))
     532  #define _mm256_mask_shrdi_epi64(A, B, C, D, E) \
     533    ((__m256i) __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(C), \
     534  					      (__v4di)(__m256i)(D), (int)(E), \
     535  					      (__v4di)(__m256i)(A), \
     536  					      (__mmask8)(B)))
     537  #define _mm256_maskz_shrdi_epi64(A, B, C, D) \
     538    ((__m256i) \
     539     __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(B),		\
     540  				    (__v4di)(__m256i)(C),(int)(D),	\
     541  				    (__v4di)(__m256i)_mm256_setzero_si256 (), \
     542  				    (__mmask8)(A)))
     543  #define _mm_shrdi_epi16(A, B, C) \
     544    ((__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)(__m128i)(A), \
     545  					 (__v8hi)(__m128i)(B),(int)(C)))
     546  #define _mm_mask_shrdi_epi16(A, B, C, D, E) \
     547    ((__m128i) __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(C), \
     548  					      (__v8hi)(__m128i)(D), (int)(E), \
     549  					      (__v8hi)(__m128i)(A), \
     550  					      (__mmask8)(B)))
     551  #define _mm_maskz_shrdi_epi16(A, B, C, D) \
     552    ((__m128i) \
     553     __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(B),		\
     554  				    (__v8hi)(__m128i)(C),(int)(D),	\
     555  				    (__v8hi)(__m128i)_mm_setzero_si128 (), \
     556  				    (__mmask8)(A)))
     557  #define _mm_shrdi_epi32(A, B, C) \
     558    ((__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)(__m128i)(A), \
     559  					 (__v4si)(__m128i)(B),(int)(C)))
     560  #define _mm_mask_shrdi_epi32(A, B, C, D, E) \
     561    ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C),	\
     562  					      (__v4si)(__m128i)(D), (int)(E), \
     563  					      (__v4si)(__m128i)(A), \
     564  					      (__mmask8)(B)))
     565  #define _mm_maskz_shrdi_epi32(A, B, C, D) \
     566    ((__m128i) \
     567     __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B),		\
     568  				    (__v4si)(__m128i)(C),(int)(D),	\
     569  				    (__v4si)(__m128i)_mm_setzero_si128 (), \
     570  				    (__mmask8)(A)))
     571  #define _mm_shrdi_epi64(A, B, C) \
     572    ((__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)(__m128i)(A), \
     573  					 (__v2di)(__m128i)(B),(int)(C)))
     574  #define _mm_mask_shrdi_epi64(A, B, C, D, E) \
     575    ((__m128i) __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(C), \
     576  					      (__v2di)(__m128i)(D), (int)(E), \
     577  					      (__v2di)(__m128i)(A), \
     578  					      (__mmask8)(B)))
     579  #define _mm_maskz_shrdi_epi64(A, B, C, D) \
     580    ((__m128i) \
     581     __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(B),		\
     582  				    (__v2di)(__m128i)(C),(int)(D),	\
     583  				    (__v2di)(__m128i)_mm_setzero_si128 (), \
     584  				    (__mmask8)(A)))
     585  #define _mm256_shldi_epi16(A, B, C) \
     586    ((__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)(__m256i)(A), \
     587  					  (__v16hi)(__m256i)(B),(int)(C)))
     588  #define _mm256_mask_shldi_epi16(A, B, C, D, E) \
     589    ((__m256i) __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(C), \
     590  					       (__v16hi)(__m256i)(D), \
     591  					       (int)(E),		\
     592  					       (__v16hi)(__m256i)(A), \
     593  					       (__mmask16)(B)))
     594  #define _mm256_maskz_shldi_epi16(A, B, C, D) \
     595    ((__m256i) \
     596     __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(B),		\
     597  				     (__v16hi)(__m256i)(C),(int)(D),	\
     598  				     (__v16hi)(__m256i)_mm256_setzero_si256 (), \
     599  				     (__mmask16)(A)))
     600  #define _mm256_shldi_epi32(A, B, C) \
     601    ((__m256i) __builtin_ia32_vpshld_v8si ((__v8si)(__m256i)(A), \
     602  					 (__v8si)(__m256i)(B),(int)(C)))
     603  #define _mm256_mask_shldi_epi32(A, B, C, D, E) \
     604    ((__m256i) __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(C), \
     605  					      (__v8si)(__m256i)(D), (int)(E), \
     606  					      (__v8si)(__m256i)(A), \
     607  					      (__mmask8)(B)))
     608  #define _mm256_maskz_shldi_epi32(A, B, C, D) \
     609    ((__m256i) \
     610     __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(B),		\
     611  				    (__v8si)(__m256i)(C),(int)(D),	\
     612  				    (__v8si)(__m256i)_mm256_setzero_si256 (), \
     613  				    (__mmask8)(A)))
     614  #define _mm256_shldi_epi64(A, B, C) \
     615    ((__m256i) __builtin_ia32_vpshld_v4di ((__v4di)(__m256i)(A), \
     616  					 (__v4di)(__m256i)(B),(int)(C)))
     617  #define _mm256_mask_shldi_epi64(A, B, C, D, E) \
     618    ((__m256i) __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(C), \
     619  					      (__v4di)(__m256i)(D), (int)(E), \
     620  					      (__v4di)(__m256i)(A), \
     621  					      (__mmask8)(B)))
     622  #define _mm256_maskz_shldi_epi64(A, B, C, D) \
     623    ((__m256i) \
     624     __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(B),		\
     625  				    (__v4di)(__m256i)(C),(int)(D),	\
     626  				    (__v4di)(__m256i)_mm256_setzero_si256 (), \
     627  				    (__mmask8)(A)))
     628  #define _mm_shldi_epi16(A, B, C) \
     629    ((__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)(__m128i)(A), \
     630  					 (__v8hi)(__m128i)(B),(int)(C)))
     631  #define _mm_mask_shldi_epi16(A, B, C, D, E) \
     632    ((__m128i) __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(C), \
     633  					      (__v8hi)(__m128i)(D), (int)(E), \
     634  					      (__v8hi)(__m128i)(A), \
     635  					      (__mmask8)(B)))
     636  #define _mm_maskz_shldi_epi16(A, B, C, D) \
     637    ((__m128i) \
     638     __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(B),		\
     639  				    (__v8hi)(__m128i)(C),(int)(D),	\
     640  				    (__v8hi)(__m128i)_mm_setzero_si128 (), \
     641  				    (__mmask8)(A)))
     642  #define _mm_shldi_epi32(A, B, C) \
     643    ((__m128i) __builtin_ia32_vpshld_v4si ((__v4si)(__m128i)(A), \
     644  					 (__v4si)(__m128i)(B),(int)(C)))
     645  #define _mm_mask_shldi_epi32(A, B, C, D, E) \
     646    ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(C), \
     647  					      (__v4si)(__m128i)(D), (int)(E), \
     648  					      (__v4si)(__m128i)(A), \
     649  					      (__mmask8)(B)))
     650  #define _mm_maskz_shldi_epi32(A, B, C, D) \
     651    ((__m128i) \
     652     __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B),		\
     653  				    (__v4si)(__m128i)(C),(int)(D),	\
     654  				    (__v4si)(__m128i)_mm_setzero_si128 (), \
     655  				    (__mmask8)(A)))
     656  #define _mm_shldi_epi64(A, B, C) \
     657    ((__m128i) __builtin_ia32_vpshld_v2di ((__v2di)(__m128i)(A), \
     658  					 (__v2di)(__m128i)(B),(int)(C)))
     659  #define _mm_mask_shldi_epi64(A, B, C, D, E) \
     660    ((__m128i) __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(C), \
     661  					      (__v2di)(__m128i)(D), (int)(E), \
     662  					      (__v2di)(__m128i)(A), \
     663  					      (__mmask8)(B)))
     664  #define _mm_maskz_shldi_epi64(A, B, C, D) \
     665    ((__m128i) \
     666     __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(B),		\
     667  				    (__v2di)(__m128i)(C),(int)(D),	\
     668  				    (__v2di)(__m128i)_mm_setzero_si128 (), \
     669  				    (__mmask8)(A)))
     670  #endif
     671  
     672  extern __inline __m256i
     673  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     674  _mm256_shrdv_epi16 (__m256i __A, __m256i __B, __m256i __C)
     675  {
     676    return (__m256i) __builtin_ia32_vpshrdv_v16hi ((__v16hi)__A, (__v16hi) __B,
     677  								(__v16hi) __C);
     678  }
     679  
     680  extern __inline __m256i
     681  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     682  _mm256_mask_shrdv_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D)
     683  {
     684    return (__m256i)__builtin_ia32_vpshrdv_v16hi_mask ((__v16hi)__A,
     685  				(__v16hi) __C, (__v16hi) __D, (__mmask16)__B);
     686  }
     687  
     688  extern __inline __m256i
     689  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     690  _mm256_maskz_shrdv_epi16 (__mmask16 __A, __m256i __B, __m256i __C, __m256i __D)
     691  {
     692    return (__m256i)__builtin_ia32_vpshrdv_v16hi_maskz ((__v16hi)__B,
     693  				(__v16hi) __C, (__v16hi) __D, (__mmask16)__A);
     694  }
     695  
     696  extern __inline __m256i
     697  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     698  _mm256_shrdv_epi32 (__m256i __A, __m256i __B, __m256i __C)
     699  {
     700    return (__m256i) __builtin_ia32_vpshrdv_v8si ((__v8si)__A, (__v8si) __B,
     701  								(__v8si) __C);
     702  }
     703  
     704  extern __inline __m256i
     705  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     706  _mm256_mask_shrdv_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
     707  {
     708    return (__m256i)__builtin_ia32_vpshrdv_v8si_mask ((__v8si)__A, (__v8si) __C,
     709  						(__v8si) __D, (__mmask8)__B);
     710  }
     711  
     712  extern __inline __m256i
     713  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     714  _mm256_maskz_shrdv_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
     715  {
     716    return (__m256i)__builtin_ia32_vpshrdv_v8si_maskz ((__v8si)__B, (__v8si) __C,
     717  						 (__v8si) __D, (__mmask8)__A);
     718  }
     719  
     720  extern __inline __m256i
     721  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     722  _mm256_shrdv_epi64 (__m256i __A, __m256i __B, __m256i __C)
     723  {
     724    return (__m256i) __builtin_ia32_vpshrdv_v4di ((__v4di)__A, (__v4di) __B,
     725  								(__v4di) __C);
     726  }
     727  
     728  extern __inline __m256i
     729  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     730  _mm256_mask_shrdv_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
     731  {
     732    return (__m256i)__builtin_ia32_vpshrdv_v4di_mask ((__v4di)__A, (__v4di) __C,
     733  						(__v4di) __D, (__mmask8)__B);
     734  }
     735  
     736  extern __inline __m256i
     737  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     738  _mm256_maskz_shrdv_epi64 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
     739  {
     740    return (__m256i)__builtin_ia32_vpshrdv_v4di_maskz ((__v4di)__B, (__v4di) __C,
     741  						 (__v4di) __D, (__mmask8)__A);
     742  }
     743  
     744  extern __inline __m128i
     745  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     746  _mm_shrdv_epi16 (__m128i __A, __m128i __B, __m128i __C)
     747  {
     748    return (__m128i) __builtin_ia32_vpshrdv_v8hi ((__v8hi)__A, (__v8hi) __B,
     749  								(__v8hi) __C);
     750  }
     751  
     752  extern __inline __m128i
     753  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     754  _mm_mask_shrdv_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
     755  {
     756    return (__m128i)__builtin_ia32_vpshrdv_v8hi_mask ((__v8hi)__A, (__v8hi) __C,
     757  						(__v8hi) __D, (__mmask8)__B);
     758  }
     759  
     760  extern __inline __m128i
     761  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     762  _mm_maskz_shrdv_epi16 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
     763  {
     764    return (__m128i)__builtin_ia32_vpshrdv_v8hi_maskz ((__v8hi)__B, (__v8hi) __C,
     765  						 (__v8hi) __D, (__mmask8)__A);
     766  }
     767  
     768  extern __inline __m128i
     769  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     770  _mm_shrdv_epi32 (__m128i __A, __m128i __B, __m128i __C)
     771  {
     772    return (__m128i) __builtin_ia32_vpshrdv_v4si ((__v4si)__A, (__v4si) __B,
     773  								(__v4si) __C);
     774  }
     775  
     776  extern __inline __m128i
     777  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     778  _mm_mask_shrdv_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
     779  {
     780    return (__m128i)__builtin_ia32_vpshrdv_v4si_mask ((__v4si)__A, (__v4si) __C,
     781  						(__v4si) __D, (__mmask8)__B);
     782  }
     783  
     784  extern __inline __m128i
     785  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     786  _mm_maskz_shrdv_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
     787  {
     788    return (__m128i)__builtin_ia32_vpshrdv_v4si_maskz ((__v4si)__B, (__v4si) __C,
     789  						 (__v4si) __D, (__mmask8)__A);
     790  }
     791  
     792  extern __inline __m128i
     793  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     794  _mm_shrdv_epi64 (__m128i __A, __m128i __B, __m128i __C)
     795  {
     796    return (__m128i) __builtin_ia32_vpshrdv_v2di ((__v2di)__A, (__v2di) __B,
     797  								(__v2di) __C);
     798  }
     799  
     800  extern __inline __m128i
     801  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     802  _mm_mask_shrdv_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
     803  {
     804    return (__m128i)__builtin_ia32_vpshrdv_v2di_mask ((__v2di)__A, (__v2di) __C,
     805  						(__v2di) __D, (__mmask8)__B);
     806  }
     807  
     808  extern __inline __m128i
     809  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     810  _mm_maskz_shrdv_epi64 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
     811  {
     812    return (__m128i)__builtin_ia32_vpshrdv_v2di_maskz ((__v2di)__B, (__v2di) __C,
     813  						 (__v2di) __D, (__mmask8)__A);
     814  }
     815  
     816  extern __inline __m256i
     817  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     818  _mm256_shldv_epi16 (__m256i __A, __m256i __B, __m256i __C)
     819  {
     820    return (__m256i) __builtin_ia32_vpshldv_v16hi ((__v16hi)__A, (__v16hi) __B,
     821  								(__v16hi) __C);
     822  }
     823  
     824  extern __inline __m256i
     825  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     826  _mm256_mask_shldv_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D)
     827  {
     828    return (__m256i)__builtin_ia32_vpshldv_v16hi_mask ((__v16hi)__A,
     829  				(__v16hi) __C, (__v16hi) __D, (__mmask16)__B);
     830  }
     831  
     832  extern __inline __m256i
     833  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     834  _mm256_maskz_shldv_epi16 (__mmask16 __A, __m256i __B, __m256i __C, __m256i __D)
     835  {
     836    return (__m256i)__builtin_ia32_vpshldv_v16hi_maskz ((__v16hi)__B,
     837  				(__v16hi) __C, (__v16hi) __D, (__mmask16)__A);
     838  }
     839  
     840  extern __inline __m256i
     841  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     842  _mm256_shldv_epi32 (__m256i __A, __m256i __B, __m256i __C)
     843  {
     844    return (__m256i) __builtin_ia32_vpshldv_v8si ((__v8si)__A, (__v8si) __B,
     845  								(__v8si) __C);
     846  }
     847  
     848  extern __inline __m256i
     849  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     850  _mm256_mask_shldv_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
     851  {
     852    return (__m256i)__builtin_ia32_vpshldv_v8si_mask ((__v8si)__A, (__v8si) __C,
     853  						(__v8si) __D, (__mmask8)__B) ;
     854  }
     855  
     856  extern __inline __m256i
     857  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     858  _mm256_maskz_shldv_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
     859  {
     860    return (__m256i)__builtin_ia32_vpshldv_v8si_maskz ((__v8si)__B, (__v8si) __C,
     861  						(__v8si) __D, (__mmask8)__A);
     862  }
     863  
     864  extern __inline __m256i
     865  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     866  _mm256_shldv_epi64 (__m256i __A, __m256i __B, __m256i __C)
     867  {
     868    return (__m256i) __builtin_ia32_vpshldv_v4di ((__v4di)__A, (__v4di) __B,
     869  								(__v4di) __C);
     870  }
     871  
     872  extern __inline __m256i
     873  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     874  _mm256_mask_shldv_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
     875  {
     876    return (__m256i)__builtin_ia32_vpshldv_v4di_mask ((__v4di)__A, (__v4di) __C,
     877  						(__v4di) __D, (__mmask8)__B);
     878  }
     879  
     880  extern __inline __m256i
     881  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     882  _mm256_maskz_shldv_epi64 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
     883  {
     884    return (__m256i)__builtin_ia32_vpshldv_v4di_maskz ((__v4di)__B, (__v4di) __C,
     885  						 (__v4di) __D, (__mmask8)__A);
     886  }
     887  
     888  extern __inline __m128i
     889  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     890  _mm_shldv_epi16 (__m128i __A, __m128i __B, __m128i __C)
     891  {
     892    return (__m128i) __builtin_ia32_vpshldv_v8hi ((__v8hi)__A, (__v8hi) __B,
     893  								(__v8hi) __C);
     894  }
     895  
     896  extern __inline __m128i
     897  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     898  _mm_mask_shldv_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
     899  {
     900    return (__m128i)__builtin_ia32_vpshldv_v8hi_mask ((__v8hi)__A, (__v8hi) __C,
     901  						(__v8hi) __D, (__mmask8)__B);
     902  }
     903  
     904  extern __inline __m128i
     905  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     906  _mm_maskz_shldv_epi16 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
     907  {
     908    return (__m128i)__builtin_ia32_vpshldv_v8hi_maskz ((__v8hi)__B, (__v8hi) __C,
     909  						 (__v8hi) __D, (__mmask8)__A);
     910  }
     911  
     912  extern __inline __m128i
     913  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     914  _mm_shldv_epi32 (__m128i __A, __m128i __B, __m128i __C)
     915  {
     916    return (__m128i) __builtin_ia32_vpshldv_v4si ((__v4si)__A, (__v4si) __B,
     917  								(__v4si) __C);
     918  }
     919  
     920  extern __inline __m128i
     921  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     922  _mm_mask_shldv_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
     923  {
     924    return (__m128i)__builtin_ia32_vpshldv_v4si_mask ((__v4si)__A, (__v4si) __C,
     925  						(__v4si) __D, (__mmask8)__B);
     926  }
     927  
     928  extern __inline __m128i
     929  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     930  _mm_maskz_shldv_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
     931  {
     932    return (__m128i)__builtin_ia32_vpshldv_v4si_maskz ((__v4si)__B, (__v4si) __C,
     933  						 (__v4si) __D, (__mmask8)__A);
     934  }
     935  
     936  extern __inline __m128i
     937  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     938  _mm_shldv_epi64 (__m128i __A, __m128i __B, __m128i __C)
     939  {
     940    return (__m128i) __builtin_ia32_vpshldv_v2di ((__v2di)__A, (__v2di) __B,
     941  								(__v2di) __C);
     942  }
     943  
     944  extern __inline __m128i
     945  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     946  _mm_mask_shldv_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
     947  {
     948    return (__m128i)__builtin_ia32_vpshldv_v2di_mask ((__v2di)__A, (__v2di) __C,
     949  						(__v2di) __D, (__mmask8)__B);
     950  }
     951  
     952  extern __inline __m128i
     953  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     954  _mm_maskz_shldv_epi64 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
     955  {
     956    return (__m128i)__builtin_ia32_vpshldv_v2di_maskz ((__v2di)__B, (__v2di) __C,
     957  						(__v2di) __D, (__mmask8)__A);
     958  }
     959  
     960  
     961  
     962  
     963  #ifdef __DISABLE_AVX512VBMI2VL__
     964  #undef __DISABLE_AVX512VBMI2VL__
     965  #pragma GCC pop_options
     966  #endif /* __DISABLE_AVX512VBMIVL__ */
     967  
     968  #if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) || \
     969      !defined(__AVX512BW__)
     970  #pragma GCC push_options
     971  #pragma GCC target("avx512vbmi2,avx512vl,avx512bw")
     972  #define __DISABLE_AVX512VBMI2VLBW__
     973  #endif /* __AVX512VBMIVLBW__ */
     974  
     975  extern __inline __m256i
     976  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     977  _mm256_mask_compress_epi8 (__m256i __A, __mmask32 __B, __m256i __C)
     978  {
     979    return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi)__C,
     980  						(__v32qi)__A, (__mmask32)__B);
     981  }
     982  
     983  extern __inline __m256i
     984  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     985  _mm256_maskz_compress_epi8 (__mmask32 __A, __m256i __B)
     986  {
     987    return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __B,
     988  			(__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
     989  }
     990  
     991  extern __inline void
     992  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     993  _mm256_mask_compressstoreu_epi8 (void * __A, __mmask32 __B, __m256i __C)
     994  {
     995    __builtin_ia32_compressstoreuqi256_mask ((__v32qi *) __A, (__v32qi) __C,
     996  							(__mmask32) __B);
     997  }
     998  
     999  extern __inline __m256i
    1000  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1001  _mm256_mask_expand_epi8 (__m256i __A, __mmask32 __B, __m256i __C)
    1002  {
    1003    return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __C,
    1004  						    (__v32qi) __A,
    1005  						    (__mmask32) __B);
    1006  }
    1007  
    1008  extern __inline __m256i
    1009  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1010  _mm256_maskz_expand_epi8 (__mmask32 __A, __m256i __B)
    1011  {
    1012    return (__m256i) __builtin_ia32_expandqi256_maskz ((__v32qi) __B,
    1013  			(__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
    1014  }
    1015  
    1016  extern __inline __m256i
    1017  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1018  _mm256_mask_expandloadu_epi8 (__m256i __A, __mmask32 __B, const void * __C)
    1019  {
    1020    return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *) __C,
    1021  					(__v32qi) __A, (__mmask32) __B);
    1022  }
    1023  
    1024  extern __inline __m256i
    1025  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1026  _mm256_maskz_expandloadu_epi8 (__mmask32 __A, const void * __B)
    1027  {
    1028    return (__m256i) __builtin_ia32_expandloadqi256_maskz ((const __v32qi *) __B,
    1029  			(__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
    1030  }
    1031  
    1032  #ifdef __DISABLE_AVX512VBMI2VLBW__
    1033  #undef __DISABLE_AVX512VBMI2VLBW__
    1034  #pragma GCC pop_options
    1035  #endif /* __DISABLE_AVX512VBMIVLBW__ */
    1036  
    1037  #endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */