1  /* Copyright (C) 2019-2023 Free Software Foundation, Inc.
       2  
       3     This file is part of GCC.
       4  
       5     GCC is free software; you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3, or (at your option)
       8     any later version.
       9  
      10     GCC is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     Under Section 7 of GPL version 3, you are granted additional
      16     permissions described in the GCC Runtime Library Exception, version
      17     3.1, as published by the Free Software Foundation.
      18  
      19     You should have received a copy of the GNU General Public License and
      20     a copy of the GCC Runtime Library Exception along with this program;
      21     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      22     <http://www.gnu.org/licenses/>.  */
      23  
      24  #ifndef _IMMINTRIN_H_INCLUDED
      25  #error "Never use <avx512fp16vlintrin.h> directly; include <immintrin.h> instead."
      26  #endif
      27  
      28  #ifndef __AVX512FP16VLINTRIN_H_INCLUDED
      29  #define __AVX512FP16VLINTRIN_H_INCLUDED
      30  
      31  #if !defined(__AVX512VL__) || !defined(__AVX512FP16__)
      32  #pragma GCC push_options
      33  #pragma GCC target("avx512fp16,avx512vl")
      34  #define __DISABLE_AVX512FP16VL__
      35  #endif /* __AVX512FP16VL__ */
      36  
      37  extern __inline __m128
      38  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      39  _mm_castph_ps (__m128h __a)
      40  {
      41    return (__m128) __a;
      42  }
      43  
      44  extern __inline __m256
      45  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      46  _mm256_castph_ps (__m256h __a)
      47  {
      48    return (__m256) __a;
      49  }
      50  
      51  extern __inline __m128d
      52  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      53  _mm_castph_pd (__m128h __a)
      54  {
      55    return (__m128d) __a;
      56  }
      57  
      58  extern __inline __m256d
      59  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      60  _mm256_castph_pd (__m256h __a)
      61  {
      62    return (__m256d) __a;
      63  }
      64  
      65  extern __inline __m128i
      66  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      67  _mm_castph_si128 (__m128h __a)
      68  {
      69    return (__m128i) __a;
      70  }
      71  
      72  extern __inline __m256i
      73  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      74  _mm256_castph_si256 (__m256h __a)
      75  {
      76    return (__m256i) __a;
      77  }
      78  
      79  extern __inline __m128h
      80  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      81  _mm_castps_ph (__m128 __a)
      82  {
      83    return (__m128h) __a;
      84  }
      85  
      86  extern __inline __m256h
      87  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      88  _mm256_castps_ph (__m256 __a)
      89  {
      90    return (__m256h) __a;
      91  }
      92  
      93  extern __inline __m128h
      94  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      95  _mm_castpd_ph (__m128d __a)
      96  {
      97    return (__m128h) __a;
      98  }
      99  
     100  extern __inline __m256h
     101  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     102  _mm256_castpd_ph (__m256d __a)
     103  {
     104    return (__m256h) __a;
     105  }
     106  
     107  extern __inline __m128h
     108  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     109  _mm_castsi128_ph (__m128i __a)
     110  {
     111    return (__m128h) __a;
     112  }
     113  
     114  extern __inline __m256h
     115  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     116  _mm256_castsi256_ph (__m256i __a)
     117  {
     118    return (__m256h) __a;
     119  }
     120  
     121  extern __inline __m128h
     122  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     123  _mm256_castph256_ph128 (__m256h __A)
     124  {
     125    union
     126    {
     127      __m128h __a[2];
     128      __m256h __v;
     129    } __u = { .__v = __A };
     130    return __u.__a[0];
     131  }
     132  
     133  extern __inline __m256h
     134  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     135  _mm256_castph128_ph256 (__m128h __A)
     136  {
     137    union
     138    {
     139      __m128h __a[2];
     140      __m256h __v;
     141    } __u;
     142    __u.__a[0] = __A;
     143    return __u.__v;
     144  }
     145  
     146  extern __inline __m256h
     147  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     148  _mm256_zextph128_ph256 (__m128h __A)
     149  {
     150    return (__m256h) _mm256_insertf128_ps (_mm256_setzero_ps (),
     151  					 (__m128) __A, 0);
     152  }
     153  
     154  extern __inline __m256h
     155  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     156  _mm256_conj_pch (__m256h __A)
     157  {
     158    return (__m256h) _mm256_xor_epi32 ((__m256i) __A, _mm256_set1_epi32 (1<<31));
     159  }
     160  
     161  extern __inline __m256h
     162  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     163  _mm256_mask_conj_pch (__m256h __W, __mmask8 __U, __m256h __A)
     164  {
     165    return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf)
     166  						   _mm256_conj_pch (__A),
     167  						  (__v8sf) __W,
     168  						  (__mmask8) __U);
     169  }
     170  
     171  extern __inline __m256h
     172  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     173  _mm256_maskz_conj_pch (__mmask8 __U, __m256h __A)
     174  {
     175    return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf)
     176  						   _mm256_conj_pch (__A),
     177  						  (__v8sf)
     178  						   _mm256_setzero_ps (),
     179  						  (__mmask8) __U);
     180  }
     181  
     182  extern __inline __m128h
     183  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     184  _mm_conj_pch (__m128h __A)
     185  {
     186    return (__m128h) _mm_xor_epi32 ((__m128i) __A, _mm_set1_epi32 (1<<31));
     187  }
     188  
     189  extern __inline __m128h
     190  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     191  _mm_mask_conj_pch (__m128h __W, __mmask8 __U, __m128h __A)
     192  {
     193    return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A),
     194  						  (__v4sf) __W,
     195  						  (__mmask8) __U);
     196  }
     197  
     198  extern __inline __m128h
     199  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     200  _mm_maskz_conj_pch (__mmask8 __U, __m128h __A)
     201  {
     202    return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A),
     203  						  (__v4sf) _mm_setzero_ps (),
     204  						  (__mmask8) __U);
     205  }
     206  
     207  /* Intrinsics v[add,sub,mul,div]ph.  */
     208  extern __inline __m128h
     209  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     210  _mm_add_ph (__m128h __A, __m128h __B)
     211  {
     212    return (__m128h) ((__v8hf) __A + (__v8hf) __B);
     213  }
     214  
     215  extern __inline __m256h
     216  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     217  _mm256_add_ph (__m256h __A, __m256h __B)
     218  {
     219    return (__m256h) ((__v16hf) __A + (__v16hf) __B);
     220  }
     221  
     222  extern __inline __m128h
     223  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     224  _mm_mask_add_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
     225  {
     226    return __builtin_ia32_addph128_mask (__C, __D, __A, __B);
     227  }
     228  
     229  extern __inline __m256h
     230  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     231  _mm256_mask_add_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
     232  {
     233    return __builtin_ia32_addph256_mask (__C, __D, __A, __B);
     234  }
     235  
     236  extern __inline __m128h
     237  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     238  _mm_maskz_add_ph (__mmask8 __A, __m128h __B, __m128h __C)
     239  {
     240    return __builtin_ia32_addph128_mask (__B, __C, _mm_setzero_ph (),
     241  				       __A);
     242  }
     243  
     244  extern __inline __m256h
     245  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     246  _mm256_maskz_add_ph (__mmask16 __A, __m256h __B, __m256h __C)
     247  {
     248    return __builtin_ia32_addph256_mask (__B, __C,
     249  				       _mm256_setzero_ph (), __A);
     250  }
     251  
     252  extern __inline __m128h
     253  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     254  _mm_sub_ph (__m128h __A, __m128h __B)
     255  {
     256    return (__m128h) ((__v8hf) __A - (__v8hf) __B);
     257  }
     258  
     259  extern __inline __m256h
     260  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     261  _mm256_sub_ph (__m256h __A, __m256h __B)
     262  {
     263    return (__m256h) ((__v16hf) __A - (__v16hf) __B);
     264  }
     265  
     266  extern __inline __m128h
     267  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     268  _mm_mask_sub_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
     269  {
     270    return __builtin_ia32_subph128_mask (__C, __D, __A, __B);
     271  }
     272  
     273  extern __inline __m256h
     274  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     275  _mm256_mask_sub_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
     276  {
     277    return __builtin_ia32_subph256_mask (__C, __D, __A, __B);
     278  }
     279  
     280  extern __inline __m128h
     281  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     282  _mm_maskz_sub_ph (__mmask8 __A, __m128h __B, __m128h __C)
     283  {
     284    return __builtin_ia32_subph128_mask (__B, __C, _mm_setzero_ph (),
     285  				       __A);
     286  }
     287  
     288  extern __inline __m256h
     289  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     290  _mm256_maskz_sub_ph (__mmask16 __A, __m256h __B, __m256h __C)
     291  {
     292    return __builtin_ia32_subph256_mask (__B, __C,
     293  				       _mm256_setzero_ph (), __A);
     294  }
     295  
     296  extern __inline __m128h
     297  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     298  _mm_mul_ph (__m128h __A, __m128h __B)
     299  {
     300    return (__m128h) ((__v8hf) __A * (__v8hf) __B);
     301  }
     302  
     303  extern __inline __m256h
     304  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     305  _mm256_mul_ph (__m256h __A, __m256h __B)
     306  {
     307    return (__m256h) ((__v16hf) __A * (__v16hf) __B);
     308  }
     309  
     310  extern __inline __m128h
     311  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     312  _mm_mask_mul_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
     313  {
     314    return __builtin_ia32_mulph128_mask (__C, __D, __A, __B);
     315  }
     316  
     317  extern __inline __m256h
     318  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     319  _mm256_mask_mul_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
     320  {
     321    return __builtin_ia32_mulph256_mask (__C, __D, __A, __B);
     322  }
     323  
     324  extern __inline __m128h
     325  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     326  _mm_maskz_mul_ph (__mmask8 __A, __m128h __B, __m128h __C)
     327  {
     328    return __builtin_ia32_mulph128_mask (__B, __C, _mm_setzero_ph (),
     329  				       __A);
     330  }
     331  
     332  extern __inline __m256h
     333  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     334  _mm256_maskz_mul_ph (__mmask16 __A, __m256h __B, __m256h __C)
     335  {
     336    return __builtin_ia32_mulph256_mask (__B, __C,
     337  				       _mm256_setzero_ph (), __A);
     338  }
     339  
     340  extern __inline __m128h
     341  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     342  _mm_div_ph (__m128h __A, __m128h __B)
     343  {
     344    return (__m128h) ((__v8hf) __A / (__v8hf) __B);
     345  }
     346  
     347  extern __inline __m256h
     348  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     349  _mm256_div_ph (__m256h __A, __m256h __B)
     350  {
     351    return (__m256h) ((__v16hf) __A / (__v16hf) __B);
     352  }
     353  
     354  extern __inline __m128h
     355  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     356  _mm_mask_div_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
     357  {
     358    return __builtin_ia32_divph128_mask (__C, __D, __A, __B);
     359  }
     360  
     361  extern __inline __m256h
     362  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     363  _mm256_mask_div_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
     364  {
     365    return __builtin_ia32_divph256_mask (__C, __D, __A, __B);
     366  }
     367  
     368  extern __inline __m128h
     369  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     370  _mm_maskz_div_ph (__mmask8 __A, __m128h __B, __m128h __C)
     371  {
     372    return __builtin_ia32_divph128_mask (__B, __C, _mm_setzero_ph (),
     373  				       __A);
     374  }
     375  
     376  extern __inline __m256h
     377  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     378  _mm256_maskz_div_ph (__mmask16 __A, __m256h __B, __m256h __C)
     379  {
     380    return __builtin_ia32_divph256_mask (__B, __C,
     381  				       _mm256_setzero_ph (), __A);
     382  }
     383  
     384  /* Intrinsics v[max,min]ph.  */
     385  extern __inline __m128h
     386  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     387  _mm_max_ph (__m128h __A, __m128h __B)
     388  {
     389    return __builtin_ia32_maxph128_mask (__A, __B,
     390  				       _mm_setzero_ph (),
     391  				       (__mmask8) -1);
     392  }
     393  
     394  extern __inline __m256h
     395  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     396  _mm256_max_ph (__m256h __A, __m256h __B)
     397  {
     398    return __builtin_ia32_maxph256_mask (__A, __B,
     399  				       _mm256_setzero_ph (),
     400  				       (__mmask16) -1);
     401  }
     402  
     403  extern __inline __m128h
     404  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     405  _mm_mask_max_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
     406  {
     407    return __builtin_ia32_maxph128_mask (__C, __D, __A, __B);
     408  }
     409  
     410  extern __inline __m256h
     411  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     412  _mm256_mask_max_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
     413  {
     414    return __builtin_ia32_maxph256_mask (__C, __D, __A, __B);
     415  }
     416  
     417  extern __inline __m128h
     418  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     419  _mm_maskz_max_ph (__mmask8 __A, __m128h __B, __m128h __C)
     420  {
     421    return __builtin_ia32_maxph128_mask (__B, __C, _mm_setzero_ph (),
     422  				       __A);
     423  }
     424  
     425  extern __inline __m256h
     426  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     427  _mm256_maskz_max_ph (__mmask16 __A, __m256h __B, __m256h __C)
     428  {
     429    return __builtin_ia32_maxph256_mask (__B, __C,
     430  				       _mm256_setzero_ph (), __A);
     431  }
     432  
     433  extern __inline __m128h
     434  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     435  _mm_min_ph (__m128h __A, __m128h __B)
     436  {
     437    return __builtin_ia32_minph128_mask (__A, __B,
     438  				       _mm_setzero_ph (),
     439  				       (__mmask8) -1);
     440  }
     441  
     442  extern __inline __m256h
     443  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     444  _mm256_min_ph (__m256h __A, __m256h __B)
     445  {
     446    return __builtin_ia32_minph256_mask (__A, __B,
     447  				       _mm256_setzero_ph (),
     448  				       (__mmask16) -1);
     449  }
     450  
     451  extern __inline __m128h
     452  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     453  _mm_mask_min_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
     454  {
     455    return __builtin_ia32_minph128_mask (__C, __D, __A, __B);
     456  }
     457  
     458  extern __inline __m256h
     459  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     460  _mm256_mask_min_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
     461  {
     462    return __builtin_ia32_minph256_mask (__C, __D, __A, __B);
     463  }
     464  
     465  extern __inline __m128h
     466  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     467  _mm_maskz_min_ph (__mmask8 __A, __m128h __B, __m128h __C)
     468  {
     469    return __builtin_ia32_minph128_mask (__B, __C, _mm_setzero_ph (),
     470  				       __A);
     471  }
     472  
     473  extern __inline __m256h
     474  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     475  _mm256_maskz_min_ph (__mmask16 __A, __m256h __B, __m256h __C)
     476  {
     477    return __builtin_ia32_minph256_mask (__B, __C,
     478  				       _mm256_setzero_ph (), __A);
     479  }
     480  
     481  extern __inline __m128h
     482  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     483  _mm_abs_ph (__m128h __A)
     484  {
     485    return (__m128h) _mm_and_si128 ( _mm_set1_epi32 (0x7FFF7FFF),
     486  				   (__m128i) __A);
     487  }
     488  
     489  extern __inline __m256h
     490  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     491  _mm256_abs_ph (__m256h __A)
     492  {
     493    return (__m256h) _mm256_and_si256 ( _mm256_set1_epi32 (0x7FFF7FFF),
     494  				      (__m256i) __A);
     495  }
     496  
     497  /* vcmpph */
     498  #ifdef __OPTIMIZE
     499  extern __inline __mmask8
     500  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     501  _mm_cmp_ph_mask (__m128h __A, __m128h __B, const int __C)
     502  {
     503    return (__mmask8) __builtin_ia32_cmpph128_mask (__A, __B, __C,
     504  						  (__mmask8) -1);
     505  }
     506  
     507  extern __inline __mmask8
     508  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     509  _mm_mask_cmp_ph_mask (__mmask8 __A, __m128h __B, __m128h __C,
     510  		      const int __D)
     511  {
     512    return (__mmask8) __builtin_ia32_cmpph128_mask (__B, __C, __D, __A);
     513  }
     514  
     515  extern __inline __mmask16
     516  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     517  _mm_cmp_ph_mask (__m256h __A, __m256h __B, const int __C)
     518  {
     519    return (__mmask16) __builtin_ia32_cmpph256_mask (__A, __B, __C,
     520  						   (__mmask16) -1);
     521  }
     522  
     523  extern __inline __mmask16
     524  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     525  _mm_mask_cmp_ph_mask (__mmask16 __A, __m256h __B, __m256h __C,
     526  		      const int __D)
     527  {
     528    return (__mmask16) __builtin_ia32_cmpph256_mask (__B, __C, __D,
     529  						   __A);
     530  }
     531  
     532  #else
     533  #define _mm_cmp_ph_mask(A, B, C)			\
     534    (__builtin_ia32_cmpph128_mask ((A), (B), (C), (-1)))
     535  
     536  #define _mm_mask_cmp_ph_mask(A, B, C, D)		\
     537    (__builtin_ia32_cmpph128_mask ((B), (C), (D), (A)))
     538  
     539  #define _mm256_cmp_ph_mask(A, B, C)			\
     540    (__builtin_ia32_cmpph256_mask ((A), (B), (C), (-1)))
     541  
     542  #define _mm256_mask_cmp_ph_mask(A, B, C, D)		\
     543    (__builtin_ia32_cmpph256_mask ((B), (C), (D), (A)))
     544  
     545  #endif /* __OPTIMIZE__ */
     546  
     547  /* Intrinsics vsqrtph.  */
     548  extern __inline __m128h
     549  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     550  _mm_sqrt_ph (__m128h __A)
     551  {
     552    return __builtin_ia32_sqrtph128_mask (__A, _mm_setzero_ph (),
     553  					(__mmask8) -1);
     554  }
     555  
     556  extern __inline __m256h
     557  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     558  _mm256_sqrt_ph (__m256h __A)
     559  {
     560    return __builtin_ia32_sqrtph256_mask (__A, _mm256_setzero_ph (),
     561  					(__mmask16) -1);
     562  }
     563  
     564  extern __inline __m128h
     565  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     566  _mm_mask_sqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
     567  {
     568    return __builtin_ia32_sqrtph128_mask (__C, __A, __B);
     569  }
     570  
     571  extern __inline __m256h
     572  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     573  _mm256_mask_sqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
     574  {
     575    return __builtin_ia32_sqrtph256_mask (__C, __A, __B);
     576  }
     577  
     578  extern __inline __m128h
     579  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     580  _mm_maskz_sqrt_ph (__mmask8 __A, __m128h __B)
     581  {
     582    return __builtin_ia32_sqrtph128_mask (__B, _mm_setzero_ph (),
     583  					__A);
     584  }
     585  
     586  extern __inline __m256h
     587  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     588  _mm256_maskz_sqrt_ph (__mmask16 __A, __m256h __B)
     589  {
     590    return __builtin_ia32_sqrtph256_mask (__B, _mm256_setzero_ph (),
     591  					__A);
     592  }
     593  
     594  /* Intrinsics vrsqrtph.  */
     595  extern __inline __m128h
     596  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     597  _mm_rsqrt_ph (__m128h __A)
     598  {
     599    return __builtin_ia32_rsqrtph128_mask (__A, _mm_setzero_ph (),
     600  					 (__mmask8) -1);
     601  }
     602  
     603  extern __inline __m256h
     604  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     605  _mm256_rsqrt_ph (__m256h __A)
     606  {
     607    return __builtin_ia32_rsqrtph256_mask (__A, _mm256_setzero_ph (),
     608  					 (__mmask16) -1);
     609  }
     610  
     611  extern __inline __m128h
     612  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     613  _mm_mask_rsqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
     614  {
     615    return __builtin_ia32_rsqrtph128_mask (__C, __A, __B);
     616  }
     617  
     618  extern __inline __m256h
     619  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     620  _mm256_mask_rsqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
     621  {
     622    return __builtin_ia32_rsqrtph256_mask (__C, __A, __B);
     623  }
     624  
     625  extern __inline __m128h
     626  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     627  _mm_maskz_rsqrt_ph (__mmask8 __A, __m128h __B)
     628  {
     629    return __builtin_ia32_rsqrtph128_mask (__B, _mm_setzero_ph (), __A);
     630  }
     631  
     632  extern __inline __m256h
     633  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     634  _mm256_maskz_rsqrt_ph (__mmask16 __A, __m256h __B)
     635  {
     636    return __builtin_ia32_rsqrtph256_mask (__B, _mm256_setzero_ph (),
     637  					 __A);
     638  }
     639  
     640  /* Intrinsics vrcpph.  */
     641  extern __inline __m128h
     642  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     643  _mm_rcp_ph (__m128h __A)
     644  {
     645    return __builtin_ia32_rcpph128_mask (__A, _mm_setzero_ph (),
     646  				       (__mmask8) -1);
     647  }
     648  
     649  extern __inline __m256h
     650  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     651  _mm256_rcp_ph (__m256h __A)
     652  {
     653    return __builtin_ia32_rcpph256_mask (__A, _mm256_setzero_ph (),
     654  				       (__mmask16) -1);
     655  }
     656  
     657  extern __inline __m128h
     658  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     659  _mm_mask_rcp_ph (__m128h __A, __mmask8 __B, __m128h __C)
     660  {
     661    return __builtin_ia32_rcpph128_mask (__C, __A, __B);
     662  }
     663  
     664  extern __inline __m256h
     665  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     666  _mm256_mask_rcp_ph (__m256h __A, __mmask16 __B, __m256h __C)
     667  {
     668    return __builtin_ia32_rcpph256_mask (__C, __A, __B);
     669  }
     670  
     671  extern __inline __m128h
     672  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     673  _mm_maskz_rcp_ph (__mmask8 __A, __m128h __B)
     674  {
     675    return __builtin_ia32_rcpph128_mask (__B, _mm_setzero_ph (), __A);
     676  }
     677  
     678  extern __inline __m256h
     679  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     680  _mm256_maskz_rcp_ph (__mmask16 __A, __m256h __B)
     681  {
     682    return __builtin_ia32_rcpph256_mask (__B, _mm256_setzero_ph (),
     683  				       __A);
     684  }
     685  
     686  /* Intrinsics vscalefph.  */
     687  extern __inline __m128h
     688  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     689  _mm_scalef_ph (__m128h __A, __m128h __B)
     690  {
     691    return __builtin_ia32_scalefph128_mask (__A, __B,
     692  					  _mm_setzero_ph (),
     693  					  (__mmask8) -1);
     694  }
     695  
     696  extern __inline __m256h
     697  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     698  _mm256_scalef_ph (__m256h __A, __m256h __B)
     699  {
     700    return __builtin_ia32_scalefph256_mask (__A, __B,
     701  					  _mm256_setzero_ph (),
     702  					  (__mmask16) -1);
     703  }
     704  
     705  extern __inline __m128h
     706  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     707  _mm_mask_scalef_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
     708  {
     709    return __builtin_ia32_scalefph128_mask (__C, __D, __A, __B);
     710  }
     711  
     712  extern __inline __m256h
     713  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     714  _mm256_mask_scalef_ph (__m256h __A, __mmask16 __B, __m256h __C,
     715  		       __m256h __D)
     716  {
     717    return __builtin_ia32_scalefph256_mask (__C, __D, __A, __B);
     718  }
     719  
     720  extern __inline __m128h
     721  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     722  _mm_maskz_scalef_ph (__mmask8 __A, __m128h __B, __m128h __C)
     723  {
     724    return __builtin_ia32_scalefph128_mask (__B, __C,
     725  					  _mm_setzero_ph (), __A);
     726  }
     727  
     728  extern __inline __m256h
     729  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     730  _mm256_maskz_scalef_ph (__mmask16 __A, __m256h __B, __m256h __C)
     731  {
     732    return __builtin_ia32_scalefph256_mask (__B, __C,
     733  					  _mm256_setzero_ph (),
     734  					  __A);
     735  }
     736  
     737  /* Intrinsics vreduceph.  */
     738  #ifdef __OPTIMIZE__
     739  extern __inline __m128h
     740  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     741  _mm_reduce_ph (__m128h __A, int __B)
     742  {
     743    return __builtin_ia32_reduceph128_mask (__A, __B,
     744  					  _mm_setzero_ph (),
     745  					  (__mmask8) -1);
     746  }
     747  
     748  extern __inline __m128h
     749  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     750  _mm_mask_reduce_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
     751  {
     752    return __builtin_ia32_reduceph128_mask (__C, __D, __A, __B);
     753  }
     754  
     755  extern __inline __m128h
     756  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     757  _mm_maskz_reduce_ph (__mmask8 __A, __m128h __B, int __C)
     758  {
     759    return __builtin_ia32_reduceph128_mask (__B, __C,
     760  					  _mm_setzero_ph (), __A);
     761  }
     762  
     763  extern __inline __m256h
     764  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     765  _mm256_reduce_ph (__m256h __A, int __B)
     766  {
     767    return __builtin_ia32_reduceph256_mask (__A, __B,
     768  					  _mm256_setzero_ph (),
     769  					  (__mmask16) -1);
     770  }
     771  
     772  extern __inline __m256h
     773  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     774  _mm256_mask_reduce_ph (__m256h __A, __mmask16 __B, __m256h __C, int __D)
     775  {
     776    return __builtin_ia32_reduceph256_mask (__C, __D, __A, __B);
     777  }
     778  
     779  extern __inline __m256h
     780  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     781  _mm256_maskz_reduce_ph (__mmask16 __A, __m256h __B, int __C)
     782  {
     783    return __builtin_ia32_reduceph256_mask (__B, __C,
     784  					  _mm256_setzero_ph (),
     785  					  __A);
     786  }
     787  
     788  #else
     789  #define _mm_reduce_ph(A, B)				\
     790    (__builtin_ia32_reduceph128_mask ((A), (B),		\
     791  				    _mm_setzero_ph (),	\
     792  				    ((__mmask8)-1)))
     793  
     794  #define _mm_mask_reduce_ph(A,  B,  C, D)			\
     795    (__builtin_ia32_reduceph128_mask ((C), (D), (A), (B)))
     796  
     797  #define _mm_maskz_reduce_ph(A,  B, C)					\
     798    (__builtin_ia32_reduceph128_mask ((B), (C), _mm_setzero_ph (), (A)))
     799  
     800  #define _mm256_reduce_ph(A, B)					\
     801    (__builtin_ia32_reduceph256_mask ((A), (B),			\
     802  				    _mm256_setzero_ph (),	\
     803  				    ((__mmask16)-1)))
     804  
     805  #define _mm256_mask_reduce_ph(A, B, C, D)			\
     806    (__builtin_ia32_reduceph256_mask ((C), (D), (A), (B)))
     807  
     808  #define _mm256_maskz_reduce_ph(A, B, C)					\
     809    (__builtin_ia32_reduceph256_mask ((B), (C), _mm256_setzero_ph (), (A)))
     810  
     811  #endif /* __OPTIMIZE__ */
     812  
     813  /* Intrinsics vrndscaleph.  */
     814  #ifdef __OPTIMIZE__
     815    extern __inline __m128h
     816    __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     817    _mm_roundscale_ph (__m128h __A, int __B)
     818    {
     819      return __builtin_ia32_rndscaleph128_mask (__A, __B,
     820  					      _mm_setzero_ph (),
     821  					      (__mmask8) -1);
     822    }
     823  
     824  extern __inline __m128h
     825  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     826  _mm_mask_roundscale_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
     827  {
     828    return __builtin_ia32_rndscaleph128_mask (__C, __D, __A, __B);
     829  }
     830  
     831  extern __inline __m128h
     832  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     833  _mm_maskz_roundscale_ph (__mmask8 __A, __m128h __B, int __C)
     834  {
     835    return __builtin_ia32_rndscaleph128_mask (__B, __C,
     836  					    _mm_setzero_ph (), __A);
     837  }
     838  
     839  extern __inline __m256h
     840  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     841  _mm256_roundscale_ph (__m256h __A, int __B)
     842  {
     843    return __builtin_ia32_rndscaleph256_mask (__A, __B,
     844  					    _mm256_setzero_ph (),
     845  					    (__mmask16) -1);
     846  }
     847  
     848  extern __inline __m256h
     849  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     850  _mm256_mask_roundscale_ph (__m256h __A, __mmask16 __B, __m256h __C,
     851  			   int __D)
     852  {
     853    return __builtin_ia32_rndscaleph256_mask (__C, __D, __A, __B);
     854  }
     855  
     856  extern __inline __m256h
     857  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     858  _mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C)
     859  {
     860    return __builtin_ia32_rndscaleph256_mask (__B, __C,
     861  					    _mm256_setzero_ph (),
     862  					    __A);
     863  }
     864  
     865  #else
     866  #define _mm_roundscale_ph(A, B)						\
     867    (__builtin_ia32_rndscaleph128_mask ((A), (B), _mm_setzero_ph (),	\
     868  				      ((__mmask8)-1)))
     869  
     870  #define _mm_mask_roundscale_ph(A, B, C, D)			\
     871    (__builtin_ia32_rndscaleph128_mask ((C), (D), (A), (B)))
     872  
     873  #define _mm_maskz_roundscale_ph(A, B, C)				\
     874    (__builtin_ia32_rndscaleph128_mask ((B), (C), _mm_setzero_ph (), (A)))
     875  
     876  #define _mm256_roundscale_ph(A, B)				\
     877    (__builtin_ia32_rndscaleph256_mask ((A), (B),			\
     878  				      _mm256_setzero_ph(),	\
     879  				      ((__mmask16)-1)))
     880  
     881  #define _mm256_mask_roundscale_ph(A, B, C, D)			\
     882    (__builtin_ia32_rndscaleph256_mask ((C), (D), (A), (B)))
     883  
     884  #define _mm256_maskz_roundscale_ph(A, B, C)				\
     885    (__builtin_ia32_rndscaleph256_mask ((B), (C),				\
     886  				      _mm256_setzero_ph (), (A)))
     887  
     888  #endif /* __OPTIMIZE__ */
     889  
     890  /* Intrinsics vfpclassph.  */
     891  #ifdef __OPTIMIZE__
     892  extern __inline __mmask8
     893  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     894    _mm_mask_fpclass_ph_mask (__mmask8 __U, __m128h __A, const int __imm)
     895  {
     896    return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A,
     897  						      __imm, __U);
     898  }
     899  
     900  extern __inline __mmask8
     901  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     902  _mm_fpclass_ph_mask (__m128h __A, const int __imm)
     903  {
     904    return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A,
     905  						      __imm,
     906  						      (__mmask8) -1);
     907  }
     908  
     909  extern __inline __mmask16
     910  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     911  _mm256_mask_fpclass_ph_mask (__mmask16 __U, __m256h __A, const int __imm)
     912  {
     913    return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
     914  						       __imm, __U);
     915  }
     916  
     917  extern __inline __mmask16
     918  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     919  _mm256_fpclass_ph_mask (__m256h __A, const int __imm)
     920  {
     921    return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
     922  						       __imm,
     923  						       (__mmask16) -1);
     924  }
     925  
     926  #else
     927  #define _mm_fpclass_ph_mask(X, C)                                       \
     928    ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),	\
     929  						(int) (C),(__mmask8)-1))
     930  
     931  #define _mm_mask_fpclass_ph_mask(u, X, C)                               \
     932    ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),	\
     933  						(int) (C),(__mmask8)(u)))
     934  
     935  #define _mm256_fpclass_ph_mask(X, C)                                    \
     936    ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \
     937  						 (int) (C),(__mmask16)-1))
     938  
     939  #define _mm256_mask_fpclass_ph_mask(u, X, C)				\
     940    ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \
     941  						 (int) (C),(__mmask16)(u)))
     942  #endif /* __OPTIMIZE__ */
     943  
     944  /* Intrinsics vgetexpph, vgetexpsh.  */
     945  extern __inline __m256h
     946  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     947  _mm256_getexp_ph (__m256h __A)
     948  {
     949    return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
     950  						    (__v16hf)
     951  						    _mm256_setzero_ph (),
     952  						    (__mmask16) -1);
     953  }
     954  
     955  extern __inline __m256h
     956  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     957  _mm256_mask_getexp_ph (__m256h __W, __mmask16 __U, __m256h __A)
     958  {
     959    return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
     960  						    (__v16hf) __W,
     961  						    (__mmask16) __U);
     962  }
     963  
     964  extern __inline __m256h
     965  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     966  _mm256_maskz_getexp_ph (__mmask16 __U, __m256h __A)
     967  {
     968    return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
     969  						    (__v16hf)
     970  						    _mm256_setzero_ph (),
     971  						    (__mmask16) __U);
     972  }
     973  
     974  extern __inline __m128h
     975  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     976  _mm_getexp_ph (__m128h __A)
     977  {
     978    return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
     979  						    (__v8hf)
     980  						    _mm_setzero_ph (),
     981  						    (__mmask8) -1);
     982  }
     983  
     984  extern __inline __m128h
     985  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     986  _mm_mask_getexp_ph (__m128h __W, __mmask8 __U, __m128h __A)
     987  {
     988    return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
     989  						    (__v8hf) __W,
     990  						    (__mmask8) __U);
     991  }
     992  
     993  extern __inline __m128h
     994  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     995  _mm_maskz_getexp_ph (__mmask8 __U, __m128h __A)
     996  {
     997    return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
     998  						    (__v8hf)
     999  						    _mm_setzero_ph (),
    1000  						    (__mmask8) __U);
    1001  }
    1002  
    1003  
    1004  /* Intrinsics vgetmantph, vgetmantsh.  */
    1005  #ifdef __OPTIMIZE__
    1006  extern __inline __m256h
    1007  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1008  _mm256_getmant_ph (__m256h __A, _MM_MANTISSA_NORM_ENUM __B,
    1009  		   _MM_MANTISSA_SIGN_ENUM __C)
    1010  {
    1011    return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A,
    1012  						     (__C << 2) | __B,
    1013  						     (__v16hf)
    1014  						     _mm256_setzero_ph (),
    1015  						     (__mmask16) -1);
    1016  }
    1017  
    1018  extern __inline __m256h
    1019  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1020  _mm256_mask_getmant_ph (__m256h __W, __mmask16 __U, __m256h __A,
    1021  			_MM_MANTISSA_NORM_ENUM __B,
    1022  			_MM_MANTISSA_SIGN_ENUM __C)
    1023  {
    1024    return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A,
    1025  						     (__C << 2) | __B,
    1026  						     (__v16hf) __W,
    1027  						     (__mmask16) __U);
    1028  }
    1029  
    1030  extern __inline __m256h
    1031  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1032  _mm256_maskz_getmant_ph (__mmask16 __U, __m256h __A,
    1033  			 _MM_MANTISSA_NORM_ENUM __B,
    1034  			 _MM_MANTISSA_SIGN_ENUM __C)
    1035  {
    1036    return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A,
    1037  						     (__C << 2) | __B,
    1038  						     (__v16hf)
    1039  						     _mm256_setzero_ph (),
    1040  						     (__mmask16) __U);
    1041  }
    1042  
    1043  extern __inline __m128h
    1044  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1045  _mm_getmant_ph (__m128h __A, _MM_MANTISSA_NORM_ENUM __B,
    1046  		_MM_MANTISSA_SIGN_ENUM __C)
    1047  {
    1048    return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A,
    1049  						     (__C << 2) | __B,
    1050  						     (__v8hf)
    1051  						     _mm_setzero_ph (),
    1052  						     (__mmask8) -1);
    1053  }
    1054  
    1055  extern __inline __m128h
    1056  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1057  _mm_mask_getmant_ph (__m128h __W, __mmask8 __U, __m128h __A,
    1058  		     _MM_MANTISSA_NORM_ENUM __B,
    1059  		     _MM_MANTISSA_SIGN_ENUM __C)
    1060  {
    1061    return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A,
    1062  						     (__C << 2) | __B,
    1063  						     (__v8hf) __W,
    1064  						     (__mmask8) __U);
    1065  }
    1066  
    1067  extern __inline __m128h
    1068  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1069  _mm_maskz_getmant_ph (__mmask8 __U, __m128h __A,
    1070  		      _MM_MANTISSA_NORM_ENUM __B,
    1071  		      _MM_MANTISSA_SIGN_ENUM __C)
    1072  {
    1073    return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A,
    1074  						     (__C << 2) | __B,
    1075  						     (__v8hf)
    1076  						     _mm_setzero_ph (),
    1077  						     (__mmask8) __U);
    1078  }
    1079  
    1080  #else
    1081  #define _mm256_getmant_ph(X, B, C)					\
    1082    ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),	\
    1083  					       (int)(((C)<<2) | (B)),	\
    1084  					       (__v16hf)(__m256h)_mm256_setzero_ph (), \
    1085  					       (__mmask16)-1))
    1086  
    1087  #define _mm256_mask_getmant_ph(W, U, X, B, C)				\
    1088    ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),	\
    1089  					       (int)(((C)<<2) | (B)),	\
    1090  					       (__v16hf)(__m256h)(W),	\
    1091  					       (__mmask16)(U)))
    1092  
    1093  #define _mm256_maskz_getmant_ph(U, X, B, C)				\
    1094    ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),	\
    1095  					       (int)(((C)<<2) | (B)),	\
    1096  					       (__v16hf)(__m256h)_mm256_setzero_ph (), \
    1097  					       (__mmask16)(U)))
    1098  
    1099  #define _mm_getmant_ph(X, B, C)						\
    1100    ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),	\
    1101  					       (int)(((C)<<2) | (B)),	\
    1102  					       (__v8hf)(__m128h)_mm_setzero_ph (), \
    1103  					       (__mmask8)-1))
    1104  
    1105  #define _mm_mask_getmant_ph(W, U, X, B, C)				\
    1106    ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),	\
    1107  					       (int)(((C)<<2) | (B)),	\
    1108  					       (__v8hf)(__m128h)(W),	\
    1109  					       (__mmask8)(U)))
    1110  
    1111  #define _mm_maskz_getmant_ph(U, X, B, C)				\
    1112    ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),	\
    1113  					       (int)(((C)<<2) | (B)),	\
    1114  					       (__v8hf)(__m128h)_mm_setzero_ph (), \
    1115  					       (__mmask8)(U)))
    1116  
    1117  #endif /* __OPTIMIZE__ */
    1118  
    1119  /* Intrinsics vcvtph2dq.  */
    1120  extern __inline __m128i
    1121  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1122  _mm_cvtph_epi32 (__m128h __A)
    1123  {
    1124    return (__m128i)
    1125      __builtin_ia32_vcvtph2dq128_mask (__A,
    1126  				      (__v4si)
    1127  				      _mm_setzero_si128 (),
    1128  				      (__mmask8) -1);
    1129  }
    1130  
    1131  extern __inline __m128i
    1132  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1133  _mm_mask_cvtph_epi32 (__m128i __A, __mmask8 __B, __m128h __C)
    1134  {
    1135    return (__m128i)
    1136      __builtin_ia32_vcvtph2dq128_mask (__C, ( __v4si) __A, __B);
    1137  }
    1138  
    1139  extern __inline __m128i
    1140  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1141  _mm_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B)
    1142  {
    1143    return (__m128i)
    1144      __builtin_ia32_vcvtph2dq128_mask (__B,
    1145  				      (__v4si) _mm_setzero_si128 (),
    1146  				      __A);
    1147  }
    1148  
    1149  extern __inline __m256i
    1150  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1151  _mm256_cvtph_epi32 (__m128h __A)
    1152  {
    1153    return (__m256i)
    1154      __builtin_ia32_vcvtph2dq256_mask (__A,
    1155  				      (__v8si)
    1156  				      _mm256_setzero_si256 (),
    1157  				      (__mmask8) -1);
    1158  }
    1159  
    1160  extern __inline __m256i
    1161  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1162  _mm256_mask_cvtph_epi32 (__m256i __A, __mmask8 __B, __m128h __C)
    1163  {
    1164    return (__m256i)
    1165      __builtin_ia32_vcvtph2dq256_mask (__C, ( __v8si) __A, __B);
    1166  }
    1167  
    1168  extern __inline __m256i
    1169  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1170  _mm256_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B)
    1171  {
    1172    return (__m256i)
    1173      __builtin_ia32_vcvtph2dq256_mask (__B,
    1174  				      (__v8si)
    1175  				      _mm256_setzero_si256 (),
    1176  				      __A);
    1177  }
    1178  
    1179  /* Intrinsics vcvtph2udq.  */
    1180  extern __inline __m128i
    1181  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1182  _mm_cvtph_epu32 (__m128h __A)
    1183  {
    1184    return (__m128i)
    1185      __builtin_ia32_vcvtph2udq128_mask (__A,
    1186  				       (__v4si)
    1187  				       _mm_setzero_si128 (),
    1188  				       (__mmask8) -1);
    1189  }
    1190  
    1191  extern __inline __m128i
    1192  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1193  _mm_mask_cvtph_epu32 (__m128i __A, __mmask8 __B, __m128h __C)
    1194  {
    1195    return (__m128i)
    1196      __builtin_ia32_vcvtph2udq128_mask (__C, ( __v4si) __A, __B);
    1197  }
    1198  
    1199  extern __inline __m128i
    1200  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1201  _mm_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B)
    1202  {
    1203    return (__m128i)
    1204      __builtin_ia32_vcvtph2udq128_mask (__B,
    1205  				       (__v4si)
    1206  				       _mm_setzero_si128 (),
    1207  				       __A);
    1208  }
    1209  
    1210  extern __inline __m256i
    1211  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1212  _mm256_cvtph_epu32 (__m128h __A)
    1213  {
    1214    return (__m256i)
    1215      __builtin_ia32_vcvtph2udq256_mask (__A,
    1216  				       (__v8si)
    1217  				       _mm256_setzero_si256 (),
    1218  				       (__mmask8) -1);
    1219  }
    1220  
    1221  extern __inline __m256i
    1222  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1223  _mm256_mask_cvtph_epu32 (__m256i __A, __mmask8 __B, __m128h __C)
    1224  {
    1225    return (__m256i)
    1226      __builtin_ia32_vcvtph2udq256_mask (__C, ( __v8si) __A, __B);
    1227  }
    1228  
    1229  extern __inline __m256i
    1230  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1231  _mm256_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B)
    1232  {
    1233    return (__m256i)
    1234      __builtin_ia32_vcvtph2udq256_mask (__B,
    1235  				       (__v8si) _mm256_setzero_si256 (),
    1236  				       __A);
    1237  }
    1238  
    1239  /* Intrinsics vcvttph2dq.  */
    1240  extern __inline __m128i
    1241  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1242  _mm_cvttph_epi32 (__m128h __A)
    1243  {
    1244    return (__m128i)
    1245      __builtin_ia32_vcvttph2dq128_mask (__A,
    1246  				       (__v4si) _mm_setzero_si128 (),
    1247  				       (__mmask8) -1);
    1248  }
    1249  
    1250  extern __inline __m128i
    1251  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1252  _mm_mask_cvttph_epi32 (__m128i __A, __mmask8 __B, __m128h __C)
    1253  {
    1254    return (__m128i)__builtin_ia32_vcvttph2dq128_mask (__C,
    1255  						     ( __v4si) __A,
    1256  						     __B);
    1257  }
    1258  
    1259  extern __inline __m128i
    1260  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1261  _mm_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B)
    1262  {
    1263    return (__m128i)
    1264      __builtin_ia32_vcvttph2dq128_mask (__B,
    1265  				       (__v4si) _mm_setzero_si128 (),
    1266  				       __A);
    1267  }
    1268  
    1269  extern __inline __m256i
    1270  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1271  _mm256_cvttph_epi32 (__m128h __A)
    1272  {
    1273    return (__m256i)
    1274      __builtin_ia32_vcvttph2dq256_mask (__A,
    1275  				       (__v8si)
    1276  				       _mm256_setzero_si256 (),
    1277  				       (__mmask8) -1);
    1278  }
    1279  
    1280  extern __inline __m256i
    1281  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1282  _mm256_mask_cvttph_epi32 (__m256i __A, __mmask8 __B, __m128h __C)
    1283  {
    1284    return (__m256i)
    1285      __builtin_ia32_vcvttph2dq256_mask (__C,
    1286  				       ( __v8si) __A,
    1287  				       __B);
    1288  }
    1289  
    1290  extern __inline __m256i
    1291  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1292  _mm256_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B)
    1293  {
    1294    return (__m256i)
    1295      __builtin_ia32_vcvttph2dq256_mask (__B,
    1296  				       (__v8si)
    1297  				       _mm256_setzero_si256 (),
    1298  				       __A);
    1299  }
    1300  
    1301  /* Intrinsics vcvttph2udq.  */
    1302  extern __inline __m128i
    1303  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1304  _mm_cvttph_epu32 (__m128h __A)
    1305  {
    1306    return (__m128i)
    1307      __builtin_ia32_vcvttph2udq128_mask (__A,
    1308  					(__v4si)
    1309  					_mm_setzero_si128 (),
    1310  					(__mmask8) -1);
    1311  }
    1312  
    1313  extern __inline __m128i
    1314  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1315  _mm_mask_cvttph_epu32 (__m128i __A, __mmask8 __B, __m128h __C)
    1316  {
    1317    return (__m128i)
    1318      __builtin_ia32_vcvttph2udq128_mask (__C,
    1319  					( __v4si) __A,
    1320  					__B);
    1321  }
    1322  
    1323  extern __inline __m128i
    1324  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1325  _mm_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B)
    1326  {
    1327    return (__m128i)
    1328      __builtin_ia32_vcvttph2udq128_mask (__B,
    1329  					(__v4si)
    1330  					_mm_setzero_si128 (),
    1331  					__A);
    1332  }
    1333  
    1334  extern __inline __m256i
    1335  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1336  _mm256_cvttph_epu32 (__m128h __A)
    1337  {
    1338    return (__m256i)
    1339      __builtin_ia32_vcvttph2udq256_mask (__A,
    1340  					(__v8si)
    1341  					_mm256_setzero_si256 (), (__mmask8) -1);
    1342  }
    1343  
    1344  extern __inline __m256i
    1345  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1346  _mm256_mask_cvttph_epu32 (__m256i __A, __mmask8 __B, __m128h __C)
    1347  {
    1348    return (__m256i)
    1349      __builtin_ia32_vcvttph2udq256_mask (__C,
    1350  					( __v8si) __A,
    1351  					__B);
    1352  }
    1353  
    1354  extern __inline __m256i
    1355  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1356  _mm256_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B)
    1357  {
    1358    return (__m256i)
    1359      __builtin_ia32_vcvttph2udq256_mask (__B,
    1360  					(__v8si)
    1361  					_mm256_setzero_si256 (),
    1362  					__A);
    1363  }
    1364  
    1365  /* Intrinsics vcvtdq2ph.  */
    1366  extern __inline __m128h
    1367  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1368  _mm_cvtepi32_ph (__m128i __A)
    1369  {
    1370    return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __A,
    1371  					   _mm_setzero_ph (),
    1372  					   (__mmask8) -1);
    1373  }
    1374  
    1375  extern __inline __m128h
    1376  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1377  _mm_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m128i __C)
    1378  {
    1379    return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __C, __A, __B);
    1380  }
    1381  
    1382  extern __inline __m128h
    1383  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1384  _mm_maskz_cvtepi32_ph (__mmask8 __A, __m128i __B)
    1385  {
    1386    return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __B,
    1387  					   _mm_setzero_ph (),
    1388  					   __A);
    1389  }
    1390  
    1391  extern __inline __m128h
    1392  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1393  _mm256_cvtepi32_ph (__m256i __A)
    1394  {
    1395    return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __A,
    1396  					   _mm_setzero_ph (),
    1397  					   (__mmask8) -1);
    1398  }
    1399  
    1400  extern __inline __m128h
    1401  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1402  _mm256_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m256i __C)
    1403  {
    1404    return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __C, __A, __B);
    1405  }
    1406  
    1407  extern __inline __m128h
    1408  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1409  _mm256_maskz_cvtepi32_ph (__mmask8 __A, __m256i __B)
    1410  {
    1411    return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __B,
    1412  					   _mm_setzero_ph (),
    1413  					   __A);
    1414  }
    1415  
    1416  /* Intrinsics vcvtudq2ph.  */
    1417  extern __inline __m128h
    1418  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1419  _mm_cvtepu32_ph (__m128i __A)
    1420  {
    1421    return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __A,
    1422  					    _mm_setzero_ph (),
    1423  					    (__mmask8) -1);
    1424  }
    1425  
    1426  extern __inline __m128h
    1427  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1428  _mm_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m128i __C)
    1429  {
    1430    return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __C,
    1431  					    __A,
    1432  					    __B);
    1433  }
    1434  
    1435  extern __inline __m128h
    1436  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1437  _mm_maskz_cvtepu32_ph (__mmask8 __A, __m128i __B)
    1438  {
    1439    return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __B,
    1440  					    _mm_setzero_ph (),
    1441  					    __A);
    1442  }
    1443  
    1444  extern __inline __m128h
    1445  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1446  _mm256_cvtepu32_ph (__m256i __A)
    1447  {
    1448    return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __A,
    1449  					    _mm_setzero_ph (),
    1450  					    (__mmask8) -1);
    1451  }
    1452  
    1453  extern __inline __m128h
    1454  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1455  _mm256_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m256i __C)
    1456  {
    1457    return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __C, __A, __B);
    1458  }
    1459  
    1460  extern __inline __m128h
    1461  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1462  _mm256_maskz_cvtepu32_ph (__mmask8 __A, __m256i __B)
    1463  {
    1464    return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __B,
    1465  					    _mm_setzero_ph (),
    1466  					    __A);
    1467  }
    1468  
    1469  /* Intrinsics vcvtph2qq.  */
    1470  extern __inline __m128i
    1471  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1472  _mm_cvtph_epi64 (__m128h __A)
    1473  {
    1474    return
    1475      __builtin_ia32_vcvtph2qq128_mask (__A,
    1476  				      _mm_setzero_si128 (),
    1477  				      (__mmask8) -1);
    1478  }
    1479  
    1480  extern __inline __m128i
    1481  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1482  _mm_mask_cvtph_epi64 (__m128i __A, __mmask8 __B, __m128h __C)
    1483  {
    1484    return __builtin_ia32_vcvtph2qq128_mask (__C, __A, __B);
    1485  }
    1486  
    1487  extern __inline __m128i
    1488  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1489  _mm_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
    1490  {
    1491    return __builtin_ia32_vcvtph2qq128_mask (__B,
    1492  					   _mm_setzero_si128 (),
    1493  					   __A);
    1494  }
    1495  
    1496  extern __inline __m256i
    1497  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1498  _mm256_cvtph_epi64 (__m128h __A)
    1499  {
    1500    return __builtin_ia32_vcvtph2qq256_mask (__A,
    1501  					   _mm256_setzero_si256 (),
    1502  					   (__mmask8) -1);
    1503  }
    1504  
    1505  extern __inline __m256i
    1506  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1507  _mm256_mask_cvtph_epi64 (__m256i __A, __mmask8 __B, __m128h __C)
    1508  {
    1509    return __builtin_ia32_vcvtph2qq256_mask (__C, __A, __B);
    1510  }
    1511  
    1512  extern __inline __m256i
    1513  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1514  _mm256_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
    1515  {
    1516    return __builtin_ia32_vcvtph2qq256_mask (__B,
    1517  					   _mm256_setzero_si256 (),
    1518  					   __A);
    1519  }
    1520  
    1521  /* Intrinsics vcvtph2uqq.  */
    1522  extern __inline __m128i
    1523  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1524  _mm_cvtph_epu64 (__m128h __A)
    1525  {
    1526    return __builtin_ia32_vcvtph2uqq128_mask (__A,
    1527  					    _mm_setzero_si128 (),
    1528  					    (__mmask8) -1);
    1529  }
    1530  
    1531  extern __inline __m128i
    1532  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1533  _mm_mask_cvtph_epu64 (__m128i __A, __mmask8 __B, __m128h __C)
    1534  {
    1535    return __builtin_ia32_vcvtph2uqq128_mask (__C, __A, __B);
    1536  }
    1537  
    1538  extern __inline __m128i
    1539  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1540  _mm_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
    1541  {
    1542    return __builtin_ia32_vcvtph2uqq128_mask (__B,
    1543  					    _mm_setzero_si128 (),
    1544  					    __A);
    1545  }
    1546  
    1547  extern __inline __m256i
    1548  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1549  _mm256_cvtph_epu64 (__m128h __A)
    1550  {
    1551    return __builtin_ia32_vcvtph2uqq256_mask (__A,
    1552  					    _mm256_setzero_si256 (),
    1553  					    (__mmask8) -1);
    1554  }
    1555  
    1556  extern __inline __m256i
    1557  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1558  _mm256_mask_cvtph_epu64 (__m256i __A, __mmask8 __B, __m128h __C)
    1559  {
    1560    return __builtin_ia32_vcvtph2uqq256_mask (__C, __A, __B);
    1561  }
    1562  
    1563  extern __inline __m256i
    1564  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1565  _mm256_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
    1566  {
    1567    return __builtin_ia32_vcvtph2uqq256_mask (__B,
    1568  					    _mm256_setzero_si256 (),
    1569  					    __A);
    1570  }
    1571  
    1572  /* Intrinsics vcvttph2qq.  */
    1573  extern __inline __m128i
    1574  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1575  _mm_cvttph_epi64 (__m128h __A)
    1576  {
    1577    return __builtin_ia32_vcvttph2qq128_mask (__A,
    1578  					    _mm_setzero_si128 (),
    1579  					    (__mmask8) -1);
    1580  }
    1581  
    1582  extern __inline __m128i
    1583  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1584  _mm_mask_cvttph_epi64 (__m128i __A, __mmask8 __B, __m128h __C)
    1585  {
    1586    return __builtin_ia32_vcvttph2qq128_mask (__C,
    1587  					    __A,
    1588  					    __B);
    1589  }
    1590  
    1591  extern __inline __m128i
    1592  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1593  _mm_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
    1594  {
    1595    return __builtin_ia32_vcvttph2qq128_mask (__B,
    1596  					    _mm_setzero_si128 (),
    1597  					    __A);
    1598  }
    1599  
    1600  extern __inline __m256i
    1601  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1602  _mm256_cvttph_epi64 (__m128h __A)
    1603  {
    1604    return __builtin_ia32_vcvttph2qq256_mask (__A,
    1605  					    _mm256_setzero_si256 (),
    1606  					    (__mmask8) -1);
    1607  }
    1608  
    1609  extern __inline __m256i
    1610  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1611  _mm256_mask_cvttph_epi64 (__m256i __A, __mmask8 __B, __m128h __C)
    1612  {
    1613    return __builtin_ia32_vcvttph2qq256_mask (__C,
    1614  					    __A,
    1615  					    __B);
    1616  }
    1617  
    1618  extern __inline __m256i
    1619  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1620  _mm256_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
    1621  {
    1622    return __builtin_ia32_vcvttph2qq256_mask (__B,
    1623  					    _mm256_setzero_si256 (),
    1624  					    __A);
    1625  }
    1626  
    1627  /* Intrinsics vcvttph2uqq.  */
    1628  extern __inline __m128i
    1629  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1630  _mm_cvttph_epu64 (__m128h __A)
    1631  {
    1632    return __builtin_ia32_vcvttph2uqq128_mask (__A,
    1633  					     _mm_setzero_si128 (),
    1634  					     (__mmask8) -1);
    1635  }
    1636  
    1637  extern __inline __m128i
    1638  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1639  _mm_mask_cvttph_epu64 (__m128i __A, __mmask8 __B, __m128h __C)
    1640  {
    1641    return __builtin_ia32_vcvttph2uqq128_mask (__C,
    1642  					     __A,
    1643  					     __B);
    1644  }
    1645  
    1646  extern __inline __m128i
    1647  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1648  _mm_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
    1649  {
    1650    return __builtin_ia32_vcvttph2uqq128_mask (__B,
    1651  					     _mm_setzero_si128 (),
    1652  					     __A);
    1653  }
    1654  
    1655  extern __inline __m256i
    1656  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1657  _mm256_cvttph_epu64 (__m128h __A)
    1658  {
    1659    return __builtin_ia32_vcvttph2uqq256_mask (__A,
    1660  					     _mm256_setzero_si256 (),
    1661  					     (__mmask8) -1);
    1662  }
    1663  
    1664  extern __inline __m256i
    1665  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1666  _mm256_mask_cvttph_epu64 (__m256i __A, __mmask8 __B, __m128h __C)
    1667  {
    1668    return __builtin_ia32_vcvttph2uqq256_mask (__C,
    1669  					     __A,
    1670  					     __B);
    1671  }
    1672  
    1673  extern __inline __m256i
    1674  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1675  _mm256_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
    1676  {
    1677    return __builtin_ia32_vcvttph2uqq256_mask (__B,
    1678  					     _mm256_setzero_si256 (),
    1679  					     __A);
    1680  }
    1681  
    1682  /* Intrinsics vcvtqq2ph.  */
    1683  extern __inline __m128h
    1684  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1685  _mm_cvtepi64_ph (__m128i __A)
    1686  {
    1687    return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __A,
    1688  					   _mm_setzero_ph (),
    1689  					   (__mmask8) -1);
    1690  }
    1691  
    1692  extern __inline __m128h
    1693  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1694  _mm_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m128i __C)
    1695  {
    1696    return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __C, __A, __B);
    1697  }
    1698  
    1699  extern __inline __m128h
    1700  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1701  _mm_maskz_cvtepi64_ph (__mmask8 __A, __m128i __B)
    1702  {
    1703    return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __B,
    1704  					   _mm_setzero_ph (),
    1705  					   __A);
    1706  }
    1707  
    1708  extern __inline __m128h
    1709  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1710  _mm256_cvtepi64_ph (__m256i __A)
    1711  {
    1712    return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __A,
    1713  					   _mm_setzero_ph (),
    1714  					   (__mmask8) -1);
    1715  }
    1716  
    1717  extern __inline __m128h
    1718  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1719  _mm256_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m256i __C)
    1720  {
    1721    return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __C, __A, __B);
    1722  }
    1723  
    1724  extern __inline __m128h
    1725  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1726  _mm256_maskz_cvtepi64_ph (__mmask8 __A, __m256i __B)
    1727  {
    1728    return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __B,
    1729  					   _mm_setzero_ph (),
    1730  					   __A);
    1731  }
    1732  
    1733  /* Intrinsics vcvtuqq2ph.  */
    1734  extern __inline __m128h
    1735  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1736  _mm_cvtepu64_ph (__m128i __A)
    1737  {
    1738    return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __A,
    1739  					    _mm_setzero_ph (),
    1740  					    (__mmask8) -1);
    1741  }
    1742  
    1743  extern __inline __m128h
    1744  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1745  _mm_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m128i __C)
    1746  {
    1747    return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __C, __A, __B);
    1748  }
    1749  
    1750  extern __inline __m128h
    1751  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1752  _mm_maskz_cvtepu64_ph (__mmask8 __A, __m128i __B)
    1753  {
    1754    return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __B,
    1755  					    _mm_setzero_ph (),
    1756  					    __A);
    1757  }
    1758  
    1759  extern __inline __m128h
    1760  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1761  _mm256_cvtepu64_ph (__m256i __A)
    1762  {
    1763    return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __A,
    1764  					    _mm_setzero_ph (),
    1765  					    (__mmask8) -1);
    1766  }
    1767  
    1768  extern __inline __m128h
    1769  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1770  _mm256_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m256i __C)
    1771  {
    1772    return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __C, __A, __B);
    1773  }
    1774  
    1775  extern __inline __m128h
    1776  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1777  _mm256_maskz_cvtepu64_ph (__mmask8 __A, __m256i __B)
    1778  {
    1779    return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __B,
    1780  					    _mm_setzero_ph (),
    1781  					    __A);
    1782  }
    1783  
    1784  /* Intrinsics vcvtph2w.  */
    1785  extern __inline __m128i
    1786  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1787  _mm_cvtph_epi16 (__m128h __A)
    1788  {
    1789    return (__m128i)
    1790      __builtin_ia32_vcvtph2w128_mask (__A,
    1791  				     (__v8hi)
    1792  				     _mm_setzero_si128 (),
    1793  				     (__mmask8) -1);
    1794  }
    1795  
    1796  extern __inline __m128i
    1797  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1798  _mm_mask_cvtph_epi16 (__m128i __A, __mmask8 __B, __m128h __C)
    1799  {
    1800    return (__m128i)
    1801      __builtin_ia32_vcvtph2w128_mask (__C, ( __v8hi) __A, __B);
    1802  }
    1803  
    1804  extern __inline __m128i
    1805  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1806  _mm_maskz_cvtph_epi16 (__mmask8 __A, __m128h __B)
    1807  {
    1808    return (__m128i)
    1809      __builtin_ia32_vcvtph2w128_mask (__B,
    1810  				     (__v8hi)
    1811  				     _mm_setzero_si128 (),
    1812  				     __A);
    1813  }
    1814  
    1815  extern __inline __m256i
    1816  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1817  _mm256_cvtph_epi16 (__m256h __A)
    1818  {
    1819    return (__m256i)
    1820      __builtin_ia32_vcvtph2w256_mask (__A,
    1821  				     (__v16hi)
    1822  				     _mm256_setzero_si256 (),
    1823  				     (__mmask16) -1);
    1824  }
    1825  
    1826  extern __inline __m256i
    1827  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1828  _mm256_mask_cvtph_epi16 (__m256i __A, __mmask16 __B, __m256h __C)
    1829  {
    1830    return (__m256i)
    1831      __builtin_ia32_vcvtph2w256_mask (__C, ( __v16hi) __A, __B);
    1832  }
    1833  
    1834  extern __inline __m256i
    1835  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1836  _mm256_maskz_cvtph_epi16 (__mmask16 __A, __m256h __B)
    1837  {
    1838    return (__m256i)
    1839      __builtin_ia32_vcvtph2w256_mask (__B,
    1840  				     (__v16hi)
    1841  				     _mm256_setzero_si256 (),
    1842  				     __A);
    1843  }
    1844  
    1845  /* Intrinsics vcvtph2uw.  */
    1846  extern __inline __m128i
    1847  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1848  _mm_cvtph_epu16 (__m128h __A)
    1849  {
    1850    return (__m128i)
    1851      __builtin_ia32_vcvtph2uw128_mask (__A,
    1852  				      (__v8hi)
    1853  				      _mm_setzero_si128 (),
    1854  				      (__mmask8) -1);
    1855  }
    1856  
    1857  extern __inline __m128i
    1858  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1859  _mm_mask_cvtph_epu16 (__m128i __A, __mmask8 __B, __m128h __C)
    1860  {
    1861    return (__m128i)
    1862      __builtin_ia32_vcvtph2uw128_mask (__C, ( __v8hi) __A, __B);
    1863  }
    1864  
    1865  extern __inline __m128i
    1866  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1867  _mm_maskz_cvtph_epu16 (__mmask8 __A, __m128h __B)
    1868  {
    1869    return (__m128i)
    1870      __builtin_ia32_vcvtph2uw128_mask (__B,
    1871  				      (__v8hi)
    1872  				      _mm_setzero_si128 (),
    1873  				      __A);
    1874  }
    1875  
    1876  extern __inline __m256i
    1877  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1878  _mm256_cvtph_epu16 (__m256h __A)
    1879  {
    1880    return (__m256i)
    1881      __builtin_ia32_vcvtph2uw256_mask (__A,
    1882  				      (__v16hi)
    1883  				      _mm256_setzero_si256 (),
    1884  				      (__mmask16) -1);
    1885  }
    1886  
    1887  extern __inline __m256i
    1888  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1889  _mm256_mask_cvtph_epu16 (__m256i __A, __mmask16 __B, __m256h __C)
    1890  {
    1891    return (__m256i)
    1892      __builtin_ia32_vcvtph2uw256_mask (__C, ( __v16hi) __A, __B);
    1893  }
    1894  
    1895  extern __inline __m256i
    1896  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1897  _mm256_maskz_cvtph_epu16 (__mmask16 __A, __m256h __B)
    1898  {
    1899    return (__m256i)
    1900      __builtin_ia32_vcvtph2uw256_mask (__B,
    1901  				      (__v16hi)
    1902  				      _mm256_setzero_si256 (),
    1903  				      __A);
    1904  }
    1905  
    1906  /* Intrinsics vcvttph2w.  */
    1907  extern __inline __m128i
    1908  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1909  _mm_cvttph_epi16 (__m128h __A)
    1910  {
    1911    return (__m128i)
    1912      __builtin_ia32_vcvttph2w128_mask (__A,
    1913  				      (__v8hi)
    1914  				      _mm_setzero_si128 (),
    1915  				      (__mmask8) -1);
    1916  }
    1917  
    1918  extern __inline __m128i
    1919  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1920  _mm_mask_cvttph_epi16 (__m128i __A, __mmask8 __B, __m128h __C)
    1921  {
    1922    return (__m128i)
    1923      __builtin_ia32_vcvttph2w128_mask (__C,
    1924  				      ( __v8hi) __A,
    1925  				      __B);
    1926  }
    1927  
    1928  extern __inline __m128i
    1929  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1930  _mm_maskz_cvttph_epi16 (__mmask8 __A, __m128h __B)
    1931  {
    1932    return (__m128i)
    1933      __builtin_ia32_vcvttph2w128_mask (__B,
    1934  				      (__v8hi)
    1935  				      _mm_setzero_si128 (),
    1936  				      __A);
    1937  }
    1938  
    1939  extern __inline __m256i
    1940  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1941  _mm256_cvttph_epi16 (__m256h __A)
    1942  {
    1943    return (__m256i)
    1944      __builtin_ia32_vcvttph2w256_mask (__A,
    1945  				      (__v16hi)
    1946  				      _mm256_setzero_si256 (),
    1947  				      (__mmask16) -1);
    1948  }
    1949  
    1950  extern __inline __m256i
    1951  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1952  _mm256_mask_cvttph_epi16 (__m256i __A, __mmask16 __B, __m256h __C)
    1953  {
    1954    return (__m256i)
    1955      __builtin_ia32_vcvttph2w256_mask (__C,
    1956  				      ( __v16hi) __A,
    1957  				      __B);
    1958  }
    1959  
    1960  extern __inline __m256i
    1961  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1962  _mm256_maskz_cvttph_epi16 (__mmask16 __A, __m256h __B)
    1963  {
    1964    return (__m256i)
    1965      __builtin_ia32_vcvttph2w256_mask (__B,
    1966  				      (__v16hi)
    1967  				      _mm256_setzero_si256 (),
    1968  				      __A);
    1969  }
    1970  
    1971  /* Intrinsics vcvttph2uw.  */
    1972  extern __inline __m128i
    1973  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1974  _mm_cvttph_epu16 (__m128h __A)
    1975  {
    1976    return (__m128i)
    1977      __builtin_ia32_vcvttph2uw128_mask (__A,
    1978  				       (__v8hi)
    1979  				       _mm_setzero_si128 (),
    1980  				       (__mmask8) -1);
    1981  }
    1982  
    1983  extern __inline __m128i
    1984  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1985  _mm_mask_cvttph_epu16 (__m128i __A, __mmask8 __B, __m128h __C)
    1986  {
    1987    return (__m128i)
    1988      __builtin_ia32_vcvttph2uw128_mask (__C,
    1989  				       ( __v8hi) __A,
    1990  				       __B);
    1991  }
    1992  
    1993  extern __inline __m128i
    1994  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    1995  _mm_maskz_cvttph_epu16 (__mmask8 __A, __m128h __B)
    1996  {
    1997    return (__m128i)
    1998      __builtin_ia32_vcvttph2uw128_mask (__B,
    1999  				       (__v8hi)
    2000  				       _mm_setzero_si128 (),
    2001  				       __A);
    2002  }
    2003  
    2004  extern __inline __m256i
    2005  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2006  _mm256_cvttph_epu16 (__m256h __A)
    2007  {
    2008    return (__m256i)
    2009      __builtin_ia32_vcvttph2uw256_mask (__A,
    2010  				       (__v16hi)
    2011  				       _mm256_setzero_si256 (),
    2012  				       (__mmask16) -1);
    2013  }
    2014  
    2015  extern __inline __m256i
    2016  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2017  _mm256_mask_cvttph_epu16 (__m256i __A, __mmask16 __B, __m256h __C)
    2018  {
    2019    return (__m256i)
    2020      __builtin_ia32_vcvttph2uw256_mask (__C,
    2021  				       ( __v16hi) __A,
    2022  				       __B);
    2023  }
    2024  
    2025  extern __inline __m256i
    2026  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2027  _mm256_maskz_cvttph_epu16 (__mmask16 __A, __m256h __B)
    2028  {
    2029    return (__m256i)
    2030      __builtin_ia32_vcvttph2uw256_mask (__B,
    2031  				       (__v16hi) _mm256_setzero_si256 (),
    2032  				       __A);
    2033  }
    2034  
    2035  /* Intrinsics vcvtw2ph.  */
    2036  extern __inline __m128h
    2037  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2038  _mm_cvtepi16_ph (__m128i __A)
    2039  {
    2040    return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __A,
    2041  					  _mm_setzero_ph (),
    2042  					  (__mmask8) -1);
    2043  }
    2044  
    2045  extern __inline __m128h
    2046  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2047  _mm_mask_cvtepi16_ph (__m128h __A, __mmask8 __B, __m128i __C)
    2048  {
    2049    return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __C,
    2050  					  __A,
    2051  					  __B);
    2052  }
    2053  
    2054  extern __inline __m128h
    2055  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2056  _mm_maskz_cvtepi16_ph (__mmask8 __A, __m128i __B)
    2057  {
    2058    return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __B,
    2059  					  _mm_setzero_ph (),
    2060  					  __A);
    2061  }
    2062  
    2063  extern __inline __m256h
    2064  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2065  _mm256_cvtepi16_ph (__m256i __A)
    2066  {
    2067    return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __A,
    2068  					  _mm256_setzero_ph (),
    2069  					  (__mmask16) -1);
    2070  }
    2071  
    2072  extern __inline __m256h
    2073  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2074  _mm256_mask_cvtepi16_ph (__m256h __A, __mmask16 __B, __m256i __C)
    2075  {
    2076    return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __C,
    2077  					  __A,
    2078  					  __B);
    2079  }
    2080  
    2081  extern __inline __m256h
    2082  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2083  _mm256_maskz_cvtepi16_ph (__mmask16 __A, __m256i __B)
    2084  {
    2085    return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __B,
    2086  					  _mm256_setzero_ph (),
    2087  					  __A);
    2088  }
    2089  
    2090  /* Intrinsics vcvtuw2ph.  */
    2091  extern __inline __m128h
    2092  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2093  _mm_cvtepu16_ph (__m128i __A)
    2094  {
    2095    return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __A,
    2096  					   _mm_setzero_ph (),
    2097  					   (__mmask8) -1);
    2098  }
    2099  
    2100  extern __inline __m128h
    2101  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2102  _mm_mask_cvtepu16_ph (__m128h __A, __mmask8 __B, __m128i __C)
    2103  {
    2104    return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __C, __A, __B);
    2105  }
    2106  
    2107  extern __inline __m128h
    2108  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2109  _mm_maskz_cvtepu16_ph (__mmask8 __A, __m128i __B)
    2110  {
    2111    return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __B,
    2112  					   _mm_setzero_ph (),
    2113  					   __A);
    2114  }
    2115  
    2116  extern __inline __m256h
    2117  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2118  _mm256_cvtepu16_ph (__m256i __A)
    2119  {
    2120    return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __A,
    2121  					   _mm256_setzero_ph (),
    2122  					   (__mmask16) -1);
    2123  }
    2124  
    2125  extern __inline __m256h
    2126  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2127  _mm256_mask_cvtepu16_ph (__m256h __A, __mmask16 __B, __m256i __C)
    2128  {
    2129    return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __C, __A, __B);
    2130  }
    2131  
    2132  extern __inline __m256h
    2133  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2134  _mm256_maskz_cvtepu16_ph (__mmask16 __A, __m256i __B)
    2135  {
    2136    return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __B,
    2137  					   _mm256_setzero_ph (),
    2138  					   __A);
    2139  }
    2140  
    2141  /* Intrinsics vcvtph2pd.  */
    2142  extern __inline __m128d
    2143  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2144  _mm_cvtph_pd (__m128h __A)
    2145  {
    2146    return __builtin_ia32_vcvtph2pd128_mask (__A,
    2147  					   _mm_setzero_pd (),
    2148  					   (__mmask8) -1);
    2149  }
    2150  
    2151  extern __inline __m128d
    2152  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2153  _mm_mask_cvtph_pd (__m128d __A, __mmask8 __B, __m128h __C)
    2154  {
    2155    return __builtin_ia32_vcvtph2pd128_mask (__C, __A, __B);
    2156  }
    2157  
    2158  extern __inline __m128d
    2159  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2160  _mm_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
    2161  {
    2162    return __builtin_ia32_vcvtph2pd128_mask (__B, _mm_setzero_pd (), __A);
    2163  }
    2164  
    2165  extern __inline __m256d
    2166  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2167  _mm256_cvtph_pd (__m128h __A)
    2168  {
    2169    return __builtin_ia32_vcvtph2pd256_mask (__A,
    2170  					   _mm256_setzero_pd (),
    2171  					   (__mmask8) -1);
    2172  }
    2173  
    2174  extern __inline __m256d
    2175  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2176  _mm256_mask_cvtph_pd (__m256d __A, __mmask8 __B, __m128h __C)
    2177  {
    2178    return __builtin_ia32_vcvtph2pd256_mask (__C, __A, __B);
    2179  }
    2180  
    2181  extern __inline __m256d
    2182  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2183  _mm256_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
    2184  {
    2185    return __builtin_ia32_vcvtph2pd256_mask (__B,
    2186  					   _mm256_setzero_pd (),
    2187  					   __A);
    2188  }
    2189  
    2190  /* Intrinsics vcvtph2ps.  */
    2191  extern __inline __m128
    2192  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2193  _mm_cvtxph_ps (__m128h __A)
    2194  {
    2195    return __builtin_ia32_vcvtph2psx128_mask (__A,
    2196  					   _mm_setzero_ps (),
    2197  					   (__mmask8) -1);
    2198  }
    2199  
    2200  extern __inline __m128
    2201  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2202  _mm_mask_cvtxph_ps (__m128 __A, __mmask8 __B, __m128h __C)
    2203  {
    2204    return __builtin_ia32_vcvtph2psx128_mask (__C, __A, __B);
    2205  }
    2206  
    2207  extern __inline __m128
    2208  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2209  _mm_maskz_cvtxph_ps (__mmask8 __A, __m128h __B)
    2210  {
    2211    return __builtin_ia32_vcvtph2psx128_mask (__B, _mm_setzero_ps (), __A);
    2212  }
    2213  
    2214  extern __inline __m256
    2215  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2216  _mm256_cvtxph_ps (__m128h __A)
    2217  {
    2218    return __builtin_ia32_vcvtph2psx256_mask (__A,
    2219  					    _mm256_setzero_ps (),
    2220  					    (__mmask8) -1);
    2221  }
    2222  
    2223  extern __inline __m256
    2224  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2225  _mm256_mask_cvtxph_ps (__m256 __A, __mmask8 __B, __m128h __C)
    2226  {
    2227    return __builtin_ia32_vcvtph2psx256_mask (__C, __A, __B);
    2228  }
    2229  
    2230  extern __inline __m256
    2231  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2232  _mm256_maskz_cvtxph_ps (__mmask8 __A, __m128h __B)
    2233  {
    2234    return __builtin_ia32_vcvtph2psx256_mask (__B,
    2235  					    _mm256_setzero_ps (),
    2236  					    __A);
    2237  }
    2238  
    2239  /* Intrinsics vcvtxps2ph.  */
    2240  extern __inline __m128h
    2241  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2242  _mm_cvtxps_ph (__m128 __A)
    2243  {
    2244    return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __A,
    2245  					    _mm_setzero_ph (),
    2246  					    (__mmask8) -1);
    2247  }
    2248  
    2249  extern __inline __m128h
    2250  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2251  _mm_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m128 __C)
    2252  {
    2253    return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __C, __A, __B);
    2254  }
    2255  
    2256  extern __inline __m128h
    2257  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2258  _mm_maskz_cvtxps_ph (__mmask8 __A, __m128 __B)
    2259  {
    2260    return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __B,
    2261  					    _mm_setzero_ph (),
    2262  					    __A);
    2263  }
    2264  
    2265  extern __inline __m128h
    2266  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2267  _mm256_cvtxps_ph (__m256 __A)
    2268  {
    2269    return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __A,
    2270  					    _mm_setzero_ph (),
    2271  					    (__mmask8) -1);
    2272  }
    2273  
    2274  extern __inline __m128h
    2275  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2276  _mm256_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m256 __C)
    2277  {
    2278    return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __C, __A, __B);
    2279  }
    2280  
    2281  extern __inline __m128h
    2282  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2283  _mm256_maskz_cvtxps_ph (__mmask8 __A, __m256 __B)
    2284  {
    2285    return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __B,
    2286  					    _mm_setzero_ph (),
    2287  					    __A);
    2288  }
    2289  
    2290  /* Intrinsics vcvtpd2ph.  */
    2291  extern __inline __m128h
    2292  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2293  _mm_cvtpd_ph (__m128d __A)
    2294  {
    2295    return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __A,
    2296  					   _mm_setzero_ph (),
    2297  					   (__mmask8) -1);
    2298  }
    2299  
    2300  extern __inline __m128h
    2301  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2302  _mm_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m128d __C)
    2303  {
    2304    return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __C, __A, __B);
    2305  }
    2306  
    2307  extern __inline __m128h
    2308  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2309  _mm_maskz_cvtpd_ph (__mmask8 __A, __m128d __B)
    2310  {
    2311    return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __B,
    2312  					   _mm_setzero_ph (),
    2313  					   __A);
    2314  }
    2315  
    2316  extern __inline __m128h
    2317  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2318  _mm256_cvtpd_ph (__m256d __A)
    2319  {
    2320    return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __A,
    2321  					   _mm_setzero_ph (),
    2322  					   (__mmask8) -1);
    2323  }
    2324  
    2325  extern __inline __m128h
    2326  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2327  _mm256_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m256d __C)
    2328  {
    2329    return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __C, __A, __B);
    2330  }
    2331  
    2332  extern __inline __m128h
    2333  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2334  _mm256_maskz_cvtpd_ph (__mmask8 __A, __m256d __B)
    2335  {
    2336    return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __B,
    2337  					   _mm_setzero_ph (),
    2338  					   __A);
    2339  }
    2340  
    2341  /* Intrinsics vfmaddsub[132,213,231]ph.  */
    2342  extern __inline __m256h
    2343  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2344  _mm256_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C)
    2345  {
    2346    return (__m256h)__builtin_ia32_vfmaddsubph256_mask ((__v16hf)__A,
    2347  						      (__v16hf)__B,
    2348  						      (__v16hf)__C,
    2349  						      (__mmask16)-1);
    2350  }
    2351  
    2352  extern __inline __m256h
    2353  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2354  _mm256_mask_fmaddsub_ph (__m256h __A, __mmask16 __U, __m256h __B,
    2355  			 __m256h __C)
    2356  {
    2357    return (__m256h) __builtin_ia32_vfmaddsubph256_mask ((__v16hf) __A,
    2358  						       (__v16hf) __B,
    2359  						       (__v16hf) __C,
    2360  						       (__mmask16) __U);
    2361  }
    2362  
    2363  extern __inline __m256h
    2364  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2365  _mm256_mask3_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C,
    2366  			  __mmask16 __U)
    2367  {
    2368    return (__m256h) __builtin_ia32_vfmaddsubph256_mask3 ((__v16hf) __A,
    2369  							(__v16hf) __B,
    2370  							(__v16hf) __C,
    2371  							(__mmask16)
    2372  							__U);
    2373  }
    2374  
    2375  extern __inline __m256h
    2376  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2377  _mm256_maskz_fmaddsub_ph (__mmask16 __U, __m256h __A, __m256h __B,
    2378  			  __m256h __C)
    2379  {
    2380    return (__m256h) __builtin_ia32_vfmaddsubph256_maskz ((__v16hf) __A,
    2381  							(__v16hf) __B,
    2382  							(__v16hf) __C,
    2383  							(__mmask16)
    2384  							__U);
    2385  }
    2386  
    2387  extern __inline __m128h
    2388  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2389  _mm_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C)
    2390  {
    2391    return (__m128h)__builtin_ia32_vfmaddsubph128_mask ((__v8hf)__A,
    2392  						      (__v8hf)__B,
    2393  						      (__v8hf)__C,
    2394  						      (__mmask8)-1);
    2395  }
    2396  
    2397  extern __inline __m128h
    2398  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2399  _mm_mask_fmaddsub_ph (__m128h __A, __mmask8 __U, __m128h __B,
    2400  		      __m128h __C)
    2401  {
    2402    return (__m128h) __builtin_ia32_vfmaddsubph128_mask ((__v8hf) __A,
    2403  						       (__v8hf) __B,
    2404  						       (__v8hf) __C,
    2405  						       (__mmask8) __U);
    2406  }
    2407  
    2408  extern __inline __m128h
    2409  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2410  _mm_mask3_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C,
    2411  		       __mmask8 __U)
    2412  {
    2413    return (__m128h) __builtin_ia32_vfmaddsubph128_mask3 ((__v8hf) __A,
    2414  							(__v8hf) __B,
    2415  							(__v8hf) __C,
    2416  							(__mmask8)
    2417  							__U);
    2418  }
    2419  
    2420  extern __inline __m128h
    2421  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2422  _mm_maskz_fmaddsub_ph (__mmask8 __U, __m128h __A, __m128h __B,
    2423  		       __m128h __C)
    2424  {
    2425    return (__m128h) __builtin_ia32_vfmaddsubph128_maskz ((__v8hf) __A,
    2426  							(__v8hf) __B,
    2427  							(__v8hf) __C,
    2428  							(__mmask8)
    2429  							__U);
    2430  }
    2431  
    2432  /* Intrinsics vfmsubadd[132,213,231]ph.  */
    2433  extern __inline __m256h
    2434  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2435  _mm256_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C)
    2436  {
    2437    return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A,
    2438  						       (__v16hf) __B,
    2439  						       (__v16hf) __C,
    2440  						       (__mmask16) -1);
    2441  }
    2442  
    2443  extern __inline __m256h
    2444  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2445  _mm256_mask_fmsubadd_ph (__m256h __A, __mmask16 __U, __m256h __B,
    2446  			 __m256h __C)
    2447  {
    2448    return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A,
    2449  						       (__v16hf) __B,
    2450  						       (__v16hf) __C,
    2451  						       (__mmask16) __U);
    2452  }
    2453  
    2454  extern __inline __m256h
    2455  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2456  _mm256_mask3_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C,
    2457  			  __mmask16 __U)
    2458  {
    2459    return (__m256h) __builtin_ia32_vfmsubaddph256_mask3 ((__v16hf) __A,
    2460  							(__v16hf) __B,
    2461  							(__v16hf) __C,
    2462  							(__mmask16)
    2463  							__U);
    2464  }
    2465  
    2466  extern __inline __m256h
    2467  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2468  _mm256_maskz_fmsubadd_ph (__mmask16 __U, __m256h __A, __m256h __B,
    2469  			  __m256h __C)
    2470  {
    2471    return (__m256h) __builtin_ia32_vfmsubaddph256_maskz ((__v16hf) __A,
    2472  							(__v16hf) __B,
    2473  							(__v16hf) __C,
    2474  							(__mmask16)
    2475  							__U);
    2476  }
    2477  
    2478  extern __inline __m128h
    2479  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2480  _mm_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C)
    2481  {
    2482    return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A,
    2483  						       (__v8hf) __B,
    2484  						       (__v8hf) __C,
    2485  						       (__mmask8) -1);
    2486  }
    2487  
    2488  extern __inline __m128h
    2489  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2490  _mm_mask_fmsubadd_ph (__m128h __A, __mmask8 __U, __m128h __B,
    2491  		      __m128h __C)
    2492  {
    2493    return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A,
    2494  						       (__v8hf) __B,
    2495  						       (__v8hf) __C,
    2496  						       (__mmask8) __U);
    2497  }
    2498  
    2499  extern __inline __m128h
    2500  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2501  _mm_mask3_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C,
    2502  		       __mmask8 __U)
    2503  {
    2504    return (__m128h) __builtin_ia32_vfmsubaddph128_mask3 ((__v8hf) __A,
    2505  							(__v8hf) __B,
    2506  							(__v8hf) __C,
    2507  							(__mmask8)
    2508  							__U);
    2509  }
    2510  
    2511  extern __inline __m128h
    2512  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2513  _mm_maskz_fmsubadd_ph (__mmask8 __U, __m128h __A, __m128h __B,
    2514  		       __m128h __C)
    2515  {
    2516    return (__m128h) __builtin_ia32_vfmsubaddph128_maskz ((__v8hf) __A,
    2517  							(__v8hf) __B,
    2518  							(__v8hf) __C,
    2519  							(__mmask8)
    2520  							__U);
    2521  }
    2522  
    2523  /* Intrinsics vfmadd[132,213,231]ph.  */
    2524  extern __inline __m256h
    2525  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2526  _mm256_fmadd_ph (__m256h __A, __m256h __B, __m256h __C)
    2527  {
    2528    return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A,
    2529  						       (__v16hf) __B,
    2530  						       (__v16hf) __C,
    2531  						       (__mmask16) -1);
    2532  }
    2533  
    2534  extern __inline __m256h
    2535  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2536  _mm256_mask_fmadd_ph (__m256h __A, __mmask16 __U, __m256h __B,
    2537  			 __m256h __C)
    2538  {
    2539    return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A,
    2540  						       (__v16hf) __B,
    2541  						       (__v16hf) __C,
    2542  						       (__mmask16) __U);
    2543  }
    2544  
    2545  extern __inline __m256h
    2546  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2547  _mm256_mask3_fmadd_ph (__m256h __A, __m256h __B, __m256h __C,
    2548  			  __mmask16 __U)
    2549  {
    2550    return (__m256h) __builtin_ia32_vfmaddph256_mask3 ((__v16hf) __A,
    2551  							(__v16hf) __B,
    2552  							(__v16hf) __C,
    2553  							(__mmask16)
    2554  							__U);
    2555  }
    2556  
    2557  extern __inline __m256h
    2558  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2559  _mm256_maskz_fmadd_ph (__mmask16 __U, __m256h __A, __m256h __B,
    2560  			  __m256h __C)
    2561  {
    2562    return (__m256h) __builtin_ia32_vfmaddph256_maskz ((__v16hf) __A,
    2563  							(__v16hf) __B,
    2564  							(__v16hf) __C,
    2565  							(__mmask16)
    2566  							__U);
    2567  }
    2568  
    2569  extern __inline __m128h
    2570  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2571  _mm_fmadd_ph (__m128h __A, __m128h __B, __m128h __C)
    2572  {
    2573    return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A,
    2574  						       (__v8hf) __B,
    2575  						       (__v8hf) __C,
    2576  						       (__mmask8) -1);
    2577  }
    2578  
    2579  extern __inline __m128h
    2580  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2581  _mm_mask_fmadd_ph (__m128h __A, __mmask8 __U, __m128h __B,
    2582  		      __m128h __C)
    2583  {
    2584    return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A,
    2585  						       (__v8hf) __B,
    2586  						       (__v8hf) __C,
    2587  						       (__mmask8) __U);
    2588  }
    2589  
    2590  extern __inline __m128h
    2591  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2592  _mm_mask3_fmadd_ph (__m128h __A, __m128h __B, __m128h __C,
    2593  		       __mmask8 __U)
    2594  {
    2595    return (__m128h) __builtin_ia32_vfmaddph128_mask3 ((__v8hf) __A,
    2596  							(__v8hf) __B,
    2597  							(__v8hf) __C,
    2598  							(__mmask8)
    2599  							__U);
    2600  }
    2601  
    2602  extern __inline __m128h
    2603  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2604  _mm_maskz_fmadd_ph (__mmask8 __U, __m128h __A, __m128h __B,
    2605  		       __m128h __C)
    2606  {
    2607    return (__m128h) __builtin_ia32_vfmaddph128_maskz ((__v8hf) __A,
    2608  							(__v8hf) __B,
    2609  							(__v8hf) __C,
    2610  							(__mmask8)
    2611  							__U);
    2612  }
    2613  
    2614  /* Intrinsics vfnmadd[132,213,231]ph.  */
    2615  extern __inline __m256h
    2616  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2617  _mm256_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C)
    2618  {
    2619    return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A,
    2620  						       (__v16hf) __B,
    2621  						       (__v16hf) __C,
    2622  						       (__mmask16) -1);
    2623  }
    2624  
    2625  extern __inline __m256h
    2626  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2627  _mm256_mask_fnmadd_ph (__m256h __A, __mmask16 __U, __m256h __B,
    2628  			 __m256h __C)
    2629  {
    2630    return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A,
    2631  						       (__v16hf) __B,
    2632  						       (__v16hf) __C,
    2633  						       (__mmask16) __U);
    2634  }
    2635  
    2636  extern __inline __m256h
    2637  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2638  _mm256_mask3_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C,
    2639  			  __mmask16 __U)
    2640  {
    2641    return (__m256h) __builtin_ia32_vfnmaddph256_mask3 ((__v16hf) __A,
    2642  							(__v16hf) __B,
    2643  							(__v16hf) __C,
    2644  							(__mmask16)
    2645  							__U);
    2646  }
    2647  
    2648  extern __inline __m256h
    2649  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2650  _mm256_maskz_fnmadd_ph (__mmask16 __U, __m256h __A, __m256h __B,
    2651  			  __m256h __C)
    2652  {
    2653    return (__m256h) __builtin_ia32_vfnmaddph256_maskz ((__v16hf) __A,
    2654  							(__v16hf) __B,
    2655  							(__v16hf) __C,
    2656  							(__mmask16)
    2657  							__U);
    2658  }
    2659  
    2660  extern __inline __m128h
    2661  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2662  _mm_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C)
    2663  {
    2664    return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A,
    2665  						       (__v8hf) __B,
    2666  						       (__v8hf) __C,
    2667  						       (__mmask8) -1);
    2668  }
    2669  
    2670  extern __inline __m128h
    2671  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2672  _mm_mask_fnmadd_ph (__m128h __A, __mmask8 __U, __m128h __B,
    2673  		      __m128h __C)
    2674  {
    2675    return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A,
    2676  						       (__v8hf) __B,
    2677  						       (__v8hf) __C,
    2678  						       (__mmask8) __U);
    2679  }
    2680  
    2681  extern __inline __m128h
    2682  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2683  _mm_mask3_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C,
    2684  		       __mmask8 __U)
    2685  {
    2686    return (__m128h) __builtin_ia32_vfnmaddph128_mask3 ((__v8hf) __A,
    2687  							(__v8hf) __B,
    2688  							(__v8hf) __C,
    2689  							(__mmask8)
    2690  							__U);
    2691  }
    2692  
    2693  extern __inline __m128h
    2694  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2695  _mm_maskz_fnmadd_ph (__mmask8 __U, __m128h __A, __m128h __B,
    2696  		       __m128h __C)
    2697  {
    2698    return (__m128h) __builtin_ia32_vfnmaddph128_maskz ((__v8hf) __A,
    2699  							(__v8hf) __B,
    2700  							(__v8hf) __C,
    2701  							(__mmask8)
    2702  							__U);
    2703  }
    2704  
    2705  /* Intrinsics vfmsub[132,213,231]ph.  */
    2706  extern __inline __m256h
    2707  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2708  _mm256_fmsub_ph (__m256h __A, __m256h __B, __m256h __C)
    2709  {
    2710    return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A,
    2711  						       (__v16hf) __B,
    2712  						       (__v16hf) __C,
    2713  						       (__mmask16) -1);
    2714  }
    2715  
    2716  extern __inline __m256h
    2717  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2718  _mm256_mask_fmsub_ph (__m256h __A, __mmask16 __U, __m256h __B,
    2719  			 __m256h __C)
    2720  {
    2721    return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A,
    2722  						       (__v16hf) __B,
    2723  						       (__v16hf) __C,
    2724  						       (__mmask16) __U);
    2725  }
    2726  
    2727  extern __inline __m256h
    2728  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2729  _mm256_mask3_fmsub_ph (__m256h __A, __m256h __B, __m256h __C,
    2730  			  __mmask16 __U)
    2731  {
    2732    return (__m256h) __builtin_ia32_vfmsubph256_mask3 ((__v16hf) __A,
    2733  							(__v16hf) __B,
    2734  							(__v16hf) __C,
    2735  							(__mmask16)
    2736  							__U);
    2737  }
    2738  
    2739  extern __inline __m256h
    2740  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2741  _mm256_maskz_fmsub_ph (__mmask16 __U, __m256h __A, __m256h __B,
    2742  			  __m256h __C)
    2743  {
    2744    return (__m256h) __builtin_ia32_vfmsubph256_maskz ((__v16hf) __A,
    2745  							(__v16hf) __B,
    2746  							(__v16hf) __C,
    2747  							(__mmask16)
    2748  							__U);
    2749  }
    2750  
    2751  extern __inline __m128h
    2752  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2753  _mm_fmsub_ph (__m128h __A, __m128h __B, __m128h __C)
    2754  {
    2755    return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A,
    2756  						       (__v8hf) __B,
    2757  						       (__v8hf) __C,
    2758  						       (__mmask8) -1);
    2759  }
    2760  
    2761  extern __inline __m128h
    2762  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2763  _mm_mask_fmsub_ph (__m128h __A, __mmask8 __U, __m128h __B,
    2764  		      __m128h __C)
    2765  {
    2766    return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A,
    2767  						       (__v8hf) __B,
    2768  						       (__v8hf) __C,
    2769  						       (__mmask8) __U);
    2770  }
    2771  
    2772  extern __inline __m128h
    2773  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2774  _mm_mask3_fmsub_ph (__m128h __A, __m128h __B, __m128h __C,
    2775  		       __mmask8 __U)
    2776  {
    2777    return (__m128h) __builtin_ia32_vfmsubph128_mask3 ((__v8hf) __A,
    2778  							(__v8hf) __B,
    2779  							(__v8hf) __C,
    2780  							(__mmask8)
    2781  							__U);
    2782  }
    2783  
    2784  extern __inline __m128h
    2785  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2786  _mm_maskz_fmsub_ph (__mmask8 __U, __m128h __A, __m128h __B,
    2787  		       __m128h __C)
    2788  {
    2789    return (__m128h) __builtin_ia32_vfmsubph128_maskz ((__v8hf) __A,
    2790  							(__v8hf) __B,
    2791  							(__v8hf) __C,
    2792  							(__mmask8)
    2793  							__U);
    2794  }
    2795  
    2796  /* Intrinsics vfnmsub[132,213,231]ph.  */
    2797  extern __inline __m256h
    2798  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2799  _mm256_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C)
    2800  {
    2801    return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A,
    2802  						       (__v16hf) __B,
    2803  						       (__v16hf) __C,
    2804  						       (__mmask16) -1);
    2805  }
    2806  
    2807  extern __inline __m256h
    2808  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2809  _mm256_mask_fnmsub_ph (__m256h __A, __mmask16 __U, __m256h __B,
    2810  			 __m256h __C)
    2811  {
    2812    return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A,
    2813  						       (__v16hf) __B,
    2814  						       (__v16hf) __C,
    2815  						       (__mmask16) __U);
    2816  }
    2817  
    2818  extern __inline __m256h
    2819  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2820  _mm256_mask3_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C,
    2821  			  __mmask16 __U)
    2822  {
    2823    return (__m256h) __builtin_ia32_vfnmsubph256_mask3 ((__v16hf) __A,
    2824  							(__v16hf) __B,
    2825  							(__v16hf) __C,
    2826  							(__mmask16)
    2827  							__U);
    2828  }
    2829  
    2830  extern __inline __m256h
    2831  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2832  _mm256_maskz_fnmsub_ph (__mmask16 __U, __m256h __A, __m256h __B,
    2833  			  __m256h __C)
    2834  {
    2835    return (__m256h) __builtin_ia32_vfnmsubph256_maskz ((__v16hf) __A,
    2836  							(__v16hf) __B,
    2837  							(__v16hf) __C,
    2838  							(__mmask16)
    2839  							__U);
    2840  }
    2841  
    2842  extern __inline __m128h
    2843  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2844  _mm_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C)
    2845  {
    2846    return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A,
    2847  						       (__v8hf) __B,
    2848  						       (__v8hf) __C,
    2849  						       (__mmask8) -1);
    2850  }
    2851  
    2852  extern __inline __m128h
    2853  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2854  _mm_mask_fnmsub_ph (__m128h __A, __mmask8 __U, __m128h __B,
    2855  		      __m128h __C)
    2856  {
    2857    return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A,
    2858  						       (__v8hf) __B,
    2859  						       (__v8hf) __C,
    2860  						       (__mmask8) __U);
    2861  }
    2862  
    2863  extern __inline __m128h
    2864  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2865  _mm_mask3_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C,
    2866  		       __mmask8 __U)
    2867  {
    2868    return (__m128h) __builtin_ia32_vfnmsubph128_mask3 ((__v8hf) __A,
    2869  							(__v8hf) __B,
    2870  							(__v8hf) __C,
    2871  							(__mmask8)
    2872  							__U);
    2873  }
    2874  
    2875  extern __inline __m128h
    2876  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2877  _mm_maskz_fnmsub_ph (__mmask8 __U, __m128h __A, __m128h __B,
    2878  		       __m128h __C)
    2879  {
    2880    return (__m128h) __builtin_ia32_vfnmsubph128_maskz ((__v8hf) __A,
    2881  							(__v8hf) __B,
    2882  							(__v8hf) __C,
    2883  							(__mmask8)
    2884  							__U);
    2885  }
    2886  
    2887  /* Intrinsics vf[,c]maddcph.  */
    2888  extern __inline __m128h
    2889  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2890  _mm_fmadd_pch (__m128h __A, __m128h __B, __m128h __C)
    2891  {
    2892    return (__m128h) __builtin_ia32_vfmaddcph128 ((__v8hf) __A,
    2893  						(__v8hf) __B,
    2894  						(__v8hf) __C);
    2895  }
    2896  
    2897  extern __inline __m128h
    2898  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2899  _mm_mask_fmadd_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
    2900  {
    2901    return (__m128h)
    2902      __builtin_ia32_vfmaddcph128_mask ((__v8hf) __A,
    2903  				      (__v8hf) __C,
    2904  				      (__v8hf) __D, __B);
    2905  }
    2906  
    2907  extern __inline __m128h
    2908  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2909  _mm_mask3_fmadd_pch (__m128h __A, __m128h __B, __m128h __C,  __mmask8 __D)
    2910  {
    2911    return (__m128h)
    2912      __builtin_ia32_vfmaddcph128_mask3 ((__v8hf) __A,
    2913  				       (__v8hf) __B,
    2914  				       (__v8hf) __C, __D);
    2915  }
    2916  
    2917  extern __inline __m128h
    2918  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2919  _mm_maskz_fmadd_pch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
    2920  {
    2921    return (__m128h) __builtin_ia32_vfmaddcph128_maskz ((__v8hf) __B,
    2922  						      (__v8hf) __C,
    2923  						      (__v8hf) __D, __A);
    2924  }
    2925  
    2926  extern __inline __m256h
    2927  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2928  _mm256_fmadd_pch (__m256h __A, __m256h __B, __m256h __C)
    2929  {
    2930    return (__m256h) __builtin_ia32_vfmaddcph256 ((__v16hf) __A,
    2931  						(__v16hf) __B,
    2932  						(__v16hf) __C);
    2933  }
    2934  
    2935  extern __inline __m256h
    2936  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2937  _mm256_mask_fmadd_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
    2938  {
    2939    return (__m256h)
    2940       __builtin_ia32_vfmaddcph256_mask ((__v16hf) __A,
    2941  				       (__v16hf) __C,
    2942  				       (__v16hf) __D, __B);
    2943  }
    2944  
    2945  extern __inline __m256h
    2946  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2947  _mm256_mask3_fmadd_pch (__m256h __A, __m256h __B, __m256h __C,  __mmask8 __D)
    2948  {
    2949    return (__m256h)
    2950      __builtin_ia32_vfmaddcph256_mask3 ((__v16hf) __A,
    2951  				       (__v16hf) __B,
    2952  				       (__v16hf) __C, __D);
    2953  }
    2954  
    2955  extern __inline __m256h
    2956  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2957  _mm256_maskz_fmadd_pch (__mmask8 __A, __m256h __B, __m256h __C, __m256h __D)
    2958  {
    2959    return (__m256h)__builtin_ia32_vfmaddcph256_maskz ((__v16hf) __B,
    2960  						     (__v16hf) __C,
    2961  						     (__v16hf) __D, __A);
    2962  }
    2963  
    2964  extern __inline __m128h
    2965  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2966  _mm_fcmadd_pch (__m128h __A, __m128h __B, __m128h __C)
    2967  {
    2968    return (__m128h) __builtin_ia32_vfcmaddcph128 ((__v8hf) __A,
    2969  						 (__v8hf) __B,
    2970  						 (__v8hf) __C);
    2971  }
    2972  
    2973  extern __inline __m128h
    2974  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2975  _mm_mask_fcmadd_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
    2976  {
    2977    return (__m128h)
    2978       __builtin_ia32_vfcmaddcph128_mask ((__v8hf) __A,
    2979  					(__v8hf) __C,
    2980  					(__v8hf) __D, __B);
    2981  }
    2982  
    2983  extern __inline __m128h
    2984  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2985  _mm_mask3_fcmadd_pch (__m128h __A, __m128h __B, __m128h __C,  __mmask8 __D)
    2986  {
    2987    return (__m128h)
    2988      __builtin_ia32_vfcmaddcph128_mask3 ((__v8hf) __A,
    2989  					(__v8hf) __B,
    2990  					(__v8hf) __C, __D);
    2991  }
    2992  
    2993  extern __inline __m128h
    2994  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    2995  _mm_maskz_fcmadd_pch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
    2996  {
    2997    return (__m128h)__builtin_ia32_vfcmaddcph128_maskz ((__v8hf) __B,
    2998  						      (__v8hf) __C,
    2999  						      (__v8hf) __D, __A);
    3000  }
    3001  
    3002  extern __inline __m256h
    3003  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3004  _mm256_fcmadd_pch (__m256h __A, __m256h __B, __m256h __C)
    3005  {
    3006    return (__m256h) __builtin_ia32_vfcmaddcph256 ((__v16hf) __A,
    3007  						 (__v16hf) __B,
    3008  						 (__v16hf) __C);
    3009  }
    3010  
    3011  extern __inline __m256h
    3012  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3013  _mm256_mask_fcmadd_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
    3014  {
    3015    return (__m256h)
    3016       __builtin_ia32_vfcmaddcph256_mask ((__v16hf) __A,
    3017  					(__v16hf) __C,
    3018  					(__v16hf) __D, __B);
    3019  }
    3020  
    3021  extern __inline __m256h
    3022  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3023  _mm256_mask3_fcmadd_pch (__m256h __A, __m256h __B, __m256h __C,  __mmask8 __D)
    3024  {
    3025    return (__m256h)
    3026      __builtin_ia32_vfcmaddcph256_mask3 ((__v16hf) __A,
    3027  					(__v16hf) __B,
    3028  					(__v16hf) __C, __D);
    3029  }
    3030  
    3031  extern __inline __m256h
    3032  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3033  _mm256_maskz_fcmadd_pch (__mmask8 __A, __m256h __B, __m256h __C, __m256h __D)
    3034  {
    3035    return (__m256h) __builtin_ia32_vfcmaddcph256_maskz ((__v16hf) __B,
    3036  						       (__v16hf) __C,
    3037  						       (__v16hf) __D, __A);
    3038  }
    3039  
    3040  /* Intrinsics vf[,c]mulcph.  */
    3041  extern __inline __m128h
    3042  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3043  _mm_fmul_pch (__m128h __A, __m128h __B)
    3044  {
    3045    return (__m128h) __builtin_ia32_vfmulcph128 ((__v8hf) __A, (__v8hf) __B);
    3046  }
    3047  
    3048  extern __inline __m128h
    3049  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3050  _mm_mask_fmul_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
    3051  {
    3052    return (__m128h) __builtin_ia32_vfmulcph128_mask ((__v8hf) __C,
    3053  						    (__v8hf) __D,
    3054  						    (__v8hf) __A, __B);
    3055  }
    3056  
    3057  extern __inline __m128h
    3058  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3059  _mm_maskz_fmul_pch (__mmask8 __A, __m128h __B, __m128h __C)
    3060  {
    3061    return (__m128h) __builtin_ia32_vfmulcph128_mask ((__v8hf) __B,
    3062  						    (__v8hf) __C,
    3063  						    _mm_setzero_ph (),
    3064  						    __A);
    3065  }
    3066  
    3067  extern __inline __m256h
    3068  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3069  _mm256_fmul_pch (__m256h __A, __m256h __B)
    3070  {
    3071    return (__m256h) __builtin_ia32_vfmulcph256 ((__v16hf) __A,
    3072  					       (__v16hf) __B);
    3073  }
    3074  
    3075  extern __inline __m256h
    3076  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3077  _mm256_mask_fmul_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
    3078  {
    3079    return (__m256h) __builtin_ia32_vfmulcph256_mask ((__v16hf) __C,
    3080  						    (__v16hf) __D,
    3081  						    (__v16hf) __A, __B);
    3082  }
    3083  
    3084  extern __inline __m256h
    3085  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3086  _mm256_maskz_fmul_pch (__mmask8 __A, __m256h __B, __m256h __C)
    3087  {
    3088    return (__m256h) __builtin_ia32_vfmulcph256_mask ((__v16hf) __B,
    3089  						    (__v16hf) __C,
    3090  						    _mm256_setzero_ph (),
    3091  						    __A);
    3092  }
    3093  
    3094  extern __inline __m128h
    3095  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3096  _mm_fcmul_pch (__m128h __A, __m128h __B)
    3097  {
    3098    return (__m128h) __builtin_ia32_vfcmulcph128 ((__v8hf) __A,
    3099  						(__v8hf) __B);
    3100  }
    3101  
    3102  extern __inline __m128h
    3103  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3104  _mm_mask_fcmul_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
    3105  {
    3106    return (__m128h) __builtin_ia32_vfcmulcph128_mask ((__v8hf) __C,
    3107  						     (__v8hf) __D,
    3108  						     (__v8hf) __A, __B);
    3109  }
    3110  
    3111  extern __inline __m128h
    3112  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3113  _mm_maskz_fcmul_pch (__mmask8 __A, __m128h __B, __m128h __C)
    3114  {
    3115    return (__m128h) __builtin_ia32_vfcmulcph128_mask ((__v8hf) __B,
    3116  						     (__v8hf) __C,
    3117  						     _mm_setzero_ph (),
    3118  						     __A);
    3119  }
    3120  
    3121  extern __inline __m256h
    3122  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3123  _mm256_fcmul_pch (__m256h __A, __m256h __B)
    3124  {
    3125    return (__m256h) __builtin_ia32_vfcmulcph256 ((__v16hf) __A, (__v16hf) __B);
    3126  }
    3127  
    3128  extern __inline __m256h
    3129  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3130  _mm256_mask_fcmul_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
    3131  {
    3132    return (__m256h) __builtin_ia32_vfcmulcph256_mask ((__v16hf) __C,
    3133  						     (__v16hf) __D,
    3134  						     (__v16hf) __A, __B);
    3135  }
    3136  
    3137  extern __inline __m256h
    3138  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3139  _mm256_maskz_fcmul_pch (__mmask8 __A, __m256h __B, __m256h __C)
    3140  {
    3141    return (__m256h) __builtin_ia32_vfcmulcph256_mask ((__v16hf) __B,
    3142  						     (__v16hf) __C,
    3143  						     _mm256_setzero_ph (),
    3144  						     __A);
    3145  }
    3146  
    3147  #define _MM256_REDUCE_OP(op)						\
    3148    __m128h __T1 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 0);	\
    3149    __m128h __T2 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 1);	\
    3150    __m128h __T3 = (__T1 op __T2);					\
    3151    __m128h __T4 = (__m128h) __builtin_shuffle (__T3,			\
    3152  		 (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 });			\
    3153    __m128h __T5 = (__T3) op (__T4);					\
    3154    __m128h __T6 = (__m128h) __builtin_shuffle (__T5,			\
    3155  		 (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 });			\
    3156    __m128h __T7 = __T5 op __T6;						\
    3157    return __T7[0] op __T7[1]
    3158  
    3159  extern __inline _Float16
    3160  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3161  _mm256_reduce_add_ph (__m256h __A)
    3162  {
    3163    _MM256_REDUCE_OP (+);
    3164  }
    3165  
    3166  extern __inline _Float16
    3167  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3168  _mm256_reduce_mul_ph (__m256h __A)
    3169  {
    3170    _MM256_REDUCE_OP (*);
    3171  }
    3172  
    3173  #undef _MM256_REDUCE_OP
    3174  #define _MM256_REDUCE_OP(op)						\
    3175    __m128h __T1 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 0);	\
    3176    __m128h __T2 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 1);	\
    3177    __m128h __T3 = _mm_##op (__T1, __T2);				\
    3178    __m128h __T4 = (__m128h) __builtin_shuffle (__T3,			\
    3179  		 (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 });			\
    3180    __m128h __T5 = _mm_##op (__T3, __T4);				\
    3181    __m128h __T6 = (__m128h) __builtin_shuffle (__T5, (__v8hi) { 4, 5 }); \
    3182    __m128h __T7 = _mm_##op (__T5, __T6);				\
    3183    __m128h __T8 = (__m128h) __builtin_shuffle (__T7, (__v8hi) { 1, 0 }); \
    3184    __m128h __T9 = _mm_##op (__T7, __T8);				\
    3185    return __T9[0]
    3186  
    3187  extern __inline _Float16
    3188  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3189  _mm256_reduce_min_ph (__m256h __A)
    3190  {
    3191    _MM256_REDUCE_OP (min_ph);
    3192  }
    3193  
    3194  extern __inline _Float16
    3195  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3196  _mm256_reduce_max_ph (__m256h __A)
    3197  {
    3198    _MM256_REDUCE_OP (max_ph);
    3199  }
    3200  
    3201  #define _MM_REDUCE_OP(op) 						\
    3202    __m128h __T1 = (__m128h) __builtin_shuffle (__A,			\
    3203  		 (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 });			\
    3204    __m128h __T2 = (__A) op (__T1);					\
    3205    __m128h __T3 = (__m128h) __builtin_shuffle (__T2,			\
    3206  		 (__v8hi){ 2, 3, 0, 1, 4, 5, 6, 7 });			\
    3207    __m128h __T4 = __T2 op __T3;						\
    3208    return __T4[0] op __T4[1]
    3209  
    3210  extern __inline _Float16
    3211  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3212  _mm_reduce_add_ph (__m128h __A)
    3213  {
    3214    _MM_REDUCE_OP (+);
    3215  }
    3216  
    3217  extern __inline _Float16
    3218  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3219  _mm_reduce_mul_ph (__m128h __A)
    3220  {
    3221    _MM_REDUCE_OP (*);
    3222  }
    3223  
    3224  #undef _MM_REDUCE_OP
    3225  #define _MM_REDUCE_OP(op) 						\
    3226    __m128h __T1 = (__m128h) __builtin_shuffle (__A,			\
    3227  		 (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 });			\
    3228    __m128h __T2 = _mm_##op (__A, __T1);					\
    3229    __m128h __T3 = (__m128h) __builtin_shuffle (__T2, (__v8hi){ 4, 5 });	\
    3230    __m128h __T4 = _mm_##op (__T2, __T3);				\
    3231    __m128h __T5 = (__m128h) __builtin_shuffle (__T4, (__v8hi){ 1, 0 });	\
    3232    __m128h __T6 = _mm_##op (__T4, __T5);				\
    3233    return __T6[0]
    3234  
    3235  extern __inline _Float16
    3236  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3237  _mm_reduce_min_ph (__m128h __A)
    3238  {
    3239    _MM_REDUCE_OP (min_ph);
    3240  }
    3241  
    3242  extern __inline _Float16
    3243  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3244  _mm_reduce_max_ph (__m128h __A)
    3245  {
    3246    _MM_REDUCE_OP (max_ph);
    3247  }
    3248  
    3249  #undef _MM256_REDUCE_OP
    3250  #undef _MM_REDUCE_OP
    3251  
    3252  extern __inline __m256h
    3253  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3254  _mm256_mask_blend_ph (__mmask16 __U, __m256h __A, __m256h __W)
    3255  {
    3256    return (__m256h) __builtin_ia32_movdquhi256_mask ((__v16hi) __W,
    3257  						    (__v16hi) __A,
    3258  						    (__mmask16) __U);
    3259  
    3260  }
    3261  
    3262  extern __inline __m256h
    3263  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3264  _mm256_permutex2var_ph (__m256h __A, __m256i __I, __m256h __B)
    3265  {
    3266    return (__m256h) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A,
    3267  						       (__v16hi) __I,
    3268  						       (__v16hi) __B,
    3269  						       (__mmask16)-1);
    3270  }
    3271  
    3272  extern __inline __m256h
    3273  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3274  _mm256_permutexvar_ph (__m256i __A, __m256h __B)
    3275  {
    3276    return (__m256h) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
    3277  						     (__v16hi) __A,
    3278  						     (__v16hi)
    3279  						     (_mm256_setzero_ph ()),
    3280  						     (__mmask16)-1);
    3281  }
    3282  
    3283  extern __inline __m128h
    3284  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3285  _mm_mask_blend_ph (__mmask8 __U, __m128h __A, __m128h __W)
    3286  {
    3287    return (__m128h) __builtin_ia32_movdquhi128_mask ((__v8hi) __W,
    3288  						    (__v8hi) __A,
    3289  						    (__mmask8) __U);
    3290  
    3291  }
    3292  
    3293  extern __inline __m128h
    3294  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3295  _mm_permutex2var_ph (__m128h __A, __m128i __I, __m128h __B)
    3296  {
    3297    return (__m128h) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A,
    3298  						       (__v8hi) __I,
    3299  						       (__v8hi) __B,
    3300  						       (__mmask8)-1);
    3301  }
    3302  
    3303  extern __inline __m128h
    3304  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3305  _mm_permutexvar_ph (__m128i __A, __m128h __B)
    3306  {
    3307    return (__m128h) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
    3308  						     (__v8hi) __A,
    3309  						     (__v8hi)
    3310  						     (_mm_setzero_ph ()),
    3311  						     (__mmask8)-1);
    3312  }
    3313  
    3314  extern __inline __m256h
    3315  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3316  _mm256_set1_pch (_Float16 _Complex __A)
    3317  {
    3318    union
    3319    {
    3320      _Float16 _Complex __a;
    3321      float __b;
    3322    } __u = { .__a = __A };
    3323  
    3324    return (__m256h) _mm256_set1_ps (__u.__b);
    3325  }
    3326  
    3327  extern __inline __m128h
    3328  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
    3329  _mm_set1_pch (_Float16 _Complex __A)
    3330  {
    3331    union
    3332    {
    3333      _Float16 _Complex __a;
    3334      float __b;
    3335    } __u = { .__a = __A };
    3336  
    3337    return (__m128h) _mm_set1_ps (__u.__b);
    3338  }
    3339  
    3340  // intrinsics below are alias for f*mul_*ch
    3341  #define _mm_mul_pch(A, B) _mm_fmul_pch ((A), (B))
    3342  #define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch ((W), (U), (A), (B))
    3343  #define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch ((U), (A), (B))
    3344  #define _mm256_mul_pch(A, B) _mm256_fmul_pch ((A), (B))
    3345  #define _mm256_mask_mul_pch(W, U, A, B)				      \
    3346    _mm256_mask_fmul_pch ((W), (U), (A), (B))
    3347  #define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch ((U), (A), (B))
    3348  
    3349  #define _mm_cmul_pch(A, B) _mm_fcmul_pch ((A), (B))
    3350  #define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch ((W), (U), (A), (B))
    3351  #define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch ((U), (A), (B))
    3352  #define _mm256_cmul_pch(A, B) _mm256_fcmul_pch ((A), (B))
    3353  #define _mm256_mask_cmul_pch(W, U, A, B)			      \
    3354     _mm256_mask_fcmul_pch ((W), (U), (A), (B))
    3355  #define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch((U), (A), (B))
    3356  
    3357  #ifdef __DISABLE_AVX512FP16VL__
    3358  #undef __DISABLE_AVX512FP16VL__
    3359  #pragma GCC pop_options
    3360  #endif /* __DISABLE_AVX512FP16VL__ */
    3361  
    3362  #endif /* __AVX512FP16VLINTRIN_H_INCLUDED */