1  /* { dg-do compile } */
       2  /* { dg-options "-O2 -Werror-implicit-function-declaration -Wuninitialized -march=k8" } */
       3  /* { dg-add-options bind_pic_locally } */
       4  
       5  #include <mm_malloc.h>
       6  
       7  /* Test that the intrinsics compile with optimization.  All of them
       8     are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
       9     mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h,
      10     tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h, tsxtrkintrin.h,
      11     avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h,
      12     avx512bitalgintrin.h, avx512vp2intersectintrin.h,
      13     amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h,
      14     avx512vp2intersectvlintrin.h and mm_malloc.h that reference the proper
      15     builtin functions.
      16     Defining away "extern" and "__inline" results in all of them being
      17     compiled as proper functions.  */
      18  
      19  #define extern
      20  #define __inline
      21  
      22  /* Following intrinsics require immediate arguments. */
      23  
      24  /* ammintrin.h */
      25  #define __builtin_ia32_extrqi(X, I, L)  __builtin_ia32_extrqi(X, 1, 1)
      26  #define __builtin_ia32_insertqi(X, Y, I, L) __builtin_ia32_insertqi(X, Y, 1, 1)
      27  
      28  /* wmmintrin.h */
      29  #define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1)
      30  #define __builtin_ia32_pclmulqdq128(X, Y, I) __builtin_ia32_pclmulqdq128(X, Y, 1)
      31  
      32  /* smmintrin.h */
      33  #define __builtin_ia32_roundpd(V, M) __builtin_ia32_roundpd(V, 1)
      34  #define __builtin_ia32_roundsd(D, V, M) __builtin_ia32_roundsd(D, V, 1)
      35  #define __builtin_ia32_roundps(V, M) __builtin_ia32_roundps(V, 1)
      36  #define __builtin_ia32_roundss(D, V, M) __builtin_ia32_roundss(D, V, 1)
      37  
      38  #define __builtin_ia32_pblendw128(X, Y, M) __builtin_ia32_pblendw128 (X, Y, 1)
      39  #define __builtin_ia32_blendps(X, Y, M) __builtin_ia32_blendps(X, Y, 1)
      40  #define __builtin_ia32_blendpd(X, Y, M) __builtin_ia32_blendpd(X, Y, 1)
      41  #define __builtin_ia32_dpps(X, Y, M) __builtin_ia32_dpps(X, Y, 1)
      42  #define __builtin_ia32_dppd(X, Y, M) __builtin_ia32_dppd(X, Y, 1)
      43  #define __builtin_ia32_insertps128(D, S, N) __builtin_ia32_insertps128(D, S, 1)
      44  #define __builtin_ia32_vec_ext_v4sf(X, N) __builtin_ia32_vec_ext_v4sf(X, 1)
      45  #define __builtin_ia32_vec_set_v16qi(D, S, N) __builtin_ia32_vec_set_v16qi(D, S, 1)
      46  #define __builtin_ia32_vec_set_v4si(D, S, N) __builtin_ia32_vec_set_v4si(D, S, 1)
      47  #define __builtin_ia32_vec_set_v2di(D, S, N) __builtin_ia32_vec_set_v2di(D, S, 1)
      48  #define __builtin_ia32_vec_ext_v16qi(X, N) __builtin_ia32_vec_ext_v16qi(X, 1)
      49  #define __builtin_ia32_vec_ext_v4si(X, N) __builtin_ia32_vec_ext_v4si(X, 1)
      50  #define __builtin_ia32_vec_ext_v2di(X, N) __builtin_ia32_vec_ext_v2di(X, 1)
      51  #define __builtin_ia32_mpsadbw128(X, Y, M) __builtin_ia32_mpsadbw128(X, Y, 1)
      52  #define __builtin_ia32_pcmpistrm128(X, Y, M) \
      53    __builtin_ia32_pcmpistrm128(X, Y, 1)
      54  #define __builtin_ia32_pcmpistri128(X, Y, M) \
      55    __builtin_ia32_pcmpistri128(X, Y, 1)
      56  #define __builtin_ia32_pcmpestrm128(X, LX, Y, LY, M) \
      57    __builtin_ia32_pcmpestrm128(X, LX, Y, LY, 1)
      58  #define __builtin_ia32_pcmpestri128(X, LX, Y, LY, M) \
      59    __builtin_ia32_pcmpestri128(X, LX, Y, LY, 1)
      60  #define __builtin_ia32_pcmpistria128(X, Y, M) \
      61    __builtin_ia32_pcmpistria128(X, Y, 1)
      62  #define __builtin_ia32_pcmpistric128(X, Y, M) \
      63    __builtin_ia32_pcmpistric128(X, Y, 1)
      64  #define __builtin_ia32_pcmpistrio128(X, Y, M) \
      65    __builtin_ia32_pcmpistrio128(X, Y, 1)
      66  #define __builtin_ia32_pcmpistris128(X, Y, M) \
      67    __builtin_ia32_pcmpistris128(X, Y, 1)
      68  #define __builtin_ia32_pcmpistriz128(X, Y, M) \
      69    __builtin_ia32_pcmpistriz128(X, Y, 1)
      70  #define __builtin_ia32_pcmpestria128(X, LX, Y, LY, M) \
      71    __builtin_ia32_pcmpestria128(X, LX, Y, LY, 1)
      72  #define __builtin_ia32_pcmpestric128(X, LX, Y, LY, M) \
      73    __builtin_ia32_pcmpestric128(X, LX, Y, LY, 1)
      74  #define __builtin_ia32_pcmpestrio128(X, LX, Y, LY, M) \
      75    __builtin_ia32_pcmpestrio128(X, LX, Y, LY, 1)
      76  #define __builtin_ia32_pcmpestris128(X, LX, Y, LY, M) \
      77    __builtin_ia32_pcmpestris128(X, LX, Y, LY, 1)
      78  #define __builtin_ia32_pcmpestriz128(X, LX, Y, LY, M) \
      79    __builtin_ia32_pcmpestriz128(X, LX, Y, LY, 1)
      80  
      81  /* tmmintrin.h */
      82  #define __builtin_ia32_palignr128(X, Y, N) __builtin_ia32_palignr128(X, Y, 8)
      83  #define __builtin_ia32_palignr(X, Y, N) __builtin_ia32_palignr(X, Y, 8)
      84  
      85  /* emmintrin.h */
      86  #define __builtin_ia32_psrldqi128(A, B) __builtin_ia32_psrldqi128(A, 8)
      87  #define __builtin_ia32_pslldqi128(A, B) __builtin_ia32_pslldqi128(A, 8)
      88  #define __builtin_ia32_pshufhw(A, N) __builtin_ia32_pshufhw(A, 0)
      89  #define __builtin_ia32_pshuflw(A, N) __builtin_ia32_pshuflw(A, 0)
      90  #define __builtin_ia32_pshufd(A, N) __builtin_ia32_pshufd(A, 0)
      91  #define __builtin_ia32_vec_set_v8hi(A, D, N) \
      92    __builtin_ia32_vec_set_v8hi(A, D, 0)
      93  #define __builtin_ia32_vec_ext_v8hi(A, N) __builtin_ia32_vec_ext_v8hi(A, 0)
      94  #define __builtin_ia32_shufpd(A, B, N) __builtin_ia32_shufpd(A, B, 0)
      95  
      96  /* xmmintrin.h */
      97  #define __builtin_ia32_prefetch(A, B, C, D) __builtin_ia32_prefetch(A, 0, 3, 0)
      98  #define __builtin_ia32_pshufw(A, N) __builtin_ia32_pshufw(A, 0)
      99  #define __builtin_ia32_vec_set_v4hi(A, D, N) \
     100    __builtin_ia32_vec_set_v4hi(A, D, 0)
     101  #define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0)
     102  #define __builtin_ia32_shufps(A, B, N) __builtin_ia32_shufps(A, B, 0)
     103  
     104  /* immintrin.h */
     105  #define __builtin_ia32_blendpd256(X, Y, M) __builtin_ia32_blendpd256(X, Y, 1)
     106  #define __builtin_ia32_blendps256(X, Y, M) __builtin_ia32_blendps256(X, Y, 1)
     107  #define __builtin_ia32_dpps256(X, Y, M) __builtin_ia32_dpps256(X, Y, 1)
     108  #define __builtin_ia32_shufpd256(X, Y, M) __builtin_ia32_shufpd256(X, Y, 1)
     109  #define __builtin_ia32_shufps256(X, Y, M) __builtin_ia32_shufps256(X, Y, 1)
     110  #define __builtin_ia32_cmpsd(X, Y, O) __builtin_ia32_cmpsd(X, Y, 1)
     111  #define __builtin_ia32_cmpss(X, Y, O) __builtin_ia32_cmpss(X, Y, 1)
     112  #define __builtin_ia32_cmppd(X, Y, O) __builtin_ia32_cmppd(X, Y, 1)
     113  #define __builtin_ia32_cmpps(X, Y, O) __builtin_ia32_cmpps(X, Y, 1)
     114  #define __builtin_ia32_cmppd256(X, Y, O) __builtin_ia32_cmppd256(X, Y, 1)
     115  #define __builtin_ia32_cmpps256(X, Y, O) __builtin_ia32_cmpps256(X, Y, 1)
     116  #define __builtin_ia32_vextractf128_pd256(X, N) __builtin_ia32_vextractf128_pd256(X, 1)
     117  #define __builtin_ia32_vextractf128_ps256(X, N) __builtin_ia32_vextractf128_ps256(X, 1)
     118  #define __builtin_ia32_vextractf128_si256(X, N) __builtin_ia32_vextractf128_si256(X, 1)
     119  #define __builtin_ia32_vpermilpd(X, N) __builtin_ia32_vpermilpd(X, 1)
     120  #define __builtin_ia32_vpermilpd256(X, N) __builtin_ia32_vpermilpd256(X, 1)
     121  #define __builtin_ia32_vpermilps(X, N) __builtin_ia32_vpermilps(X, 1)
     122  #define __builtin_ia32_vpermilps256(X, N) __builtin_ia32_vpermilps256(X, 1)
     123  #define __builtin_ia32_vpermil2pd(X, Y, C, I) __builtin_ia32_vpermil2pd(X, Y, C, 1)
     124  #define __builtin_ia32_vpermil2pd256(X, Y, C, I) __builtin_ia32_vpermil2pd256(X, Y, C, 1)
     125  #define __builtin_ia32_vpermil2ps(X, Y, C, I) __builtin_ia32_vpermil2ps(X, Y, C, 1)
     126  #define __builtin_ia32_vpermil2ps256(X, Y, C, I) __builtin_ia32_vpermil2ps256(X, Y, C, 1)
     127  #define __builtin_ia32_vperm2f128_pd256(X, Y, C) __builtin_ia32_vperm2f128_pd256(X, Y, 1)
     128  #define __builtin_ia32_vperm2f128_ps256(X, Y, C) __builtin_ia32_vperm2f128_ps256(X, Y, 1)
     129  #define __builtin_ia32_vperm2f128_si256(X, Y, C) __builtin_ia32_vperm2f128_si256(X, Y, 1)
     130  #define __builtin_ia32_vinsertf128_pd256(X, Y, C) __builtin_ia32_vinsertf128_pd256(X, Y, 1)
     131  #define __builtin_ia32_vinsertf128_ps256(X, Y, C) __builtin_ia32_vinsertf128_ps256(X, Y, 1)
     132  #define __builtin_ia32_vinsertf128_si256(X, Y, C) __builtin_ia32_vinsertf128_si256(X, Y, 1)
     133  #define __builtin_ia32_roundpd256(V, M) __builtin_ia32_roundpd256(V, 1)
     134  #define __builtin_ia32_roundps256(V, M) __builtin_ia32_roundps256(V, 1)
     135  #define __builtin_ia32_vcvtps2ph(A, I) __builtin_ia32_vcvtps2ph(A, 1)
     136  #define __builtin_ia32_vcvtps2ph256(A, I) __builtin_ia32_vcvtps2ph256(A, 1)
     137  
     138  /* xopintrin.h */
     139  #define __builtin_ia32_vprotbi(A, B) __builtin_ia32_vprotbi(A,1)
     140  #define __builtin_ia32_vprotwi(A, B) __builtin_ia32_vprotwi(A,1)
     141  #define __builtin_ia32_vprotdi(A, B) __builtin_ia32_vprotdi(A,1)
     142  #define __builtin_ia32_vprotqi(A, B) __builtin_ia32_vprotqi(A,1)
     143  
     144  /* lwpintrin.h */
     145  #define __builtin_ia32_lwpval32(D2, D1, F) __builtin_ia32_lwpval32 (D2, D1, 1)
     146  #define __builtin_ia32_lwpval64(D2, D1, F) __builtin_ia32_lwpval64 (D2, D1, 1)
     147  #define __builtin_ia32_lwpins32(D2, D1, F) __builtin_ia32_lwpins32 (D2, D1, 1)
     148  #define __builtin_ia32_lwpins64(D2, D1, F) __builtin_ia32_lwpins64 (D2, D1, 1)
     149  
     150  /* tbmintrin.h */
     151  #define __builtin_ia32_bextri_u32(X, Y) __builtin_ia32_bextr_u32 (X, 1)
     152  #define __builtin_ia32_bextri_u64(X, Y) __builtin_ia32_bextr_u64 (X, 1)
     153  
     154  /* avx2intrin.h */
     155  #define __builtin_ia32_mpsadbw256(X, Y, Z) __builtin_ia32_mpsadbw256 (X, Y, 1)
     156  #define __builtin_ia32_palignr256(X, Y, Z) __builtin_ia32_palignr256 (X, Y, 8)
     157  #define __builtin_ia32_pblendw256(X, Y, Z) __builtin_ia32_pblendw256 (X, Y, 1)
     158  #define __builtin_ia32_pshufd256(X, Y) __builtin_ia32_pshufd256(X, 1)
     159  #define __builtin_ia32_pshufhw256(X, Y) __builtin_ia32_pshufhw256(X, 1)
     160  #define __builtin_ia32_pshuflw256(X, Y) __builtin_ia32_pshuflw256(X, 1)
     161  #define __builtin_ia32_pslldqi256(X, Y) __builtin_ia32_pslldqi256(X, 8)
     162  #define __builtin_ia32_psrldqi256(X, Y) __builtin_ia32_psrldqi256(X, 8)
     163  #define __builtin_ia32_pblendd128(X, Y, Z) __builtin_ia32_pblendd128(X, Y, 1)
     164  #define __builtin_ia32_pblendd256(X, Y, Z) __builtin_ia32_pblendd256(X, Y, 1)
     165  #define __builtin_ia32_permdf256(X, Y) __builtin_ia32_permdf256(X, 1)
     166  #define __builtin_ia32_permdi256(X, Y) __builtin_ia32_permdi256(X, 1)
     167  #define __builtin_ia32_permti256(X, Y, Z) __builtin_ia32_permti256(X, Y, 1)
     168  #define __builtin_ia32_extract128i256(X, Y) __builtin_ia32_extract128i256(X, 1)
     169  #define __builtin_ia32_insert128i256(X, Y, Z) __builtin_ia32_insert128i256(X, Y, 1)
     170  #define __builtin_ia32_gathersiv2df(X, Y, Z, K, M) __builtin_ia32_gathersiv2df(X, Y, Z, K, 1)
     171  #define __builtin_ia32_gathersiv4df(X, Y, Z, K, M) __builtin_ia32_gathersiv4df(X, Y, Z, K, 1)
     172  #define __builtin_ia32_gatherdiv2df(X, Y, Z, K, M) __builtin_ia32_gatherdiv2df(X, Y, Z, K, 1)
     173  #define __builtin_ia32_gatherdiv4df(X, Y, Z, K, M) __builtin_ia32_gatherdiv4df(X, Y, Z, K, 1)
     174  #define __builtin_ia32_gathersiv4sf(X, Y, Z, K, M) __builtin_ia32_gathersiv4sf(X, Y, Z, K, 1)
     175  #define __builtin_ia32_gathersiv8sf(X, Y, Z, K, M) __builtin_ia32_gathersiv8sf(X, Y, Z, K, 1)
     176  #define __builtin_ia32_gatherdiv4sf(X, Y, Z, K, M) __builtin_ia32_gatherdiv4sf(X, Y, Z, K, 1)
     177  #define __builtin_ia32_gatherdiv4sf256(X, Y, Z, K, M) __builtin_ia32_gatherdiv4sf256(X, Y, Z, K, 1)
     178  #define __builtin_ia32_gathersiv2di(X, Y, Z, K, M) __builtin_ia32_gathersiv2di(X, Y, Z, K, 1)
     179  #define __builtin_ia32_gathersiv4di(X, Y, Z, K, M) __builtin_ia32_gathersiv4di(X, Y, Z, K, 1)
     180  #define __builtin_ia32_gatherdiv2di(X, Y, Z, K, M) __builtin_ia32_gatherdiv2di(X, Y, Z, K, 1)
     181  #define __builtin_ia32_gatherdiv4di(X, Y, Z, K, M) __builtin_ia32_gatherdiv4di(X, Y, Z, K, 1)
     182  #define __builtin_ia32_gathersiv4si(X, Y, Z, K, M) __builtin_ia32_gathersiv4si(X, Y, Z, K, 1)
     183  #define __builtin_ia32_gathersiv8si(X, Y, Z, K, M) __builtin_ia32_gathersiv8si(X, Y, Z, K, 1)
     184  #define __builtin_ia32_gatherdiv4si(X, Y, Z, K, M) __builtin_ia32_gatherdiv4si(X, Y, Z, K, 1)
     185  #define __builtin_ia32_gatherdiv4si256(X, Y, Z, K, M) __builtin_ia32_gatherdiv4si256(X, Y, Z, K, 1)
     186  
     187  /* rtmintrin.h */
     188  #define __builtin_ia32_xabort(M) __builtin_ia32_xabort(1)
     189  
     190  /* avx512fintrin.h */
     191  #define __builtin_ia32_kshiftlihi(A, B) __builtin_ia32_kshiftlihi(A, 8)
     192  #define __builtin_ia32_kshiftrihi(A, B) __builtin_ia32_kshiftrihi(A, 8)
     193  #define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 8)
     194  #define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 8)
     195  #define __builtin_ia32_addsd_round(A, B, C) __builtin_ia32_addsd_round(A, B, 8)
     196  #define __builtin_ia32_addsd_mask_round(A, B, C, D, E) __builtin_ia32_addsd_mask_round(A, B, C, D, 8)
     197  #define __builtin_ia32_addss_round(A, B, C) __builtin_ia32_addss_round(A, B, 8)
     198  #define __builtin_ia32_addss_mask_round(A, B, C, D, E) __builtin_ia32_addss_mask_round(A, B, C, D, 8)
     199  #define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E)
     200  #define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E)
     201  #define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D)
     202  #define __builtin_ia32_cmppd512_mask(A, B, F, D, E) __builtin_ia32_cmppd512_mask(A, B, 1, D, 8)
     203  #define __builtin_ia32_cmpps512_mask(A, B, F, D, E) __builtin_ia32_cmpps512_mask(A, B, 1, D, 8)
     204  #define __builtin_ia32_cmpq512_mask(A, B, E, D) __builtin_ia32_cmpq512_mask(A, B, 1, D)
     205  #define __builtin_ia32_cmpsd_mask(A, B, F, D, E) __builtin_ia32_cmpsd_mask(A, B, 1, D, 8)
     206  #define __builtin_ia32_cmpss_mask(A, B, F, D, E) __builtin_ia32_cmpss_mask(A, B, 1, D, 8)
     207  #define __builtin_ia32_cvtdq2ps512_mask(A, B, C, D) __builtin_ia32_cvtdq2ps512_mask(A, B, C, 8)
     208  #define __builtin_ia32_cvtpd2dq512_mask(A, B, C, D) __builtin_ia32_cvtpd2dq512_mask(A, B, C, 8)
     209  #define __builtin_ia32_cvtpd2ps512_mask(A, B, C, D) __builtin_ia32_cvtpd2ps512_mask(A, B, C, 8)
     210  #define __builtin_ia32_cvtpd2udq512_mask(A, B, C, D) __builtin_ia32_cvtpd2udq512_mask(A, B, C, 8)
     211  #define __builtin_ia32_cvtps2dq512_mask(A, B, C, D) __builtin_ia32_cvtps2dq512_mask(A, B, C, 8)
     212  #define __builtin_ia32_cvtps2pd512_mask(A, B, C, D) __builtin_ia32_cvtps2pd512_mask(A, B, C, 8)
     213  #define __builtin_ia32_cvtps2udq512_mask(A, B, C, D) __builtin_ia32_cvtps2udq512_mask(A, B, C, 8)
     214  #define __builtin_ia32_cvtsd2ss_round(A, B, C) __builtin_ia32_cvtsd2ss_round(A, B, 8)
     215  #define __builtin_ia32_cvtss2sd_round(A, B, C) __builtin_ia32_cvtss2sd_round(A, B, 4)
     216  #define __builtin_ia32_cvtsi2sd64(A, B, C) __builtin_ia32_cvtsi2sd64(A, B, 8)
     217  #define __builtin_ia32_cvtsi2ss32(A, B, C) __builtin_ia32_cvtsi2ss32(A, B, 8)
     218  #define __builtin_ia32_cvtsi2ss64(A, B, C) __builtin_ia32_cvtsi2ss64(A, B, 8)
     219  #define __builtin_ia32_cvttpd2dq512_mask(A, B, C, D) __builtin_ia32_cvttpd2dq512_mask(A, B, C, 8)
     220  #define __builtin_ia32_cvttpd2udq512_mask(A, B, C, D) __builtin_ia32_cvttpd2udq512_mask(A, B, C, 8)
     221  #define __builtin_ia32_cvttps2dq512_mask(A, B, C, D) __builtin_ia32_cvttps2dq512_mask(A, B, C, 8)
     222  #define __builtin_ia32_cvttps2udq512_mask(A, B, C, D) __builtin_ia32_cvttps2udq512_mask(A, B, C, 8)
     223  #define __builtin_ia32_cvtudq2ps512_mask(A, B, C, D) __builtin_ia32_cvtudq2ps512_mask(A, B, C, 8)
     224  #define __builtin_ia32_cvtusi2sd64(A, B, C) __builtin_ia32_cvtusi2sd64(A, B, 8)
     225  #define __builtin_ia32_cvtusi2ss32(A, B, C) __builtin_ia32_cvtusi2ss32(A, B, 8)
     226  #define __builtin_ia32_cvtusi2ss64(A, B, C) __builtin_ia32_cvtusi2ss64(A, B, 8)
     227  #define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 8)
     228  #define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 8)
     229  #define __builtin_ia32_divsd_round(A, B, C) __builtin_ia32_divsd_round(A, B, 8)
     230  #define __builtin_ia32_divsd_mask_round(A, B, C, D, E) __builtin_ia32_divsd_mask_round(A, B, C, D, 8)
     231  #define __builtin_ia32_divss_round(A, B, C) __builtin_ia32_divss_round(A, B, 8)
     232  #define __builtin_ia32_divss_mask_round(A, B, C, D, E) __builtin_ia32_divss_mask_round(A, B, C, D, 8)
     233  #define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D)
     234  #define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D)
     235  #define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D)
     236  #define __builtin_ia32_extracti64x4_mask(A, E, C, D) __builtin_ia32_extracti64x4_mask(A, 1, C, D)
     237  #define __builtin_ia32_fixupimmpd512_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmpd512_mask(A, B, C, 1, E, 8)
     238  #define __builtin_ia32_fixupimmpd512_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmpd512_maskz(A, B, C, 1, E, 8)
     239  #define __builtin_ia32_fixupimmps512_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmps512_mask(A, B, C, 1, E, 8)
     240  #define __builtin_ia32_fixupimmps512_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmps512_maskz(A, B, C, 1, E, 8)
     241  #define __builtin_ia32_fixupimmsd_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmsd_mask(A, B, C, 1, E, 8)
     242  #define __builtin_ia32_fixupimmsd_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmsd_maskz(A, B, C, 1, E, 8)
     243  #define __builtin_ia32_fixupimmss_mask(A, B, C, I, E, F) __builtin_ia32_fixupimmss_mask(A, B, C, 1, E, 8)
     244  #define __builtin_ia32_fixupimmss_maskz(A, B, C, I, E, F) __builtin_ia32_fixupimmss_maskz(A, B, C, 1, E, 8)
     245  #define __builtin_ia32_gatherdiv8df(A, B, C, D, F) __builtin_ia32_gatherdiv8df(A, B, C, D, 8)
     246  #define __builtin_ia32_gatherdiv8di(A, B, C, D, F) __builtin_ia32_gatherdiv8di(A, B, C, D, 8)
     247  #define __builtin_ia32_gatherdiv16sf(A, B, C, D, F) __builtin_ia32_gatherdiv16sf(A, B, C, D, 8)
     248  #define __builtin_ia32_gatherdiv16si(A, B, C, D, F) __builtin_ia32_gatherdiv16si(A, B, C, D, 8)
     249  #define __builtin_ia32_gathersiv16sf(A, B, C, D, F) __builtin_ia32_gathersiv16sf(A, B, C, D, 8)
     250  #define __builtin_ia32_gathersiv16si(A, B, C, D, F) __builtin_ia32_gathersiv16si(A, B, C, D, 8)
     251  #define __builtin_ia32_gathersiv8df(A, B, C, D, F) __builtin_ia32_gathersiv8df(A, B, C, D, 8)
     252  #define __builtin_ia32_gathersiv8di(A, B, C, D, F) __builtin_ia32_gathersiv8di(A, B, C, D, 8)
     253  #define __builtin_ia32_getexppd512_mask(A, B, C, D) __builtin_ia32_getexppd512_mask(A, B, C, 8)
     254  #define __builtin_ia32_getexpps512_mask(A, B, C, D) __builtin_ia32_getexpps512_mask(A, B, C, 8)
     255  #define __builtin_ia32_getexpsd128_round(A, B, C) __builtin_ia32_getexpsd128_round(A, B, 4)
     256  #define __builtin_ia32_getexpsd_mask_round(A, B, C, D, E) __builtin_ia32_getexpsd_mask_round(A, B, C, D, 4)
     257  #define __builtin_ia32_getexpss128_round(A, B, C) __builtin_ia32_getexpss128_round(A, B, 4)
     258  #define __builtin_ia32_getexpss_mask_round(A, B, C, D, E) __builtin_ia32_getexpss_mask_round(A, B, C, D, 4)
     259  #define __builtin_ia32_getmantpd512_mask(A, F, C, D, E) __builtin_ia32_getmantpd512_mask(A, 1, C, D, 8)
     260  #define __builtin_ia32_getmantps512_mask(A, F, C, D, E) __builtin_ia32_getmantps512_mask(A, 1, C, D, 8)
     261  #define __builtin_ia32_getmantsd_round(A, B, C, D) __builtin_ia32_getmantsd_round(A, B, 1, 4)
     262  #define __builtin_ia32_getmantsd_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsd_mask_round(A, B, 1, W, U, 4)
     263  #define __builtin_ia32_getmantss_round(A, B, C, D) __builtin_ia32_getmantss_round(A, B, 1, 4)
     264  #define __builtin_ia32_getmantss_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantss_mask_round(A, B, 1, W, U, 4)
     265  #define __builtin_ia32_insertf32x4_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_mask(A, B, 1, D, E)
     266  #define __builtin_ia32_insertf64x4_mask(A, B, F, D, E) __builtin_ia32_insertf64x4_mask(A, B, 1, D, E)
     267  #define __builtin_ia32_inserti32x4_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_mask(A, B, 1, D, E)
     268  #define __builtin_ia32_inserti64x4_mask(A, B, F, D, E) __builtin_ia32_inserti64x4_mask(A, B, 1, D, E)
     269  #define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 8)
     270  #define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 8)
     271  #define __builtin_ia32_maxsd_round(A, B, C) __builtin_ia32_maxsd_round(A, B, 4)
     272  #define __builtin_ia32_maxsd_mask_round(A, B, C, D, E) __builtin_ia32_maxsd_mask_round(A, B, C, D, 4)
     273  #define __builtin_ia32_maxss_round(A, B, C) __builtin_ia32_maxss_round(A, B, 4)
     274  #define __builtin_ia32_maxss_mask_round(A, B, C, D, E) __builtin_ia32_maxss_mask_round(A, B, C, D, 4)
     275  #define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 8)
     276  #define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 8)
     277  #define __builtin_ia32_minsd_round(A, B, C) __builtin_ia32_minsd_round(A, B, 4)
     278  #define __builtin_ia32_minsd_mask_round(A, B, C, D, E) __builtin_ia32_minsd_mask_round(A, B, C, D, 4)
     279  #define __builtin_ia32_minss_round(A, B, C) __builtin_ia32_minss_round(A, B, 4)
     280  #define __builtin_ia32_minss_mask_round(A, B, C, D, E) __builtin_ia32_minss_mask_round(A, B, C, D, 4)
     281  #define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 8)
     282  #define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 8)
     283  #define __builtin_ia32_mulsd_round(A, B, C) __builtin_ia32_mulsd_round(A, B, 8)
     284  #define __builtin_ia32_mulsd_mask_round(A, B, C, D, E) __builtin_ia32_mulsd_mask_round(A, B, C, D, 8)
     285  #define __builtin_ia32_mulss_round(A, B, C) __builtin_ia32_mulss_round(A, B, 8)
     286  #define __builtin_ia32_mulss_mask_round(A, B, C, D, E) __builtin_ia32_mulss_mask_round(A, B, C, D, 8)
     287  #define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D)
     288  #define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D)
     289  #define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D)
     290  #define __builtin_ia32_prolq512_mask(A, E, C, D) __builtin_ia32_prolq512_mask(A, 1, C, D)
     291  #define __builtin_ia32_prord512_mask(A, E, C, D) __builtin_ia32_prord512_mask(A, 1, C, D)
     292  #define __builtin_ia32_prorq512_mask(A, E, C, D) __builtin_ia32_prorq512_mask(A, 1, C, D)
     293  #define __builtin_ia32_pshufd512_mask(A, E, C, D) __builtin_ia32_pshufd512_mask(A, 1, C, D)
     294  #define __builtin_ia32_pslldi512_mask(A, E, C, D) __builtin_ia32_pslldi512_mask(A, 1, C, D)
     295  #define __builtin_ia32_psllqi512_mask(A, E, C, D) __builtin_ia32_psllqi512_mask(A, 1, C, D)
     296  #define __builtin_ia32_psradi512_mask(A, E, C, D) __builtin_ia32_psradi512_mask(A, 1, C, D)
     297  #define __builtin_ia32_psraqi512_mask(A, E, C, D) __builtin_ia32_psraqi512_mask(A, 1, C, D)
     298  #define __builtin_ia32_psrldi512_mask(A, E, C, D) __builtin_ia32_psrldi512_mask(A, 1, C, D)
     299  #define __builtin_ia32_psrlqi512_mask(A, E, C, D) __builtin_ia32_psrlqi512_mask(A, 1, C, D)
     300  #define __builtin_ia32_pternlogd512_mask(A, B, C, F, E) __builtin_ia32_pternlogd512_mask(A, B, C, 1, E)
     301  #define __builtin_ia32_pternlogd512_maskz(A, B, C, F, E) __builtin_ia32_pternlogd512_maskz(A, B, C, 1, E)
     302  #define __builtin_ia32_pternlogq512_mask(A, B, C, F, E) __builtin_ia32_pternlogq512_mask(A, B, C, 1, E)
     303  #define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E)
     304  #define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 8)
     305  #define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 8)
     306  #define __builtin_ia32_rndscalesd_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesd_mask_round(A, B, 1, D, E, 4)
     307  #define __builtin_ia32_rndscaless_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscaless_mask_round(A, B, 1, D, E, 4)
     308  #define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 8)
     309  #define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 8)
     310  #define __builtin_ia32_scalefsd_mask_round(A, B, C, D, E) __builtin_ia32_scalefsd_mask_round(A, B, C, D, 8)
     311  #define __builtin_ia32_scalefss_mask_round(A, B, C, D, E) __builtin_ia32_scalefss_mask_round(A, B, C, D, 8)
     312  #define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 8)
     313  #define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 8)
     314  #define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 8)
     315  #define __builtin_ia32_scatterdiv16si(A, B, C, D, F) __builtin_ia32_scatterdiv16si(A, B, C, D, 8)
     316  #define __builtin_ia32_scattersiv16sf(A, B, C, D, F) __builtin_ia32_scattersiv16sf(A, B, C, D, 8)
     317  #define __builtin_ia32_scattersiv16si(A, B, C, D, F) __builtin_ia32_scattersiv16si(A, B, C, D, 8)
     318  #define __builtin_ia32_scattersiv8df(A, B, C, D, F) __builtin_ia32_scattersiv8df(A, B, C, D, 8)
     319  #define __builtin_ia32_scattersiv8di(A, B, C, D, F) __builtin_ia32_scattersiv8di(A, B, C, D, 8)
     320  #define __builtin_ia32_shuf_f32x4_mask(A, B, F, D, E) __builtin_ia32_shuf_f32x4_mask(A, B, 1, D, E)
     321  #define __builtin_ia32_shuf_f64x2_mask(A, B, F, D, E) __builtin_ia32_shuf_f64x2_mask(A, B, 1, D, E)
     322  #define __builtin_ia32_shuf_i32x4_mask(A, B, F, D, E) __builtin_ia32_shuf_i32x4_mask(A, B, 1, D, E)
     323  #define __builtin_ia32_shuf_i64x2_mask(A, B, F, D, E) __builtin_ia32_shuf_i64x2_mask(A, B, 1, D, E)
     324  #define __builtin_ia32_shufpd512_mask(A, B, F, D, E) __builtin_ia32_shufpd512_mask(A, B, 1, D, E)
     325  #define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E)
     326  #define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 8)
     327  #define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 8)
     328  #define __builtin_ia32_sqrtss_mask_round(A, B, C, D, E) __builtin_ia32_sqrtss_mask_round(A, B, C, D, 8)
     329  #define __builtin_ia32_sqrtsd_mask_round(A, B, C, D, E) __builtin_ia32_sqrtsd_mask_round(A, B, C, D, 8)
     330  #define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 8)
     331  #define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 8)
     332  #define __builtin_ia32_subsd_round(A, B, C) __builtin_ia32_subsd_round(A, B, 8)
     333  #define __builtin_ia32_subsd_mask_round(A, B, C, D, E) __builtin_ia32_subsd_mask_round(A, B, C, D, 8)
     334  #define __builtin_ia32_subss_round(A, B, C) __builtin_ia32_subss_round(A, B, 8)
     335  #define __builtin_ia32_subss_mask_round(A, B, C, D, E) __builtin_ia32_subss_mask_round(A, B, C, D, 8)
     336  #define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D)
     337  #define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D)
     338  #define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 8)
     339  #define __builtin_ia32_vcomiss(A, B, C, D) __builtin_ia32_vcomiss(A, B, 1, 8)
     340  #define __builtin_ia32_vcvtph2ps512_mask(A, B, C, D) __builtin_ia32_vcvtph2ps512_mask(A, B, C, 8)
     341  #define __builtin_ia32_vcvtps2ph512_mask(A, E, C, D) __builtin_ia32_vcvtps2ph512_mask(A, 1, C, D)
     342  #define __builtin_ia32_vcvtsd2si32(A, B) __builtin_ia32_vcvtsd2si32(A, 8)
     343  #define __builtin_ia32_vcvtsd2si64(A, B) __builtin_ia32_vcvtsd2si64(A, 8)
     344  #define __builtin_ia32_vcvtsd2usi32(A, B) __builtin_ia32_vcvtsd2usi32(A, 8)
     345  #define __builtin_ia32_vcvtsd2usi64(A, B) __builtin_ia32_vcvtsd2usi64(A, 8)
     346  #define __builtin_ia32_vcvtss2si32(A, B) __builtin_ia32_vcvtss2si32(A, 8)
     347  #define __builtin_ia32_vcvtss2si64(A, B) __builtin_ia32_vcvtss2si64(A, 8)
     348  #define __builtin_ia32_vcvtss2usi32(A, B) __builtin_ia32_vcvtss2usi32(A, 8)
     349  #define __builtin_ia32_vcvtss2usi64(A, B) __builtin_ia32_vcvtss2usi64(A, 8)
     350  #define __builtin_ia32_vcvttsd2si32(A, B) __builtin_ia32_vcvttsd2si32(A, 8)
     351  #define __builtin_ia32_vcvttsd2si64(A, B) __builtin_ia32_vcvttsd2si64(A, 8)
     352  #define __builtin_ia32_vcvttsd2usi32(A, B) __builtin_ia32_vcvttsd2usi32(A, 8)
     353  #define __builtin_ia32_vcvttsd2usi64(A, B) __builtin_ia32_vcvttsd2usi64(A, 8)
     354  #define __builtin_ia32_vcvttss2si32(A, B) __builtin_ia32_vcvttss2si32(A, 8)
     355  #define __builtin_ia32_vcvttss2si64(A, B) __builtin_ia32_vcvttss2si64(A, 8)
     356  #define __builtin_ia32_vcvttss2usi32(A, B) __builtin_ia32_vcvttss2usi32(A, 8)
     357  #define __builtin_ia32_vcvttss2usi64(A, B) __builtin_ia32_vcvttss2usi64(A, 8)
     358  #define __builtin_ia32_vfmaddpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddpd512_mask(A, B, C, D, 8)
     359  #define __builtin_ia32_vfmaddpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddpd512_mask3(A, B, C, D, 8)
     360  #define __builtin_ia32_vfmaddpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddpd512_maskz(A, B, C, D, 8)
     361  #define __builtin_ia32_vfmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask(A, B, C, D, 8)
     362  #define __builtin_ia32_vfmaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask3(A, B, C, D, 8)
     363  #define __builtin_ia32_vfmaddps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddps512_maskz(A, B, C, D, 8)
     364  #define __builtin_ia32_vfmaddsd3_round(A, B, C, D) __builtin_ia32_vfmaddsd3_round(A, B, C, 8)
     365  #define __builtin_ia32_vfmaddss3_round(A, B, C, D) __builtin_ia32_vfmaddss3_round(A, B, C, 8)
     366  #define __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, 8)
     367  #define __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, 8)
     368  #define __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, 8)
     369  #define __builtin_ia32_vfmaddsubps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_mask(A, B, C, D, 8)
     370  #define __builtin_ia32_vfmaddsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_mask3(A, B, C, D, 8)
     371  #define __builtin_ia32_vfmaddsubps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubps512_maskz(A, B, C, D, 8)
     372  #define __builtin_ia32_vfmsubaddpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddpd512_mask3(A, B, C, D, 8)
     373  #define __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, 8)
     374  #define __builtin_ia32_vfmsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfmsubpd512_mask(A, B, C, D, 8)
     375  #define __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, 8)
     376  #define __builtin_ia32_vfmsubpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubpd512_maskz(A, B, C, D, 8)
     377  #define __builtin_ia32_vfmsubps512_mask(A, B, C, D, E) __builtin_ia32_vfmsubps512_mask(A, B, C, D, 8)
     378  #define __builtin_ia32_vfmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubps512_mask3(A, B, C, D, 8)
     379  #define __builtin_ia32_vfmsubps512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubps512_maskz(A, B, C, D, 8)
     380  #define __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, 8)
     381  #define __builtin_ia32_vfnmaddpd512_mask3(A, B, C, D, E) __builtin_ia32_vfnmaddpd512_mask3(A, B, C, D, 8)
     382  #define __builtin_ia32_vfnmaddpd512_maskz(A, B, C, D, E) __builtin_ia32_vfnmaddpd512_maskz(A, B, C, D, 8)
     383  #define __builtin_ia32_vfnmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddps512_mask(A, B, C, D, 8)
     384  #define __builtin_ia32_vfnmaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfnmaddps512_mask3(A, B, C, D, 8)
     385  #define __builtin_ia32_vfnmaddps512_maskz(A, B, C, D, E) __builtin_ia32_vfnmaddps512_maskz(A, B, C, D, 8)
     386  #define __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, 8)
     387  #define __builtin_ia32_vfnmsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_mask3(A, B, C, D, 8)
     388  #define __builtin_ia32_vfnmsubpd512_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_maskz(A, B, C, D, 8)
     389  #define __builtin_ia32_vfnmsubps512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubps512_mask(A, B, C, D, 8)
     390  #define __builtin_ia32_vfnmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubps512_mask3(A, B, C, D, 8)
     391  #define __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, 8)
     392  #define __builtin_ia32_vpermilpd512_mask(A, E, C, D) __builtin_ia32_vpermilpd512_mask(A, 1, C, D)
     393  #define __builtin_ia32_vpermilps512_mask(A, E, C, D) __builtin_ia32_vpermilps512_mask(A, 1, C, D)
     394  #define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 8)
     395  #define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 8)
     396  #define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 8)
     397  #define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 8)
     398  #define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 8)
     399  #define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 8)
     400  #define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 8)
     401  #define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 8)
     402  #define __builtin_ia32_cvtsd2ss_mask_round(A, B, C, D, E) __builtin_ia32_cvtsd2ss_mask_round(A, B, C, D, 8)
     403  #define __builtin_ia32_cvtss2sd_mask_round(A, B, C, D, E) __builtin_ia32_cvtss2sd_mask_round(A, B, C, D, 8)
     404  
     405  /* avx512pfintrin.h */
     406  #define __builtin_ia32_gatherpfdps(A, B, C, D, E) __builtin_ia32_gatherpfdps(A, B, C, 1, _MM_HINT_T0)
     407  #define __builtin_ia32_gatherpfqps(A, B, C, D, E) __builtin_ia32_gatherpfqps(A, B, C, 1, _MM_HINT_T0)
     408  #define __builtin_ia32_scatterpfdps(A, B, C, D, E) __builtin_ia32_scatterpfdps(A, B, C, 1, _MM_HINT_T0)
     409  #define __builtin_ia32_scatterpfqps(A, B, C, D, E) __builtin_ia32_scatterpfqps(A, B, C, 1, _MM_HINT_T0)
     410  #define __builtin_ia32_gatherpfdpd(A, B, C, D, E) __builtin_ia32_gatherpfdpd(A, B, C, 1, _MM_HINT_T0)
     411  #define __builtin_ia32_gatherpfqpd(A, B, C, D, E) __builtin_ia32_gatherpfqpd(A, B, C, 1, _MM_HINT_T0)
     412  #define __builtin_ia32_scatterpfdpd(A, B, C, D, E) __builtin_ia32_scatterpfdpd(A, B, C, 1, _MM_HINT_T0)
     413  #define __builtin_ia32_scatterpfqpd(A, B, C, D, E) __builtin_ia32_scatterpfqpd(A, B, C, 1, _MM_HINT_T0)
     414  
     415  /* avx512erintrin.h */
     416  #define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask (A, B, C, 8)
     417  #define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask (A, B, C, 8)
     418  #define __builtin_ia32_rcp28pd_mask(A, B, C, D) __builtin_ia32_rcp28pd_mask (A, B, C, 8)
     419  #define __builtin_ia32_rcp28ps_mask(A, B, C, D) __builtin_ia32_rcp28ps_mask (A, B, C, 8)
     420  #define __builtin_ia32_rsqrt28pd_mask(A, B, C, D) __builtin_ia32_rsqrt28pd_mask (A, B, C, 8)
     421  #define __builtin_ia32_rsqrt28ps_mask(A, B, C, D) __builtin_ia32_rsqrt28ps_mask (A, B, C, 8)
     422  #define __builtin_ia32_rcp28sd_round(A, B, C) __builtin_ia32_rcp28sd_round(A, B, 8)
     423  #define __builtin_ia32_rcp28ss_round(A, B, C) __builtin_ia32_rcp28ss_round(A, B, 8)
     424  #define __builtin_ia32_rsqrt28sd_round(A, B, C) __builtin_ia32_rsqrt28sd_round(A, B, 8)
     425  #define __builtin_ia32_rsqrt28ss_round(A, B, C) __builtin_ia32_rsqrt28ss_round(A, B, 8)
     426  #define __builtin_ia32_rcp28sd_mask_round(A, B, C, D, E) __builtin_ia32_rcp28sd_mask_round(A, B, C, D, 8)
     427  #define __builtin_ia32_rcp28ss_mask_round(A, B, C, D, E) __builtin_ia32_rcp28ss_mask_round(A, B, C, D, 8)
     428  #define __builtin_ia32_rsqrt28sd_mask_round(A, B, C, D, E) __builtin_ia32_rsqrt28sd_mask_round(A, B, C, D, 8)
     429  #define __builtin_ia32_rsqrt28ss_mask_round(A, B, C, D, E) __builtin_ia32_rsqrt28ss_mask_round(A, B, C, D, 8)
     430  
     431  /* shaintrin.h */
     432  #define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
     433  
     434  /* avx512bwintrin.h */
     435  #define __builtin_ia32_kshiftlisi(A, B) __builtin_ia32_kshiftlisi(A, 8)
     436  #define __builtin_ia32_kshiftlidi(A, B) __builtin_ia32_kshiftlidi(A, 8)
     437  #define __builtin_ia32_kshiftrisi(A, B) __builtin_ia32_kshiftrisi(A, 8)
     438  #define __builtin_ia32_kshiftridi(A, B) __builtin_ia32_kshiftridi(A, 8)
     439  #define __builtin_ia32_ucmpw512_mask(A, B, E, D) __builtin_ia32_ucmpw512_mask(A, B, 1, D)
     440  #define __builtin_ia32_ucmpb512_mask(A, B, E, D) __builtin_ia32_ucmpb512_mask(A, B, 1, D)
     441  #define __builtin_ia32_psrlwi512_mask(A, E, C, D) __builtin_ia32_psrlwi512_mask(A, 1, C, D)
     442  #define __builtin_ia32_psrawi512_mask(A, E, C, D) __builtin_ia32_psrawi512_mask(A, 1, C, D)
     443  #define __builtin_ia32_psllwi512_mask(A, E, C, D) __builtin_ia32_psllwi512_mask(A, 1, C, D)
     444  #define __builtin_ia32_pshuflw512_mask(A, E, C, D) __builtin_ia32_pshuflw512_mask(A, 1, C, D)
     445  #define __builtin_ia32_pshufhw512_mask(A, E, C, D) __builtin_ia32_pshufhw512_mask(A, 1, C, D)
     446  #define __builtin_ia32_palignr512_mask(A, B, F, D, E) __builtin_ia32_palignr512_mask(A, B, 8, D, E)
     447  #define __builtin_ia32_palignr512(A, B, D) __builtin_ia32_palignr512(A, B, 8)
     448  #define __builtin_ia32_dbpsadbw512_mask(A, B, F, D, E) __builtin_ia32_dbpsadbw512_mask(A, B, 1, D, E)
     449  #define __builtin_ia32_cmpw512_mask(A, B, E, D) __builtin_ia32_cmpw512_mask(A, B, 1, D)
     450  #define __builtin_ia32_cmpb512_mask(A, B, E, D) __builtin_ia32_cmpb512_mask(A, B, 1, D)
     451  #define __builtin_ia32_psrldq512(A, B) __builtin_ia32_psrldq512(A, 8)
     452  #define __builtin_ia32_pslldq512(A, B) __builtin_ia32_pslldq512(A, 8)
     453  
     454  /* avx512dqintrin.h */
     455  #define __builtin_ia32_kshiftliqi(A, B) __builtin_ia32_kshiftliqi(A, 8)
     456  #define __builtin_ia32_kshiftriqi(A, B) __builtin_ia32_kshiftriqi(A, 8)
     457  #define __builtin_ia32_reducess_mask(A, B, F, W, U) __builtin_ia32_reducess_mask(A, B, 1, W, U)
     458  #define __builtin_ia32_reducesd_mask(A, B, F, W, U) __builtin_ia32_reducesd_mask(A, B, 1, W, U)
     459  #define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D)
     460  #define __builtin_ia32_reducepd512_mask(A, E, C, D) __builtin_ia32_reducepd512_mask(A, 1, C, D)
     461  #define __builtin_ia32_rangess128_mask_round(A, B, I, D, E, F) \
     462      __builtin_ia32_rangess128_mask_round(A, B, 1, D, E, 8)
     463  #define __builtin_ia32_rangesd128_mask_round(A, B, I, D, E, F) \
     464      __builtin_ia32_rangesd128_mask_round(A, B, 1, D, E, 8)
     465  #define __builtin_ia32_rangeps512_mask(A, B, I, D, E, F) __builtin_ia32_rangeps512_mask(A, B, 1, D, E, 8)
     466  #define __builtin_ia32_rangepd512_mask(A, B, I, D, E, F) __builtin_ia32_rangepd512_mask(A, B, 1, D, E, 8)
     467  #define __builtin_ia32_inserti64x2_512_mask(A, B, F, D, E) __builtin_ia32_inserti64x2_512_mask(A, B, 1, D, E)
     468  #define __builtin_ia32_inserti32x8_mask(A, B, F, D, E) __builtin_ia32_inserti32x8_mask(A, B, 1, D, E)
     469  #define __builtin_ia32_insertf64x2_512_mask(A, B, F, D, E) __builtin_ia32_insertf64x2_512_mask(A, B, 1, D, E)
     470  #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E) __builtin_ia32_insertf32x8_mask(A, B, 1, D, E)
     471  #define __builtin_ia32_fpclassss_mask(A, D, U) __builtin_ia32_fpclassss_mask(A, 1, U)
     472  #define __builtin_ia32_fpclasssd_mask(A, D, U) __builtin_ia32_fpclasssd_mask(A, 1, U)
     473  #define __builtin_ia32_fpclassps512_mask(A, D, C) __builtin_ia32_fpclassps512_mask(A, 1, C)
     474  #define __builtin_ia32_fpclasspd512_mask(A, D, C) __builtin_ia32_fpclasspd512_mask(A, 1, C)
     475  #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D) __builtin_ia32_extracti64x2_512_mask(A, 1, C, D)
     476  #define __builtin_ia32_extracti32x8_mask(A, E, C, D) __builtin_ia32_extracti32x8_mask(A, 1, C, D)
     477  #define __builtin_ia32_extractf64x2_512_mask(A, E, C, D) __builtin_ia32_extractf64x2_512_mask(A, 1, C, D)
     478  #define __builtin_ia32_extractf32x8_mask(A, E, C, D) __builtin_ia32_extractf32x8_mask(A, 1, C, D)
     479  #define __builtin_ia32_cvtuqq2ps512_mask(A, B, C, D) __builtin_ia32_cvtuqq2ps512_mask(A, B, C, 8)
     480  #define __builtin_ia32_cvtuqq2pd512_mask(A, B, C, D) __builtin_ia32_cvtuqq2pd512_mask(A, B, C, 8)
     481  #define __builtin_ia32_cvttps2uqq512_mask(A, B, C, D) __builtin_ia32_cvttps2uqq512_mask(A, B, C, 8)
     482  #define __builtin_ia32_cvttps2qq512_mask(A, B, C, D) __builtin_ia32_cvttps2qq512_mask(A, B, C, 8)
     483  #define __builtin_ia32_cvttpd2uqq512_mask(A, B, C, D) __builtin_ia32_cvttpd2uqq512_mask(A, B, C, 8)
     484  #define __builtin_ia32_cvttpd2qq512_mask(A, B, C, D) __builtin_ia32_cvttpd2qq512_mask(A, B, C, 8)
     485  #define __builtin_ia32_cvtqq2ps512_mask(A, B, C, D) __builtin_ia32_cvtqq2ps512_mask(A, B, C, 8)
     486  #define __builtin_ia32_cvtqq2pd512_mask(A, B, C, D) __builtin_ia32_cvtqq2pd512_mask(A, B, C, 8)
     487  #define __builtin_ia32_cvtps2uqq512_mask(A, B, C, D) __builtin_ia32_cvtps2uqq512_mask(A, B, C, 8)
     488  #define __builtin_ia32_cvtps2qq512_mask(A, B, C, D) __builtin_ia32_cvtps2qq512_mask(A, B, C, 8)
     489  #define __builtin_ia32_cvtpd2uqq512_mask(A, B, C, D) __builtin_ia32_cvtpd2uqq512_mask(A, B, C, 8)
     490  #define __builtin_ia32_cvtpd2qq512_mask(A, B, C, D) __builtin_ia32_cvtpd2qq512_mask(A, B, C, 8)
     491  #define __builtin_ia32_reducesd_mask_round(A, B, C, D, E, F) __builtin_ia32_reducesd_mask_round(A, B, 8, D, E, 8)
     492  #define __builtin_ia32_reducess_mask_round(A, B, C, D, E, F) __builtin_ia32_reducess_mask_round(A, B, 8, D, E, 8)
     493  #define __builtin_ia32_reducepd512_mask_round(A, B, C, D, E) __builtin_ia32_reducepd512_mask_round(A, 8, C, D, 8)
     494  #define __builtin_ia32_reduceps512_mask_round(A, B, C, D, E) __builtin_ia32_reduceps512_mask_round(A, 8, C, D, 8)
     495  
     496  /* avx512vlintrin.h */
     497  #define __builtin_ia32_vpermilps_mask(A, E, C, D) __builtin_ia32_vpermilps_mask(A, 1, C, D)
     498  #define __builtin_ia32_vpermilps256_mask(A, E, C, D) __builtin_ia32_vpermilps256_mask(A, 1, C, D)
     499  #define __builtin_ia32_vpermilpd_mask(A, E, C, D) __builtin_ia32_vpermilpd_mask(A, 1, C, D)
     500  #define __builtin_ia32_vpermilpd256_mask(A, E, C, D) __builtin_ia32_vpermilpd256_mask(A, 1, C, D)
     501  #define __builtin_ia32_vcvtps2ph_mask(A, E, C, D) __builtin_ia32_vcvtps2ph_mask(A, 1, C, D)
     502  #define __builtin_ia32_vcvtps2ph256_mask(A, E, C, D) __builtin_ia32_vcvtps2ph256_mask(A, 1, C, D)
     503  #define __builtin_ia32_ucmpq256_mask(A, B, E, D) __builtin_ia32_ucmpq256_mask(A, B, 1, D)
     504  #define __builtin_ia32_ucmpq128_mask(A, B, E, D) __builtin_ia32_ucmpq128_mask(A, B, 1, D)
     505  #define __builtin_ia32_ucmpd256_mask(A, B, E, D) __builtin_ia32_ucmpd256_mask(A, B, 1, D)
     506  #define __builtin_ia32_ucmpd128_mask(A, B, E, D) __builtin_ia32_ucmpd128_mask(A, B, 1, D)
     507  #define __builtin_ia32_shufps256_mask(A, B, F, D, E) __builtin_ia32_shufps256_mask(A, B, 1, D, E)
     508  #define __builtin_ia32_shufps128_mask(A, B, F, D, E) __builtin_ia32_shufps128_mask(A, B, 1, D, E)
     509  #define __builtin_ia32_shufpd256_mask(A, B, F, D, E) __builtin_ia32_shufpd256_mask(A, B, 1, D, E)
     510  #define __builtin_ia32_shufpd128_mask(A, B, F, D, E) __builtin_ia32_shufpd128_mask(A, B, 1, D, E)
     511  #define __builtin_ia32_shuf_i64x2_256_mask(A, B, F, D, E) __builtin_ia32_shuf_i64x2_256_mask(A, B, 1, D, E)
     512  #define __builtin_ia32_shuf_i32x4_256_mask(A, B, F, D, E) __builtin_ia32_shuf_i32x4_256_mask(A, B, 1, D, E)
     513  #define __builtin_ia32_shuf_f64x2_256_mask(A, B, F, D, E) __builtin_ia32_shuf_f64x2_256_mask(A, B, 1, D, E)
     514  #define __builtin_ia32_shuf_f32x4_256_mask(A, B, F, D, E) __builtin_ia32_shuf_f32x4_256_mask(A, B, 1, D, E)
     515  #define __builtin_ia32_scattersiv8si(A, B, C, D, F) __builtin_ia32_scattersiv8si(A, B, C, D, 1)
     516  #define __builtin_ia32_scattersiv8sf(A, B, C, D, F) __builtin_ia32_scattersiv8sf(A, B, C, D, 1)
     517  #define __builtin_ia32_scattersiv4si(A, B, C, D, F) __builtin_ia32_scattersiv4si(A, B, C, D, 1)
     518  #define __builtin_ia32_scattersiv4sf(A, B, C, D, F) __builtin_ia32_scattersiv4sf(A, B, C, D, 1)
     519  #define __builtin_ia32_scattersiv4di(A, B, C, D, F) __builtin_ia32_scattersiv4di(A, B, C, D, 1)
     520  #define __builtin_ia32_scattersiv4df(A, B, C, D, F) __builtin_ia32_scattersiv4df(A, B, C, D, 1)
     521  #define __builtin_ia32_scattersiv2di(A, B, C, D, F) __builtin_ia32_scattersiv2di(A, B, C, D, 1)
     522  #define __builtin_ia32_scattersiv2df(A, B, C, D, F) __builtin_ia32_scattersiv2df(A, B, C, D, 1)
     523  #define __builtin_ia32_scatterdiv8si(A, B, C, D, F) __builtin_ia32_scatterdiv8si(A, B, C, D, 1)
     524  #define __builtin_ia32_scatterdiv8sf(A, B, C, D, F) __builtin_ia32_scatterdiv8sf(A, B, C, D, 1)
     525  #define __builtin_ia32_scatterdiv4si(A, B, C, D, F) __builtin_ia32_scatterdiv4si(A, B, C, D, 1)
     526  #define __builtin_ia32_scatterdiv4sf(A, B, C, D, F) __builtin_ia32_scatterdiv4sf(A, B, C, D, 1)
     527  #define __builtin_ia32_scatterdiv4di(A, B, C, D, F) __builtin_ia32_scatterdiv4di(A, B, C, D, 1)
     528  #define __builtin_ia32_scatterdiv4df(A, B, C, D, F) __builtin_ia32_scatterdiv4df(A, B, C, D, 1)
     529  #define __builtin_ia32_scatterdiv2di(A, B, C, D, F) __builtin_ia32_scatterdiv2di(A, B, C, D, 1)
     530  #define __builtin_ia32_scatterdiv2df(A, B, C, D, F) __builtin_ia32_scatterdiv2df(A, B, C, D, 1)
     531  #define __builtin_ia32_rndscaleps_256_mask(A, E, C, D) __builtin_ia32_rndscaleps_256_mask(A, 1, C, D)
     532  #define __builtin_ia32_rndscaleps_128_mask(A, E, C, D) __builtin_ia32_rndscaleps_128_mask(A, 1, C, D)
     533  #define __builtin_ia32_rndscalepd_256_mask(A, E, C, D) __builtin_ia32_rndscalepd_256_mask(A, 1, C, D)
     534  #define __builtin_ia32_rndscalepd_128_mask(A, E, C, D) __builtin_ia32_rndscalepd_128_mask(A, 1, C, D)
     535  #define __builtin_ia32_pternlogq256_maskz(A, B, C, F, E) __builtin_ia32_pternlogq256_maskz(A, B, C, 1, E)
     536  #define __builtin_ia32_pternlogq256_mask(A, B, C, F, E) __builtin_ia32_pternlogq256_mask(A, B, C, 1, E)
     537  #define __builtin_ia32_pternlogq128_maskz(A, B, C, F, E) __builtin_ia32_pternlogq128_maskz(A, B, C, 1, E)
     538  #define __builtin_ia32_pternlogq128_mask(A, B, C, F, E) __builtin_ia32_pternlogq128_mask(A, B, C, 1, E)
     539  #define __builtin_ia32_pternlogd256_maskz(A, B, C, F, E) __builtin_ia32_pternlogd256_maskz(A, B, C, 1, E)
     540  #define __builtin_ia32_pternlogd256_mask(A, B, C, F, E) __builtin_ia32_pternlogd256_mask(A, B, C, 1, E)
     541  #define __builtin_ia32_pternlogd128_maskz(A, B, C, F, E) __builtin_ia32_pternlogd128_maskz(A, B, C, 1, E)
     542  #define __builtin_ia32_pternlogd128_mask(A, B, C, F, E) __builtin_ia32_pternlogd128_mask(A, B, C, 1, E)
     543  #define __builtin_ia32_psrlqi256_mask(A, E, C, D) __builtin_ia32_psrlqi256_mask(A, 1, C, D)
     544  #define __builtin_ia32_psrlqi128_mask(A, E, C, D) __builtin_ia32_psrlqi128_mask(A, 1, C, D)
     545  #define __builtin_ia32_psrldi256_mask(A, E, C, D) __builtin_ia32_psrldi256_mask(A, 1, C, D)
     546  #define __builtin_ia32_psrldi128_mask(A, E, C, D) __builtin_ia32_psrldi128_mask(A, 1, C, D)
     547  #define __builtin_ia32_psraqi256_mask(A, E, C, D) __builtin_ia32_psraqi256_mask(A, 1, C, D)
     548  #define __builtin_ia32_psraqi128_mask(A, E, C, D) __builtin_ia32_psraqi128_mask(A, 1, C, D)
     549  #define __builtin_ia32_psradi256_mask(A, E, C, D) __builtin_ia32_psradi256_mask(A, 1, C, D)
     550  #define __builtin_ia32_psradi128_mask(A, E, C, D) __builtin_ia32_psradi128_mask(A, 1, C, D)
     551  #define __builtin_ia32_psllqi256_mask(A, E, C, D) __builtin_ia32_psllqi256_mask(A, 1, C, D)
     552  #define __builtin_ia32_psllqi128_mask(A, E, C, D) __builtin_ia32_psllqi128_mask(A, 1, C, D)
     553  #define __builtin_ia32_pslldi256_mask(A, E, C, D) __builtin_ia32_pslldi256_mask(A, 1, C, D)
     554  #define __builtin_ia32_pslldi128_mask(A, E, C, D) __builtin_ia32_pslldi128_mask(A, 1, C, D)
     555  #define __builtin_ia32_pshufd256_mask(A, E, C, D) __builtin_ia32_pshufd256_mask(A, 1, C, D)
     556  #define __builtin_ia32_pshufd128_mask(A, E, C, D) __builtin_ia32_pshufd128_mask(A, 1, C, D)
     557  #define __builtin_ia32_prorq256_mask(A, E, C, D) __builtin_ia32_prorq256_mask(A, 1, C, D)
     558  #define __builtin_ia32_prorq128_mask(A, E, C, D) __builtin_ia32_prorq128_mask(A, 1, C, D)
     559  #define __builtin_ia32_prord256_mask(A, E, C, D) __builtin_ia32_prord256_mask(A, 1, C, D)
     560  #define __builtin_ia32_prord128_mask(A, E, C, D) __builtin_ia32_prord128_mask(A, 1, C, D)
     561  #define __builtin_ia32_prolq256_mask(A, E, C, D) __builtin_ia32_prolq256_mask(A, 1, C, D)
     562  #define __builtin_ia32_prolq128_mask(A, E, C, D) __builtin_ia32_prolq128_mask(A, 1, C, D)
     563  #define __builtin_ia32_prold256_mask(A, E, C, D) __builtin_ia32_prold256_mask(A, 1, C, D)
     564  #define __builtin_ia32_prold128_mask(A, E, C, D) __builtin_ia32_prold128_mask(A, 1, C, D)
     565  #define __builtin_ia32_permdi256_mask(A, E, C, D) __builtin_ia32_permdi256_mask(A, 1, C, D)
     566  #define __builtin_ia32_permdf256_mask(A, E, C, D) __builtin_ia32_permdf256_mask(A, 1, C, D)
     567  #define __builtin_ia32_inserti32x4_256_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_256_mask(A, B, 1, D, E)
     568  #define __builtin_ia32_insertf32x4_256_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_256_mask(A, B, 1, D, E)
     569  #define __builtin_ia32_getmantps256_mask(A, E, C, D) __builtin_ia32_getmantps256_mask(A, 1, C, D)
     570  #define __builtin_ia32_getmantps128_mask(A, E, C, D) __builtin_ia32_getmantps128_mask(A, 1, C, D)
     571  #define __builtin_ia32_getmantpd256_mask(A, E, C, D) __builtin_ia32_getmantpd256_mask(A, 1, C, D)
     572  #define __builtin_ia32_getmantpd128_mask(A, E, C, D) __builtin_ia32_getmantpd128_mask(A, 1, C, D)
     573  #define __builtin_ia32_gather3siv8si(A, B, C, D, F) __builtin_ia32_gather3siv8si(A, B, C, D, 1)
     574  #define __builtin_ia32_gather3siv8sf(A, B, C, D, F) __builtin_ia32_gather3siv8sf(A, B, C, D, 1)
     575  #define __builtin_ia32_gather3siv4si(A, B, C, D, F) __builtin_ia32_gather3siv4si(A, B, C, D, 1)
     576  #define __builtin_ia32_gather3siv4sf(A, B, C, D, F) __builtin_ia32_gather3siv4sf(A, B, C, D, 1)
     577  #define __builtin_ia32_gather3siv4di(A, B, C, D, F) __builtin_ia32_gather3siv4di(A, B, C, D, 1)
     578  #define __builtin_ia32_gather3siv4df(A, B, C, D, F) __builtin_ia32_gather3siv4df(A, B, C, D, 1)
     579  #define __builtin_ia32_gather3siv2di(A, B, C, D, F) __builtin_ia32_gather3siv2di(A, B, C, D, 1)
     580  #define __builtin_ia32_gather3siv2df(A, B, C, D, F) __builtin_ia32_gather3siv2df(A, B, C, D, 1)
     581  #define __builtin_ia32_gather3div8si(A, B, C, D, F) __builtin_ia32_gather3div8si(A, B, C, D, 1)
     582  #define __builtin_ia32_gather3div8sf(A, B, C, D, F) __builtin_ia32_gather3div8sf(A, B, C, D, 1)
     583  #define __builtin_ia32_gather3div4si(A, B, C, D, F) __builtin_ia32_gather3div4si(A, B, C, D, 1)
     584  #define __builtin_ia32_gather3div4sf(A, B, C, D, F) __builtin_ia32_gather3div4sf(A, B, C, D, 1)
     585  #define __builtin_ia32_gather3div4di(A, B, C, D, F) __builtin_ia32_gather3div4di(A, B, C, D, 1)
     586  #define __builtin_ia32_gather3div4df(A, B, C, D, F) __builtin_ia32_gather3div4df(A, B, C, D, 1)
     587  #define __builtin_ia32_gather3div2di(A, B, C, D, F) __builtin_ia32_gather3div2di(A, B, C, D, 1)
     588  #define __builtin_ia32_gather3div2df(A, B, C, D, F) __builtin_ia32_gather3div2df(A, B, C, D, 1)
     589  #define __builtin_ia32_fixupimmps256_maskz(A, B, C, F, E) __builtin_ia32_fixupimmps256_maskz(A, B, C, 1, E)
     590  #define __builtin_ia32_fixupimmps256_mask(A, B, C, F, E) __builtin_ia32_fixupimmps256_mask(A, B, C, 1, E)
     591  #define __builtin_ia32_fixupimmps128_maskz(A, B, C, F, E) __builtin_ia32_fixupimmps128_maskz(A, B, C, 1, E)
     592  #define __builtin_ia32_fixupimmps128_mask(A, B, C, F, E) __builtin_ia32_fixupimmps128_mask(A, B, C, 1, E)
     593  #define __builtin_ia32_fixupimmpd256_maskz(A, B, C, F, E) __builtin_ia32_fixupimmpd256_maskz(A, B, C, 1, E)
     594  #define __builtin_ia32_fixupimmpd256_mask(A, B, C, F, E) __builtin_ia32_fixupimmpd256_mask(A, B, C, 1, E)
     595  #define __builtin_ia32_fixupimmpd128_maskz(A, B, C, F, E) __builtin_ia32_fixupimmpd128_maskz(A, B, C, 1, E)
     596  #define __builtin_ia32_fixupimmpd128_mask(A, B, C, F, E) __builtin_ia32_fixupimmpd128_mask(A, B, C, 1, E)
     597  #define __builtin_ia32_extracti32x4_256_mask(A, E, C, D) __builtin_ia32_extracti32x4_256_mask(A, 1, C, D)
     598  #define __builtin_ia32_extractf32x4_256_mask(A, E, C, D) __builtin_ia32_extractf32x4_256_mask(A, 1, C, D)
     599  #define __builtin_ia32_cmpq256_mask(A, B, E, D) __builtin_ia32_cmpq256_mask(A, B, 1, D)
     600  #define __builtin_ia32_cmpq128_mask(A, B, E, D) __builtin_ia32_cmpq128_mask(A, B, 1, D)
     601  #define __builtin_ia32_cmpps256_mask(A, B, E, D) __builtin_ia32_cmpps256_mask(A, B, 1, D)
     602  #define __builtin_ia32_cmpps128_mask(A, B, E, D) __builtin_ia32_cmpps128_mask(A, B, 1, D)
     603  #define __builtin_ia32_cmppd256_mask(A, B, E, D) __builtin_ia32_cmppd256_mask(A, B, 1, D)
     604  #define __builtin_ia32_cmppd128_mask(A, B, E, D) __builtin_ia32_cmppd128_mask(A, B, 1, D)
     605  #define __builtin_ia32_cmpd256_mask(A, B, E, D) __builtin_ia32_cmpd256_mask(A, B, 1, D)
     606  #define __builtin_ia32_cmpd128_mask(A, B, E, D) __builtin_ia32_cmpd128_mask(A, B, 1, D)
     607  #define __builtin_ia32_alignq256_mask(A, B, F, D, E) __builtin_ia32_alignq256_mask(A, B, 1, D, E)
     608  #define __builtin_ia32_alignq128_mask(A, B, F, D, E) __builtin_ia32_alignq128_mask(A, B, 1, D, E)
     609  #define __builtin_ia32_alignd256_mask(A, B, F, D, E) __builtin_ia32_alignd256_mask(A, B, 1, D, E)
     610  #define __builtin_ia32_alignd128_mask(A, B, F, D, E) __builtin_ia32_alignd128_mask(A, B, 1, D, E)
     611  
     612  /* avx512vlbwintrin.h */
     613  #define __builtin_ia32_ucmpw256_mask(A, B, E, D) __builtin_ia32_ucmpw256_mask(A, B, 1, D)
     614  #define __builtin_ia32_ucmpw128_mask(A, B, E, D) __builtin_ia32_ucmpw128_mask(A, B, 1, D)
     615  #define __builtin_ia32_ucmpb256_mask(A, B, E, D) __builtin_ia32_ucmpb256_mask(A, B, 1, D)
     616  #define __builtin_ia32_ucmpb128_mask(A, B, E, D) __builtin_ia32_ucmpb128_mask(A, B, 1, D)
     617  #define __builtin_ia32_psrlwi256_mask(A, E, C, D) __builtin_ia32_psrlwi256_mask(A, 1, C, D)
     618  #define __builtin_ia32_psrlwi128_mask(A, E, C, D) __builtin_ia32_psrlwi128_mask(A, 1, C, D)
     619  #define __builtin_ia32_psrawi256_mask(A, E, C, D) __builtin_ia32_psrawi256_mask(A, 1, C, D)
     620  #define __builtin_ia32_psrawi128_mask(A, E, C, D) __builtin_ia32_psrawi128_mask(A, 1, C, D)
     621  #define __builtin_ia32_psllwi256_mask(A, E, C, D) __builtin_ia32_psllwi256_mask(A, 1, C, D)
     622  #define __builtin_ia32_psllwi128_mask(A, E, C, D) __builtin_ia32_psllwi128_mask(A, 1, C, D)
     623  #define __builtin_ia32_pshuflw256_mask(A, E, C, D) __builtin_ia32_pshuflw256_mask(A, 1, C, D)
     624  #define __builtin_ia32_pshuflw128_mask(A, E, C, D) __builtin_ia32_pshuflw128_mask(A, 1, C, D)
     625  #define __builtin_ia32_pshufhw256_mask(A, E, C, D) __builtin_ia32_pshufhw256_mask(A, 1, C, D)
     626  #define __builtin_ia32_pshufhw128_mask(A, E, C, D) __builtin_ia32_pshufhw128_mask(A, 1, C, D)
     627  #define __builtin_ia32_palignr256_mask(A, B, F, D, E) __builtin_ia32_palignr256_mask(A, B, 8, D, E)
     628  #define __builtin_ia32_palignr128_mask(A, B, F, D, E) __builtin_ia32_palignr128_mask(A, B, 8, D, E)
     629  #define __builtin_ia32_dbpsadbw256_mask(A, B, F, D, E) __builtin_ia32_dbpsadbw256_mask(A, B, 1, D, E)
     630  #define __builtin_ia32_dbpsadbw128_mask(A, B, F, D, E) __builtin_ia32_dbpsadbw128_mask(A, B, 1, D, E)
     631  #define __builtin_ia32_cmpw256_mask(A, B, E, D) __builtin_ia32_cmpw256_mask(A, B, 1, D)
     632  #define __builtin_ia32_cmpw128_mask(A, B, E, D) __builtin_ia32_cmpw128_mask(A, B, 1, D)
     633  #define __builtin_ia32_cmpb256_mask(A, B, E, D) __builtin_ia32_cmpb256_mask(A, B, 1, D)
     634  #define __builtin_ia32_cmpb128_mask(A, B, E, D) __builtin_ia32_cmpb128_mask(A, B, 1, D)
     635  
     636  /* avx512vldqintrin.h */
     637  #define __builtin_ia32_reduceps256_mask(A, E, C, D) __builtin_ia32_reduceps256_mask(A, 1, C, D)
     638  #define __builtin_ia32_reduceps128_mask(A, E, C, D) __builtin_ia32_reduceps128_mask(A, 1, C, D)
     639  #define __builtin_ia32_reducepd256_mask(A, E, C, D) __builtin_ia32_reducepd256_mask(A, 1, C, D)
     640  #define __builtin_ia32_reducepd128_mask(A, E, C, D) __builtin_ia32_reducepd128_mask(A, 1, C, D)
     641  #define __builtin_ia32_rangeps256_mask(A, B, F, D, E) __builtin_ia32_rangeps256_mask(A, B, 1, D, E)
     642  #define __builtin_ia32_rangeps128_mask(A, B, F, D, E) __builtin_ia32_rangeps128_mask(A, B, 1, D, E)
     643  #define __builtin_ia32_rangepd256_mask(A, B, F, D, E) __builtin_ia32_rangepd256_mask(A, B, 1, D, E)
     644  #define __builtin_ia32_rangepd128_mask(A, B, F, D, E) __builtin_ia32_rangepd128_mask(A, B, 1, D, E)
     645  #define __builtin_ia32_inserti64x2_256_mask(A, B, F, D, E) __builtin_ia32_inserti64x2_256_mask(A, B, 1, D, E)
     646  #define __builtin_ia32_insertf64x2_256_mask(A, B, F, D, E) __builtin_ia32_insertf64x2_256_mask(A, B, 1, D, E)
     647  #define __builtin_ia32_fpclassps256_mask(A, D, C) __builtin_ia32_fpclassps256_mask(A, 1, C)
     648  #define __builtin_ia32_fpclassps128_mask(A, D, C) __builtin_ia32_fpclassps128_mask(A, 1, C)
     649  #define __builtin_ia32_fpclasspd256_mask(A, D, C) __builtin_ia32_fpclasspd256_mask(A, 1, C)
     650  #define __builtin_ia32_fpclasspd128_mask(A, D, C) __builtin_ia32_fpclasspd128_mask(A, 1, C)
     651  #define __builtin_ia32_extracti64x2_256_mask(A, E, C, D) __builtin_ia32_extracti64x2_256_mask(A, 1, C, D)
     652  #define __builtin_ia32_extractf64x2_256_mask(A, E, C, D) __builtin_ia32_extractf64x2_256_mask(A, 1, C, D)
     653  
     654  /* gfniintrin.h */
     655  #define __builtin_ia32_vgf2p8affineinvqb_v16qi(A, B, C) __builtin_ia32_vgf2p8affineinvqb_v16qi(A, B, 1) 
     656  #define __builtin_ia32_vgf2p8affineinvqb_v32qi(A, B, C) __builtin_ia32_vgf2p8affineinvqb_v32qi(A, B, 1)
     657  #define __builtin_ia32_vgf2p8affineinvqb_v64qi(A, B, C) __builtin_ia32_vgf2p8affineinvqb_v64qi(A, B, 1)
     658  #define __builtin_ia32_vgf2p8affineinvqb_v16qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask(A, B, 1, D, E) 
     659  #define __builtin_ia32_vgf2p8affineinvqb_v32qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask(A, B, 1, D, E) 
     660  #define __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(A, B, 1, D, E) 
     661  #define __builtin_ia32_vgf2p8affineqb_v16qi(A, B, C) __builtin_ia32_vgf2p8affineqb_v16qi(A, B, 1) 
     662  #define __builtin_ia32_vgf2p8affineqb_v32qi(A, B, C) __builtin_ia32_vgf2p8affineqb_v32qi(A, B, 1)
     663  #define __builtin_ia32_vgf2p8affineqb_v64qi(A, B, C) __builtin_ia32_vgf2p8affineqb_v64qi(A, B, 1)
     664  #define __builtin_ia32_vgf2p8affineqb_v16qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v16qi_mask(A, B, 1, D, E) 
     665  #define __builtin_ia32_vgf2p8affineqb_v32qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v32qi_mask(A, B, 1, D, E) 
     666  #define __builtin_ia32_vgf2p8affineqb_v64qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v64qi_mask(A, B, 1, D, E) 
     667  
     668  /* avx512vbmi2intrin.h */
     669  #define __builtin_ia32_vpshrd_v32hi(A, B, C) __builtin_ia32_vpshrd_v32hi(A, B, 1)
     670  #define __builtin_ia32_vpshrd_v32hi_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v32hi_mask(A, B, 1, D, E)
     671  #define __builtin_ia32_vpshrd_v16si(A, B, C) __builtin_ia32_vpshrd_v16si(A, B, 1)
     672  #define __builtin_ia32_vpshrd_v16si_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v16si_mask(A, B, 1, D, E)
     673  #define __builtin_ia32_vpshrd_v8di(A, B, C) __builtin_ia32_vpshrd_v8di(A, B, 1)
     674  #define __builtin_ia32_vpshrd_v8di_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v8di_mask(A, B, 1, D, E)
     675  #define __builtin_ia32_vpshrd_v16hi(A, B, C) __builtin_ia32_vpshrd_v16hi(A, B, 1)
     676  #define __builtin_ia32_vpshrd_v16hi_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v16hi_mask(A, B, 1, D, E)
     677  #define __builtin_ia32_vpshrd_v8si(A, B, C) __builtin_ia32_vpshrd_v8si(A, B, 1)
     678  #define __builtin_ia32_vpshrd_v8si_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v8si_mask(A, B, 1, D, E)
     679  #define __builtin_ia32_vpshrd_v4di(A, B, C) __builtin_ia32_vpshrd_v4di(A, B, 1)
     680  #define __builtin_ia32_vpshrd_v4di_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v4di_mask(A, B, 1, D, E)
     681  #define __builtin_ia32_vpshrd_v8hi(A, B, C) __builtin_ia32_vpshrd_v8hi(A, B, 1)
     682  #define __builtin_ia32_vpshrd_v8hi_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v8hi_mask(A, B, 1, D, E)
     683  #define __builtin_ia32_vpshrd_v4si(A, B, C) __builtin_ia32_vpshrd_v4si(A, B, 1)
     684  #define __builtin_ia32_vpshrd_v4si_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v4si_mask(A, B, 1, D, E)
     685  #define __builtin_ia32_vpshrd_v2di(A, B, C) __builtin_ia32_vpshrd_v2di(A, B, 1)
     686  #define __builtin_ia32_vpshrd_v2di_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v2di_mask(A, B, 1, D, E)
     687  #define __builtin_ia32_vpshld_v32hi(A, B, C) __builtin_ia32_vpshld_v32hi(A, B, 1)
     688  #define __builtin_ia32_vpshld_v32hi_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v32hi_mask(A, B, 1, D, E)
     689  #define __builtin_ia32_vpshld_v16si(A, B, C) __builtin_ia32_vpshld_v16si(A, B, 1)
     690  #define __builtin_ia32_vpshld_v16si_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v16si_mask(A, B, 1, D, E)
     691  #define __builtin_ia32_vpshld_v8di(A, B, C) __builtin_ia32_vpshld_v8di(A, B, 1)
     692  #define __builtin_ia32_vpshld_v8di_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v8di_mask(A, B, 1, D, E)
     693  #define __builtin_ia32_vpshld_v16hi(A, B, C) __builtin_ia32_vpshld_v16hi(A, B, 1)
     694  #define __builtin_ia32_vpshld_v16hi_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v16hi_mask(A, B, 1, D, E)
     695  #define __builtin_ia32_vpshld_v8si(A, B, C) __builtin_ia32_vpshld_v8si(A, B, 1)
     696  #define __builtin_ia32_vpshld_v8si_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v8si_mask(A, B, 1, D, E)
     697  #define __builtin_ia32_vpshld_v4di(A, B, C) __builtin_ia32_vpshld_v4di(A, B, 1)
     698  #define __builtin_ia32_vpshld_v4di_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v4di_mask(A, B, 1, D, E)
     699  #define __builtin_ia32_vpshld_v8hi(A, B, C) __builtin_ia32_vpshld_v8hi(A, B, 1)
     700  #define __builtin_ia32_vpshld_v8hi_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v8hi_mask(A, B, 1, D, E)
     701  #define __builtin_ia32_vpshld_v4si(A, B, C) __builtin_ia32_vpshld_v4si(A, B, 1)
     702  #define __builtin_ia32_vpshld_v4si_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v4si_mask(A, B, 1, D, E)
     703  #define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1)
     704  #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
     705  
     706  /* avx512fp16intrin.h */
     707  #define __builtin_ia32_addph512_mask_round(A, B, C, D, E) __builtin_ia32_addph512_mask_round(A, B, C, D, 8)
     708  #define __builtin_ia32_subph512_mask_round(A, B, C, D, E) __builtin_ia32_subph512_mask_round(A, B, C, D, 8)
     709  #define __builtin_ia32_mulph512_mask_round(A, B, C, D, E) __builtin_ia32_mulph512_mask_round(A, B, C, D, 8)
     710  #define __builtin_ia32_divph512_mask_round(A, B, C, D, E) __builtin_ia32_divph512_mask_round(A, B, C, D, 8)
     711  #define __builtin_ia32_addsh_mask_round(A, B, C, D, E) __builtin_ia32_addsh_mask_round(A, B, C, D, 8)
     712  #define __builtin_ia32_subsh_mask_round(A, B, C, D, E) __builtin_ia32_subsh_mask_round(A, B, C, D, 8)
     713  #define __builtin_ia32_mulsh_mask_round(A, B, C, D, E) __builtin_ia32_mulsh_mask_round(A, B, C, D, 8)
     714  #define __builtin_ia32_divsh_mask_round(A, B, C, D, E) __builtin_ia32_divsh_mask_round(A, B, C, D, 8)
     715  #define __builtin_ia32_maxph512_mask_round(A, B, C, D, E) __builtin_ia32_maxph512_mask_round(A, B, C, D, 8)
     716  #define __builtin_ia32_minph512_mask_round(A, B, C, D, E) __builtin_ia32_minph512_mask_round(A, B, C, D, 8)
     717  #define __builtin_ia32_maxsh_mask_round(A, B, C, D, E) __builtin_ia32_maxsh_mask_round(A, B, C, D, 8)
     718  #define __builtin_ia32_minsh_mask_round(A, B, C, D, E) __builtin_ia32_minsh_mask_round(A, B, C, D, 8)
     719  #define __builtin_ia32_cmpph512_mask(A, B, C, D) __builtin_ia32_cmpph512_mask(A, B, 1, D)
     720  #define __builtin_ia32_cmpph512_mask_round(A, B, C, D, E) __builtin_ia32_cmpph512_mask_round(A, B, 1, D, 8)
     721  #define __builtin_ia32_cmpsh_mask_round(A, B, C, D, E) __builtin_ia32_cmpsh_mask_round(A, B, 1, D, 8)
     722  #define __builtin_ia32_sqrtph512_mask_round(C, A, B, D) __builtin_ia32_sqrtph512_mask_round(C, A, B, 8)
     723  #define __builtin_ia32_sqrtsh_mask_round(D, C, A, B, E) __builtin_ia32_sqrtsh_mask_round(D, C, A, B, 8)
     724  #define __builtin_ia32_scalefph512_mask_round(A, B, C, D, E) __builtin_ia32_scalefph512_mask_round(A, B, C, D, 8)
     725  #define __builtin_ia32_scalefsh_mask_round(A, B, C, D, E) __builtin_ia32_scalefsh_mask_round(A, B, C, D, 8)
     726  #define __builtin_ia32_reduceph512_mask_round(A, B, C, D, E) __builtin_ia32_reduceph512_mask_round(A, 123, C, D, 8)
     727  #define __builtin_ia32_reduceph128_mask(A, B, C, D) __builtin_ia32_reduceph128_mask(A, 123, C, D)
     728  #define __builtin_ia32_reduceph256_mask(A, B, C, D) __builtin_ia32_reduceph256_mask(A, 123, C, D)
     729  #define __builtin_ia32_reducesh_mask_round(A, B, C, D, E, F) __builtin_ia32_reducesh_mask_round(A, B, 123, D, E, 8)
     730  #define __builtin_ia32_rndscaleph512_mask_round(A, B, C, D, E) __builtin_ia32_rndscaleph512_mask_round(A, 123, C, D, 8)
     731  #define __builtin_ia32_rndscaleph128_mask(A, B, C, D) __builtin_ia32_rndscaleph128_mask(A, 123, C, D)
     732  #define __builtin_ia32_rndscaleph256_mask(A, B, C, D) __builtin_ia32_rndscaleph256_mask(A, 123, C, D)
     733  #define __builtin_ia32_rndscalesh_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesh_mask_round(A, B, 123, D, E, 8)
     734  #define __builtin_ia32_fpclassph512_mask(A, D, C) __builtin_ia32_fpclassph512_mask(A, 1, C)
     735  #define __builtin_ia32_fpclasssh_mask(A, D, U) __builtin_ia32_fpclasssh_mask(A, 1, U)
     736  #define __builtin_ia32_getexpph512_mask(A, B, C, D) __builtin_ia32_getexpph512_mask(A, B, C, 8)
     737  #define __builtin_ia32_getexpsh_mask_round(A, B, C, D, E) __builtin_ia32_getexpsh_mask_round(A, B, C, D, 4)
     738  #define __builtin_ia32_getmantph512_mask(A, F, C, D, E) __builtin_ia32_getmantph512_mask(A, 1, C, D, 8)
     739  #define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4)
     740  #define __builtin_ia32_vcvtph2dq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2dq512_mask_round(A, B, C, 8)
     741  #define __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, 8)
     742  #define __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, 8)
     743  #define __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, 8)
     744  #define __builtin_ia32_vcvttph2dq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2dq512_mask_round(A, B, C, 8)
     745  #define __builtin_ia32_vcvttph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2udq512_mask_round(A, B, C, 8)
     746  #define __builtin_ia32_vcvttph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2qq512_mask_round(A, B, C, 8)
     747  #define __builtin_ia32_vcvttph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2uqq512_mask_round(A, B, C, 8)
     748  #define __builtin_ia32_vcvtph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2w512_mask_round(A, B, C, 8)
     749  #define __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, 8)
     750  #define __builtin_ia32_vcvttph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2w512_mask_round(A, B, C, 8)
     751  #define __builtin_ia32_vcvttph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2uw512_mask_round(A, B, C, 8)
     752  #define __builtin_ia32_vcvtw2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtw2ph512_mask_round(A, B, C, 8)
     753  #define __builtin_ia32_vcvtuw2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtuw2ph512_mask_round(A, B, C, 8)
     754  #define __builtin_ia32_vcvtdq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtdq2ph512_mask_round(A, B, C, 8)
     755  #define __builtin_ia32_vcvtudq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph512_mask_round(A, B, C, 8)
     756  #define __builtin_ia32_vcvtqq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph512_mask_round(A, B, C, 8)
     757  #define __builtin_ia32_vcvtuqq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph512_mask_round(A, B, C, 8)
     758  #define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8)
     759  #define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
     760  #define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
     761  #define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
     762  #define __builtin_ia32_vcvttsh2si32_round(A, B) __builtin_ia32_vcvttsh2si32_round(A, 8)
     763  #define __builtin_ia32_vcvttsh2si64_round(A, B) __builtin_ia32_vcvttsh2si64_round(A, 8)
     764  #define __builtin_ia32_vcvttsh2usi32_round(A, B) __builtin_ia32_vcvttsh2usi32_round(A, 8)
     765  #define __builtin_ia32_vcvttsh2usi64_round(A, B) __builtin_ia32_vcvttsh2usi64_round(A, 8)
     766  #define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
     767  #define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
     768  #define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)
     769  #define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8)
     770  #define __builtin_ia32_vcvtph2pd512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2pd512_mask_round(A, B, C, 8)
     771  #define __builtin_ia32_vcvtph2psx512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2psx512_mask_round(A, B, C, 8)
     772  #define __builtin_ia32_vcvtpd2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtpd2ph512_mask_round(A, B, C, 8)
     773  #define __builtin_ia32_vcvtps2phx512_mask_round(A, B, C, D) __builtin_ia32_vcvtps2phx512_mask_round(A, B, C, 8)
     774  #define __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, 8)
     775  #define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8)
     776  #define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8)
     777  #define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8)
     778  #define __builtin_ia32_vfmaddsubph512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_mask(A, B, C, D, 8)
     779  #define __builtin_ia32_vfmaddsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_mask3(A, B, C, D, 8)
     780  #define __builtin_ia32_vfmaddsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_maskz(A, B, C, D, 8)
     781  #define __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, 8)
     782  #define __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, 8)
     783  #define __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, 8)
     784  #define __builtin_ia32_vfmaddph512_mask(A, B, C, D, E) __builtin_ia32_vfmaddph512_mask(A, B, C, D, 8)
     785  #define __builtin_ia32_vfmaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddph512_mask3(A, B, C, D, 8)
     786  #define __builtin_ia32_vfmaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddph512_maskz(A, B, C, D, 8)
     787  #define __builtin_ia32_vfnmaddph512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddph512_mask(A, B, C, D, 8)
     788  #define __builtin_ia32_vfnmaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfnmaddph512_mask3(A, B, C, D, 8)
     789  #define __builtin_ia32_vfnmaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfnmaddph512_maskz(A, B, C, D, 8)
     790  #define __builtin_ia32_vfmsubph512_mask(A, B, C, D, E) __builtin_ia32_vfmsubph512_mask(A, B, C, D, 8)
     791  #define __builtin_ia32_vfmsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubph512_mask3(A, B, C, D, 8)
     792  #define __builtin_ia32_vfmsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubph512_maskz(A, B, C, D, 8)
     793  #define __builtin_ia32_vfnmsubph512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubph512_mask(A, B, C, D, 8)
     794  #define __builtin_ia32_vfnmsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubph512_mask3(A, B, C, D, 8)
     795  #define __builtin_ia32_vfnmsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubph512_maskz(A, B, C, D, 8)
     796  #define __builtin_ia32_vfmaddsh3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsh3_mask(A, B, C, D, 8)
     797  #define __builtin_ia32_vfmaddsh3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsh3_mask3(A, B, C, D, 8)
     798  #define __builtin_ia32_vfmaddsh3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsh3_maskz(A, B, C, D, 8)
     799  #define __builtin_ia32_vfnmaddsh3_mask(A, B, C, D, E) __builtin_ia32_vfnmaddsh3_mask(A, B, C, D, 8)
     800  #define __builtin_ia32_vfnmaddsh3_mask3(A, B, C, D, E) __builtin_ia32_vfnmaddsh3_mask3(A, B, C, D, 8)
     801  #define __builtin_ia32_vfnmaddsh3_maskz(A, B, C, D, E) __builtin_ia32_vfnmaddsh3_maskz(A, B, C, D, 8)
     802  #define __builtin_ia32_vfmsubsh3_mask(A, B, C, D, E) __builtin_ia32_vfmsubsh3_mask(A, B, C, D, 8)
     803  #define __builtin_ia32_vfmsubsh3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsh3_mask3(A, B, C, D, 8)
     804  #define __builtin_ia32_vfmsubsh3_maskz(A, B, C, D, E) __builtin_ia32_vfmsubsh3_maskz(A, B, C, D, 8)
     805  #define __builtin_ia32_vfnmsubsh3_mask(A, B, C, D, E) __builtin_ia32_vfnmsubsh3_mask(A, B, C, D, 8)
     806  #define __builtin_ia32_vfnmsubsh3_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubsh3_mask3(A, B, C, D, 8)
     807  #define __builtin_ia32_vfnmsubsh3_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubsh3_maskz(A, B, C, D, 8)
     808  #define __builtin_ia32_vfcmaddcph512_round(A, B, C, D) __builtin_ia32_vfcmaddcph512_round(A, B, C, 8)
     809  #define __builtin_ia32_vfcmaddcph512_mask_round(A, C, D, B, E) __builtin_ia32_vfcmaddcph512_mask_round(A, C, D, B, 8)
     810  #define __builtin_ia32_vfcmaddcph512_mask3_round(A, C, D, B, E) __builtin_ia32_vfcmaddcph512_mask3_round(A, C, D, B, 8)
     811  #define __builtin_ia32_vfcmaddcph512_maskz_round(B, C, D, A, E) __builtin_ia32_vfcmaddcph512_maskz_round(B, C, D, A, 8)
     812  #define __builtin_ia32_vfmaddcph512_round(A, B, C, D) __builtin_ia32_vfmaddcph512_round(A, B, C, 8)
     813  #define __builtin_ia32_vfmaddcph512_mask_round(A, C, D, B, E) __builtin_ia32_vfmaddcph512_mask_round(A, C, D, B, 8)
     814  #define __builtin_ia32_vfmaddcph512_mask3_round(A, C, D, B, E) __builtin_ia32_vfmaddcph512_mask3_round(A, C, D, B, 8)
     815  #define __builtin_ia32_vfmaddcph512_maskz_round(B, C, D, A, E) __builtin_ia32_vfmaddcph512_maskz_round(B, C, D, A, 8)
     816  #define __builtin_ia32_vfmulcph512_round(A, B, C) __builtin_ia32_vfmulcph512_round(A, B, 8)
     817  #define __builtin_ia32_vfmulcph512_mask_round(A, C, D, B, E) __builtin_ia32_vfmulcph512_mask_round(A, C, D, B, 8)
     818  #define __builtin_ia32_vfcmulcph512_round(A, B, C) __builtin_ia32_vfcmulcph512_round(A, B, 8)
     819  #define __builtin_ia32_vfcmulcph512_mask_round(A, C, D, B, E) __builtin_ia32_vfcmulcph512_mask_round(A, C, D, B, 8)
     820  #define __builtin_ia32_vfmaddcsh_round(A, B, C, D) __builtin_ia32_vfmaddcsh_round(A, B, C, 8)
     821  #define __builtin_ia32_vfmaddcsh_mask_round(A, C, D, B, E) __builtin_ia32_vfmaddcsh_mask_round(A, C, D, B, 8)
     822  #define __builtin_ia32_vfmaddcsh_mask3_round(A, C, D, B, E) __builtin_ia32_vfmaddcsh_mask3_round(A, C, D, B, 8)
     823  #define __builtin_ia32_vfmaddcsh_maskz_round(B, C, D, A, E) __builtin_ia32_vfmaddcsh_maskz_round(B, C, D, A, 8)
     824  #define __builtin_ia32_vfcmaddcsh_round(A, B, C, D) __builtin_ia32_vfcmaddcsh_round(A, B, C, 8)
     825  #define __builtin_ia32_vfcmaddcsh_mask_round(A, C, D, B, E) __builtin_ia32_vfcmaddcsh_mask_round(A, C, D, B, 8)
     826  #define __builtin_ia32_vfcmaddcsh_mask3_round(A, C, D, B, E) __builtin_ia32_vfcmaddcsh_mask3_round(A, C, D, B, 8)
     827  #define __builtin_ia32_vfcmaddcsh_maskz_round(B, C, D, A, E) __builtin_ia32_vfcmaddcsh_maskz_round(B, C, D, A, 8)
     828  #define __builtin_ia32_vfmulcsh_round(A, B, C) __builtin_ia32_vfmulcsh_round(A, B, 8)
     829  #define __builtin_ia32_vfmulcsh_mask_round(A, C, D, B, E) __builtin_ia32_vfmulcsh_mask_round(A, C, D, B, 8)
     830  #define __builtin_ia32_vfcmulcsh_round(A, B, C) __builtin_ia32_vfcmulcsh_round(A, B, 8)
     831  #define __builtin_ia32_vfcmulcsh_mask_round(A, C, D, B, E) __builtin_ia32_vfcmulcsh_mask_round(A, C, D, B, 8)
     832  
     833  /* avx512fp16vlintrin.h */
     834  #define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
     835  #define __builtin_ia32_cmpph256_mask(A, B, C, D) __builtin_ia32_cmpph256_mask(A, B, 1, D)
     836  #define __builtin_ia32_fpclassph256_mask(A, D, C) __builtin_ia32_fpclassph256_mask(A, 1, C)
     837  #define __builtin_ia32_fpclassph128_mask(A, D, C) __builtin_ia32_fpclassph128_mask(A, 1, C)
     838  #define __builtin_ia32_getmantph256_mask(A, E, C, D) __builtin_ia32_getmantph256_mask(A, 1, C, D)
     839  #define __builtin_ia32_getmantph128_mask(A, E, C, D) __builtin_ia32_getmantph128_mask(A, 1, C, D)
     840  
     841  /* vpclmulqdqintrin.h */
     842  #define __builtin_ia32_vpclmulqdq_v4di(A, B, C)  __builtin_ia32_vpclmulqdq_v4di(A, B, 1) 
     843  #define __builtin_ia32_vpclmulqdq_v2di(A, B, C)  __builtin_ia32_vpclmulqdq_v2di(A, B, 1) 
     844  #define __builtin_ia32_vpclmulqdq_v8di(A, B, C)  __builtin_ia32_vpclmulqdq_v8di(A, B, 1) 
     845  
     846  /* cmpccxadd.h */
     847  #define __builtin_ia32_cmpccxadd(A, B, C, D) __builtin_ia32_cmpccxadd(A, B, C, 1)
     848  #define __builtin_ia32_cmpccxadd64(A, B, C, D) __builtin_ia32_cmpccxadd64(A, B, C, 1)
     849  
     850  #pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex")
     851  
     852  #include <x86intrin.h>