1  /* Copyright (C) 2007-2023 Free Software Foundation, Inc.
       2  
       3     This file is part of GCC.
       4  
       5     GCC is free software; you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3, or (at your option)
       8     any later version.
       9  
      10     GCC is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     Under Section 7 of GPL version 3, you are granted additional
      16     permissions described in the GCC Runtime Library Exception, version
      17     3.1, as published by the Free Software Foundation.
      18  
      19     You should have received a copy of the GNU General Public License and
      20     a copy of the GCC Runtime Library Exception along with this program;
      21     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      22     <http://www.gnu.org/licenses/>.  */
      23  
      24  #ifndef _X86INTRIN_H_INCLUDED
      25  # error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
      26  #endif
      27  
      28  #ifndef _XOPMMINTRIN_H_INCLUDED
      29  #define _XOPMMINTRIN_H_INCLUDED
      30  
      31  #include <fma4intrin.h>
      32  
      33  #ifndef __XOP__
      34  #pragma GCC push_options
      35  #pragma GCC target("xop")
      36  #define __DISABLE_XOP__
      37  #endif /* __XOP__ */
      38  
      39  /* Integer multiply/add instructions. */
      40  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      41  _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
      42  {
      43    return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
      44  }
      45  
      46  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      47  _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
      48  {
      49    return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
      50  }
      51  
      52  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      53  _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
      54  {
      55    return  (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
      56  }
      57  
      58  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      59  _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
      60  {
      61    return  (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
      62  }
      63  
      64  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      65  _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
      66  {
      67    return  (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
      68  }
      69  
      70  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      71  _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
      72  {
      73    return  (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
      74  }
      75  
      76  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      77  _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
      78  {
      79    return  (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
      80  }
      81  
      82  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      83  _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
      84  {
      85    return  (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
      86  }
      87  
      88  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      89  _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
      90  {
      91    return  (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
      92  }
      93  
      94  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      95  _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
      96  {
      97    return  (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
      98  }
      99  
     100  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     101  _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
     102  {
     103    return  (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
     104  }
     105  
     106  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     107  _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
     108  {
     109    return  (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
     110  }
     111  
     112  /* Packed Integer Horizontal Add and Subtract */
     113  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     114  _mm_haddw_epi8(__m128i __A)
     115  {
     116    return  (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A);
     117  }
     118  
     119  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     120  _mm_haddd_epi8(__m128i __A)
     121  {
     122    return  (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A);
     123  }
     124  
     125  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     126  _mm_haddq_epi8(__m128i __A)
     127  {
     128    return  (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A);
     129  }
     130  
     131  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     132  _mm_haddd_epi16(__m128i __A)
     133  {
     134    return  (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A);
     135  }
     136  
     137  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     138  _mm_haddq_epi16(__m128i __A)
     139  {
     140    return  (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A);
     141  }
     142  
     143  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     144  _mm_haddq_epi32(__m128i __A)
     145  {
     146    return  (__m128i) __builtin_ia32_vphadddq ((__v4si)__A);
     147  }
     148  
     149  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     150  _mm_haddw_epu8(__m128i __A)
     151  {
     152    return  (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A);
     153  }
     154  
     155  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     156  _mm_haddd_epu8(__m128i __A)
     157  {
     158    return  (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A);
     159  }
     160  
     161  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     162  _mm_haddq_epu8(__m128i __A)
     163  {
     164    return  (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A);
     165  }
     166  
     167  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     168  _mm_haddd_epu16(__m128i __A)
     169  {
     170    return  (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A);
     171  }
     172  
     173  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     174  _mm_haddq_epu16(__m128i __A)
     175  {
     176    return  (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A);
     177  }
     178  
     179  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     180  _mm_haddq_epu32(__m128i __A)
     181  {
     182    return  (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A);
     183  }
     184  
     185  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     186  _mm_hsubw_epi8(__m128i __A)
     187  {
     188    return  (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A);
     189  }
     190  
     191  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     192  _mm_hsubd_epi16(__m128i __A)
     193  {
     194    return  (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A);
     195  }
     196  
     197  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     198  _mm_hsubq_epi32(__m128i __A)
     199  {
     200    return  (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A);
     201  }
     202  
     203  /* Vector conditional move and permute */
     204  
     205  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     206  _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
     207  {
     208    return  (__m128i) __builtin_ia32_vpcmov (__A, __B, __C);
     209  }
     210  
     211  extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     212  _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
     213  {
     214    return  (__m256i) __builtin_ia32_vpcmov256 (__A, __B, __C);
     215  }
     216  
     217  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     218  _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
     219  {
     220    return  (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
     221  }
     222  
     223  /* Packed Integer Rotates and Shifts
     224     Rotates - Non-Immediate form */
     225  
     226  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     227  _mm_rot_epi8(__m128i __A,  __m128i __B)
     228  {
     229    return  (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B);
     230  }
     231  
     232  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     233  _mm_rot_epi16(__m128i __A,  __m128i __B)
     234  {
     235    return  (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B);
     236  }
     237  
     238  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     239  _mm_rot_epi32(__m128i __A,  __m128i __B)
     240  {
     241    return  (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B);
     242  }
     243  
     244  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     245  _mm_rot_epi64(__m128i __A,  __m128i __B)
     246  {
     247    return (__m128i)  __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B);
     248  }
     249  
     250  /* Rotates - Immediate form */
     251  
     252  #ifdef __OPTIMIZE__
     253  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     254  _mm_roti_epi8(__m128i __A, const int __B)
     255  {
     256    return  (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B);
     257  }
     258  
     259  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     260  _mm_roti_epi16(__m128i __A, const int __B)
     261  {
     262    return  (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B);
     263  }
     264  
     265  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     266  _mm_roti_epi32(__m128i __A, const int __B)
     267  {
     268    return  (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B);
     269  }
     270  
     271  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     272  _mm_roti_epi64(__m128i __A, const int __B)
     273  {
     274    return  (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B);
     275  }
     276  #else
     277  #define _mm_roti_epi8(A, N) \
     278    ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N)))
     279  #define _mm_roti_epi16(A, N) \
     280    ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N)))
     281  #define _mm_roti_epi32(A, N) \
     282    ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N)))
     283  #define _mm_roti_epi64(A, N) \
     284    ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N)))
     285  #endif
     286  
     287  /* Shifts */
     288  
     289  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     290  _mm_shl_epi8(__m128i __A,  __m128i __B)
     291  {
     292    return  (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B);
     293  }
     294  
     295  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     296  _mm_shl_epi16(__m128i __A,  __m128i __B)
     297  {
     298    return  (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B);
     299  }
     300  
     301  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     302  _mm_shl_epi32(__m128i __A,  __m128i __B)
     303  {
     304    return  (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B);
     305  }
     306  
     307  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     308  _mm_shl_epi64(__m128i __A,  __m128i __B)
     309  {
     310    return  (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B);
     311  }
     312  
     313  
     314  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     315  _mm_sha_epi8(__m128i __A,  __m128i __B)
     316  {
     317    return  (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B);
     318  }
     319  
     320  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     321  _mm_sha_epi16(__m128i __A,  __m128i __B)
     322  {
     323    return  (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B);
     324  }
     325  
     326  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     327  _mm_sha_epi32(__m128i __A,  __m128i __B)
     328  {
     329    return  (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B);
     330  }
     331  
     332  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     333  _mm_sha_epi64(__m128i __A,  __m128i __B)
     334  {
     335    return  (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B);
     336  }
     337  
     338  /* Compare and Predicate Generation
     339     pcom (integer, unsigned bytes) */
     340  
     341  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     342  _mm_comlt_epu8(__m128i __A, __m128i __B)
     343  {
     344    return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B);
     345  }
     346  
     347  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     348  _mm_comle_epu8(__m128i __A, __m128i __B)
     349  {
     350    return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B);
     351  }
     352  
     353  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     354  _mm_comgt_epu8(__m128i __A, __m128i __B)
     355  {
     356    return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B);
     357  }
     358  
     359  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     360  _mm_comge_epu8(__m128i __A, __m128i __B)
     361  {
     362    return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B);
     363  }
     364  
     365  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     366  _mm_comeq_epu8(__m128i __A, __m128i __B)
     367  {
     368    return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B);
     369  }
     370  
     371  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     372  _mm_comneq_epu8(__m128i __A, __m128i __B)
     373  {
     374    return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B);
     375  }
     376  
     377  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     378  _mm_comfalse_epu8(__m128i __A, __m128i __B)
     379  {
     380    return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B);
     381  }
     382  
     383  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     384  _mm_comtrue_epu8(__m128i __A, __m128i __B)
     385  {
     386    return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B);
     387  }
     388  
     389  /*pcom (integer, unsigned words) */
     390  
     391  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     392  _mm_comlt_epu16(__m128i __A, __m128i __B)
     393  {
     394    return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B);
     395  }
     396  
     397  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     398  _mm_comle_epu16(__m128i __A, __m128i __B)
     399  {
     400    return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B);
     401  }
     402  
     403  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     404  _mm_comgt_epu16(__m128i __A, __m128i __B)
     405  {
     406    return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B);
     407  }
     408  
     409  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     410  _mm_comge_epu16(__m128i __A, __m128i __B)
     411  {
     412    return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B);
     413  }
     414  
     415  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     416  _mm_comeq_epu16(__m128i __A, __m128i __B)
     417  {
     418    return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B);
     419  }
     420  
     421  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     422  _mm_comneq_epu16(__m128i __A, __m128i __B)
     423  {
     424    return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B);
     425  }
     426  
     427  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     428  _mm_comfalse_epu16(__m128i __A, __m128i __B)
     429  {
     430    return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B);
     431  }
     432  
     433  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     434  _mm_comtrue_epu16(__m128i __A, __m128i __B)
     435  {
     436    return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B);
     437  }
     438  
     439  /*pcom (integer, unsigned double words) */
     440  
     441  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     442  _mm_comlt_epu32(__m128i __A, __m128i __B)
     443  {
     444    return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B);
     445  }
     446  
     447  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     448  _mm_comle_epu32(__m128i __A, __m128i __B)
     449  {
     450    return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B);
     451  }
     452  
     453  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     454  _mm_comgt_epu32(__m128i __A, __m128i __B)
     455  {
     456    return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B);
     457  }
     458  
     459  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     460  _mm_comge_epu32(__m128i __A, __m128i __B)
     461  {
     462    return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B);
     463  }
     464  
     465  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     466  _mm_comeq_epu32(__m128i __A, __m128i __B)
     467  {
     468    return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B);
     469  }
     470  
     471  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     472  _mm_comneq_epu32(__m128i __A, __m128i __B)
     473  {
     474    return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B);
     475  }
     476  
     477  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     478  _mm_comfalse_epu32(__m128i __A, __m128i __B)
     479  {
     480    return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B);
     481  }
     482  
     483  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     484  _mm_comtrue_epu32(__m128i __A, __m128i __B)
     485  {
     486    return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B);
     487  }
     488  
     489  /*pcom (integer, unsigned quad words) */
     490  
     491  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     492  _mm_comlt_epu64(__m128i __A, __m128i __B)
     493  {
     494    return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B);
     495  }
     496  
     497  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     498  _mm_comle_epu64(__m128i __A, __m128i __B)
     499  {
     500    return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B);
     501  }
     502  
     503  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     504  _mm_comgt_epu64(__m128i __A, __m128i __B)
     505  {
     506    return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B);
     507  }
     508  
     509  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     510  _mm_comge_epu64(__m128i __A, __m128i __B)
     511  {
     512    return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B);
     513  }
     514  
     515  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     516  _mm_comeq_epu64(__m128i __A, __m128i __B)
     517  {
     518    return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B);
     519  }
     520  
     521  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     522  _mm_comneq_epu64(__m128i __A, __m128i __B)
     523  {
     524    return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B);
     525  }
     526  
     527  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     528  _mm_comfalse_epu64(__m128i __A, __m128i __B)
     529  {
     530    return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B);
     531  }
     532  
     533  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     534  _mm_comtrue_epu64(__m128i __A, __m128i __B)
     535  {
     536    return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B);
     537  }
     538  
     539  /*pcom (integer, signed bytes) */
     540  
     541  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     542  _mm_comlt_epi8(__m128i __A, __m128i __B)
     543  {
     544    return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B);
     545  }
     546  
     547  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     548  _mm_comle_epi8(__m128i __A, __m128i __B)
     549  {
     550    return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B);
     551  }
     552  
     553  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     554  _mm_comgt_epi8(__m128i __A, __m128i __B)
     555  {
     556    return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B);
     557  }
     558  
     559  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     560  _mm_comge_epi8(__m128i __A, __m128i __B)
     561  {
     562    return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B);
     563  }
     564  
     565  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     566  _mm_comeq_epi8(__m128i __A, __m128i __B)
     567  {
     568    return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B);
     569  }
     570  
     571  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     572  _mm_comneq_epi8(__m128i __A, __m128i __B)
     573  {
     574    return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B);
     575  }
     576  
     577  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     578  _mm_comfalse_epi8(__m128i __A, __m128i __B)
     579  {
     580    return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B);
     581  }
     582  
     583  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     584  _mm_comtrue_epi8(__m128i __A, __m128i __B)
     585  {
     586    return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B);
     587  }
     588  
     589  /*pcom (integer, signed words) */
     590  
     591  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     592  _mm_comlt_epi16(__m128i __A, __m128i __B)
     593  {
     594    return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B);
     595  }
     596  
     597  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     598  _mm_comle_epi16(__m128i __A, __m128i __B)
     599  {
     600    return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B);
     601  }
     602  
     603  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     604  _mm_comgt_epi16(__m128i __A, __m128i __B)
     605  {
     606    return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B);
     607  }
     608  
     609  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     610  _mm_comge_epi16(__m128i __A, __m128i __B)
     611  {
     612    return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B);
     613  }
     614  
     615  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     616  _mm_comeq_epi16(__m128i __A, __m128i __B)
     617  {
     618    return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B);
     619  }
     620  
     621  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     622  _mm_comneq_epi16(__m128i __A, __m128i __B)
     623  {
     624    return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B);
     625  }
     626  
     627  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     628  _mm_comfalse_epi16(__m128i __A, __m128i __B)
     629  {
     630    return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B);
     631  }
     632  
     633  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     634  _mm_comtrue_epi16(__m128i __A, __m128i __B)
     635  {
     636    return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B);
     637  }
     638  
     639  /*pcom (integer, signed double words) */
     640  
     641  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     642  _mm_comlt_epi32(__m128i __A, __m128i __B)
     643  {
     644    return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B);
     645  }
     646  
     647  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     648  _mm_comle_epi32(__m128i __A, __m128i __B)
     649  {
     650    return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B);
     651  }
     652  
     653  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     654  _mm_comgt_epi32(__m128i __A, __m128i __B)
     655  {
     656    return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B);
     657  }
     658  
     659  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     660  _mm_comge_epi32(__m128i __A, __m128i __B)
     661  {
     662    return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B);
     663  }
     664  
     665  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     666  _mm_comeq_epi32(__m128i __A, __m128i __B)
     667  {
     668    return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B);
     669  }
     670  
     671  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     672  _mm_comneq_epi32(__m128i __A, __m128i __B)
     673  {
     674    return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B);
     675  }
     676  
     677  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     678  _mm_comfalse_epi32(__m128i __A, __m128i __B)
     679  {
     680    return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B);
     681  }
     682  
     683  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     684  _mm_comtrue_epi32(__m128i __A, __m128i __B)
     685  {
     686    return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B);
     687  }
     688  
     689  /*pcom (integer, signed quad words) */
     690  
     691  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     692  _mm_comlt_epi64(__m128i __A, __m128i __B)
     693  {
     694    return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B);
     695  }
     696  
     697  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     698  _mm_comle_epi64(__m128i __A, __m128i __B)
     699  {
     700    return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B);
     701  }
     702  
     703  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     704  _mm_comgt_epi64(__m128i __A, __m128i __B)
     705  {
     706    return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B);
     707  }
     708  
     709  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     710  _mm_comge_epi64(__m128i __A, __m128i __B)
     711  {
     712    return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B);
     713  }
     714  
     715  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     716  _mm_comeq_epi64(__m128i __A, __m128i __B)
     717  {
     718    return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B);
     719  }
     720  
     721  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     722  _mm_comneq_epi64(__m128i __A, __m128i __B)
     723  {
     724    return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B);
     725  }
     726  
     727  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     728  _mm_comfalse_epi64(__m128i __A, __m128i __B)
     729  {
     730    return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B);
     731  }
     732  
     733  extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     734  _mm_comtrue_epi64(__m128i __A, __m128i __B)
     735  {
     736    return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B);
     737  }
     738  
     739  /* FRCZ */
     740  
     741  extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     742  _mm_frcz_ps (__m128 __A)
     743  {
     744    return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A);
     745  }
     746  
     747  extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     748  _mm_frcz_pd (__m128d __A)
     749  {
     750    return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A);
     751  }
     752  
     753  extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     754  _mm_frcz_ss (__m128 __A, __m128 __B)
     755  {
     756    return (__m128) __builtin_ia32_movss ((__v4sf)__A,
     757  					(__v4sf)
     758  					__builtin_ia32_vfrczss ((__v4sf)__B));
     759  }
     760  
     761  extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     762  _mm_frcz_sd (__m128d __A, __m128d __B)
     763  {
     764    return (__m128d) __builtin_ia32_movsd ((__v2df)__A,
     765  					 (__v2df)
     766  					 __builtin_ia32_vfrczsd ((__v2df)__B));
     767  }
     768  
     769  extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     770  _mm256_frcz_ps (__m256 __A)
     771  {
     772    return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A);
     773  }
     774  
     775  extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     776  _mm256_frcz_pd (__m256d __A)
     777  {
     778    return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A);
     779  }
     780  
     781  /* PERMIL2 */
     782  
     783  #ifdef __OPTIMIZE__
     784  extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     785  _mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
     786  {
     787    return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
     788  					      (__v2df)__Y,
     789  					      (__v2di)__C,
     790  					      __I);
     791  }
     792  
     793  extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     794  _mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
     795  {
     796    return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
     797  						 (__v4df)__Y,
     798  						 (__v4di)__C,
     799  						 __I);
     800  }
     801  
     802  extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     803  _mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
     804  {
     805    return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
     806  					     (__v4sf)__Y,
     807  					     (__v4si)__C,
     808  					     __I);
     809  }
     810  
     811  extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     812  _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
     813  {
     814    return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
     815  						(__v8sf)__Y,
     816  						(__v8si)__C,
     817  						__I);
     818  }
     819  #else
     820  #define _mm_permute2_pd(X, Y, C, I)					\
     821    ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X),		\
     822  					(__v2df)(__m128d)(Y),		\
     823  					(__v2di)(__m128i)(C),		\
     824  					(int)(I)))
     825  
     826  #define _mm256_permute2_pd(X, Y, C, I)					\
     827    ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X),	\
     828  					   (__v4df)(__m256d)(Y),	\
     829  					   (__v4di)(__m256i)(C),	\
     830  					   (int)(I)))
     831  
     832  #define _mm_permute2_ps(X, Y, C, I)					\
     833    ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X),		\
     834  				       (__v4sf)(__m128)(Y),		\
     835  				       (__v4si)(__m128i)(C),		\
     836  				       (int)(I)))
     837  
     838  #define _mm256_permute2_ps(X, Y, C, I)					\
     839    ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X),		\
     840  					  (__v8sf)(__m256)(Y),  	\
     841  					  (__v8si)(__m256i)(C),		\
     842   					  (int)(I)))
     843  #endif /* __OPTIMIZE__ */
     844  
     845  #ifdef __DISABLE_XOP__
     846  #undef __DISABLE_XOP__
     847  #pragma GCC pop_options
     848  #endif /* __DISABLE_XOP__ */
     849  
     850  #endif /* _XOPMMINTRIN_H_INCLUDED */