1  /* Copyright (C) 2017-2023 Free Software Foundation, Inc.
       2  
       3     This file is part of GCC.
       4  
       5     GCC is free software; you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3, or (at your option)
       8     any later version.
       9  
      10     GCC is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     Under Section 7 of GPL version 3, you are granted additional
      16     permissions described in the GCC Runtime Library Exception, version
      17     3.1, as published by the Free Software Foundation.
      18  
      19     You should have received a copy of the GNU General Public License and
      20     a copy of the GCC Runtime Library Exception along with this program;
      21     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      22     <http://www.gnu.org/licenses/>.  */
      23  
      24  #ifndef _IMMINTRIN_H_INCLUDED
      25  #error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
      26  #endif
      27  
      28  #ifndef _GFNIINTRIN_H_INCLUDED
      29  #define _GFNIINTRIN_H_INCLUDED
      30  
      31  #if !defined(__GFNI__) || !defined(__SSE2__)
      32  #pragma GCC push_options
      33  #pragma GCC target("gfni,sse2")
      34  #define __DISABLE_GFNI__
      35  #endif /* __GFNI__ */
      36  
      37  extern __inline __m128i
      38  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      39  _mm_gf2p8mul_epi8 (__m128i __A, __m128i __B)
      40  {
      41    return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
      42  						   (__v16qi) __B);
      43  }
      44  
      45  #ifdef __OPTIMIZE__
      46  extern __inline __m128i
      47  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      48  _mm_gf2p8affineinv_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
      49  {
      50    return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi ((__v16qi) __A,
      51  							   (__v16qi) __B,
      52  							    __C);
      53  }
      54  
      55  extern __inline __m128i
      56  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      57  _mm_gf2p8affine_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
      58  {
      59    return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi) __A,
      60  							(__v16qi) __B, __C);
      61  }
      62  #else
      63  #define _mm_gf2p8affineinv_epi64_epi8(A, B, C)				   \
      64    ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
      65  					   (__v16qi)(__m128i)(B), (int)(C)))
      66  #define _mm_gf2p8affine_epi64_epi8(A, B, C)				   \
      67    ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi)(__m128i)(A),   \
      68  					   (__v16qi)(__m128i)(B), (int)(C)))
      69  #endif
      70  
      71  #ifdef __DISABLE_GFNI__
      72  #undef __DISABLE_GFNI__
      73  #pragma GCC pop_options
      74  #endif /* __DISABLE_GFNI__ */
      75  
      76  #if !defined(__GFNI__) || !defined(__AVX__)
      77  #pragma GCC push_options
      78  #pragma GCC target("gfni,avx")
      79  #define __DISABLE_GFNIAVX__
      80  #endif /* __GFNIAVX__ */
      81  
      82  extern __inline __m256i
      83  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      84  _mm256_gf2p8mul_epi8 (__m256i __A, __m256i __B)
      85  {
      86    return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi ((__v32qi) __A,
      87  						    (__v32qi) __B);
      88  }
      89  
      90  #ifdef __OPTIMIZE__
      91  extern __inline __m256i
      92  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      93  _mm256_gf2p8affineinv_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
      94  {
      95    return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi ((__v32qi) __A,
      96  							   (__v32qi) __B,
      97  							    __C);
      98  }
      99  
     100  extern __inline __m256i
     101  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     102  _mm256_gf2p8affine_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
     103  {
     104    return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi) __A,
     105  							(__v32qi) __B, __C);
     106  }
     107  #else
     108  #define _mm256_gf2p8affineinv_epi64_epi8(A, B, C)			   \
     109    ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \
     110  						    (__v32qi)(__m256i)(B), \
     111  						    (int)(C)))
     112  #define _mm256_gf2p8affine_epi64_epi8(A, B, C)				   \
     113    ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi)(__m256i)(A),   \
     114  					(   __v32qi)(__m256i)(B), (int)(C)))
     115  #endif
     116  
     117  #ifdef __DISABLE_GFNIAVX__
     118  #undef __DISABLE_GFNIAVX__
     119  #pragma GCC pop_options
     120  #endif /* __GFNIAVX__ */
     121  
     122  #if !defined(__GFNI__) || !defined(__AVX512VL__)
     123  #pragma GCC push_options
     124  #pragma GCC target("gfni,avx512vl")
     125  #define __DISABLE_GFNIAVX512VL__
     126  #endif /* __GFNIAVX512VL__ */
     127  
     128  extern __inline __m128i
     129  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     130  _mm_mask_gf2p8mul_epi8 (__m128i __A, __mmask16 __B, __m128i __C, __m128i __D)
     131  {
     132    return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __C,
     133  							 (__v16qi) __D,
     134  							 (__v16qi)__A, __B);
     135  }
     136  
     137  extern __inline __m128i
     138  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     139  _mm_maskz_gf2p8mul_epi8 (__mmask16 __A, __m128i __B, __m128i __C)
     140  {
     141    return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __B,
     142  			(__v16qi) __C, (__v16qi) _mm_setzero_si128 (), __A);
     143  }
     144  
     145  #ifdef __OPTIMIZE__
     146  extern __inline __m128i
     147  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     148  _mm_mask_gf2p8affineinv_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
     149  				    __m128i __D, const int __E)
     150  {
     151    return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __C,
     152  								(__v16qi) __D,
     153  								 __E,
     154  								(__v16qi)__A,
     155  								 __B);
     156  }
     157  
     158  extern __inline __m128i
     159  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     160  _mm_maskz_gf2p8affineinv_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
     161  				     const int __D)
     162  {
     163    return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __B,
     164  						(__v16qi) __C, __D,
     165  						(__v16qi) _mm_setzero_si128 (),
     166  						 __A);
     167  }
     168  
     169  extern __inline __m128i
     170  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     171  _mm_mask_gf2p8affine_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
     172  				 __m128i __D, const int __E)
     173  {
     174    return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __C,
     175  					(__v16qi) __D, __E, (__v16qi)__A, __B);
     176  }
     177  
     178  extern __inline __m128i
     179  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     180  _mm_maskz_gf2p8affine_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
     181  				  const int __D)
     182  {
     183    return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __B,
     184  		     (__v16qi) __C, __D, (__v16qi) _mm_setzero_si128 (), __A);
     185  }
     186  #else
     187  #define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) 		   \
     188    ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask(		   \
     189  			(__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D),      \
     190  			(int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
     191  #define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
     192    ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask(		   \
     193  			(__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C),	   \
     194  			(int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), \
     195  			(__mmask16)(A)))
     196  #define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
     197    ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(C),\
     198        (__v16qi)(__m128i)(D), (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
     199  #define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D)			    \
     200    ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(B),\
     201  		(__v16qi)(__m128i)(C), (int)(D),			    \
     202  		(__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A)))
     203  #endif
     204  
     205  #ifdef __DISABLE_GFNIAVX512VL__
     206  #undef __DISABLE_GFNIAVX512VL__
     207  #pragma GCC pop_options
     208  #endif /* __GFNIAVX512VL__ */
     209  
     210  #if !defined(__GFNI__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
     211  #pragma GCC push_options
     212  #pragma GCC target("gfni,avx512vl,avx512bw")
     213  #define __DISABLE_GFNIAVX512VLBW__
     214  #endif /* __GFNIAVX512VLBW__ */
     215  
     216  extern __inline __m256i
     217  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     218  _mm256_mask_gf2p8mul_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
     219  			   __m256i __D)
     220  {
     221    return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __C,
     222  							 (__v32qi) __D,
     223  							 (__v32qi)__A, __B);
     224  }
     225  
     226  extern __inline __m256i
     227  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     228  _mm256_maskz_gf2p8mul_epi8 (__mmask32 __A, __m256i __B, __m256i __C)
     229  {
     230    return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __B,
     231  			(__v32qi) __C, (__v32qi) _mm256_setzero_si256 (), __A);
     232  }
     233  
     234  #ifdef __OPTIMIZE__
     235  extern __inline __m256i
     236  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     237  _mm256_mask_gf2p8affineinv_epi64_epi8 (__m256i __A, __mmask32 __B,
     238  				       __m256i __C, __m256i __D, const int __E)
     239  {
     240    return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __C,
     241  								(__v32qi) __D,
     242  							 	 __E,
     243  								(__v32qi)__A,
     244  								 __B);
     245  }
     246  
     247  extern __inline __m256i
     248  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     249  _mm256_maskz_gf2p8affineinv_epi64_epi8 (__mmask32 __A, __m256i __B,
     250  					__m256i __C, const int __D)
     251  {
     252    return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __B,
     253  				      (__v32qi) __C, __D,
     254  				      (__v32qi) _mm256_setzero_si256 (), __A);
     255  }
     256  
     257  extern __inline __m256i
     258  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     259  _mm256_mask_gf2p8affine_epi64_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
     260  				    __m256i __D, const int __E)
     261  {
     262    return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __C,
     263  							     (__v32qi) __D,
     264  							      __E,
     265  							     (__v32qi)__A,
     266  							      __B);
     267  }
     268  
     269  extern __inline __m256i
     270  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     271  _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
     272  				     __m256i __C, const int __D)
     273  {
     274    return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __B,
     275  		(__v32qi) __C, __D, (__v32qi)_mm256_setzero_si256 (), __A);
     276  }
     277  #else
     278  #define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E)		\
     279    ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask(		\
     280  	(__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E),		\
     281  	(__v32qi)(__m256i)(A), (__mmask32)(B)))
     282  #define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D)		\
     283    ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask(		\
     284  	(__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D),		\
     285  	(__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
     286  #define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) 		    \
     287    ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(C),\
     288  	(__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B)))
     289  #define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D)			    \
     290    ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(B),\
     291  	 (__v32qi)(__m256i)(C), (int)(D),				    \
     292  	 (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
     293  #endif
     294  
     295  #ifdef __DISABLE_GFNIAVX512VLBW__
     296  #undef __DISABLE_GFNIAVX512VLBW__
     297  #pragma GCC pop_options
     298  #endif /* __GFNIAVX512VLBW__ */
     299  
     300  #if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__)
     301  #pragma GCC push_options
     302  #pragma GCC target("gfni,avx512f,avx512bw")
     303  #define __DISABLE_GFNIAVX512FBW__
     304  #endif /* __GFNIAVX512FBW__ */
     305  
     306  extern __inline __m512i
     307  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     308  _mm512_mask_gf2p8mul_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
     309  			   __m512i __D)
     310  {
     311    return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __C,
     312  					(__v64qi) __D, (__v64qi)__A, __B);
     313  }
     314  
     315  extern __inline __m512i
     316  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     317  _mm512_maskz_gf2p8mul_epi8 (__mmask64 __A, __m512i __B, __m512i __C)
     318  {
     319    return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __B,
     320  			(__v64qi) __C, (__v64qi) _mm512_setzero_si512 (), __A);
     321  }
     322  extern __inline __m512i
     323  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     324  _mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B)
     325  {
     326    return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A,
     327  						    (__v64qi) __B);
     328  }
     329  
     330  #ifdef __OPTIMIZE__
     331  extern __inline __m512i
     332  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     333  _mm512_mask_gf2p8affineinv_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
     334  				       __m512i __D, const int __E)
     335  {
     336    return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __C,
     337  								(__v64qi) __D,
     338  								 __E,
     339  								(__v64qi)__A,
     340  								 __B);
     341  }
     342  
     343  extern __inline __m512i
     344  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     345  _mm512_maskz_gf2p8affineinv_epi64_epi8 (__mmask64 __A, __m512i __B,
     346  					__m512i __C, const int __D)
     347  {
     348    return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __B,
     349  				(__v64qi) __C, __D,
     350  				(__v64qi) _mm512_setzero_si512 (), __A);
     351  }
     352  
     353  extern __inline __m512i
     354  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     355  _mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
     356  {
     357    return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A,
     358  							   (__v64qi) __B, __C);
     359  }
     360  
     361  extern __inline __m512i
     362  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     363  _mm512_mask_gf2p8affine_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
     364  				    __m512i __D, const int __E)
     365  {
     366    return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __C,
     367  					(__v64qi) __D, __E, (__v64qi)__A, __B);
     368  }
     369  
     370  extern __inline __m512i
     371  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     372  _mm512_maskz_gf2p8affine_epi64_epi8 (__mmask64 __A, __m512i __B, __m512i __C,
     373  				     const int __D)
     374  {
     375    return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __B,
     376  		  (__v64qi) __C, __D, (__v64qi) _mm512_setzero_si512 (), __A);
     377  }
     378  extern __inline __m512i
     379  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     380  _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
     381  {
     382    return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A,
     383  							(__v64qi) __B, __C);
     384  }
     385  #else
     386  #define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) 		\
     387    ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(		\
     388  	(__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E),		\
     389  	(__v64qi)(__m512i)(A), (__mmask64)(B)))
     390  #define _mm512_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D)		\
     391    ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(		\
     392  	(__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D),		\
     393  	(__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
     394  #define _mm512_gf2p8affineinv_epi64_epi8(A, B, C)			\
     395    ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi (			\
     396  	(__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
     397  #define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E)		    \
     398    ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(C),\
     399       (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B)))
     400  #define _mm512_maskz_gf2p8affine_epi64_epi8(A, B, C, D)			    \
     401    ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(B),\
     402  	 (__v64qi)(__m512i)(C), (int)(D),				    \
     403  	 (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
     404  #define _mm512_gf2p8affine_epi64_epi8(A, B, C)				    \
     405    ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A),    \
     406  	 (__v64qi)(__m512i)(B), (int)(C)))
     407  #endif
     408  
     409  #ifdef __DISABLE_GFNIAVX512FBW__
     410  #undef __DISABLE_GFNIAVX512FBW__
     411  #pragma GCC pop_options
     412  #endif /* __GFNIAVX512FBW__ */
     413  
     414  #endif /* _GFNIINTRIN_H_INCLUDED */