(root)/
gcc-13.2.0/
gcc/
config/
i386/
avx512erintrin.h
       1  /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
       2  
       3     This file is part of GCC.
       4  
       5     GCC is free software; you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3, or (at your option)
       8     any later version.
       9  
      10     GCC is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     Under Section 7 of GPL version 3, you are granted additional
      16     permissions described in the GCC Runtime Library Exception, version
      17     3.1, as published by the Free Software Foundation.
      18  
      19     You should have received a copy of the GNU General Public License and
      20     a copy of the GCC Runtime Library Exception along with this program;
      21     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      22     <http://www.gnu.org/licenses/>.  */
      23  
      24  #ifndef _IMMINTRIN_H_INCLUDED
      25  #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
      26  #endif
      27  
      28  #ifndef _AVX512ERINTRIN_H_INCLUDED
      29  #define _AVX512ERINTRIN_H_INCLUDED
      30  
      31  #ifndef __AVX512ER__
      32  #pragma GCC push_options
      33  #pragma GCC target("avx512er")
      34  #define __DISABLE_AVX512ER__
      35  #endif /* __AVX512ER__ */
      36  
      37  /* Internal data types for implementing the intrinsics.  */
      38  typedef double __v8df __attribute__ ((__vector_size__ (64)));
      39  typedef float __v16sf __attribute__ ((__vector_size__ (64)));
      40  
      41  /* The Intel API is flexible enough that we must allow aliasing with other
      42     vector types, and their scalar components.  */
      43  typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
      44  typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
      45  
      46  typedef unsigned char  __mmask8;
      47  typedef unsigned short __mmask16;
      48  
      49  #ifdef __OPTIMIZE__
      50  extern __inline __m512d
      51  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      52  _mm512_exp2a23_round_pd (__m512d __A, int __R)
      53  {
      54    return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
      55  					       (__v8df) _mm512_undefined_pd (),
      56  					       (__mmask8) -1, __R);
      57  }
      58  
      59  extern __inline __m512d
      60  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      61  _mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
      62  {
      63    return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
      64  					       (__v8df) __W,
      65  					       (__mmask8) __U, __R);
      66  }
      67  
      68  extern __inline __m512d
      69  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      70  _mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
      71  {
      72    return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
      73  					       (__v8df) _mm512_setzero_pd (),
      74  					       (__mmask8) __U, __R);
      75  }
      76  
      77  extern __inline __m512
      78  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      79  _mm512_exp2a23_round_ps (__m512 __A, int __R)
      80  {
      81    return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
      82  					      (__v16sf) _mm512_undefined_ps (),
      83  					      (__mmask16) -1, __R);
      84  }
      85  
      86  extern __inline __m512
      87  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      88  _mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
      89  {
      90    return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
      91  					      (__v16sf) __W,
      92  					      (__mmask16) __U, __R);
      93  }
      94  
      95  extern __inline __m512
      96  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      97  _mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
      98  {
      99    return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
     100  					      (__v16sf) _mm512_setzero_ps (),
     101  					      (__mmask16) __U, __R);
     102  }
     103  
     104  extern __inline __m512d
     105  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     106  _mm512_rcp28_round_pd (__m512d __A, int __R)
     107  {
     108    return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
     109  						(__v8df) _mm512_undefined_pd (),
     110  						(__mmask8) -1, __R);
     111  }
     112  
     113  extern __inline __m512d
     114  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     115  _mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
     116  {
     117    return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
     118  						(__v8df) __W,
     119  						(__mmask8) __U, __R);
     120  }
     121  
     122  extern __inline __m512d
     123  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     124  _mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
     125  {
     126    return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
     127  						(__v8df) _mm512_setzero_pd (),
     128  						(__mmask8) __U, __R);
     129  }
     130  
     131  extern __inline __m512
     132  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     133  _mm512_rcp28_round_ps (__m512 __A, int __R)
     134  {
     135    return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
     136  					       (__v16sf) _mm512_undefined_ps (),
     137  					       (__mmask16) -1, __R);
     138  }
     139  
     140  extern __inline __m512
     141  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     142  _mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
     143  {
     144    return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
     145  					       (__v16sf) __W,
     146  					       (__mmask16) __U, __R);
     147  }
     148  
     149  extern __inline __m512
     150  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     151  _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
     152  {
     153    return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
     154  					       (__v16sf) _mm512_setzero_ps (),
     155  					       (__mmask16) __U, __R);
     156  }
     157  
     158  extern __inline __m128d
     159  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     160  _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
     161  {
     162    return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
     163  						 (__v2df) __A,
     164  						 __R);
     165  }
     166  
     167  extern __inline __m128d
     168  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     169  _mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
     170  			 __m128d __B, int __R)
     171  {
     172    return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
     173  						      (__v2df) __A,
     174  						      (__v2df) __W,
     175  						      __U,
     176  						      __R);
     177  }
     178  
     179  extern __inline __m128d
     180  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     181  _mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
     182  {
     183    return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
     184  						      (__v2df) __A,
     185  						      (__v2df)
     186  						      _mm_setzero_pd (),
     187  						      __U,
     188  						      __R);
     189  }
     190  
     191  extern __inline __m128
     192  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     193  _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
     194  {
     195    return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
     196  						(__v4sf) __A,
     197  						__R);
     198  }
     199  
     200  extern __inline __m128
     201  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     202  _mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
     203  			 __m128 __B, int __R)
     204  {
     205    return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
     206  						     (__v4sf) __A,
     207  						     (__v4sf) __W,
     208  						     __U,
     209  						     __R);
     210  }
     211  
     212  extern __inline __m128
     213  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     214  _mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
     215  {
     216    return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
     217  						     (__v4sf) __A,
     218  						     (__v4sf)
     219  						     _mm_setzero_ps (),
     220  						     __U,
     221  						     __R);
     222  }
     223  
     224  extern __inline __m512d
     225  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     226  _mm512_rsqrt28_round_pd (__m512d __A, int __R)
     227  {
     228    return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
     229  						  (__v8df) _mm512_undefined_pd (),
     230  						  (__mmask8) -1, __R);
     231  }
     232  
     233  extern __inline __m512d
     234  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     235  _mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
     236  {
     237    return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
     238  						  (__v8df) __W,
     239  						  (__mmask8) __U, __R);
     240  }
     241  
     242  extern __inline __m512d
     243  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     244  _mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
     245  {
     246    return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
     247  						  (__v8df) _mm512_setzero_pd (),
     248  						  (__mmask8) __U, __R);
     249  }
     250  
     251  extern __inline __m512
     252  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     253  _mm512_rsqrt28_round_ps (__m512 __A, int __R)
     254  {
     255    return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
     256  						 (__v16sf) _mm512_undefined_ps (),
     257  						 (__mmask16) -1, __R);
     258  }
     259  
     260  extern __inline __m512
     261  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     262  _mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
     263  {
     264    return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
     265  						 (__v16sf) __W,
     266  						 (__mmask16) __U, __R);
     267  }
     268  
     269  extern __inline __m512
     270  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     271  _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
     272  {
     273    return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
     274  						 (__v16sf) _mm512_setzero_ps (),
     275  						 (__mmask16) __U, __R);
     276  }
     277  
     278  extern __inline __m128d
     279  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     280  _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
     281  {
     282    return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
     283  						   (__v2df) __A,
     284  						   __R);
     285  }
     286  
     287  extern __inline __m128d
     288  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     289  _mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
     290  			   __m128d __B, int __R)
     291  {
     292    return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
     293  							(__v2df) __A,
     294  							(__v2df) __W,
     295  							__U,
     296  							__R);
     297  }
     298  
     299  extern __inline __m128d
     300  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     301  _mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
     302  {
     303    return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
     304  							(__v2df) __A,
     305  							(__v2df)
     306  							_mm_setzero_pd (),
     307  							__U,
     308  							__R);
     309  }
     310  
     311  extern __inline __m128
     312  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     313  _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
     314  {
     315    return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
     316  						  (__v4sf) __A,
     317  						  __R);
     318  }
     319  
     320  extern __inline __m128
     321  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     322  _mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
     323  			   __m128 __B, int __R)
     324  {
     325    return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
     326  						       (__v4sf) __A,
     327  						       (__v4sf) __W,
     328  						       __U,
     329  						       __R);
     330  }
     331  
     332  extern __inline __m128
     333  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     334  _mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
     335  {
     336    return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
     337  						       (__v4sf) __A,
     338  						       (__v4sf)
     339  						       _mm_setzero_ps (),
     340  						       __U,
     341  						       __R);
     342  }
     343  
     344  #else
     345  #define _mm512_exp2a23_round_pd(A, C)            \
     346      __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
     347  
     348  #define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
     349      __builtin_ia32_exp2pd_mask(A, W, U, C)
     350  
     351  #define _mm512_maskz_exp2a23_round_pd(U, A, C)   \
     352      __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
     353  
     354  #define _mm512_exp2a23_round_ps(A, C)            \
     355      __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
     356  
     357  #define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
     358      __builtin_ia32_exp2ps_mask(A, W, U, C)
     359  
     360  #define _mm512_maskz_exp2a23_round_ps(U, A, C)   \
     361      __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
     362  
     363  #define _mm512_rcp28_round_pd(A, C)            \
     364      __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
     365  
     366  #define _mm512_mask_rcp28_round_pd(W, U, A, C) \
     367      __builtin_ia32_rcp28pd_mask(A, W, U, C)
     368  
     369  #define _mm512_maskz_rcp28_round_pd(U, A, C)   \
     370      __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
     371  
     372  #define _mm512_rcp28_round_ps(A, C)            \
     373      __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
     374  
     375  #define _mm512_mask_rcp28_round_ps(W, U, A, C) \
     376      __builtin_ia32_rcp28ps_mask(A, W, U, C)
     377  
     378  #define _mm512_maskz_rcp28_round_ps(U, A, C)   \
     379      __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
     380  
     381  #define _mm512_rsqrt28_round_pd(A, C)            \
     382      __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
     383  
     384  #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
     385      __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
     386  
     387  #define _mm512_maskz_rsqrt28_round_pd(U, A, C)   \
     388      __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
     389  
     390  #define _mm512_rsqrt28_round_ps(A, C)            \
     391      __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
     392  
     393  #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
     394      __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
     395  
     396  #define _mm512_maskz_rsqrt28_round_ps(U, A, C)   \
     397      __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
     398  
     399  #define _mm_rcp28_round_sd(A, B, R)	\
     400      __builtin_ia32_rcp28sd_round(A, B, R)
     401  
     402  #define _mm_mask_rcp28_round_sd(W, U, A, B, R)	\
     403      __builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
     404  
     405  #define _mm_maskz_rcp28_round_sd(U, A, B, R)	\
     406      __builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), \
     407  				       (U), (R))
     408  
     409  #define _mm_rcp28_round_ss(A, B, R)	\
     410      __builtin_ia32_rcp28ss_round(A, B, R)
     411  
     412  #define _mm_mask_rcp28_round_ss(W, U, A, B, R)	\
     413      __builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
     414  
     415  #define _mm_maskz_rcp28_round_ss(U, A, B, R)	\
     416      __builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), \
     417  				       (U), (R))
     418  
     419  #define _mm_rsqrt28_round_sd(A, B, R)	\
     420      __builtin_ia32_rsqrt28sd_round(A, B, R)
     421  
     422  #define _mm_mask_rsqrt28_round_sd(W, U, A, B, R)	\
     423      __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
     424  
     425  #define _mm_maskz_rsqrt28_round_sd(U, A, B, R)	\
     426      __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (),\
     427  					 (U), (R))
     428  
     429  #define _mm_rsqrt28_round_ss(A, B, R)	\
     430      __builtin_ia32_rsqrt28ss_round(A, B, R)
     431  
     432  #define _mm_mask_rsqrt28_round_ss(W, U, A, B, R)	\
     433      __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
     434  
     435  #define _mm_maskz_rsqrt28_round_ss(U, A, B, R)	\
     436      __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (),\
     437  					 (U), (R))
     438  
     439  #endif
     440  
     441  #define _mm_mask_rcp28_sd(W, U, A, B)\
     442      _mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
     443  
     444  #define _mm_maskz_rcp28_sd(U, A, B)\
     445      _mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
     446  
     447  #define _mm_mask_rcp28_ss(W, U, A, B)\
     448      _mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
     449  
     450  #define _mm_maskz_rcp28_ss(U, A, B)\
     451      _mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
     452  
     453  #define _mm_mask_rsqrt28_sd(W, U, A, B)\
     454      _mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
     455  
     456  #define _mm_maskz_rsqrt28_sd(U, A, B)\
     457      _mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
     458  
     459  #define _mm_mask_rsqrt28_ss(W, U, A, B)\
     460      _mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
     461  
     462  #define _mm_maskz_rsqrt28_ss(U, A, B)\
     463      _mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
     464  
     465  #define _mm512_exp2a23_pd(A)                    \
     466      _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
     467  
     468  #define _mm512_mask_exp2a23_pd(W, U, A)   \
     469      _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
     470  
     471  #define _mm512_maskz_exp2a23_pd(U, A)     \
     472      _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
     473  
     474  #define _mm512_exp2a23_ps(A)                    \
     475      _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
     476  
     477  #define _mm512_mask_exp2a23_ps(W, U, A)   \
     478      _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
     479  
     480  #define _mm512_maskz_exp2a23_ps(U, A)     \
     481      _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
     482  
     483  #define _mm512_rcp28_pd(A)                    \
     484      _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
     485  
     486  #define _mm512_mask_rcp28_pd(W, U, A)   \
     487      _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
     488  
     489  #define _mm512_maskz_rcp28_pd(U, A)     \
     490      _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
     491  
     492  #define _mm512_rcp28_ps(A)                    \
     493      _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
     494  
     495  #define _mm512_mask_rcp28_ps(W, U, A)   \
     496      _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
     497  
     498  #define _mm512_maskz_rcp28_ps(U, A)     \
     499      _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
     500  
     501  #define _mm512_rsqrt28_pd(A)                    \
     502      _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
     503  
     504  #define _mm512_mask_rsqrt28_pd(W, U, A)   \
     505      _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
     506  
     507  #define _mm512_maskz_rsqrt28_pd(U, A)     \
     508      _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
     509  
     510  #define _mm512_rsqrt28_ps(A)                    \
     511      _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
     512  
     513  #define _mm512_mask_rsqrt28_ps(W, U, A)   \
     514      _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
     515  
     516  #define _mm512_maskz_rsqrt28_ps(U, A)     \
     517      _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
     518  
     519  #define _mm_rcp28_sd(A, B)	\
     520      __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
     521  
     522  #define _mm_rcp28_ss(A, B)	\
     523      __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
     524  
     525  #define _mm_rsqrt28_sd(A, B)	\
     526      __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
     527  
     528  #define _mm_rsqrt28_ss(A, B)	\
     529      __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
     530  
     531  #ifdef __DISABLE_AVX512ER__
     532  #undef __DISABLE_AVX512ER__
     533  #pragma GCC pop_options
     534  #endif /* __DISABLE_AVX512ER__ */
     535  
     536  #endif /* _AVX512ERINTRIN_H_INCLUDED */