1  /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
       2  
       3     This file is part of GCC.
       4  
       5     GCC is free software; you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3, or (at your option)
       8     any later version.
       9  
      10     GCC is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     Under Section 7 of GPL version 3, you are granted additional
      16     permissions described in the GCC Runtime Library Exception, version
      17     3.1, as published by the Free Software Foundation.
      18  
      19     You should have received a copy of the GNU General Public License and
      20     a copy of the GCC Runtime Library Exception along with this program;
      21     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      22     <http://www.gnu.org/licenses/>.  */
      23  
      24  #ifndef _IMMINTRIN_H_INCLUDED
      25  #error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
      26  #endif
      27  
      28  #ifndef _AVX512PFINTRIN_H_INCLUDED
      29  #define _AVX512PFINTRIN_H_INCLUDED
      30  
      31  #ifndef __AVX512PF__
      32  #pragma GCC push_options
      33  #pragma GCC target("avx512pf")
      34  #define __DISABLE_AVX512PF__
      35  #endif /* __AVX512PF__ */
      36  
      37  /* Internal data types for implementing the intrinsics.  */
      38  typedef long long __v8di __attribute__ ((__vector_size__ (64)));
      39  typedef int __v16si __attribute__ ((__vector_size__ (64)));
      40  
      41  /* The Intel API is flexible enough that we must allow aliasing with other
      42     vector types, and their scalar components.  */
      43  typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
      44  
      45  typedef unsigned char  __mmask8;
      46  typedef unsigned short __mmask16;
      47  
      48  #ifdef __OPTIMIZE__
      49  extern __inline void
      50  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      51  _mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
      52  			      int __scale, int __hint)
      53  {
      54    __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
      55  			      __scale, __hint);
      56  }
      57  
      58  extern __inline void
      59  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      60  _mm512_prefetch_i32gather_ps (__m512i __index, void const *__addr,
      61  			      int __scale, int __hint)
      62  {
      63    __builtin_ia32_gatherpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
      64  			      __scale, __hint);
      65  }
      66  
      67  extern __inline void
      68  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      69  _mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
      70  				   void const *__addr, int __scale, int __hint)
      71  {
      72    __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
      73  			      __hint);
      74  }
      75  
      76  extern __inline void
      77  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      78  _mm512_mask_prefetch_i32gather_ps (__m512i __index, __mmask16 __mask,
      79  				   void const *__addr, int __scale, int __hint)
      80  {
      81    __builtin_ia32_gatherpfdps (__mask, (__v16si) __index, __addr, __scale,
      82  			      __hint);
      83  }
      84  
      85  extern __inline void
      86  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      87  _mm512_prefetch_i64gather_pd (__m512i __index, void const *__addr,
      88  			      int __scale, int __hint)
      89  {
      90    __builtin_ia32_gatherpfqpd ((__mmask8) 0xFF, (__v8di) __index, __addr,
      91  			      __scale, __hint);
      92  }
      93  
      94  extern __inline void
      95  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      96  _mm512_prefetch_i64gather_ps (__m512i __index, void const *__addr,
      97  			      int __scale, int __hint)
      98  {
      99    __builtin_ia32_gatherpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
     100  			      __scale, __hint);
     101  }
     102  
     103  extern __inline void
     104  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     105  _mm512_mask_prefetch_i64gather_pd (__m512i __index, __mmask8 __mask,
     106  				   void const *__addr, int __scale, int __hint)
     107  {
     108    __builtin_ia32_gatherpfqpd (__mask, (__v8di) __index, __addr, __scale,
     109  			      __hint);
     110  }
     111  
     112  extern __inline void
     113  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     114  _mm512_mask_prefetch_i64gather_ps (__m512i __index, __mmask8 __mask,
     115  				   void const *__addr, int __scale, int __hint)
     116  {
     117    __builtin_ia32_gatherpfqps (__mask, (__v8di) __index, __addr, __scale,
     118  			      __hint);
     119  }
     120  
     121  extern __inline void
     122  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     123  _mm512_prefetch_i32scatter_pd (void *__addr, __m256i __index, int __scale,
     124  			       int __hint)
     125  {
     126    __builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
     127  			      __scale, __hint);
     128  }
     129  
     130  extern __inline void
     131  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     132  _mm512_prefetch_i32scatter_ps (void *__addr, __m512i __index, int __scale,
     133  			       int __hint)
     134  {
     135    __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
     136  			      __scale, __hint);
     137  }
     138  
     139  extern __inline void
     140  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     141  _mm512_mask_prefetch_i32scatter_pd (void *__addr, __mmask8 __mask,
     142  				    __m256i __index, int __scale, int __hint)
     143  {
     144    __builtin_ia32_scatterpfdpd (__mask, (__v8si) __index, __addr, __scale,
     145  			       __hint);
     146  }
     147  
     148  extern __inline void
     149  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     150  _mm512_mask_prefetch_i32scatter_ps (void *__addr, __mmask16 __mask,
     151  				    __m512i __index, int __scale, int __hint)
     152  {
     153    __builtin_ia32_scatterpfdps (__mask, (__v16si) __index, __addr, __scale,
     154  			       __hint);
     155  }
     156  
     157  extern __inline void
     158  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     159  _mm512_prefetch_i64scatter_pd (void *__addr, __m512i __index, int __scale,
     160  			       int __hint)
     161  {
     162    __builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) __index,__addr,
     163  			      __scale, __hint);
     164  }
     165  
     166  extern __inline void
     167  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     168  _mm512_prefetch_i64scatter_ps (void *__addr, __m512i __index, int __scale,
     169  			       int __hint)
     170  {
     171    __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
     172  			      __scale, __hint);
     173  }
     174  
     175  extern __inline void
     176  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     177  _mm512_mask_prefetch_i64scatter_pd (void *__addr, __mmask8 __mask,
     178  				    __m512i __index, int __scale, int __hint)
     179  {
     180    __builtin_ia32_scatterpfqpd (__mask, (__v8di) __index, __addr, __scale,
     181  			       __hint);
     182  }
     183  
     184  extern __inline void
     185  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     186  _mm512_mask_prefetch_i64scatter_ps (void *__addr, __mmask8 __mask,
     187  				    __m512i __index, int __scale, int __hint)
     188  {
     189    __builtin_ia32_scatterpfqps (__mask, (__v8di) __index, __addr, __scale,
     190  			       __hint);
     191  }
     192  
     193  #else
     194  #define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT)		     \
     195    __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX),     \
     196  			      (void const *) (ADDR), (int) (SCALE),	     \
     197  			      (int) (HINT))
     198  
     199  #define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT)		     \
     200    __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX), \
     201  			      (void const *) (ADDR), (int) (SCALE),	     \
     202  			      (int) (HINT))
     203  
     204  #define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
     205    __builtin_ia32_gatherpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX),  \
     206  			      (void const *) (ADDR), (int) (SCALE),	     \
     207  			      (int) (HINT))
     208  
     209  #define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT)    \
     210    __builtin_ia32_gatherpfdps ((__mmask16) (MASK), (__v16si)(__m512i) (INDEX),\
     211  			      (void const *) (ADDR), (int) (SCALE),	     \
     212  			      (int) (HINT))
     213  
     214  #define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT)		     \
     215    __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX),     \
     216  			      (void *) (ADDR), (int) (SCALE), (int) (HINT))
     217  
     218  #define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT)		     \
     219    __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX),     \
     220  			      (void *) (ADDR), (int) (SCALE), (int) (HINT))
     221  
     222  #define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
     223    __builtin_ia32_gatherpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX),  \
     224  			      (void *) (ADDR), (int) (SCALE), (int) (HINT))
     225  
     226  #define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT)    \
     227    __builtin_ia32_gatherpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX),  \
     228  			      (void *) (ADDR), (int) (SCALE), (int) (HINT))
     229  
     230  #define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT)              \
     231    __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX),    \
     232  			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
     233  
     234  #define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT)              \
     235    __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX),\
     236  			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
     237  
     238  #define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT)   \
     239    __builtin_ia32_scatterpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), \
     240  			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
     241  
     242  #define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT)   \
     243    __builtin_ia32_scatterpfdps ((__mmask16) (MASK),			     \
     244  			       (__v16si)(__m512i) (INDEX),		     \
     245  			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
     246  
     247  #define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT)              \
     248    __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX),    \
     249  			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
     250  
     251  #define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT)              \
     252    __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX),    \
     253  			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
     254  
     255  #define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT)   \
     256    __builtin_ia32_scatterpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \
     257  			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
     258  
     259  #define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT)   \
     260    __builtin_ia32_scatterpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \
     261  			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
     262  #endif
     263  
     264  #ifdef __DISABLE_AVX512PF__
     265  #undef __DISABLE_AVX512PF__
     266  #pragma GCC pop_options
     267  #endif /* __DISABLE_AVX512PF__ */
     268  
     269  #endif /* _AVX512PFINTRIN_H_INCLUDED */