1  /* Copyright (C) 2019-2023 Free Software Foundation, Inc.
       2  
       3     This file is part of GCC.
       4  
       5     GCC is free software; you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3, or (at your option)
       8     any later version.
       9  
      10     GCC is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     Under Section 7 of GPL version 3, you are granted additional
      16     permissions described in the GCC Runtime Library Exception, version
      17     3.1, as published by the Free Software Foundation.
      18  
      19     You should have received a copy of the GNU General Public License and
      20     a copy of the GCC Runtime Library Exception along with this program;
      21     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      22     <http://www.gnu.org/licenses/>.  */
      23  
      24  #ifndef _IMMINTRIN_H_INCLUDED
      25  #error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
      26  #endif
      27  
      28  #ifndef _AVX512BF16INTRIN_H_INCLUDED
      29  #define _AVX512BF16INTRIN_H_INCLUDED
      30  
      31  #ifndef __AVX512BF16__
      32  #pragma GCC push_options
      33  #pragma GCC target("avx512bf16")
      34  #define __DISABLE_AVX512BF16__
      35  #endif /* __AVX512BF16__ */
      36  
      37  /* Internal data types for implementing the intrinsics.  */
      38  typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64)));
      39  
      40  /* The Intel API is flexible enough that we must allow aliasing with other
      41     vector types, and their scalar components.  */
      42  typedef __bf16 __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
      43  
      44  /* Convert One BF16 Data to One Single Float Data.  */
      45  extern __inline float
      46  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
      47  _mm_cvtsbh_ss (__bf16 __A)
      48  {
      49    return __builtin_ia32_cvtbf2sf (__A);
      50  }
      51  
      52  /* vcvtne2ps2bf16 */
      53  
      54  extern __inline __m512bh
      55  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      56  _mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
      57  {
      58    return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf(__A, __B);
      59  }
      60  
      61  extern __inline __m512bh
      62  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      63  _mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D)
      64  {
      65    return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_mask(__C, __D, __A, __B);
      66  }
      67  
      68  extern __inline __m512bh
      69  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      70  _mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C)
      71  {
      72    return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_maskz(__B, __C, __A);
      73  }
      74  
      75  /* vcvtneps2bf16 */
      76  
      77  extern __inline __m256bh
      78  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      79  _mm512_cvtneps_pbh (__m512 __A)
      80  {
      81    return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf(__A);
      82  }
      83  
      84  extern __inline __m256bh
      85  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      86  _mm512_mask_cvtneps_pbh (__m256bh __A, __mmask16 __B, __m512 __C)
      87  {
      88    return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C, __A, __B);
      89  }
      90  
      91  extern __inline __m256bh
      92  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      93  _mm512_maskz_cvtneps_pbh (__mmask16 __A, __m512 __B)
      94  {
      95    return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B, __A);
      96  }
      97  
      98  /* vdpbf16ps */
      99  
     100  extern __inline __m512
     101  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     102  _mm512_dpbf16_ps (__m512 __A, __m512bh __B, __m512bh __C)
     103  {
     104    return (__m512)__builtin_ia32_dpbf16ps_v16sf(__A, __B, __C);
     105  }
     106  
     107  extern __inline __m512
     108  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     109  _mm512_mask_dpbf16_ps (__m512 __A, __mmask16 __B, __m512bh __C, __m512bh __D)
     110  {
     111    return (__m512)__builtin_ia32_dpbf16ps_v16sf_mask(__A, __C, __D, __B);
     112  }
     113  
     114  extern __inline __m512
     115  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     116  _mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D)
     117  {
     118    return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A);
     119  }
     120  
     121  extern __inline __m512
     122  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     123  _mm512_cvtpbh_ps (__m256bh __A)
     124  {
     125    return (__m512)_mm512_castsi512_ps ((__m512i)_mm512_slli_epi32 (
     126  	 (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16));
     127  }
     128  
     129  extern __inline __m512
     130  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     131  _mm512_maskz_cvtpbh_ps (__mmask16 __U, __m256bh __A)
     132  {
     133    return (__m512)_mm512_castsi512_ps ((__m512i) _mm512_slli_epi32 (
     134  	 (__m512i)_mm512_maskz_cvtepi16_epi32 (
     135  	 (__mmask16)__U, (__m256i)__A), 16));
     136  }
     137  
     138  extern __inline __m512
     139  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
     140  _mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A)
     141  {
     142    return (__m512)_mm512_castsi512_ps ((__m512i)(_mm512_mask_slli_epi32 (
     143  	 (__m512i)__S, (__mmask16)__U,
     144  	 (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16)));
     145  }
     146  
     147  #ifdef __DISABLE_AVX512BF16__
     148  #undef __DISABLE_AVX512BF16__
     149  #pragma GCC pop_options
     150  #endif /* __DISABLE_AVX512BF16__ */
     151  
     152  #endif /* _AVX512BF16INTRIN_H_INCLUDED */