(root)/
glibc-2.38/
sysdeps/
powerpc/
fpu/
fenv_libc.h
       1  /* Internal libc stuff for floating point environment routines.
       2     Copyright (C) 1997-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #ifndef _FENV_LIBC_H
      20  #define _FENV_LIBC_H	1
      21  
      22  #include <fenv.h>
      23  #include <ldsodefs.h>
      24  #include <sysdep.h>
      25  
      26  extern const fenv_t *__fe_nomask_env_priv (void);
      27  
      28  extern const fenv_t *__fe_mask_env (void) attribute_hidden;
      29  
      30  /* If the old env had any enabled exceptions and the new env has no enabled
      31     exceptions, then mask SIGFPE in the MSR FE0/FE1 bits.  This may allow the
      32     FPU to run faster because it always takes the default action and can not
      33     generate SIGFPE.  */
      34  #define __TEST_AND_ENTER_NON_STOP(old, new) \
      35    do { \
      36      if (((old) & FPSCR_ENABLES_MASK) != 0 && ((new) & FPSCR_ENABLES_MASK) == 0) \
      37        (void) __fe_mask_env (); \
      38    } while (0)
      39  
      40  /* If the old env has no enabled exceptions and the new env has any enabled
      41     exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits.  This will put the
      42     hardware into "precise mode" and may cause the FPU to run slower on some
      43     hardware.  */
      44  #define __TEST_AND_EXIT_NON_STOP(old, new) \
      45    do { \
      46      if (((old) & FPSCR_ENABLES_MASK) == 0 && ((new) & FPSCR_ENABLES_MASK) != 0) \
      47        (void) __fe_nomask_env_priv (); \
      48    } while (0)
      49  
      50  /* The sticky bits in the FPSCR indicating exceptions have occurred.  */
      51  #define FPSCR_STICKY_BITS ((FE_ALL_EXCEPT | FE_ALL_INVALID) & ~FE_INVALID)
      52  
      53  /* Equivalent to fegetenv, but returns a fenv_t instead of taking a
      54     pointer.  */
      55  #define fegetenv_register() __builtin_mffs()
      56  
      57  /* Equivalent to fegetenv_register, but only returns bits for
      58     status, exception enables, and mode.
      59     Nicely, it turns out that the 'mffsl' instruction will decode to
      60     'mffs' on architectures older than "power9" because the additional
      61     bits set for 'mffsl' are "don't care" for 'mffs'.  'mffs' is a superset
      62     of 'mffsl'.  */
      63  #define fegetenv_control()					\
      64    ({register double __fr;						\
      65      __asm__ __volatile__ (						\
      66        ".machine push; .machine \"power9\"; mffsl %0; .machine pop"	\
      67        : "=f" (__fr));							\
      68      __fr;								\
      69    })
      70  
      71  #define __fe_mffscrn(rn)						\
      72    ({register fenv_union_t __fr;						\
      73      if (__builtin_constant_p (rn))					\
      74        __asm__ __volatile__ (						\
      75          ".machine push; .machine \"power9\"; mffscrni %0,%1; .machine pop" \
      76          : "=f" (__fr.fenv) : "n" (rn));					\
      77      else								\
      78      {									\
      79        __fr.l = (rn);							\
      80        __asm__ __volatile__ (						\
      81          ".machine push; .machine \"power9\"; mffscrn %0,%1; .machine pop" \
      82          : "=f" (__fr.fenv) : "f" (__fr.fenv));				\
      83      }									\
      84      __fr.fenv;								\
      85    })
      86  
      87  /* Like fegetenv_control, but also sets the rounding mode.  */
      88  #ifdef _ARCH_PWR9
      89  #define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
      90  #else
      91  /* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
      92     but not sufficient, because it does not set the rounding mode.
      93     Explicitly set the rounding mode when 'mffscrn' actually doesn't.  */
      94  #define fegetenv_and_set_rn(rn)						\
      95    ({register fenv_union_t __fr;						\
      96      __fr.fenv = __fe_mffscrn (rn);					\
      97      if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00)))	\
      98        __fesetround_inline (rn);						\
      99      __fr.fenv;								\
     100    })
     101  #endif
     102  
     103  /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer.  */
     104  #define fesetenv_register(env) \
     105  	do { \
     106  	  double d = (env); \
     107  	  if(GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \
     108  	    asm volatile (".machine push; " \
     109  			  ".machine \"power6\"; " \
     110  			  "mtfsf 0xff,%0,1,0; " \
     111  			  ".machine pop" : : "f" (d)); \
     112  	  else \
     113  	    __builtin_mtfsf (0xff, d); \
     114  	} while(0)
     115  
     116  /* Set the last 2 nibbles of the FPSCR, which contain the
     117     exception enables and the rounding mode.
     118     'fegetenv_control' retrieves these bits by reading the FPSCR.  */
     119  #define fesetenv_control(env) __builtin_mtfsf (0b00000011, (env));
     120  
     121  /* This very handy macro:
     122     - Sets the rounding mode to 'round to nearest';
     123     - Sets the processor into IEEE mode; and
     124     - Prevents exceptions from being raised for inexact results.
     125     These things happen to be exactly what you need for typical elementary
     126     functions.  */
     127  #define relax_fenv_state() \
     128  	do { \
     129  	   if (GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \
     130  	     asm volatile (".machine push; .machine \"power6\"; " \
     131  		  "mtfsfi 7,0,1; .machine pop"); \
     132  	   asm volatile ("mtfsfi 7,0"); \
     133  	} while(0)
     134  
     135  /* Set/clear a particular FPSCR bit (for instance,
     136     reset_fpscr_bit(FPSCR_VE);
     137     prevents INVALID exceptions from being raised).  */
     138  #define set_fpscr_bit(x) asm volatile ("mtfsb1 %0" : : "n"(x))
     139  #define reset_fpscr_bit(x) asm volatile ("mtfsb0 %0" : : "n"(x))
     140  
     141  typedef union
     142  {
     143    fenv_t fenv;
     144    unsigned long long l;
     145  } fenv_union_t;
     146  
     147  
     148  static inline int
     149  __fesetround_inline (int round)
     150  {
     151  #ifdef _ARCH_PWR9
     152    __fe_mffscrn (round);
     153  #else
     154    if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
     155      __fe_mffscrn (round);
     156    else if ((unsigned int) round < 2)
     157      {
     158         asm volatile ("mtfsb0 30");
     159         if ((unsigned int) round == 0)
     160           asm volatile ("mtfsb0 31");
     161         else
     162           asm volatile ("mtfsb1 31");
     163      }
     164    else
     165      {
     166         asm volatile ("mtfsb1 30");
     167         if ((unsigned int) round == 2)
     168           asm volatile ("mtfsb0 31");
     169         else
     170           asm volatile ("mtfsb1 31");
     171      }
     172  #endif
     173    return 0;
     174  }
     175  
     176  /* Same as __fesetround_inline, however without runtime check to use DFP
     177     mtfsfi syntax (as relax_fenv_state) or if round value is valid.  */
     178  static inline void
     179  __fesetround_inline_nocheck (const int round)
     180  {
     181  #ifdef _ARCH_PWR9
     182    __fe_mffscrn (round);
     183  #else
     184    if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
     185      __fe_mffscrn (round);
     186    else
     187      asm volatile ("mtfsfi 7,%0" : : "n" (round));
     188  #endif
     189  }
     190  
     191  #define FPSCR_MASK(bit) (1 << (31 - (bit)))
     192  
     193  /* Definitions of all the FPSCR bit numbers */
     194  enum {
     195    FPSCR_FX = 0,    /* exception summary */
     196  #define FPSCR_FX_MASK (FPSCR_MASK (FPSCR_FX))
     197    FPSCR_FEX,       /* enabled exception summary */
     198  #define FPSCR_FEX_MASK (FPSCR_MASK FPSCR_FEX))
     199    FPSCR_VX,        /* invalid operation summary */
     200  #define FPSCR_VX_MASK (FPSCR_MASK (FPSCR_VX))
     201    FPSCR_OX,        /* overflow */
     202  #define FPSCR_OX_MASK (FPSCR_MASK (FPSCR_OX))
     203    FPSCR_UX,        /* underflow */
     204  #define FPSCR_UX_MASK (FPSCR_MASK (FPSCR_UX))
     205    FPSCR_ZX,        /* zero divide */
     206  #define FPSCR_ZX_MASK (FPSCR_MASK (FPSCR_ZX))
     207    FPSCR_XX,        /* inexact */
     208  #define FPSCR_XX_MASK (FPSCR_MASK (FPSCR_XX))
     209    FPSCR_VXSNAN,    /* invalid operation for sNaN */
     210  #define FPSCR_VXSNAN_MASK (FPSCR_MASK (FPSCR_VXSNAN))
     211    FPSCR_VXISI,     /* invalid operation for Inf-Inf */
     212  #define FPSCR_VXISI_MASK (FPSCR_MASK (FPSCR_VXISI))
     213    FPSCR_VXIDI,     /* invalid operation for Inf/Inf */
     214  #define FPSCR_VXIDI_MASK (FPSCR_MASK (FPSCR_VXIDI))
     215    FPSCR_VXZDZ,     /* invalid operation for 0/0 */
     216  #define FPSCR_VXZDZ_MASK (FPSCR_MASK (FPSCR_VXZDZ))
     217    FPSCR_VXIMZ,     /* invalid operation for Inf*0 */
     218  #define FPSCR_VXIMZ_MASK (FPSCR_MASK (FPSCR_VXIMZ))
     219    FPSCR_VXVC,      /* invalid operation for invalid compare */
     220  #define FPSCR_VXVC_MASK (FPSCR_MASK (FPSCR_VXVC))
     221    FPSCR_FR,        /* fraction rounded [fraction was incremented by round] */
     222  #define FPSCR_FR_MASK (FPSCR_MASK (FPSCR_FR))
     223    FPSCR_FI,        /* fraction inexact */
     224  #define FPSCR_FI_MASK (FPSCR_MASK (FPSCR_FI))
     225    FPSCR_FPRF_C,    /* result class descriptor */
     226  #define FPSCR_FPRF_C_MASK (FPSCR_MASK (FPSCR_FPRF_C))
     227    FPSCR_FPRF_FL,   /* result less than (usually, less than 0) */
     228  #define FPSCR_FPRF_FL_MASK (FPSCR_MASK (FPSCR_FPRF_FL))
     229    FPSCR_FPRF_FG,   /* result greater than */
     230  #define FPSCR_FPRF_FG_MASK (FPSCR_MASK (FPSCR_FPRF_FG))
     231    FPSCR_FPRF_FE,   /* result equal to */
     232  #define FPSCR_FPRF_FE_MASK (FPSCR_MASK (FPSCR_FPRF_FE))
     233    FPSCR_FPRF_FU,   /* result unordered */
     234  #define FPSCR_FPRF_FU_MASK (FPSCR_MASK (FPSCR_FPRF_FU))
     235    FPSCR_20,        /* reserved */
     236    FPSCR_VXSOFT,    /* invalid operation set by software */
     237  #define FPSCR_VXSOFT_MASK (FPSCR_MASK (FPSCR_VXSOFT))
     238    FPSCR_VXSQRT,    /* invalid operation for square root */
     239  #define FPSCR_VXSQRT_MASK (FPSCR_MASK (FPSCR_VXSQRT))
     240    FPSCR_VXCVI,     /* invalid operation for invalid integer convert */
     241  #define FPSCR_VXCVI_MASK (FPSCR_MASK (FPSCR_VXCVI))
     242    FPSCR_VE,        /* invalid operation exception enable */
     243  #define FPSCR_VE_MASK (FPSCR_MASK (FPSCR_VE))
     244    FPSCR_OE,        /* overflow exception enable */
     245  #define FPSCR_OE_MASK (FPSCR_MASK (FPSCR_OE))
     246    FPSCR_UE,        /* underflow exception enable */
     247  #define FPSCR_UE_MASK (FPSCR_MASK (FPSCR_UE))
     248    FPSCR_ZE,        /* zero divide exception enable */
     249  #define FPSCR_ZE_MASK (FPSCR_MASK (FPSCR_ZE))
     250    FPSCR_XE,        /* inexact exception enable */
     251  #define FPSCR_XE_MASK (FPSCR_MASK (FPSCR_XE))
     252  #ifdef _ARCH_PWR6
     253    FPSCR_29,        /* Reserved in ISA 2.05  */
     254  #define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_29))
     255  #else
     256    FPSCR_NI,        /* non-IEEE mode (typically, no denormalised numbers) */
     257  #define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_NI))
     258  #endif /* _ARCH_PWR6 */
     259    /* the remaining two least-significant bits keep the rounding mode */
     260    FPSCR_RN_hi,
     261  #define FPSCR_RN_hi_MASK (FPSCR_MASK (FPSCR_RN_hi))
     262    FPSCR_RN_lo
     263  #define FPSCR_RN_lo_MASK (FPSCR_MASK (FPSCR_RN_lo))
     264  };
     265  
     266  #define FPSCR_RN_MASK (FPSCR_RN_hi_MASK|FPSCR_RN_lo_MASK)
     267  #define FPSCR_ENABLES_MASK \
     268    (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
     269  #define FPSCR_BASIC_EXCEPTIONS_MASK \
     270    (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
     271  #define FPSCR_EXCEPTIONS_MASK (FPSCR_BASIC_EXCEPTIONS_MASK| \
     272    FPSCR_VXSNAN_MASK|FPSCR_VXISI_MASK|FPSCR_VXIDI_MASK|FPSCR_VXZDZ_MASK| \
     273    FPSCR_VXIMZ_MASK|FPSCR_VXVC_MASK|FPSCR_VXSOFT_MASK|FPSCR_VXSQRT_MASK| \
     274    FPSCR_VXCVI_MASK)
     275  #define FPSCR_FPRF_MASK \
     276    (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \
     277     FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK)
     278  #define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK)
     279  #define FPSCR_STATUS_MASK (FPSCR_FR_MASK|FPSCR_FI_MASK|FPSCR_FPRF_MASK)
     280  
     281  /* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits
     282     in the FPSCR, albeit shifted to different but corresponding locations.
     283     Similarly, the exception indicator bits in the FPSCR correspond one-to-one
     284     with the exception enable bits. It is thus possible to map the FENV(1)
     285     exceptions directly to the FPSCR enables with a simple mask and shift,
     286     and vice versa. */
     287  #define FPSCR_EXCEPT_TO_ENABLE_SHIFT 22
     288  
     289  static inline int
     290  fenv_reg_to_exceptions (unsigned long long l)
     291  {
     292    return (((int)l) & FPSCR_ENABLES_MASK) << FPSCR_EXCEPT_TO_ENABLE_SHIFT;
     293  }
     294  
     295  static inline unsigned long long
     296  fenv_exceptions_to_reg (int excepts)
     297  {
     298    return (unsigned long long)
     299      (excepts & FE_ALL_EXCEPT) >> FPSCR_EXCEPT_TO_ENABLE_SHIFT;
     300  }
     301  
     302  #ifdef _ARCH_PWR6
     303    /* Not supported in ISA 2.05.  Provided for source compat only.  */
     304  # define FPSCR_NI 29
     305  #endif /* _ARCH_PWR6 */
     306  
     307  /* This operation (i) sets the appropriate FPSCR bits for its
     308     parameter, (ii) converts sNaN to the corresponding qNaN, and (iii)
     309     otherwise passes its parameter through unchanged (in particular, -0
     310     and +0 stay as they were).  The `obvious' way to do this is optimised
     311     out by gcc.  */
     312  #define f_wash(x) \
     313     ({ double d; asm volatile ("fmul %0,%1,%2" \
     314  			      : "=f"(d) \
     315  			      : "f" (x), "f"((float)1.0)); d; })
     316  #define f_washf(x) \
     317     ({ float f; asm volatile ("fmuls %0,%1,%2" \
     318  			     : "=f"(f) \
     319  			     : "f" (x), "f"((float)1.0)); f; })
     320  
     321  #endif /* fenv_libc.h */