1  /* FPU-related code for x86 and x86_64 processors.
       2     Copyright (C) 2005-2023 Free Software Foundation, Inc.
       3     Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
       4  
       5  This file is part of the GNU Fortran 95 runtime library (libgfortran).
       6  
       7  Libgfortran is free software; you can redistribute it and/or
       8  modify it under the terms of the GNU General Public
       9  License as published by the Free Software Foundation; either
      10  version 3 of the License, or (at your option) any later version.
      11  
      12  Libgfortran is distributed in the hope that it will be useful,
      13  but WITHOUT ANY WARRANTY; without even the implied warranty of
      14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15  GNU General Public License for more details.
      16  
      17  Under Section 7 of GPL version 3, you are granted additional
      18  permissions described in the GCC Runtime Library Exception, version
      19  3.1, as published by the Free Software Foundation.
      20  
      21  You should have received a copy of the GNU General Public License and
      22  a copy of the GCC Runtime Library Exception along with this program;
      23  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      24  <http://www.gnu.org/licenses/>.  */
      25  
      26  #ifndef __SSE_MATH__
      27  #include "cpuid.h"
      28  #endif
      29  
      30  static int
      31  has_sse (void)
      32  {
      33  #ifndef __SSE_MATH__
      34    unsigned int eax, ebx, ecx, edx;
      35  
      36    if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
      37      return 0;
      38  
      39    return edx & bit_SSE;
      40  #else
      41    return 1;
      42  #endif
      43  }
      44  
      45  /* i387 exceptions -- see linux <fpu_control.h> header file for details.  */
      46  #define _FPU_MASK_IM  0x01
      47  #define _FPU_MASK_DM  0x02
      48  #define _FPU_MASK_ZM  0x04
      49  #define _FPU_MASK_OM  0x08
      50  #define _FPU_MASK_UM  0x10
      51  #define _FPU_MASK_PM  0x20
      52  #define _FPU_MASK_ALL 0x3f
      53  
      54  #define _FPU_EX_ALL   0x3f
      55  
      56  /* i387 rounding modes.  */
      57  
      58  #define _FPU_RC_NEAREST 0x0
      59  #define _FPU_RC_DOWN    0x1
      60  #define _FPU_RC_UP      0x2
      61  #define _FPU_RC_ZERO    0x3
      62  
      63  #define _FPU_RC_MASK    0x3
      64  
      65  /* Enable flush to zero mode.  */
      66  
      67  #define MXCSR_FTZ (1 << 15)
      68  
      69  
      70  /* This structure corresponds to the layout of the block
      71     written by FSTENV.  */
      72  struct fenv
      73  {
      74    unsigned short int __control_word;
      75    unsigned short int __unused1;
      76    unsigned short int __status_word;
      77    unsigned short int __unused2;
      78    unsigned short int __tags;
      79    unsigned short int __unused3;
      80    unsigned int __eip;
      81    unsigned short int __cs_selector;
      82    unsigned int __opcode:11;
      83    unsigned int __unused4:5;
      84    unsigned int __data_offset;
      85    unsigned short int __data_selector;
      86    unsigned short int __unused5;
      87    unsigned int __mxcsr;
      88  } __attribute__ ((gcc_struct));
      89  
      90  /* Check we can actually store the FPU state in the allocated size.  */
      91  _Static_assert (sizeof(struct fenv) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
      92  		"GFC_FPE_STATE_BUFFER_SIZE is too small");
      93  
      94  #ifdef __SSE_MATH__
      95  # define __math_force_eval_div(x, y)					\
      96    do {									\
      97      __asm__ ("" : "+x" (x)); __asm__ __volatile__ ("" : : "x" (x / y));	\
      98    } while (0)
      99  #else
     100  # define __math_force_eval_div(x, y)					\
     101    do {									\
     102      __asm__ ("" : "+t" (x)); __asm__ __volatile__ ("" : : "f" (x / y));	\
     103    } while (0)
     104  #endif
     105  
     106  /* Raise the supported floating-point exceptions from EXCEPTS.  Other
     107     bits in EXCEPTS are ignored.  Code originally borrowed from
     108     libatomic/config/x86/fenv.c.  */
     109  
     110  static void
     111  local_feraiseexcept (int excepts)
     112  {
     113    struct fenv temp;
     114  
     115    if (excepts & _FPU_MASK_IM)
     116      {
     117        float f = 0.0f;
     118        __math_force_eval_div (f, f);
     119      }
     120    if (excepts & _FPU_MASK_DM)
     121      {
     122        __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
     123        temp.__status_word |= _FPU_MASK_DM;
     124        __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
     125        __asm__ __volatile__ ("fwait");
     126      }
     127    if (excepts & _FPU_MASK_ZM)
     128      {
     129        float f = 1.0f, g = 0.0f;
     130        __math_force_eval_div (f, g);
     131      }
     132    if (excepts & _FPU_MASK_OM)
     133      {
     134        __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
     135        temp.__status_word |= _FPU_MASK_OM;
     136        __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
     137        __asm__ __volatile__ ("fwait");
     138      }
     139    if (excepts & _FPU_MASK_UM)
     140      {
     141        __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
     142        temp.__status_word |= _FPU_MASK_UM;
     143        __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
     144        __asm__ __volatile__ ("fwait");
     145      }
     146    if (excepts & _FPU_MASK_PM)
     147      {
     148        float f = 1.0f, g = 3.0f;
     149        __math_force_eval_div (f, g);
     150      }
     151  }
     152  
     153  
     154  void
     155  set_fpu_trap_exceptions (int trap, int notrap)
     156  {
     157    int exc_set = 0, exc_clr = 0;
     158    unsigned short cw;
     159  
     160    if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
     161    if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
     162    if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
     163    if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
     164    if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
     165    if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
     166  
     167    if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
     168    if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
     169    if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
     170    if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
     171    if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
     172    if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
     173  
     174    __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
     175  
     176    cw |= exc_clr;
     177    cw &= ~exc_set;
     178  
     179    __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
     180  
     181    if (has_sse())
     182      {
     183        unsigned int cw_sse;
     184  
     185        __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
     186  
     187        /* The SSE exception masks are shifted by 7 bits.  */
     188        cw_sse |= (exc_clr << 7);
     189        cw_sse &= ~(exc_set << 7);
     190  
     191        /* Clear stalled exception flags.  */
     192        cw_sse &= ~_FPU_EX_ALL;
     193  
     194        __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
     195      }
     196  }
     197  
     198  void
     199  set_fpu (void)
     200  {
     201    set_fpu_trap_exceptions (options.fpe, 0);
     202  }
     203  
     204  int
     205  get_fpu_trap_exceptions (void)
     206  {
     207    unsigned short cw;
     208    int mask;
     209    int res = 0;
     210  
     211    __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
     212    mask = cw;
     213  
     214    if (has_sse())
     215      {
     216        unsigned int cw_sse;
     217  
     218        __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
     219  
     220        /* The SSE exception masks are shifted by 7 bits.  */
     221        mask |= (cw_sse >> 7);
     222      }
     223  
     224    mask = ~mask & _FPU_MASK_ALL;
     225  
     226    if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
     227    if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
     228    if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
     229    if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
     230    if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
     231    if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
     232  
     233    return res;
     234  }
     235  
     236  int
     237  support_fpu_trap (int flag __attribute__((unused)))
     238  {
     239    return 1;
     240  }
     241  
     242  int
     243  get_fpu_except_flags (void)
     244  {
     245    unsigned short cw;
     246    int excepts;
     247    int res = 0;
     248  
     249    __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
     250    excepts = cw;
     251  
     252    if (has_sse())
     253      {
     254        unsigned int cw_sse;
     255  
     256        __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
     257        excepts |= cw_sse;
     258      }
     259  
     260    excepts &= _FPU_EX_ALL;
     261  
     262    if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
     263    if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
     264    if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
     265    if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
     266    if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
     267    if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
     268  
     269    return res;
     270  }
     271  
     272  void
     273  set_fpu_except_flags (int set, int clear)
     274  {
     275    struct fenv temp;
     276    int exc_set = 0, exc_clr = 0;
     277  
     278    /* Translate from GFC_PE_* values to _FPU_MASK_* values.  */
     279    if (set & GFC_FPE_INVALID)
     280      exc_set |= _FPU_MASK_IM;
     281    if (clear & GFC_FPE_INVALID)
     282      exc_clr |= _FPU_MASK_IM;
     283  
     284    if (set & GFC_FPE_DENORMAL)
     285      exc_set |= _FPU_MASK_DM;
     286    if (clear & GFC_FPE_DENORMAL)
     287      exc_clr |= _FPU_MASK_DM;
     288  
     289    if (set & GFC_FPE_ZERO)
     290      exc_set |= _FPU_MASK_ZM;
     291    if (clear & GFC_FPE_ZERO)
     292      exc_clr |= _FPU_MASK_ZM;
     293  
     294    if (set & GFC_FPE_OVERFLOW)
     295      exc_set |= _FPU_MASK_OM;
     296    if (clear & GFC_FPE_OVERFLOW)
     297      exc_clr |= _FPU_MASK_OM;
     298  
     299    if (set & GFC_FPE_UNDERFLOW)
     300      exc_set |= _FPU_MASK_UM;
     301    if (clear & GFC_FPE_UNDERFLOW)
     302      exc_clr |= _FPU_MASK_UM;
     303  
     304    if (set & GFC_FPE_INEXACT)
     305      exc_set |= _FPU_MASK_PM;
     306    if (clear & GFC_FPE_INEXACT)
     307      exc_clr |= _FPU_MASK_PM;
     308  
     309  
     310    /* Change the flags. This is tricky on 387 (unlike SSE), because we have
     311       FNSTSW but no FLDSW instruction.  */
     312    __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
     313    temp.__status_word &= ~exc_clr;
     314    __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
     315  
     316    /* Change the flags on SSE.  */
     317  
     318    if (has_sse())
     319    {
     320      unsigned int cw_sse;
     321  
     322      __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
     323      cw_sse &= ~exc_clr;
     324      __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
     325    }
     326  
     327    local_feraiseexcept (exc_set);
     328  }
     329  
     330  int
     331  support_fpu_flag (int flag __attribute__((unused)))
     332  {
     333    return 1;
     334  }
     335  
     336  void
     337  set_fpu_rounding_mode (int round)
     338  {
     339    int round_mode;
     340    unsigned short cw;
     341  
     342    switch (round)
     343      {
     344      case GFC_FPE_TONEAREST:
     345        round_mode = _FPU_RC_NEAREST;
     346        break;
     347      case GFC_FPE_UPWARD:
     348        round_mode = _FPU_RC_UP;
     349        break;
     350      case GFC_FPE_DOWNWARD:
     351        round_mode = _FPU_RC_DOWN;
     352        break;
     353      case GFC_FPE_TOWARDZERO:
     354        round_mode = _FPU_RC_ZERO;
     355        break;
     356      default:
     357        return; /* Should be unreachable.  */
     358      }
     359  
     360    __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
     361  
     362    /* The x87 round control bits are shifted by 10 bits.  */
     363    cw &= ~(_FPU_RC_MASK << 10);
     364    cw |= round_mode << 10;
     365  
     366    __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
     367  
     368    if (has_sse())
     369      {
     370        unsigned int cw_sse;
     371  
     372        __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
     373  
     374        /* The SSE round control bits are shifted by 13 bits.  */
     375        cw_sse &= ~(_FPU_RC_MASK << 13);
     376        cw_sse |= round_mode << 13;
     377  
     378        __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
     379      }
     380  }
     381  
     382  int
     383  get_fpu_rounding_mode (void)
     384  {
     385    int round_mode;
     386  
     387  #ifdef __SSE_MATH__
     388    unsigned int cw;
     389  
     390    __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
     391  
     392    /* The SSE round control bits are shifted by 13 bits.  */
     393    round_mode = cw >> 13;
     394  #else
     395    unsigned short cw;
     396  
     397    __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
     398  
     399    /* The x87 round control bits are shifted by 10 bits.  */
     400    round_mode = cw >> 10;
     401  #endif
     402  
     403    round_mode &= _FPU_RC_MASK;
     404  
     405    switch (round_mode)
     406      {
     407      case _FPU_RC_NEAREST:
     408        return GFC_FPE_TONEAREST;
     409      case _FPU_RC_UP:
     410        return GFC_FPE_UPWARD;
     411      case _FPU_RC_DOWN:
     412        return GFC_FPE_DOWNWARD;
     413      case _FPU_RC_ZERO:
     414        return GFC_FPE_TOWARDZERO;
     415      default:
     416        return 0; /* Should be unreachable.  */
     417      }
     418  }
     419  
     420  int
     421  support_fpu_rounding_mode (int mode)
     422  {
     423    if (mode == GFC_FPE_AWAY)
     424      return 0;
     425    else
     426      return 1;
     427  }
     428  
     429  void
     430  get_fpu_state (void *state)
     431  {
     432    struct fenv *envp = state;
     433  
     434    __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
     435  
     436    /* fnstenv has the side effect of masking all exceptions, so we need
     437       to restore the control word after that.  */
     438    __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
     439  
     440    if (has_sse())
     441      __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
     442  }
     443  
     444  void
     445  set_fpu_state (void *state)
     446  {
     447    struct fenv *envp = state;
     448  
     449    /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
     450       complex than this, but I think it suffices in our case.  */
     451    __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
     452  
     453    if (has_sse())
     454      __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
     455  }
     456  
     457  
     458  int
     459  support_fpu_underflow_control (int kind)
     460  {
     461    if (!has_sse())
     462      return 0;
     463  
     464    return (kind == 4 || kind == 8) ? 1 : 0;
     465  }
     466  
     467  
     468  int
     469  get_fpu_underflow_mode (void)
     470  {
     471    unsigned int cw_sse;
     472  
     473    if (!has_sse())
     474      return 1;
     475  
     476    __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
     477  
     478    /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow.  */
     479    return (cw_sse & MXCSR_FTZ) ? 0 : 1;
     480  }
     481  
     482  
     483  void
     484  set_fpu_underflow_mode (int gradual)
     485  {
     486    unsigned int cw_sse;
     487  
     488    if (!has_sse())
     489      return;
     490  
     491    __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
     492  
     493    if (gradual)
     494      cw_sse &= ~MXCSR_FTZ;
     495    else
     496      cw_sse |= MXCSR_FTZ;
     497  
     498    __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
     499  }
     500