(root)/
gmp-6.3.0/
mpn/
x86_64/
fat/
fat.c
       1  /* x86_64 fat binary initializers.
       2  
       3     Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
       4     Torbjorn Granlund (port to x86_64)
       5  
       6     THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.
       7     THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
       8     COMPLETELY IN FUTURE GNU MP RELEASES.
       9  
      10  Copyright 2003, 2004, 2009, 2011-2015, 2017 Free Software Foundation, Inc.
      11  
      12  This file is part of the GNU MP Library.
      13  
      14  The GNU MP Library is free software; you can redistribute it and/or modify
      15  it under the terms of either:
      16  
      17    * the GNU Lesser General Public License as published by the Free
      18      Software Foundation; either version 3 of the License, or (at your
      19      option) any later version.
      20  
      21  or
      22  
      23    * the GNU General Public License as published by the Free Software
      24      Foundation; either version 2 of the License, or (at your option) any
      25      later version.
      26  
      27  or both in parallel, as here.
      28  
      29  The GNU MP Library is distributed in the hope that it will be useful, but
      30  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      31  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      32  for more details.
      33  
      34  You should have received copies of the GNU General Public License and the
      35  GNU Lesser General Public License along with the GNU MP Library.  If not,
      36  see https://www.gnu.org/licenses/.  */
      37  
      38  #include <stdio.h>    /* for printf */
      39  #include <stdlib.h>   /* for getenv */
      40  #include <string.h>
      41  
      42  #include "gmp-impl.h"
      43  
      44  /* Change this to "#define TRACE(x) x" for some traces. */
      45  #define TRACE(x)
      46  
      47  
      48  /* fat_entry.asm */
      49  long __gmpn_cpuid (char [12], int);
      50  
      51  
      52  #if WANT_FAKE_CPUID
      53  /* The "name"s in the table are values for the GMP_CPU_TYPE environment
      54     variable.  Anything can be used, but for now it's the canonical cpu types
      55     as per config.guess/config.sub.  */
      56  
      57  #define __gmpn_cpuid            fake_cpuid
      58  
      59  #define MAKE_FMS(family, model)						\
      60    ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20)			\
      61     + (((model) & 0xf) << 4) + (((model)  &  0xf0) << 12))
      62  
      63  static struct {
      64    const char  *name;
      65    const char  *vendor;
      66    unsigned    fms;
      67  } fake_cpuid_table[] = {
      68    { "core2",      "GenuineIntel", MAKE_FMS (6, 0xf) },
      69    { "nehalem",    "GenuineIntel", MAKE_FMS (6, 0x1a) },
      70    { "nhm",        "GenuineIntel", MAKE_FMS (6, 0x1a) },
      71    { "atom",       "GenuineIntel", MAKE_FMS (6, 0x1c) },
      72    { "westmere",   "GenuineIntel", MAKE_FMS (6, 0x25) },
      73    { "wsm",        "GenuineIntel", MAKE_FMS (6, 0x25) },
      74    { "sandybridge","GenuineIntel", MAKE_FMS (6, 0x2a) },
      75    { "sbr",        "GenuineIntel", MAKE_FMS (6, 0x2a) },
      76    { "silvermont", "GenuineIntel", MAKE_FMS (6, 0x37) },
      77    { "slm",        "GenuineIntel", MAKE_FMS (6, 0x37) },
      78    { "haswell",    "GenuineIntel", MAKE_FMS (6, 0x3c) },
      79    { "hwl",        "GenuineIntel", MAKE_FMS (6, 0x3c) },
      80    { "broadwell",  "GenuineIntel", MAKE_FMS (6, 0x3d) },
      81    { "bwl",        "GenuineIntel", MAKE_FMS (6, 0x3d) },
      82    { "skylake",    "GenuineIntel", MAKE_FMS (6, 0x5e) },
      83    { "sky",        "GenuineIntel", MAKE_FMS (6, 0x5e) },
      84    { "pentium4",   "GenuineIntel", MAKE_FMS (15, 3) },
      85  
      86    { "k8",         "AuthenticAMD", MAKE_FMS (15, 0) },
      87    { "k10",        "AuthenticAMD", MAKE_FMS (16, 0) },
      88    { "bobcat",     "AuthenticAMD", MAKE_FMS (20, 1) },
      89    { "bulldozer",  "AuthenticAMD", MAKE_FMS (21, 1) },
      90    { "piledriver", "AuthenticAMD", MAKE_FMS (21, 2) },
      91    { "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) },
      92    { "excavator",  "AuthenticAMD", MAKE_FMS (21, 0x60) },
      93    { "jaguar",     "AuthenticAMD", MAKE_FMS (22, 1) },
      94    { "zen",        "AuthenticAMD", MAKE_FMS (23, 1) },
      95  
      96    { "nano",       "CentaurHauls", MAKE_FMS (6, 15) },
      97  };
      98  
      99  static int
     100  fake_cpuid_lookup (void)
     101  {
     102    char  *s;
     103    int   i;
     104  
     105    s = getenv ("GMP_CPU_TYPE");
     106    if (s == NULL)
     107      {
     108        printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n");
     109        abort ();
     110      }
     111  
     112    for (i = 0; i < numberof (fake_cpuid_table); i++)
     113      if (strcmp (s, fake_cpuid_table[i].name) == 0)
     114        return i;
     115  
     116    printf ("GMP_CPU_TYPE=%s unknown\n", s);
     117    abort ();
     118  }
     119  
     120  static long
     121  fake_cpuid (char dst[12], unsigned int id)
     122  {
     123    int  i = fake_cpuid_lookup();
     124  
     125    switch (id) {
     126    case 0:
     127      memcpy (dst, fake_cpuid_table[i].vendor, 12);
     128      return 0;
     129    case 1:
     130      return fake_cpuid_table[i].fms;
     131    case 7:
     132      dst[0] = 0xff;				/* BMI1, AVX2, etc */
     133      dst[1] = 0xff;				/* BMI2, etc */
     134      return 0;
     135    case 0x80000001:
     136      dst[4 + 29 / 8] = (1 << (29 % 8));		/* "long" mode */
     137      return 0;
     138    default:
     139      printf ("fake_cpuid(): oops, unknown id %d\n", id);
     140      abort ();
     141    }
     142  }
     143  #endif
     144  
     145  
     146  typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t));
     147  typedef DECL_preinv_mod_1    ((*preinv_mod_1_t));
     148  
     149  struct cpuvec_t __gmpn_cpuvec = {
     150    __MPN(add_n_init),
     151    __MPN(addlsh1_n_init),
     152    __MPN(addlsh2_n_init),
     153    __MPN(addmul_1_init),
     154    __MPN(addmul_2_init),
     155    __MPN(bdiv_dbm1c_init),
     156    __MPN(cnd_add_n_init),
     157    __MPN(cnd_sub_n_init),
     158    __MPN(com_init),
     159    __MPN(copyd_init),
     160    __MPN(copyi_init),
     161    __MPN(divexact_1_init),
     162    __MPN(divrem_1_init),
     163    __MPN(gcd_11_init),
     164    __MPN(lshift_init),
     165    __MPN(lshiftc_init),
     166    __MPN(mod_1_init),
     167    __MPN(mod_1_1p_init),
     168    __MPN(mod_1_1p_cps_init),
     169    __MPN(mod_1s_2p_init),
     170    __MPN(mod_1s_2p_cps_init),
     171    __MPN(mod_1s_4p_init),
     172    __MPN(mod_1s_4p_cps_init),
     173    __MPN(mod_34lsub1_init),
     174    __MPN(modexact_1c_odd_init),
     175    __MPN(mul_1_init),
     176    __MPN(mul_basecase_init),
     177    __MPN(mullo_basecase_init),
     178    __MPN(preinv_divrem_1_init),
     179    __MPN(preinv_mod_1_init),
     180    __MPN(redc_1_init),
     181    __MPN(redc_2_init),
     182    __MPN(rshift_init),
     183    __MPN(sqr_basecase_init),
     184    __MPN(sub_n_init),
     185    __MPN(sublsh1_n_init),
     186    __MPN(submul_1_init),
     187    0
     188  };
     189  
     190  int __gmpn_cpuvec_initialized = 0;
     191  
     192  /* The following setups start with generic x86, then overwrite with
     193     specifics for a chip, and higher versions of that chip.
     194  
     195     The arrangement of the setups here will normally be the same as the $path
     196     selections in configure.in for the respective chips.
     197  
     198     This code is reentrant and thread safe.  We always calculate the same
     199     decided_cpuvec, so if two copies of the code are running it doesn't
     200     matter which completes first, both write the same to __gmpn_cpuvec.
     201  
     202     We need to go via decided_cpuvec because if one thread has completed
     203     __gmpn_cpuvec then it may be making use of the threshold values in that
     204     vector.  If another thread is still running __gmpn_cpuvec_init then we
     205     don't want it to write different values to those fields since some of the
     206     asm routines only operate correctly up to their own defined threshold,
     207     not an arbitrary value.  */
     208  
     209  static int
     210  gmp_workaround_skylake_cpuid_bug ()
     211  {
     212    char feature_string[49];
     213    char processor_name_string[49];
     214    static const char *bad_cpus[] = {" G44", " G45", " G39" /* , "6600" */ };
     215    int i;
     216  
     217    /* Example strings:                                   */
     218    /* "Intel(R) Pentium(R) CPU G4400 @ 3.30GHz"          */
     219    /* "Intel(R) Core(TM) i5-6600K CPU @ 3.50GHz"         */
     220    /*                  ^               ^               ^ */
     221    /*     0x80000002       0x80000003      0x80000004    */
     222    /* We match out just the 0x80000003 part here. */
     223  
     224    /* In their infinitive wisdom, Intel decided to use one register order for
     225       the vendor string, and another for the processor name string.  We shuffle
     226       things about here, rather than write a new variant of our assembly cpuid.
     227    */
     228  
     229    unsigned int eax, ebx, ecx, edx;
     230    eax = __gmpn_cpuid (feature_string, 0x80000003);
     231    ebx = ((unsigned int *)feature_string)[0];
     232    edx = ((unsigned int *)feature_string)[1];
     233    ecx = ((unsigned int *)feature_string)[2];
     234  
     235    ((unsigned int *) (processor_name_string))[0] = eax;
     236    ((unsigned int *) (processor_name_string))[1] = ebx;
     237    ((unsigned int *) (processor_name_string))[2] = ecx;
     238    ((unsigned int *) (processor_name_string))[3] = edx;
     239  
     240    processor_name_string[16] = 0;
     241  
     242    for (i = 0; i < sizeof (bad_cpus) / sizeof (char *); i++)
     243      {
     244        if (strstr (processor_name_string, bad_cpus[i]) != 0)
     245  	return 1;
     246      }
     247    return 0;
     248  }
     249  
     250  enum {BMI2_BIT = 8};
     251  
     252  void
     253  __gmpn_cpuvec_init (void)
     254  {
     255    struct cpuvec_t  decided_cpuvec;
     256    char vendor_string[13];
     257    char dummy_string[12];
     258    long fms;
     259    int family, model;
     260  
     261    TRACE (printf ("__gmpn_cpuvec_init:\n"));
     262  
     263    memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec));
     264  
     265    CPUVEC_SETUP_x86_64;
     266    CPUVEC_SETUP_fat;
     267  
     268    __gmpn_cpuid (vendor_string, 0);
     269    vendor_string[12] = 0;
     270  
     271    fms = __gmpn_cpuid (dummy_string, 1);
     272    family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
     273    model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
     274  
     275    /* Check extended feature flags */
     276    __gmpn_cpuid (dummy_string, 0x80000001);
     277    if ((dummy_string[4 + 29 / 8] & (1 << (29 % 8))) == 0)
     278      abort (); /* longmode-capable-bit turned off! */
     279  
     280    /*********************************************************/
     281    /*** WARNING: keep this list in sync with config.guess ***/
     282    /*********************************************************/
     283    if (strcmp (vendor_string, "GenuineIntel") == 0)
     284      {
     285        switch (family)
     286  	{
     287  	case 6:
     288  	  switch (model)
     289  	    {
     290  	    case 0x0f:		/* Conroe Merom Kentsfield Allendale */
     291  	    case 0x10:
     292  	    case 0x11:
     293  	    case 0x12:
     294  	    case 0x13:
     295  	    case 0x14:
     296  	    case 0x15:
     297  	    case 0x16:
     298  	    case 0x17:		/* PNR Wolfdale Yorkfield */
     299  	    case 0x18:
     300  	    case 0x19:
     301  	    case 0x1d:		/* PNR Dunnington */
     302  	      CPUVEC_SETUP_core2;
     303  	      break;
     304  
     305  	    case 0x1c:		/* Atom Silverthorne */
     306  	    case 0x26:		/* Atom Lincroft */
     307  	    case 0x27:		/* Atom Saltwell? */
     308  	    case 0x36:		/* Atom Cedarview/Saltwell */
     309  	      CPUVEC_SETUP_atom;
     310  	      break;
     311  
     312  	    case 0x1a:		/* NHM Gainestown */
     313  	    case 0x1b:
     314  	    case 0x1e:		/* NHM Lynnfield/Jasper */
     315  	    case 0x1f:
     316  	    case 0x20:
     317  	    case 0x21:
     318  	    case 0x22:
     319  	    case 0x23:
     320  	    case 0x24:
     321  	    case 0x25:		/* WSM Clarkdale/Arrandale */
     322  	    case 0x28:
     323  	    case 0x29:
     324  	    case 0x2b:
     325  	    case 0x2c:		/* WSM Gulftown */
     326  	    case 0x2e:		/* NHM Beckton */
     327  	    case 0x2f:		/* WSM Eagleton */
     328  	      CPUVEC_SETUP_core2;
     329  	      CPUVEC_SETUP_coreinhm;
     330  	      break;
     331  
     332  	    case 0x37:		/* Silvermont */
     333  	    case 0x4a:		/* Silvermont */
     334  	    case 0x4c:		/* Airmont */
     335  	    case 0x4d:		/* Silvermont/Avoton */
     336  	    case 0x5a:		/* Silvermont */
     337  	      CPUVEC_SETUP_atom;
     338  	      CPUVEC_SETUP_silvermont;
     339  	      break;
     340  
     341  	    case 0x5c:		/* Goldmont */
     342  	    case 0x5f:		/* Goldmont */
     343  	    case 0x7a:		/* Goldmont Plus */
     344  	      CPUVEC_SETUP_atom;
     345  	      CPUVEC_SETUP_silvermont;
     346  	      CPUVEC_SETUP_goldmont;
     347  	      break;
     348  
     349  	    case 0x2a:		/* SB */
     350  	    case 0x2d:		/* SBC-EP */
     351  	    case 0x3a:		/* IBR */
     352  	    case 0x3e:		/* IBR Ivytown */
     353  	      CPUVEC_SETUP_core2;
     354  	      CPUVEC_SETUP_coreinhm;
     355  	      CPUVEC_SETUP_coreisbr;
     356  	      break;
     357  	    case 0x3c:		/* Haswell client */
     358  	    case 0x3f:		/* Haswell server */
     359  	    case 0x45:		/* Haswell ULT */
     360  	    case 0x46:		/* Crystal Well */
     361  	      CPUVEC_SETUP_core2;
     362  	      CPUVEC_SETUP_coreinhm;
     363  	      CPUVEC_SETUP_coreisbr;
     364  	      /* Some Haswells lack BMI2.  Let them appear as Sandybridges for
     365  		 now.  */
     366  	      __gmpn_cpuid (dummy_string, 7);
     367  	      if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0)
     368  		break;
     369  	      CPUVEC_SETUP_coreihwl;
     370  	      break;
     371  	    case 0x3d:		/* Broadwell */
     372  	    case 0x47:		/* Broadwell */
     373  	    case 0x4f:		/* Broadwell server */
     374  	    case 0x56:		/* Broadwell microserver */
     375  	      CPUVEC_SETUP_core2;
     376  	      CPUVEC_SETUP_coreinhm;
     377  	      CPUVEC_SETUP_coreisbr;
     378  	      if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0)
     379  		break;
     380  	      CPUVEC_SETUP_coreihwl;
     381  	      CPUVEC_SETUP_coreibwl;
     382  	      break;
     383  	    case 0x4e:		/* Skylake client */
     384  	    case 0x55:		/* Skylake server */
     385  	    case 0x5e:		/* Skylake */
     386  	    case 0x8e:		/* Kabylake */
     387  	    case 0x9e:		/* Kabylake */
     388  	      CPUVEC_SETUP_core2;
     389  	      CPUVEC_SETUP_coreinhm;
     390  	      CPUVEC_SETUP_coreisbr;
     391  	      if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0)
     392  		break;
     393  	      if (gmp_workaround_skylake_cpuid_bug ())
     394  		break;
     395  	      CPUVEC_SETUP_coreihwl;
     396  	      CPUVEC_SETUP_coreibwl;
     397  	      CPUVEC_SETUP_skylake;
     398  	      break;
     399  	    }
     400  	  break;
     401  
     402  	case 15:
     403  	  CPUVEC_SETUP_pentium4;
     404  	  break;
     405  	}
     406      }
     407    else if (strcmp (vendor_string, "AuthenticAMD") == 0)
     408      {
     409        switch (family)
     410  	{
     411  	case 0x0f:		/* k8 */
     412  	case 0x11:		/* "fam 11h", mix of k8 and k10 */
     413  	case 0x13:
     414  	  CPUVEC_SETUP_k8;
     415  	  break;
     416  
     417  	case 0x10:		/* k10 */
     418  	case 0x12:		/* k10 (llano) */
     419  	  CPUVEC_SETUP_k8;
     420  	  CPUVEC_SETUP_k10;
     421  	  break;
     422  
     423  	case 0x14:		/* bobcat */
     424  	  CPUVEC_SETUP_k8;
     425  	  CPUVEC_SETUP_k10;
     426  	  CPUVEC_SETUP_bt1;
     427  	  break;
     428  
     429  	case 0x16:		/* jaguar */
     430  	  CPUVEC_SETUP_k8;
     431  	  CPUVEC_SETUP_k10;
     432  	  CPUVEC_SETUP_bt1;
     433  	  CPUVEC_SETUP_bt2;
     434  	  break;
     435  
     436  	case 0x15:		/* bulldozer, piledriver, steamroller, excavator */
     437  	  CPUVEC_SETUP_k8;
     438  	  CPUVEC_SETUP_k10;
     439  	  CPUVEC_SETUP_bd1;
     440  	  break;
     441  
     442  	case 0x17:		/* zen */
     443  	case 0x19:		/* zen3 */
     444  	  CPUVEC_SETUP_zen;
     445  	  break;
     446  	}
     447      }
     448    else if (strcmp (vendor_string, "CentaurHauls") == 0)
     449      {
     450        switch (family)
     451  	{
     452  	case 6:
     453  	  if (model >= 15)
     454  	    CPUVEC_SETUP_nano;
     455  	  break;
     456  	}
     457      }
     458  
     459    /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1.
     460       Instead default to the plain versions from whichever CPU we detected.
     461       The function arguments are compatible, no need for any glue code.  */
     462    if (decided_cpuvec.preinv_divrem_1 == NULL)
     463      decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1;
     464    if (decided_cpuvec.preinv_mod_1 == NULL)
     465      decided_cpuvec.preinv_mod_1    =(preinv_mod_1_t)   decided_cpuvec.mod_1;
     466  
     467    ASSERT_CPUVEC (decided_cpuvec);
     468    CPUVEC_INSTALL (decided_cpuvec);
     469  
     470    /* Set this once the threshold fields are ready.
     471       Use volatile to prevent it getting moved.  */
     472    *((volatile int *) &__gmpn_cpuvec_initialized) = 1;
     473  }