(root)/
glibc-2.38/
sysdeps/
x86/
cpu-features.c
       1  /* Initialize CPU feature data.
       2     This file is part of the GNU C Library.
       3     Copyright (C) 2008-2023 Free Software Foundation, Inc.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <dl-hwcap.h>
      20  #include <libc-pointer-arith.h>
      21  #include <get-isa-level.h>
      22  #include <cacheinfo.h>
      23  #include <dl-cacheinfo.h>
      24  #include <dl-minsigstacksize.h>
      25  #include <dl-hwcap2.h>
      26  
      27  extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
      28    attribute_hidden;
      29  
      30  #ifdef __LP64__
      31  static void
      32  TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
      33  {
      34    if (valp->numval)
      35      GLRO(dl_x86_cpu_features).preferred[index_arch_Prefer_MAP_32BIT_EXEC]
      36        |= bit_arch_Prefer_MAP_32BIT_EXEC;
      37  }
      38  #endif
      39  
      40  #if CET_ENABLED
      41  extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *)
      42    attribute_hidden;
      43  extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *)
      44    attribute_hidden;
      45  
      46  # include <dl-cet.h>
      47  #endif
      48  
      49  static void
      50  update_active (struct cpu_features *cpu_features)
      51  {
      52    /* Copy the cpuid bits to active bits for CPU featuress whose usability
      53       in user space can be detected without additional OS support.  */
      54    CPU_FEATURE_SET_ACTIVE (cpu_features, SSE3);
      55    CPU_FEATURE_SET_ACTIVE (cpu_features, PCLMULQDQ);
      56    CPU_FEATURE_SET_ACTIVE (cpu_features, SSSE3);
      57    CPU_FEATURE_SET_ACTIVE (cpu_features, CMPXCHG16B);
      58    CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_1);
      59    CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_2);
      60    CPU_FEATURE_SET_ACTIVE (cpu_features, MOVBE);
      61    CPU_FEATURE_SET_ACTIVE (cpu_features, POPCNT);
      62    CPU_FEATURE_SET_ACTIVE (cpu_features, AES);
      63    CPU_FEATURE_SET_ACTIVE (cpu_features, OSXSAVE);
      64    CPU_FEATURE_SET_ACTIVE (cpu_features, TSC);
      65    CPU_FEATURE_SET_ACTIVE (cpu_features, CX8);
      66    CPU_FEATURE_SET_ACTIVE (cpu_features, CMOV);
      67    CPU_FEATURE_SET_ACTIVE (cpu_features, CLFSH);
      68    CPU_FEATURE_SET_ACTIVE (cpu_features, MMX);
      69    CPU_FEATURE_SET_ACTIVE (cpu_features, FXSR);
      70    CPU_FEATURE_SET_ACTIVE (cpu_features, SSE);
      71    CPU_FEATURE_SET_ACTIVE (cpu_features, SSE2);
      72    CPU_FEATURE_SET_ACTIVE (cpu_features, HTT);
      73    CPU_FEATURE_SET_ACTIVE (cpu_features, BMI1);
      74    CPU_FEATURE_SET_ACTIVE (cpu_features, HLE);
      75    CPU_FEATURE_SET_ACTIVE (cpu_features, BMI2);
      76    CPU_FEATURE_SET_ACTIVE (cpu_features, ERMS);
      77    CPU_FEATURE_SET_ACTIVE (cpu_features, RDSEED);
      78    CPU_FEATURE_SET_ACTIVE (cpu_features, ADX);
      79    CPU_FEATURE_SET_ACTIVE (cpu_features, CLFLUSHOPT);
      80    CPU_FEATURE_SET_ACTIVE (cpu_features, CLWB);
      81    CPU_FEATURE_SET_ACTIVE (cpu_features, SHA);
      82    CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHWT1);
      83    CPU_FEATURE_SET_ACTIVE (cpu_features, OSPKE);
      84    CPU_FEATURE_SET_ACTIVE (cpu_features, WAITPKG);
      85    CPU_FEATURE_SET_ACTIVE (cpu_features, GFNI);
      86    CPU_FEATURE_SET_ACTIVE (cpu_features, RDPID);
      87    CPU_FEATURE_SET_ACTIVE (cpu_features, RDRAND);
      88    CPU_FEATURE_SET_ACTIVE (cpu_features, CLDEMOTE);
      89    CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIRI);
      90    CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIR64B);
      91    CPU_FEATURE_SET_ACTIVE (cpu_features, FSRM);
      92    CPU_FEATURE_SET_ACTIVE (cpu_features, RTM_ALWAYS_ABORT);
      93    CPU_FEATURE_SET_ACTIVE (cpu_features, SERIALIZE);
      94    CPU_FEATURE_SET_ACTIVE (cpu_features, TSXLDTRK);
      95    CPU_FEATURE_SET_ACTIVE (cpu_features, LAHF64_SAHF64);
      96    CPU_FEATURE_SET_ACTIVE (cpu_features, LZCNT);
      97    CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4A);
      98    CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHW);
      99    CPU_FEATURE_SET_ACTIVE (cpu_features, TBM);
     100    CPU_FEATURE_SET_ACTIVE (cpu_features, RDTSCP);
     101    CPU_FEATURE_SET_ACTIVE (cpu_features, WBNOINVD);
     102    CPU_FEATURE_SET_ACTIVE (cpu_features, RAO_INT);
     103    CPU_FEATURE_SET_ACTIVE (cpu_features, CMPCCXADD);
     104    CPU_FEATURE_SET_ACTIVE (cpu_features, FZLRM);
     105    CPU_FEATURE_SET_ACTIVE (cpu_features, FSRS);
     106    CPU_FEATURE_SET_ACTIVE (cpu_features, FSRCS);
     107    CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHI);
     108    CPU_FEATURE_SET_ACTIVE (cpu_features, PTWRITE);
     109  
     110    if (!CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT))
     111      CPU_FEATURE_SET_ACTIVE (cpu_features, RTM);
     112  
     113  #if CET_ENABLED
     114    CPU_FEATURE_SET_ACTIVE (cpu_features, IBT);
     115    CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
     116  #endif
     117  
     118    /* Can we call xgetbv?  */
     119    if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
     120      {
     121        unsigned int xcrlow;
     122        unsigned int xcrhigh;
     123        asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
     124        /* Is YMM and XMM state usable?  */
     125        if ((xcrlow & (bit_YMM_state | bit_XMM_state))
     126  	  == (bit_YMM_state | bit_XMM_state))
     127  	{
     128  	  /* Determine if AVX is usable.  */
     129  	  if (CPU_FEATURES_CPU_P (cpu_features, AVX))
     130  	    {
     131  	      CPU_FEATURE_SET (cpu_features, AVX);
     132  	      /* The following features depend on AVX being usable.  */
     133  	      /* Determine if AVX2 is usable.  */
     134  	      if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
     135  		{
     136  		  CPU_FEATURE_SET (cpu_features, AVX2);
     137  
     138  		  /* Unaligned load with 256-bit AVX registers are faster
     139  		     on Intel/AMD processors with AVX2.  */
     140  		  cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
     141  		    |= bit_arch_AVX_Fast_Unaligned_Load;
     142  		}
     143  	      /* Determine if AVX-IFMA is usable.  */
     144  	      CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_IFMA);
     145  	      /* Determine if AVX-NE-CONVERT is usable.  */
     146  	      CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_NE_CONVERT);
     147  	      /* Determine if AVX-VNNI is usable.  */
     148  	      CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI);
     149  	      /* Determine if AVX-VNNI-INT8 is usable.  */
     150  	      CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI_INT8);
     151  	      /* Determine if FMA is usable.  */
     152  	      CPU_FEATURE_SET_ACTIVE (cpu_features, FMA);
     153  	      /* Determine if VAES is usable.  */
     154  	      CPU_FEATURE_SET_ACTIVE (cpu_features, VAES);
     155  	      /* Determine if VPCLMULQDQ is usable.  */
     156  	      CPU_FEATURE_SET_ACTIVE (cpu_features, VPCLMULQDQ);
     157  	      /* Determine if XOP is usable.  */
     158  	      CPU_FEATURE_SET_ACTIVE (cpu_features, XOP);
     159  	      /* Determine if F16C is usable.  */
     160  	      CPU_FEATURE_SET_ACTIVE (cpu_features, F16C);
     161  	    }
     162  
     163  	  /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
     164  	     ZMM16-ZMM31 state are enabled.  */
     165  	  if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
     166  			 | bit_ZMM16_31_state))
     167  	      == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
     168  	    {
     169  	      /* Determine if AVX512F is usable.  */
     170  	      if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
     171  		{
     172  		  CPU_FEATURE_SET (cpu_features, AVX512F);
     173  		  /* Determine if AVX512CD is usable.  */
     174  		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512CD);
     175  		  /* Determine if AVX512ER is usable.  */
     176  		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512ER);
     177  		  /* Determine if AVX512PF is usable.  */
     178  		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512PF);
     179  		  /* Determine if AVX512VL is usable.  */
     180  		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512VL);
     181  		  /* Determine if AVX512DQ is usable.  */
     182  		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512DQ);
     183  		  /* Determine if AVX512BW is usable.  */
     184  		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512BW);
     185  		  /* Determine if AVX512_4FMAPS is usable.  */
     186  		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4FMAPS);
     187  		  /* Determine if AVX512_4VNNIW is usable.  */
     188  		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4VNNIW);
     189  		  /* Determine if AVX512_BITALG is usable.  */
     190  		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BITALG);
     191  		  /* Determine if AVX512_IFMA is usable.  */
     192  		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_IFMA);
     193  		  /* Determine if AVX512_VBMI is usable.  */
     194  		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI);
     195  		  /* Determine if AVX512_VBMI2 is usable.  */
     196  		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI2);
     197  		  /* Determine if is AVX512_VNNI usable.  */
     198  		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VNNI);
     199  		  /* Determine if AVX512_VPOPCNTDQ is usable.  */
     200  		  CPU_FEATURE_SET_ACTIVE (cpu_features,
     201  					  AVX512_VPOPCNTDQ);
     202  		  /* Determine if AVX512_VP2INTERSECT is usable.  */
     203  		  CPU_FEATURE_SET_ACTIVE (cpu_features,
     204  					  AVX512_VP2INTERSECT);
     205  		  /* Determine if AVX512_BF16 is usable.  */
     206  		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BF16);
     207  		  /* Determine if AVX512_FP16 is usable.  */
     208  		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_FP16);
     209  		}
     210  	    }
     211  	}
     212  
     213        /* Are XTILECFG and XTILEDATA states usable?  */
     214        if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
     215  	  == (bit_XTILECFG_state | bit_XTILEDATA_state))
     216  	{
     217  	  /* Determine if AMX_BF16 is usable.  */
     218  	  CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_BF16);
     219  	  /* Determine if AMX_TILE is usable.  */
     220  	  CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_TILE);
     221  	  /* Determine if AMX_INT8 is usable.  */
     222  	  CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_INT8);
     223  	  /* Determine if AMX_FP16 is usable.  */
     224  	  CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_FP16);
     225  	  /* Determine if AMX_COMPLEX is usable.  */
     226  	  CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_COMPLEX);
     227  	}
     228  
     229        /* APX is usable only if the APX state is supported by kernel.  */
     230        if ((xcrlow & bit_APX_state) != 0)
     231  	CPU_FEATURE_SET_ACTIVE (cpu_features, APX_F);
     232  
     233        /* These features are usable only when OSXSAVE is enabled.  */
     234        CPU_FEATURE_SET (cpu_features, XSAVE);
     235        CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEOPT);
     236        CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEC);
     237        CPU_FEATURE_SET_ACTIVE (cpu_features, XGETBV_ECX_1);
     238        CPU_FEATURE_SET_ACTIVE (cpu_features, XFD);
     239  
     240        /* For _dl_runtime_resolve, set xsave_state_size to xsave area
     241  	 size + integer register save size and align it to 64 bytes.  */
     242        if (cpu_features->basic.max_cpuid >= 0xd)
     243  	{
     244  	  unsigned int eax, ebx, ecx, edx;
     245  
     246  	  __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
     247  	  if (ebx != 0)
     248  	    {
     249  	      unsigned int xsave_state_full_size
     250  		= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
     251  
     252  	      cpu_features->xsave_state_size
     253  		= xsave_state_full_size;
     254  	      cpu_features->xsave_state_full_size
     255  		= xsave_state_full_size;
     256  
     257  	      /* Check if XSAVEC is available.  */
     258  	      if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
     259  		{
     260  		  unsigned int xstate_comp_offsets[32];
     261  		  unsigned int xstate_comp_sizes[32];
     262  		  unsigned int i;
     263  
     264  		  xstate_comp_offsets[0] = 0;
     265  		  xstate_comp_offsets[1] = 160;
     266  		  xstate_comp_offsets[2] = 576;
     267  		  xstate_comp_sizes[0] = 160;
     268  		  xstate_comp_sizes[1] = 256;
     269  
     270  		  for (i = 2; i < 32; i++)
     271  		    {
     272  		      if ((STATE_SAVE_MASK & (1 << i)) != 0)
     273  			{
     274  			  __cpuid_count (0xd, i, eax, ebx, ecx, edx);
     275  			  xstate_comp_sizes[i] = eax;
     276  			}
     277  		      else
     278  			{
     279  			  ecx = 0;
     280  			  xstate_comp_sizes[i] = 0;
     281  			}
     282  
     283  		      if (i > 2)
     284  			{
     285  			  xstate_comp_offsets[i]
     286  			    = (xstate_comp_offsets[i - 1]
     287  			       + xstate_comp_sizes[i -1]);
     288  			  if ((ecx & (1 << 1)) != 0)
     289  			    xstate_comp_offsets[i]
     290  			      = ALIGN_UP (xstate_comp_offsets[i], 64);
     291  			}
     292  		    }
     293  
     294  		  /* Use XSAVEC.  */
     295  		  unsigned int size
     296  		    = xstate_comp_offsets[31] + xstate_comp_sizes[31];
     297  		  if (size)
     298  		    {
     299  		      cpu_features->xsave_state_size
     300  			= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
     301  		      CPU_FEATURE_SET (cpu_features, XSAVEC);
     302  		    }
     303  		}
     304  	    }
     305  	}
     306      }
     307  
     308    /* Determine if PKU is usable.  */
     309    if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
     310      CPU_FEATURE_SET (cpu_features, PKU);
     311  
     312    /* Determine if Key Locker instructions are usable.  */
     313    if (CPU_FEATURES_CPU_P (cpu_features, AESKLE))
     314      {
     315        CPU_FEATURE_SET (cpu_features, AESKLE);
     316        CPU_FEATURE_SET_ACTIVE (cpu_features, KL);
     317        CPU_FEATURE_SET_ACTIVE (cpu_features, WIDE_KL);
     318      }
     319  
     320    dl_check_hwcap2 (cpu_features);
     321  
     322    cpu_features->isa_1 = get_isa_level (cpu_features);
     323  }
     324  
     325  static void
     326  get_extended_indices (struct cpu_features *cpu_features)
     327  {
     328    unsigned int eax, ebx, ecx, edx;
     329    __cpuid (0x80000000, eax, ebx, ecx, edx);
     330    if (eax >= 0x80000001)
     331      __cpuid (0x80000001,
     332  	     cpu_features->features[CPUID_INDEX_80000001].cpuid.eax,
     333  	     cpu_features->features[CPUID_INDEX_80000001].cpuid.ebx,
     334  	     cpu_features->features[CPUID_INDEX_80000001].cpuid.ecx,
     335  	     cpu_features->features[CPUID_INDEX_80000001].cpuid.edx);
     336    if (eax >= 0x80000007)
     337      __cpuid (0x80000007,
     338  	     cpu_features->features[CPUID_INDEX_80000007].cpuid.eax,
     339  	     cpu_features->features[CPUID_INDEX_80000007].cpuid.ebx,
     340  	     cpu_features->features[CPUID_INDEX_80000007].cpuid.ecx,
     341  	     cpu_features->features[CPUID_INDEX_80000007].cpuid.edx);
     342    if (eax >= 0x80000008)
     343      __cpuid (0x80000008,
     344  	     cpu_features->features[CPUID_INDEX_80000008].cpuid.eax,
     345  	     cpu_features->features[CPUID_INDEX_80000008].cpuid.ebx,
     346  	     cpu_features->features[CPUID_INDEX_80000008].cpuid.ecx,
     347  	     cpu_features->features[CPUID_INDEX_80000008].cpuid.edx);
     348  }
     349  
     350  static void
     351  get_common_indices (struct cpu_features *cpu_features,
     352  		    unsigned int *family, unsigned int *model,
     353  		    unsigned int *extended_model, unsigned int *stepping)
     354  {
     355    if (family)
     356      {
     357        unsigned int eax;
     358        __cpuid (1, eax,
     359  	       cpu_features->features[CPUID_INDEX_1].cpuid.ebx,
     360  	       cpu_features->features[CPUID_INDEX_1].cpuid.ecx,
     361  	       cpu_features->features[CPUID_INDEX_1].cpuid.edx);
     362        cpu_features->features[CPUID_INDEX_1].cpuid.eax = eax;
     363        *family = (eax >> 8) & 0x0f;
     364        *model = (eax >> 4) & 0x0f;
     365        *extended_model = (eax >> 12) & 0xf0;
     366        *stepping = eax & 0x0f;
     367        if (*family == 0x0f)
     368  	{
     369  	  *family += (eax >> 20) & 0xff;
     370  	  *model += *extended_model;
     371  	}
     372      }
     373  
     374    if (cpu_features->basic.max_cpuid >= 7)
     375      {
     376        __cpuid_count (7, 0,
     377  		     cpu_features->features[CPUID_INDEX_7].cpuid.eax,
     378  		     cpu_features->features[CPUID_INDEX_7].cpuid.ebx,
     379  		     cpu_features->features[CPUID_INDEX_7].cpuid.ecx,
     380  		     cpu_features->features[CPUID_INDEX_7].cpuid.edx);
     381        __cpuid_count (7, 1,
     382  		     cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.eax,
     383  		     cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ebx,
     384  		     cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ecx,
     385  		     cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.edx);
     386      }
     387  
     388    if (cpu_features->basic.max_cpuid >= 0xd)
     389      __cpuid_count (0xd, 1,
     390  		   cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.eax,
     391  		   cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ebx,
     392  		   cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ecx,
     393  		   cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.edx);
     394  
     395    if (cpu_features->basic.max_cpuid >= 0x14)
     396      __cpuid_count (0x14, 0,
     397  		   cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.eax,
     398  		   cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ebx,
     399  		   cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ecx,
     400  		   cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.edx);
     401  
     402    if (cpu_features->basic.max_cpuid >= 0x19)
     403      __cpuid_count (0x19, 0,
     404  		   cpu_features->features[CPUID_INDEX_19].cpuid.eax,
     405  		   cpu_features->features[CPUID_INDEX_19].cpuid.ebx,
     406  		   cpu_features->features[CPUID_INDEX_19].cpuid.ecx,
     407  		   cpu_features->features[CPUID_INDEX_19].cpuid.edx);
     408  
     409    dl_check_minsigstacksize (cpu_features);
     410  }
     411  
     412  _Static_assert (((index_arch_Fast_Unaligned_Load
     413  		  == index_arch_Fast_Unaligned_Copy)
     414  		 && (index_arch_Fast_Unaligned_Load
     415  		     == index_arch_Prefer_PMINUB_for_stringop)
     416  		 && (index_arch_Fast_Unaligned_Load
     417  		     == index_arch_Slow_SSE4_2)
     418  		 && (index_arch_Fast_Unaligned_Load
     419  		     == index_arch_Fast_Rep_String)
     420  		 && (index_arch_Fast_Unaligned_Load
     421  		     == index_arch_Fast_Copy_Backward)),
     422  		"Incorrect index_arch_Fast_Unaligned_Load");
     423  
     424  
     425  /* Intel Family-6 microarch list.  */
     426  enum
     427  {
     428    /* Atom processors.  */
     429    INTEL_ATOM_BONNELL,
     430    INTEL_ATOM_SILVERMONT,
     431    INTEL_ATOM_AIRMONT,
     432    INTEL_ATOM_GOLDMONT,
     433    INTEL_ATOM_GOLDMONT_PLUS,
     434    INTEL_ATOM_SIERRAFOREST,
     435    INTEL_ATOM_GRANDRIDGE,
     436    INTEL_ATOM_TREMONT,
     437  
     438    /* Bigcore processors.  */
     439    INTEL_BIGCORE_MEROM,
     440    INTEL_BIGCORE_PENRYN,
     441    INTEL_BIGCORE_DUNNINGTON,
     442    INTEL_BIGCORE_NEHALEM,
     443    INTEL_BIGCORE_WESTMERE,
     444    INTEL_BIGCORE_SANDYBRIDGE,
     445    INTEL_BIGCORE_IVYBRIDGE,
     446    INTEL_BIGCORE_HASWELL,
     447    INTEL_BIGCORE_BROADWELL,
     448    INTEL_BIGCORE_SKYLAKE,
     449    INTEL_BIGCORE_KABYLAKE,
     450    INTEL_BIGCORE_COMETLAKE,
     451    INTEL_BIGCORE_SKYLAKE_AVX512,
     452    INTEL_BIGCORE_CANNONLAKE,
     453    INTEL_BIGCORE_ICELAKE,
     454    INTEL_BIGCORE_TIGERLAKE,
     455    INTEL_BIGCORE_ROCKETLAKE,
     456    INTEL_BIGCORE_SAPPHIRERAPIDS,
     457    INTEL_BIGCORE_RAPTORLAKE,
     458    INTEL_BIGCORE_EMERALDRAPIDS,
     459    INTEL_BIGCORE_METEORLAKE,
     460    INTEL_BIGCORE_LUNARLAKE,
     461    INTEL_BIGCORE_ARROWLAKE,
     462    INTEL_BIGCORE_GRANITERAPIDS,
     463  
     464    /* Mixed (bigcore + atom SOC).  */
     465    INTEL_MIXED_LAKEFIELD,
     466    INTEL_MIXED_ALDERLAKE,
     467  
     468    /* KNL.  */
     469    INTEL_KNIGHTS_MILL,
     470    INTEL_KNIGHTS_LANDING,
     471  
     472    /* Unknown.  */
     473    INTEL_UNKNOWN,
     474  };
     475  
     476  static unsigned int
     477  intel_get_fam6_microarch (unsigned int model,
     478  			  __attribute__ ((unused)) unsigned int stepping)
     479  {
     480    switch (model)
     481      {
     482      case 0x1C:
     483      case 0x26:
     484        return INTEL_ATOM_BONNELL;
     485      case 0x27:
     486      case 0x35:
     487      case 0x36:
     488        /* Really Saltwell, but Saltwell is just a die shrink of Bonnell
     489           (microarchitecturally identical).  */
     490        return INTEL_ATOM_BONNELL;
     491      case 0x37:
     492      case 0x4A:
     493      case 0x4D:
     494      case 0x5D:
     495        return INTEL_ATOM_SILVERMONT;
     496      case 0x4C:
     497      case 0x5A:
     498      case 0x75:
     499        return INTEL_ATOM_AIRMONT;
     500      case 0x5C:
     501      case 0x5F:
     502        return INTEL_ATOM_GOLDMONT;
     503      case 0x7A:
     504        return INTEL_ATOM_GOLDMONT_PLUS;
     505      case 0xAF:
     506        return INTEL_ATOM_SIERRAFOREST;
     507      case 0xB6:
     508        return INTEL_ATOM_GRANDRIDGE;
     509      case 0x86:
     510      case 0x96:
     511      case 0x9C:
     512        return INTEL_ATOM_TREMONT;
     513      case 0x0F:
     514      case 0x16:
     515        return INTEL_BIGCORE_MEROM;
     516      case 0x17:
     517        return INTEL_BIGCORE_PENRYN;
     518      case 0x1D:
     519        return INTEL_BIGCORE_DUNNINGTON;
     520      case 0x1A:
     521      case 0x1E:
     522      case 0x1F:
     523      case 0x2E:
     524        return INTEL_BIGCORE_NEHALEM;
     525      case 0x25:
     526      case 0x2C:
     527      case 0x2F:
     528        return INTEL_BIGCORE_WESTMERE;
     529      case 0x2A:
     530      case 0x2D:
     531        return INTEL_BIGCORE_SANDYBRIDGE;
     532      case 0x3A:
     533      case 0x3E:
     534        return INTEL_BIGCORE_IVYBRIDGE;
     535      case 0x3C:
     536      case 0x3F:
     537      case 0x45:
     538      case 0x46:
     539        return INTEL_BIGCORE_HASWELL;
     540      case 0x3D:
     541      case 0x47:
     542      case 0x4F:
     543      case 0x56:
     544        return INTEL_BIGCORE_BROADWELL;
     545      case 0x4E:
     546      case 0x5E:
     547        return INTEL_BIGCORE_SKYLAKE;
     548      case 0x8E:
     549      /*
     550       Stepping = {9}
     551          -> Amberlake
     552       Stepping = {10}
     553          -> Coffeelake
     554       Stepping = {11, 12}
     555          -> Whiskeylake
     556       else
     557          -> Kabylake
     558  
     559       All of these are derivatives of Kabylake (Skylake client).
     560       */
     561  	  return INTEL_BIGCORE_KABYLAKE;
     562      case 0x9E:
     563      /*
     564       Stepping = {10, 11, 12, 13}
     565          -> Coffeelake
     566       else
     567          -> Kabylake
     568  
     569       Coffeelake is a derivatives of Kabylake (Skylake client).
     570       */
     571  	  return INTEL_BIGCORE_KABYLAKE;
     572      case 0xA5:
     573      case 0xA6:
     574        return INTEL_BIGCORE_COMETLAKE;
     575      case 0x66:
     576        return INTEL_BIGCORE_CANNONLAKE;
     577      case 0x55:
     578      /*
     579       Stepping = {6, 7}
     580          -> Cascadelake
     581       Stepping = {11}
     582          -> Cooperlake
     583       else
     584          -> Skylake-avx512
     585  
     586       These are all microarchitecturally identical, so use
     587       Skylake-avx512 for all of them.
     588       */
     589        return INTEL_BIGCORE_SKYLAKE_AVX512;
     590      case 0x6A:
     591      case 0x6C:
     592      case 0x7D:
     593      case 0x7E:
     594      case 0x9D:
     595        return INTEL_BIGCORE_ICELAKE;
     596      case 0x8C:
     597      case 0x8D:
     598        return INTEL_BIGCORE_TIGERLAKE;
     599      case 0xA7:
     600        return INTEL_BIGCORE_ROCKETLAKE;
     601      case 0x8F:
     602        return INTEL_BIGCORE_SAPPHIRERAPIDS;
     603      case 0xB7:
     604      case 0xBA:
     605      case 0xBF:
     606        return INTEL_BIGCORE_RAPTORLAKE;
     607      case 0xCF:
     608        return INTEL_BIGCORE_EMERALDRAPIDS;
     609      case 0xAA:
     610      case 0xAC:
     611        return INTEL_BIGCORE_METEORLAKE;
     612      case 0xbd:
     613        return INTEL_BIGCORE_LUNARLAKE;
     614      case 0xc6:
     615        return INTEL_BIGCORE_ARROWLAKE;
     616      case 0xAD:
     617      case 0xAE:
     618        return INTEL_BIGCORE_GRANITERAPIDS;
     619      case 0x8A:
     620        return INTEL_MIXED_LAKEFIELD;
     621      case 0x97:
     622      case 0x9A:
     623      case 0xBE:
     624        return INTEL_MIXED_ALDERLAKE;
     625      case 0x85:
     626        return INTEL_KNIGHTS_MILL;
     627      case 0x57:
     628        return INTEL_KNIGHTS_LANDING;
     629      default:
     630        return INTEL_UNKNOWN;
     631      }
     632  }
     633  
     634  static inline void
     635  init_cpu_features (struct cpu_features *cpu_features)
     636  {
     637    unsigned int ebx, ecx, edx;
     638    unsigned int family = 0;
     639    unsigned int model = 0;
     640    unsigned int stepping = 0;
     641    enum cpu_features_kind kind;
     642  
     643    cpu_features->cachesize_non_temporal_divisor = 4;
     644  #if !HAS_CPUID
     645    if (__get_cpuid_max (0, 0) == 0)
     646      {
     647        kind = arch_kind_other;
     648        goto no_cpuid;
     649      }
     650  #endif
     651  
     652    __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx);
     653  
     654    /* This spells out "GenuineIntel".  */
     655    if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
     656      {
     657        unsigned int extended_model;
     658  
     659        kind = arch_kind_intel;
     660  
     661        get_common_indices (cpu_features, &family, &model, &extended_model,
     662  			  &stepping);
     663  
     664        get_extended_indices (cpu_features);
     665  
     666        update_active (cpu_features);
     667  
     668        if (family == 0x06)
     669  	{
     670  	  model += extended_model;
     671  	  unsigned int microarch
     672  	      = intel_get_fam6_microarch (model, stepping);
     673  
     674  	  switch (microarch)
     675  	    {
     676  	      /* Atom / KNL tuning.  */
     677  	    case INTEL_ATOM_BONNELL:
     678  	      /* BSF is slow on Bonnell.  */
     679  	      cpu_features->preferred[index_arch_Slow_BSF]
     680  		  |= bit_arch_Slow_BSF;
     681  	      break;
     682  
     683  	      /* Unaligned load versions are faster than SSSE3
     684  		     on Airmont, Silvermont, Goldmont, and Goldmont Plus.  */
     685  	    case INTEL_ATOM_AIRMONT:
     686  	    case INTEL_ATOM_SILVERMONT:
     687  	    case INTEL_ATOM_GOLDMONT:
     688  	    case INTEL_ATOM_GOLDMONT_PLUS:
     689  
     690            /* Knights Landing.  Enable Silvermont optimizations.  */
     691  	    case INTEL_KNIGHTS_LANDING:
     692  
     693  	      cpu_features->preferred[index_arch_Fast_Unaligned_Load]
     694  		  |= (bit_arch_Fast_Unaligned_Load
     695  		      | bit_arch_Fast_Unaligned_Copy
     696  		      | bit_arch_Prefer_PMINUB_for_stringop
     697  		      | bit_arch_Slow_SSE4_2);
     698  	      break;
     699  
     700  	    case INTEL_ATOM_TREMONT:
     701  	      /* Enable rep string instructions, unaligned load, unaligned
     702  		 copy, pminub and avoid SSE 4.2 on Tremont.  */
     703  	      cpu_features->preferred[index_arch_Fast_Rep_String]
     704  		  |= (bit_arch_Fast_Rep_String
     705  		      | bit_arch_Fast_Unaligned_Load
     706  		      | bit_arch_Fast_Unaligned_Copy
     707  		      | bit_arch_Prefer_PMINUB_for_stringop
     708  		      | bit_arch_Slow_SSE4_2);
     709  	      break;
     710  
     711  	   /*
     712  	    Default tuned Knights microarch.
     713  	    case INTEL_KNIGHTS_MILL:
     714          */
     715  
     716  	   /*
     717  	    Default tuned atom microarch.
     718  	    case INTEL_ATOM_SIERRAFOREST:
     719  	    case INTEL_ATOM_GRANDRIDGE:
     720  	   */
     721  
     722  	      /* Bigcore/Default Tuning.  */
     723  	    default:
     724  	    default_tuning:
     725  	      /* Unknown family 0x06 processors.  Assuming this is one
     726  		 of Core i3/i5/i7 processors if AVX is available.  */
     727  	      if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
     728  		break;
     729  
     730  	    enable_modern_features:
     731  	      /* Rep string instructions, unaligned load, unaligned copy,
     732  		 and pminub are fast on Intel Core i3, i5 and i7.  */
     733  	      cpu_features->preferred[index_arch_Fast_Rep_String]
     734  		  |= (bit_arch_Fast_Rep_String
     735  		      | bit_arch_Fast_Unaligned_Load
     736  		      | bit_arch_Fast_Unaligned_Copy
     737  		      | bit_arch_Prefer_PMINUB_for_stringop);
     738  	      break;
     739  
     740  	    case INTEL_BIGCORE_NEHALEM:
     741  	    case INTEL_BIGCORE_WESTMERE:
     742  	      /* Older CPUs prefer non-temporal stores at lower threshold.  */
     743  	      cpu_features->cachesize_non_temporal_divisor = 8;
     744  	      goto enable_modern_features;
     745  
     746  	      /* Older Bigcore microarch (smaller non-temporal store
     747  		 threshold).  */
     748  	    case INTEL_BIGCORE_SANDYBRIDGE:
     749  	    case INTEL_BIGCORE_IVYBRIDGE:
     750  	    case INTEL_BIGCORE_HASWELL:
     751  	    case INTEL_BIGCORE_BROADWELL:
     752  	      cpu_features->cachesize_non_temporal_divisor = 8;
     753  	      goto default_tuning;
     754  
     755  	      /* Newer Bigcore microarch (larger non-temporal store
     756  		 threshold).  */
     757  	    case INTEL_BIGCORE_SKYLAKE:
     758  	    case INTEL_BIGCORE_KABYLAKE:
     759  	    case INTEL_BIGCORE_COMETLAKE:
     760  	    case INTEL_BIGCORE_SKYLAKE_AVX512:
     761  	    case INTEL_BIGCORE_CANNONLAKE:
     762  	    case INTEL_BIGCORE_ICELAKE:
     763  	    case INTEL_BIGCORE_TIGERLAKE:
     764  	    case INTEL_BIGCORE_ROCKETLAKE:
     765  	    case INTEL_BIGCORE_RAPTORLAKE:
     766  	    case INTEL_BIGCORE_METEORLAKE:
     767  	    case INTEL_BIGCORE_LUNARLAKE:
     768  	    case INTEL_BIGCORE_ARROWLAKE:
     769  	    case INTEL_BIGCORE_SAPPHIRERAPIDS:
     770  	    case INTEL_BIGCORE_EMERALDRAPIDS:
     771  	    case INTEL_BIGCORE_GRANITERAPIDS:
     772  	      cpu_features->cachesize_non_temporal_divisor = 2;
     773  	      goto default_tuning;
     774  
     775  	      /* Default tuned Mixed (bigcore + atom SOC). */
     776  	    case INTEL_MIXED_LAKEFIELD:
     777  	    case INTEL_MIXED_ALDERLAKE:
     778  	      cpu_features->cachesize_non_temporal_divisor = 2;
     779  	      goto default_tuning;
     780  	    }
     781  
     782  	      /* Disable TSX on some processors to avoid TSX on kernels that
     783  		 weren't updated with the latest microcode package (which
     784  		 disables broken feature by default).  */
     785  	  switch (microarch)
     786  	    {
     787  	    case INTEL_BIGCORE_SKYLAKE_AVX512:
     788  	      /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */
     789  	      if (stepping <= 5)
     790  		goto disable_tsx;
     791  	      break;
     792  
     793  	    case INTEL_BIGCORE_KABYLAKE:
     794  	      /* NB: Although the errata documents that for model == 0x8e
     795  		     (kabylake skylake client), only 0xb stepping or lower are
     796  		     impacted, the intention of the errata was to disable TSX on
     797  		     all client processors on all steppings.  Include 0xc
     798  		     stepping which is an Intel Core i7-8665U, a client mobile
     799  		     processor.  */
     800  	      if (stepping > 0xc)
     801  		break;
     802  	      /* Fall through.  */
     803  	    case INTEL_BIGCORE_SKYLAKE:
     804  		/* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
     805  		   processors listed in:
     806  
     807  https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
     808  		 */
     809  	    disable_tsx:
     810  		CPU_FEATURE_UNSET (cpu_features, HLE);
     811  		CPU_FEATURE_UNSET (cpu_features, RTM);
     812  		CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
     813  		break;
     814  
     815  	    case INTEL_BIGCORE_HASWELL:
     816  		/* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working
     817  		   TSX.  Haswell also include other model numbers that have
     818  		   working TSX.  */
     819  		if (model == 0x3f && stepping >= 4)
     820  		break;
     821  
     822  		CPU_FEATURE_UNSET (cpu_features, RTM);
     823  		break;
     824  	    }
     825  	}
     826  
     827  
     828        /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
     829           if AVX512ER is available.  Don't use AVX512 to avoid lower CPU
     830  	 frequency if AVX512ER isn't available.  */
     831        if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
     832  	cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
     833  	  |= bit_arch_Prefer_No_VZEROUPPER;
     834        else
     835  	{
     836  	  /* Processors with AVX512 and AVX-VNNI won't lower CPU frequency
     837  	     when ZMM load and store instructions are used.  */
     838  	  if (!CPU_FEATURES_CPU_P (cpu_features, AVX_VNNI))
     839  	    cpu_features->preferred[index_arch_Prefer_No_AVX512]
     840  	      |= bit_arch_Prefer_No_AVX512;
     841  
     842  	  /* Avoid RTM abort triggered by VZEROUPPER inside a
     843  	     transactionally executing RTM region.  */
     844  	  if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
     845  	    cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
     846  	      |= bit_arch_Prefer_No_VZEROUPPER;
     847  	}
     848  
     849        /* Avoid avoid short distance REP MOVSB on processor with FSRM.  */
     850        if (CPU_FEATURES_CPU_P (cpu_features, FSRM))
     851  	cpu_features->preferred[index_arch_Avoid_Short_Distance_REP_MOVSB]
     852  	  |= bit_arch_Avoid_Short_Distance_REP_MOVSB;
     853      }
     854    /* This spells out "AuthenticAMD" or "HygonGenuine".  */
     855    else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
     856  	   || (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e))
     857      {
     858        unsigned int extended_model;
     859  
     860        kind = arch_kind_amd;
     861  
     862        get_common_indices (cpu_features, &family, &model, &extended_model,
     863  			  &stepping);
     864  
     865        get_extended_indices (cpu_features);
     866  
     867        update_active (cpu_features);
     868  
     869        ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx;
     870  
     871        if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
     872  	{
     873  	  /* Since the FMA4 bit is in CPUID_INDEX_80000001 and
     874  	     FMA4 requires AVX, determine if FMA4 is usable here.  */
     875  	  CPU_FEATURE_SET_ACTIVE (cpu_features, FMA4);
     876  	}
     877  
     878        if (family == 0x15)
     879  	{
     880  	  /* "Excavator"   */
     881  	  if (model >= 0x60 && model <= 0x7f)
     882  	  {
     883  	    cpu_features->preferred[index_arch_Fast_Unaligned_Load]
     884  	      |= (bit_arch_Fast_Unaligned_Load
     885  		  | bit_arch_Fast_Copy_Backward);
     886  
     887  	    /* Unaligned AVX loads are slower.*/
     888  	    cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
     889  	      &= ~bit_arch_AVX_Fast_Unaligned_Load;
     890  	  }
     891  	}
     892      }
     893    /* This spells out "CentaurHauls" or " Shanghai ".  */
     894    else if ((ebx == 0x746e6543 && ecx == 0x736c7561 && edx == 0x48727561)
     895  	   || (ebx == 0x68532020 && ecx == 0x20206961 && edx == 0x68676e61))
     896      {
     897        unsigned int extended_model, stepping;
     898  
     899        kind = arch_kind_zhaoxin;
     900  
     901        get_common_indices (cpu_features, &family, &model, &extended_model,
     902  			  &stepping);
     903  
     904        get_extended_indices (cpu_features);
     905  
     906        update_active (cpu_features);
     907  
     908        model += extended_model;
     909        if (family == 0x6)
     910          {
     911            if (model == 0xf || model == 0x19)
     912              {
     913  	      CPU_FEATURE_UNSET (cpu_features, AVX);
     914  	      CPU_FEATURE_UNSET (cpu_features, AVX2);
     915  
     916                cpu_features->preferred[index_arch_Slow_SSE4_2]
     917                  |= bit_arch_Slow_SSE4_2;
     918  
     919  	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
     920  		&= ~bit_arch_AVX_Fast_Unaligned_Load;
     921              }
     922          }
     923        else if (family == 0x7)
     924          {
     925  	  if (model == 0x1b)
     926  	    {
     927  	      CPU_FEATURE_UNSET (cpu_features, AVX);
     928  	      CPU_FEATURE_UNSET (cpu_features, AVX2);
     929  
     930  	      cpu_features->preferred[index_arch_Slow_SSE4_2]
     931  		|= bit_arch_Slow_SSE4_2;
     932  
     933  	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
     934  		&= ~bit_arch_AVX_Fast_Unaligned_Load;
     935  	    }
     936  	  else if (model == 0x3b)
     937  	    {
     938  	      CPU_FEATURE_UNSET (cpu_features, AVX);
     939  	      CPU_FEATURE_UNSET (cpu_features, AVX2);
     940  
     941  	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
     942  		&= ~bit_arch_AVX_Fast_Unaligned_Load;
     943  	    }
     944  	}
     945      }
     946    else
     947      {
     948        kind = arch_kind_other;
     949        get_common_indices (cpu_features, NULL, NULL, NULL, NULL);
     950        update_active (cpu_features);
     951      }
     952  
     953    /* Support i586 if CX8 is available.  */
     954    if (CPU_FEATURES_CPU_P (cpu_features, CX8))
     955      cpu_features->preferred[index_arch_I586] |= bit_arch_I586;
     956  
     957    /* Support i686 if CMOV is available.  */
     958    if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
     959      cpu_features->preferred[index_arch_I686] |= bit_arch_I686;
     960  
     961  #if !HAS_CPUID
     962  no_cpuid:
     963  #endif
     964  
     965    cpu_features->basic.kind = kind;
     966    cpu_features->basic.family = family;
     967    cpu_features->basic.model = model;
     968    cpu_features->basic.stepping = stepping;
     969  
     970    dl_init_cacheinfo (cpu_features);
     971  
     972    TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
     973  
     974  #ifdef __LP64__
     975    TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
     976  	       TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
     977  #endif
     978  
     979    bool disable_xsave_features = false;
     980  
     981    if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE))
     982      {
     983        /* These features are usable only if OSXSAVE is usable.  */
     984        CPU_FEATURE_UNSET (cpu_features, XSAVE);
     985        CPU_FEATURE_UNSET (cpu_features, XSAVEOPT);
     986        CPU_FEATURE_UNSET (cpu_features, XSAVEC);
     987        CPU_FEATURE_UNSET (cpu_features, XGETBV_ECX_1);
     988        CPU_FEATURE_UNSET (cpu_features, XFD);
     989  
     990        disable_xsave_features = true;
     991      }
     992  
     993    if (disable_xsave_features
     994        || (!CPU_FEATURE_USABLE_P (cpu_features, XSAVE)
     995  	  && !CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)))
     996      {
     997        /* Clear xsave_state_size if both XSAVE and XSAVEC aren't usable.  */
     998        cpu_features->xsave_state_size = 0;
     999  
    1000        CPU_FEATURE_UNSET (cpu_features, AVX);
    1001        CPU_FEATURE_UNSET (cpu_features, AVX2);
    1002        CPU_FEATURE_UNSET (cpu_features, AVX_VNNI);
    1003        CPU_FEATURE_UNSET (cpu_features, FMA);
    1004        CPU_FEATURE_UNSET (cpu_features, VAES);
    1005        CPU_FEATURE_UNSET (cpu_features, VPCLMULQDQ);
    1006        CPU_FEATURE_UNSET (cpu_features, XOP);
    1007        CPU_FEATURE_UNSET (cpu_features, F16C);
    1008        CPU_FEATURE_UNSET (cpu_features, AVX512F);
    1009        CPU_FEATURE_UNSET (cpu_features, AVX512CD);
    1010        CPU_FEATURE_UNSET (cpu_features, AVX512ER);
    1011        CPU_FEATURE_UNSET (cpu_features, AVX512PF);
    1012        CPU_FEATURE_UNSET (cpu_features, AVX512VL);
    1013        CPU_FEATURE_UNSET (cpu_features, AVX512DQ);
    1014        CPU_FEATURE_UNSET (cpu_features, AVX512BW);
    1015        CPU_FEATURE_UNSET (cpu_features, AVX512_4FMAPS);
    1016        CPU_FEATURE_UNSET (cpu_features, AVX512_4VNNIW);
    1017        CPU_FEATURE_UNSET (cpu_features, AVX512_BITALG);
    1018        CPU_FEATURE_UNSET (cpu_features, AVX512_IFMA);
    1019        CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI);
    1020        CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI2);
    1021        CPU_FEATURE_UNSET (cpu_features, AVX512_VNNI);
    1022        CPU_FEATURE_UNSET (cpu_features, AVX512_VPOPCNTDQ);
    1023        CPU_FEATURE_UNSET (cpu_features, AVX512_VP2INTERSECT);
    1024        CPU_FEATURE_UNSET (cpu_features, AVX512_BF16);
    1025        CPU_FEATURE_UNSET (cpu_features, AVX512_FP16);
    1026        CPU_FEATURE_UNSET (cpu_features, AMX_BF16);
    1027        CPU_FEATURE_UNSET (cpu_features, AMX_TILE);
    1028        CPU_FEATURE_UNSET (cpu_features, AMX_INT8);
    1029  
    1030        CPU_FEATURE_UNSET (cpu_features, FMA4);
    1031      }
    1032  
    1033  #ifdef __x86_64__
    1034    GLRO(dl_hwcap) = HWCAP_X86_64;
    1035    if (cpu_features->basic.kind == arch_kind_intel)
    1036      {
    1037        const char *platform = NULL;
    1038  
    1039        if (CPU_FEATURE_USABLE_P (cpu_features, AVX512CD))
    1040  	{
    1041  	  if (CPU_FEATURE_USABLE_P (cpu_features, AVX512ER))
    1042  	    {
    1043  	      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512PF))
    1044  		platform = "xeon_phi";
    1045  	    }
    1046  	  else
    1047  	    {
    1048  	      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
    1049  		  && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
    1050  		  && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
    1051  		GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1;
    1052  	    }
    1053  	}
    1054  
    1055        if (platform == NULL
    1056  	  && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
    1057  	  && CPU_FEATURE_USABLE_P (cpu_features, FMA)
    1058  	  && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
    1059  	  && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
    1060  	  && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
    1061  	  && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
    1062  	  && CPU_FEATURE_USABLE_P (cpu_features, POPCNT))
    1063  	platform = "haswell";
    1064  
    1065        if (platform != NULL)
    1066  	GLRO(dl_platform) = platform;
    1067      }
    1068  #else
    1069    GLRO(dl_hwcap) = 0;
    1070    if (CPU_FEATURE_USABLE_P (cpu_features, SSE2))
    1071      GLRO(dl_hwcap) |= HWCAP_X86_SSE2;
    1072  
    1073    if (CPU_FEATURES_ARCH_P (cpu_features, I686))
    1074      GLRO(dl_platform) = "i686";
    1075    else if (CPU_FEATURES_ARCH_P (cpu_features, I586))
    1076      GLRO(dl_platform) = "i586";
    1077  #endif
    1078  
    1079  #if CET_ENABLED
    1080    TUNABLE_GET (x86_ibt, tunable_val_t *,
    1081  	       TUNABLE_CALLBACK (set_x86_ibt));
    1082    TUNABLE_GET (x86_shstk, tunable_val_t *,
    1083  	       TUNABLE_CALLBACK (set_x86_shstk));
    1084  
    1085    /* Check CET status.  */
    1086    unsigned int cet_status = get_cet_status ();
    1087  
    1088    if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT) == 0)
    1089      CPU_FEATURE_UNSET (cpu_features, IBT)
    1090    if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK) == 0)
    1091      CPU_FEATURE_UNSET (cpu_features, SHSTK)
    1092  
    1093    if (cet_status)
    1094      {
    1095        GL(dl_x86_feature_1) = cet_status;
    1096  
    1097  # ifndef SHARED
    1098        /* Check if IBT and SHSTK are enabled by kernel.  */
    1099        if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT)
    1100  	  || (cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK))
    1101  	{
    1102  	  /* Disable IBT and/or SHSTK if they are enabled by kernel, but
    1103  	     disabled by environment variable:
    1104  
    1105  	     GLIBC_TUNABLES=glibc.cpu.hwcaps=-IBT,-SHSTK
    1106  	   */
    1107  	  unsigned int cet_feature = 0;
    1108  	  if (!CPU_FEATURE_USABLE (IBT))
    1109  	    cet_feature |= GNU_PROPERTY_X86_FEATURE_1_IBT;
    1110  	  if (!CPU_FEATURE_USABLE (SHSTK))
    1111  	    cet_feature |= GNU_PROPERTY_X86_FEATURE_1_SHSTK;
    1112  
    1113  	  if (cet_feature)
    1114  	    {
    1115  	      int res = dl_cet_disable_cet (cet_feature);
    1116  
    1117  	      /* Clear the disabled bits in dl_x86_feature_1.  */
    1118  	      if (res == 0)
    1119  		GL(dl_x86_feature_1) &= ~cet_feature;
    1120  	    }
    1121  
    1122  	  /* Lock CET if IBT or SHSTK is enabled in executable.  Don't
    1123  	     lock CET if IBT or SHSTK is enabled permissively.  */
    1124  	  if (GL(dl_x86_feature_control).ibt != cet_permissive
    1125  	      && GL(dl_x86_feature_control).shstk != cet_permissive)
    1126  	    dl_cet_lock_cet ();
    1127  	}
    1128  # endif
    1129      }
    1130  #endif
    1131  
    1132  #ifndef SHARED
    1133    /* NB: In libc.a, call init_cacheinfo.  */
    1134    init_cacheinfo ();
    1135  #endif
    1136  }