(root)/
glibc-2.38/
locale/
loadarchive.c
       1  /* Code to load locale data from the locale archive file.
       2     Copyright (C) 2002-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <locale.h>
      20  #include <stddef.h>
      21  #include <stdlib.h>
      22  #include <stdbool.h>
      23  #include <errno.h>
      24  #include <assert.h>
      25  #include <string.h>
      26  #include <fcntl.h>
      27  #include <unistd.h>
      28  #include <stdint.h>
      29  #include <sys/mman.h>
      30  #include <sys/stat.h>
      31  #include <sys/param.h>
      32  
      33  #include "localeinfo.h"
      34  #include "locarchive.h"
      35  #include <not-cancel.h>
      36  
      37  /* Define the hash function.  We define the function as static inline.  */
      38  #define compute_hashval static inline compute_hashval
      39  #define hashval_t uint32_t
      40  #include "hashval.h"
      41  #undef compute_hashval
      42  
      43  
      44  /* Name of the locale archive file.  */
      45  static const char archfname[] = COMPLOCALEDIR "/locale-archive";
      46  
      47  /* Size of initial mapping window, optimal if large enough to
      48     cover the header plus the initial locale.  */
      49  #define ARCHIVE_MAPPING_WINDOW	(2 * 1024 * 1024)
      50  
      51  #ifndef MAP_COPY
      52  /* This is not quite as good as MAP_COPY since unexamined pages
      53     can change out from under us and give us inconsistent data.
      54     But we rely on the user not to diddle the system's live archive.
      55     Even though we only ever use PROT_READ, using MAP_SHARED would
      56     not give the system sufficient freedom to e.g. let the on disk
      57     file go away because it doesn't know we won't call mprotect later.  */
      58  # define MAP_COPY MAP_PRIVATE
      59  #endif
      60  #ifndef MAP_FILE
      61   /* Some systems do not have this flag; it is superfluous.  */
      62  # define MAP_FILE 0
      63  #endif
      64  
      65  /* Record of contiguous pages already mapped from the locale archive.  */
      66  struct archmapped
      67  {
      68    void *ptr;
      69    uint32_t from;
      70    uint32_t len;
      71    struct archmapped *next;
      72  };
      73  static struct archmapped *archmapped;
      74  
      75  /* This describes the mapping at the beginning of the file that contains
      76     the header data.  There could be data in the following partial page,
      77     so this is searched like any other.  Once the archive has been used,
      78     ARCHMAPPED points to this; if mapping the archive header failed,
      79     then headmap.ptr is null.  */
      80  static struct archmapped headmap;
      81  static struct __stat64_t64 archive_stat; /* stat of archive when header mapped.  */
      82  
      83  /* Record of locales that we have already loaded from the archive.  */
      84  struct locale_in_archive
      85  {
      86    struct locale_in_archive *next;
      87    char *name;
      88    struct __locale_data *data[__LC_LAST];
      89  };
      90  static struct locale_in_archive *archloaded;
      91  
      92  
      93  /* Local structure and subroutine of _nl_load_archive, see below.  */
      94  struct range
      95  {
      96    uint32_t from;
      97    uint32_t len;
      98    int category;
      99    void *result;
     100  };
     101  
     102  static int
     103  rangecmp (const void *p1, const void *p2)
     104  {
     105    return ((struct range *) p1)->from - ((struct range *) p2)->from;
     106  }
     107  
     108  
     109  /* Calculate the amount of space needed for all the tables described
     110     by the given header.  Note we do not include the empty table space
     111     that has been preallocated in the file, so our mapping may not be
     112     large enough if localedef adds data to the file in place.  However,
     113     doing that would permute the header fields while we are accessing
     114     them and thus not be safe anyway, so we don't allow for that.  */
     115  static inline off_t
     116  calculate_head_size (const struct locarhead *h)
     117  {
     118    off_t namehash_end = (h->namehash_offset
     119  			+ h->namehash_size * sizeof (struct namehashent));
     120    off_t string_end =  h->string_offset + h->string_used;
     121    off_t locrectab_end = (h->locrectab_offset
     122  			 + h->locrectab_used * sizeof (struct locrecent));
     123    return MAX (namehash_end, MAX (string_end, locrectab_end));
     124  }
     125  
     126  
     127  /* Find the locale *NAMEP in the locale archive, and return the
     128     internalized data structure for its CATEGORY data.  If this locale has
     129     already been loaded from the archive, just returns the existing data
     130     structure.  If successful, sets *NAMEP to point directly into the mapped
     131     archive string table; that way, the next call can short-circuit strcmp.  */
     132  struct __locale_data *
     133  _nl_load_locale_from_archive (int category, const char **namep)
     134  {
     135    const char *name = *namep;
     136    struct
     137    {
     138      void *addr;
     139      size_t len;
     140    } results[__LC_LAST];
     141    struct locale_in_archive *lia;
     142    struct locarhead *head;
     143    struct namehashent *namehashtab;
     144    struct locrecent *locrec;
     145    struct archmapped *mapped;
     146    struct archmapped *last;
     147    unsigned long int hval;
     148    size_t idx;
     149    size_t incr;
     150    struct range ranges[__LC_LAST - 1];
     151    int nranges;
     152    int cnt;
     153    size_t ps = __sysconf (_SC_PAGE_SIZE);
     154    int fd = -1;
     155  
     156    /* Check if we have already loaded this locale from the archive.
     157       If we previously loaded the locale but found bogons in the data,
     158       then we will have stored a null pointer to return here.  */
     159    for (lia = archloaded; lia != NULL; lia = lia->next)
     160      if (name == lia->name || !strcmp (name, lia->name))
     161        {
     162  	*namep = lia->name;
     163  	return lia->data[category];
     164        }
     165  
     166    {
     167      /* If the name contains a codeset, then we normalize the name before
     168         doing the lookup.  */
     169      const char *p = strchr (name, '.');
     170      if (p != NULL && p[1] != '@' && p[1] != '\0')
     171        {
     172  	const char *rest = __strchrnul (++p, '@');
     173  	const char *normalized_codeset = _nl_normalize_codeset (p, rest - p);
     174  	if (normalized_codeset == NULL)	/* malloc failure */
     175  	  return NULL;
     176  	if (strncmp (normalized_codeset, p, rest - p) != 0
     177  	    || normalized_codeset[rest - p] != '\0')
     178  	  {
     179  	    /* There is a normalized codeset name that is different from
     180  	       what was specified; reconstruct a new locale name using it.  */
     181  	    size_t normlen = strlen (normalized_codeset);
     182  	    size_t restlen = strlen (rest) + 1;
     183  	    char *newname = alloca (p - name + normlen + restlen);
     184  	    memcpy (__mempcpy (__mempcpy (newname, name, p - name),
     185  			       normalized_codeset, normlen),
     186  		    rest, restlen);
     187  	    name = newname;
     188  	  }
     189  	free ((char *) normalized_codeset);
     190        }
     191    }
     192  
     193    /* Make sure the archive is loaded.  */
     194    if (archmapped == NULL)
     195      {
     196        void *result;
     197        size_t headsize, mapsize;
     198  
     199        /* We do this early as a sign that we have tried to open the archive.
     200  	 If headmap.ptr remains null, that's an indication that we tried
     201  	 and failed, so we won't try again.  */
     202        archmapped = &headmap;
     203  
     204        /* The archive has never been opened.  */
     205        fd = __open_nocancel (archfname, O_RDONLY|O_LARGEFILE|O_CLOEXEC);
     206        if (fd < 0)
     207  	/* Cannot open the archive, for whatever reason.  */
     208  	return NULL;
     209  
     210        if (__fstat64_time64 (fd, &archive_stat) == -1)
     211  	{
     212  	  /* stat failed, very strange.  */
     213  	close_and_out:
     214  	  if (fd >= 0)
     215  	    __close_nocancel_nostatus (fd);
     216  	  return NULL;
     217  	}
     218  
     219  
     220        /* Map an initial window probably large enough to cover the header
     221  	 and the first locale's data.  With a large address space, we can
     222  	 just map the whole file and be sure everything is covered.  */
     223  
     224        mapsize = (sizeof (void *) > 4 ? archive_stat.st_size
     225  		 : MIN (archive_stat.st_size, ARCHIVE_MAPPING_WINDOW));
     226  
     227        result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY, fd, 0);
     228        if (result == MAP_FAILED)
     229  	goto close_and_out;
     230  
     231        /* Check whether the file is large enough for the sizes given in
     232  	 the header.  Theoretically an archive could be so large that
     233  	 just the header fails to fit in our initial mapping window.  */
     234        headsize = calculate_head_size ((const struct locarhead *) result);
     235        if (headsize > mapsize)
     236  	{
     237  	  (void) __munmap (result, mapsize);
     238  	  if (sizeof (void *) > 4 || headsize > archive_stat.st_size)
     239  	    /* The file is not big enough for the header.  Bogus.  */
     240  	    goto close_and_out;
     241  
     242  	  /* Freakishly long header.  */
     243  	  /* XXX could use mremap when available */
     244  	  mapsize = (headsize + ps - 1) & ~(ps - 1);
     245  	  result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY,
     246  			     fd, 0);
     247  	  if (result == MAP_FAILED)
     248  	    goto close_and_out;
     249  	}
     250  
     251        if (sizeof (void *) > 4 || mapsize >= archive_stat.st_size)
     252  	{
     253  	  /* We've mapped the whole file already, so we can be
     254  	     sure we won't need this file descriptor later.  */
     255  	  __close_nocancel_nostatus (fd);
     256  	  fd = -1;
     257  	}
     258  
     259        headmap.ptr = result;
     260        /* headmap.from already initialized to zero.  */
     261        headmap.len = mapsize;
     262      }
     263  
     264    /* If there is no archive or it cannot be loaded for some reason fail.  */
     265    if (__glibc_unlikely (headmap.ptr == NULL))
     266      goto close_and_out;
     267  
     268    /* We have the archive available.  To find the name we first have to
     269       determine its hash value.  */
     270    hval = compute_hashval (name, strlen (name));
     271  
     272    head = headmap.ptr;
     273    namehashtab = (struct namehashent *) ((char *) head
     274  					+ head->namehash_offset);
     275  
     276    /* Avoid division by 0 if the file is corrupted.  */
     277    if (__glibc_unlikely (head->namehash_size <= 2))
     278      goto close_and_out;
     279  
     280    idx = hval % head->namehash_size;
     281    incr = 1 + hval % (head->namehash_size - 2);
     282  
     283    /* If the name_offset field is zero this means this is a
     284       deleted entry and therefore no entry can be found.  */
     285    while (1)
     286      {
     287        if (namehashtab[idx].name_offset == 0)
     288  	/* Not found.  */
     289  	goto close_and_out;
     290  
     291        if (namehashtab[idx].hashval == hval
     292  	  && strcmp (name, headmap.ptr + namehashtab[idx].name_offset) == 0)
     293  	/* Found the entry.  */
     294  	break;
     295  
     296        idx += incr;
     297        if (idx >= head->namehash_size)
     298  	idx -= head->namehash_size;
     299      }
     300  
     301    /* We found an entry.  It might be a placeholder for a removed one.  */
     302    if (namehashtab[idx].locrec_offset == 0)
     303      goto close_and_out;
     304  
     305    locrec = (struct locrecent *) (headmap.ptr + namehashtab[idx].locrec_offset);
     306  
     307    if (sizeof (void *) > 4 /* || headmap.len == archive_stat.st_size */)
     308      {
     309        /* We already have the whole locale archive mapped in.  */
     310        assert (headmap.len == archive_stat.st_size);
     311        for (cnt = 0; cnt < __LC_LAST; ++cnt)
     312  	if (cnt != LC_ALL)
     313  	  {
     314  	    if (locrec->record[cnt].offset + locrec->record[cnt].len
     315  		> headmap.len)
     316  	      /* The archive locrectab contains bogus offsets.  */
     317  	      goto close_and_out;
     318  	    results[cnt].addr = headmap.ptr + locrec->record[cnt].offset;
     319  	    results[cnt].len = locrec->record[cnt].len;
     320  	  }
     321      }
     322    else
     323      {
     324        /* Get the offsets of the data files and sort them.  */
     325        for (cnt = nranges = 0; cnt < __LC_LAST; ++cnt)
     326  	if (cnt != LC_ALL)
     327  	  {
     328  	    ranges[nranges].from = locrec->record[cnt].offset;
     329  	    ranges[nranges].len = locrec->record[cnt].len;
     330  	    ranges[nranges].category = cnt;
     331  	    ranges[nranges].result = NULL;
     332  
     333  	    ++nranges;
     334  	  }
     335  
     336        qsort (ranges, nranges, sizeof (ranges[0]), rangecmp);
     337  
     338        /* The information about mmap'd blocks is kept in a list.
     339  	 Skip over the blocks which are before the data we need.  */
     340        last = mapped = archmapped;
     341        for (cnt = 0; cnt < nranges; ++cnt)
     342  	{
     343  	  int upper;
     344  	  size_t from;
     345  	  size_t to;
     346  	  void *addr;
     347  	  struct archmapped *newp;
     348  
     349  	  /* Determine whether the appropriate page is already mapped.  */
     350  	  while (mapped != NULL
     351  		 && (mapped->from + mapped->len
     352  		     <= ranges[cnt].from + ranges[cnt].len))
     353  	    {
     354  	      last = mapped;
     355  	      mapped = mapped->next;
     356  	    }
     357  
     358  	  /* Do we have a match?  */
     359  	  if (mapped != NULL
     360  	      && mapped->from <= ranges[cnt].from
     361  	      && (ranges[cnt].from + ranges[cnt].len
     362  		  <= mapped->from + mapped->len))
     363  	    {
     364  	      /* Yep, already loaded.  */
     365  	      results[ranges[cnt].category].addr = ((char *) mapped->ptr
     366  						    + ranges[cnt].from
     367  						    - mapped->from);
     368  	      results[ranges[cnt].category].len = ranges[cnt].len;
     369  	      continue;
     370  	    }
     371  
     372  	  /* Map the range with the locale data from the file.  We will
     373  	     try to cover as much of the locale as possible.  I.e., if the
     374  	     next category (next as in "next offset") is on the current or
     375  	     immediately following page we use it as well.  */
     376  	  assert (powerof2 (ps));
     377  	  from = ranges[cnt].from & ~(ps - 1);
     378  	  upper = cnt;
     379  	  do
     380  	    {
     381  	      to = ranges[upper].from + ranges[upper].len;
     382  	      if (to > (size_t) archive_stat.st_size)
     383  		/* The archive locrectab contains bogus offsets.  */
     384  		goto close_and_out;
     385  	      to = (to + ps - 1) & ~(ps - 1);
     386  
     387  	      /* If a range is already mmaped in, stop.	 */
     388  	      if (mapped != NULL && ranges[upper].from >= mapped->from)
     389  		break;
     390  
     391  	      ++upper;
     392  	    }
     393  	  /* Loop while still in contiguous pages. */
     394  	  while (upper < nranges && ranges[upper].from < to + ps);
     395  
     396  	  /* Open the file if it hasn't happened yet.  */
     397  	  if (fd == -1)
     398  	    {
     399  	      struct __stat64_t64 st;
     400  	      fd = __open_nocancel (archfname,
     401  				    O_RDONLY|O_LARGEFILE|O_CLOEXEC);
     402  	      if (fd == -1)
     403  		/* Cannot open the archive, for whatever reason.  */
     404  		return NULL;
     405  	      /* Now verify we think this is really the same archive file
     406  		 we opened before.  If it has been changed we cannot trust
     407  		 the header we read previously.  */
     408  	      if (__fstat64_time64 (fd, &st) < 0
     409  		  || st.st_size != archive_stat.st_size
     410  		  || st.st_mtime != archive_stat.st_mtime
     411  		  || st.st_dev != archive_stat.st_dev
     412  		  || st.st_ino != archive_stat.st_ino)
     413  		goto close_and_out;
     414  	    }
     415  
     416  	  /* Map the range from the archive.  */
     417  	  addr = __mmap64 (NULL, to - from, PROT_READ, MAP_FILE|MAP_COPY,
     418  			   fd, from);
     419  	  if (addr == MAP_FAILED)
     420  	    goto close_and_out;
     421  
     422  	  /* Allocate a record for this mapping.  */
     423  	  newp = (struct archmapped *) malloc (sizeof (struct archmapped));
     424  	  if (newp == NULL)
     425  	    {
     426  	      (void) __munmap (addr, to - from);
     427  	      goto close_and_out;
     428  	    }
     429  
     430  	  /* And queue it.  */
     431  	  newp->ptr = addr;
     432  	  newp->from = from;
     433  	  newp->len = to - from;
     434  	  assert (last->next == mapped);
     435  	  newp->next = mapped;
     436  	  last->next = newp;
     437  	  last = newp;
     438  
     439  	  /* Determine the load addresses for the category data.  */
     440  	  do
     441  	    {
     442  	      assert (ranges[cnt].from >= from);
     443  	      results[ranges[cnt].category].addr = ((char *) addr
     444  						    + ranges[cnt].from - from);
     445  	      results[ranges[cnt].category].len = ranges[cnt].len;
     446  	    }
     447  	  while (++cnt < upper);
     448  	  --cnt;		/* The 'for' will increase 'cnt' again.  */
     449  	}
     450      }
     451  
     452    /* We don't need the file descriptor any longer.  */
     453    if (fd >= 0)
     454      __close_nocancel_nostatus (fd);
     455    fd = -1;
     456  
     457    /* We succeeded in mapping all the necessary regions of the archive.
     458       Now we need the expected data structures to point into the data.  */
     459  
     460    lia = malloc (sizeof *lia);
     461    if (__glibc_unlikely (lia == NULL))
     462      return NULL;
     463  
     464    lia->name = __strdup (*namep);
     465    if (__glibc_unlikely (lia->name == NULL))
     466      {
     467        free (lia);
     468        return NULL;
     469      }
     470  
     471    lia->next = archloaded;
     472    archloaded = lia;
     473  
     474    for (cnt = 0; cnt < __LC_LAST; ++cnt)
     475      if (cnt != LC_ALL)
     476        {
     477  	lia->data[cnt] = _nl_intern_locale_data (cnt,
     478  						 results[cnt].addr,
     479  						 results[cnt].len);
     480  	if (__glibc_likely (lia->data[cnt] != NULL))
     481  	  {
     482  	    /* _nl_intern_locale_data leaves us these fields to initialize.  */
     483  	    lia->data[cnt]->alloc = ld_archive;
     484  	    lia->data[cnt]->name = lia->name;
     485  
     486  	    /* We do this instead of bumping the count each time we return
     487  	       this data because the mappings stay around forever anyway
     488  	       and we might as well hold on to a little more memory and not
     489  	       have to rebuild it on the next lookup of the same thing.
     490  	       If we were to maintain the usage_count normally and let the
     491  	       structures be freed, we would have to remove the elements
     492  	       from archloaded too.  */
     493  	    lia->data[cnt]->usage_count = UNDELETABLE;
     494  	  }
     495        }
     496  
     497    *namep = lia->name;
     498    return lia->data[category];
     499  }
     500  
     501  void
     502  _nl_archive_subfreeres (void)
     503  {
     504    struct locale_in_archive *lia;
     505    struct archmapped *am;
     506  
     507    /* Toss out our cached locales.  */
     508    lia = archloaded;
     509    while (lia != NULL)
     510      {
     511        int category;
     512        struct locale_in_archive *dead = lia;
     513        lia = lia->next;
     514  
     515        free (dead->name);
     516        for (category = 0; category < __LC_LAST; ++category)
     517  	if (category != LC_ALL && dead->data[category] != NULL)
     518  	  _nl_unload_locale (category, dead->data[category]);
     519        free (dead);
     520      }
     521    archloaded = NULL;
     522  
     523    if (archmapped != NULL)
     524      {
     525        /* Now toss all the mapping windows, which we know nothing is using any
     526  	 more because we just tossed all the locales that point into them.  */
     527  
     528        assert (archmapped == &headmap);
     529        archmapped = NULL;
     530        (void) __munmap (headmap.ptr, headmap.len);
     531        am = headmap.next;
     532        while (am != NULL)
     533  	{
     534  	  struct archmapped *dead = am;
     535  	  am = am->next;
     536  	  (void) __munmap (dead->ptr, dead->len);
     537  	  free (dead);
     538  	}
     539      }
     540  }