(root)/
glibc-2.38/
iconv/
gconv_conf.c
       1  /* Handle configuration data.
       2     Copyright (C) 1997-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <assert.h>
      20  #include <ctype.h>
      21  #include <errno.h>
      22  #include <limits.h>
      23  #include <locale.h>
      24  #include <search.h>
      25  #include <stddef.h>
      26  #include <stdio.h>
      27  #include <stdio_ext.h>
      28  #include <stdlib.h>
      29  #include <string.h>
      30  #include <unistd.h>
      31  #include <sys/param.h>
      32  
      33  #include <libc-lock.h>
      34  #include <gconv_int.h>
      35  #include <gconv_parseconfdir.h>
      36  
      37  /* This is the default path where we look for module lists.  */
      38  static const char default_gconv_path[] = GCONV_PATH;
      39  
      40  /* Type to represent search path.  */
      41  struct path_elem
      42  {
      43    const char *name;
      44    size_t len;
      45  };
      46  
      47  /* The path elements, as determined by the __gconv_get_path function.
      48     All path elements end in a slash.  */
      49  struct path_elem *__gconv_path_elem;
      50  /* Maximum length of a single path element in __gconv_path_elem.  */
      51  size_t __gconv_max_path_elem_len;
      52  
      53  /* We use the following struct if we couldn't allocate memory.  */
      54  static const struct path_elem empty_path_elem = { NULL, 0 };
      55  
      56  /* Filename extension for the modules.  */
      57  #ifndef MODULE_EXT
      58  # define MODULE_EXT ".so"
      59  #endif
      60  static const char gconv_module_ext[] = MODULE_EXT;
      61  
      62  /* We have a few builtin transformations.  */
      63  static struct gconv_module builtin_modules[] =
      64  {
      65  #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
      66  			       MinF, MaxF, MinT, MaxT) \
      67    {									      \
      68      .from_string = From,						      \
      69      .to_string = To,							      \
      70      .cost_hi = Cost,							      \
      71      .cost_lo = INT_MAX,							      \
      72      .module_name = Name							      \
      73    },
      74  #define BUILTIN_ALIAS(From, To)
      75  
      76  #include "gconv_builtin.h"
      77  
      78  #undef BUILTIN_TRANSFORMATION
      79  #undef BUILTIN_ALIAS
      80  };
      81  
      82  static const char builtin_aliases[] =
      83  {
      84  #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
      85  			       MinF, MaxF, MinT, MaxT)
      86  #define BUILTIN_ALIAS(From, To) From "\0" To "\0"
      87  
      88  #include "gconv_builtin.h"
      89  
      90  #undef BUILTIN_TRANSFORMATION
      91  #undef BUILTIN_ALIAS
      92  };
      93  
      94  
      95  /* Value of the GCONV_PATH environment variable.  */
      96  const char *__gconv_path_envvar;
      97  
      98  
      99  /* Test whether there is already a matching module known.  */
     100  static int
     101  detect_conflict (const char *alias)
     102  {
     103    struct gconv_module *node = __gconv_modules_db;
     104  
     105    while (node != NULL)
     106      {
     107        int cmpres = strcmp (alias, node->from_string);
     108  
     109        if (cmpres == 0)
     110  	/* We have a conflict.  */
     111  	return 1;
     112        else if (cmpres < 0)
     113  	node = node->left;
     114        else
     115  	node = node->right;
     116      }
     117  
     118    return node != NULL;
     119  }
     120  
     121  
     122  /* The actual code to add aliases.  */
     123  static void
     124  add_alias2 (const char *from, const char *to, const char *wp)
     125  {
     126    /* Test whether this alias conflicts with any available module.  */
     127    if (detect_conflict (from))
     128      /* It does conflict, don't add the alias.  */
     129      return;
     130  
     131    struct gconv_alias *new_alias = (struct gconv_alias *)
     132      malloc (sizeof (struct gconv_alias) + (wp - from));
     133    if (new_alias != NULL)
     134      {
     135        void **inserted;
     136  
     137        new_alias->fromname = memcpy ((char *) new_alias
     138  				    + sizeof (struct gconv_alias),
     139  				    from, wp - from);
     140        new_alias->toname = new_alias->fromname + (to - from);
     141  
     142        inserted = (void **) __tsearch (new_alias, &__gconv_alias_db,
     143  				      __gconv_alias_compare);
     144        if (inserted == NULL || *inserted != new_alias)
     145  	/* Something went wrong, free this entry.  */
     146  	free (new_alias);
     147      }
     148  }
     149  
     150  
     151  /* Add new alias.  */
     152  static void
     153  add_alias (char *rp)
     154  {
     155    /* We now expect two more string.  The strings are normalized
     156       (converted to UPPER case) and stored in the alias database.  */
     157    char *from, *to, *wp;
     158  
     159    while (__isspace_l (*rp, _nl_C_locobj_ptr))
     160      ++rp;
     161    from = wp = rp;
     162    while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
     163      *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr);
     164    if (*rp == '\0')
     165      /* There is no `to' string on the line.  Ignore it.  */
     166      return;
     167    *wp++ = '\0';
     168    to = ++rp;
     169    while (__isspace_l (*rp, _nl_C_locobj_ptr))
     170      ++rp;
     171    while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
     172      *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr);
     173    if (to == wp)
     174      /* No `to' string, ignore the line.  */
     175      return;
     176    *wp++ = '\0';
     177  
     178    add_alias2 (from, to, wp);
     179  }
     180  
     181  
     182  /* Insert a data structure for a new module in the search tree.  */
     183  static void
     184  insert_module (struct gconv_module *newp, int tobefreed)
     185  {
     186    struct gconv_module **rootp = &__gconv_modules_db;
     187  
     188    while (*rootp != NULL)
     189      {
     190        struct gconv_module *root = *rootp;
     191        int cmpres;
     192  
     193        cmpres = strcmp (newp->from_string, root->from_string);
     194        if (cmpres == 0)
     195  	{
     196  	  /* Both strings are identical.  Insert the string at the
     197  	     end of the `same' list if it is not already there.  */
     198  	  while (strcmp (newp->from_string, root->from_string) != 0
     199  		 || strcmp (newp->to_string, root->to_string) != 0)
     200  	    {
     201  	      rootp = &root->same;
     202  	      root = *rootp;
     203  	      if (root == NULL)
     204  		break;
     205  	    }
     206  
     207  	  if (root != NULL)
     208  	    {
     209  	      /* This is a no new conversion.  But maybe the cost is
     210  		 better.  */
     211  	      if (newp->cost_hi < root->cost_hi
     212  		  || (newp->cost_hi == root->cost_hi
     213  		      && newp->cost_lo < root->cost_lo))
     214  		{
     215  		  newp->left = root->left;
     216  		  newp->right = root->right;
     217  		  newp->same = root->same;
     218  		  *rootp = newp;
     219  
     220  		  free (root);
     221  		}
     222  	      else if (tobefreed)
     223  		free (newp);
     224  	      return;
     225  	    }
     226  
     227  	  break;
     228  	}
     229        else if (cmpres < 0)
     230  	rootp = &root->left;
     231        else
     232  	rootp = &root->right;
     233      }
     234  
     235    /* Plug in the new node here.  */
     236    *rootp = newp;
     237  }
     238  
     239  
     240  /* Add new module.  */
     241  static void
     242  add_module (char *rp, const char *directory, size_t dir_len, int modcounter)
     243  {
     244    /* We expect now
     245       1. `from' name
     246       2. `to' name
     247       3. filename of the module
     248       4. an optional cost value
     249    */
     250    struct gconv_alias fake_alias;
     251    struct gconv_module *new_module;
     252    char *from, *to, *module, *wp;
     253    int need_ext;
     254    int cost_hi;
     255  
     256    while (__isspace_l (*rp, _nl_C_locobj_ptr))
     257      ++rp;
     258    from = rp;
     259    while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
     260      {
     261        *rp = __toupper_l (*rp, _nl_C_locobj_ptr);
     262        ++rp;
     263      }
     264    if (*rp == '\0')
     265      return;
     266    *rp++ = '\0';
     267    to = wp = rp;
     268    while (__isspace_l (*rp, _nl_C_locobj_ptr))
     269      ++rp;
     270    while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
     271      *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr);
     272    if (*rp == '\0')
     273      return;
     274    *wp++ = '\0';
     275    do
     276      ++rp;
     277    while (__isspace_l (*rp, _nl_C_locobj_ptr));
     278    module = wp;
     279    while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
     280      *wp++ = *rp++;
     281    if (*rp == '\0')
     282      {
     283        /* There is no cost, use one by default.  */
     284        *wp++ = '\0';
     285        cost_hi = 1;
     286      }
     287    else
     288      {
     289        /* There might be a cost value.  */
     290        char *endp;
     291  
     292        *wp++ = '\0';
     293        cost_hi = strtol (rp, &endp, 10);
     294        if (rp == endp || cost_hi < 1)
     295  	/* No useful information.  */
     296  	cost_hi = 1;
     297      }
     298  
     299    if (module[0] == '\0')
     300      /* No module name given.  */
     301      return;
     302    if (module[0] == '/')
     303      dir_len = 0;
     304  
     305    /* See whether we must add the ending.  */
     306    need_ext = 0;
     307    if (wp - module < (ptrdiff_t) sizeof (gconv_module_ext)
     308        || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext,
     309  		 sizeof (gconv_module_ext)) != 0)
     310      /* We must add the module extension.  */
     311      need_ext = sizeof (gconv_module_ext) - 1;
     312  
     313    /* See whether we have already an alias with this name defined.  */
     314    fake_alias.fromname = strndupa (from, to - from);
     315  
     316    if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare) != NULL)
     317      /* This module duplicates an alias.  */
     318      return;
     319  
     320    new_module = (struct gconv_module *) calloc (1,
     321  					       sizeof (struct gconv_module)
     322  					       + (wp - from)
     323  					       + dir_len + need_ext);
     324    if (new_module != NULL)
     325      {
     326        char *tmp;
     327  
     328        new_module->from_string = tmp = (char *) (new_module + 1);
     329        tmp = __mempcpy (tmp, from, to - from);
     330  
     331        new_module->to_string = tmp;
     332        tmp = __mempcpy (tmp, to, module - to);
     333  
     334        new_module->cost_hi = cost_hi;
     335        new_module->cost_lo = modcounter;
     336  
     337        new_module->module_name = tmp;
     338  
     339        if (dir_len != 0)
     340  	tmp = __mempcpy (tmp, directory, dir_len);
     341  
     342        tmp = __mempcpy (tmp, module, wp - module);
     343  
     344        if (need_ext)
     345  	memcpy (tmp - 1, gconv_module_ext, sizeof (gconv_module_ext));
     346  
     347        /* Now insert the new module data structure in our search tree.  */
     348        insert_module (new_module, 1);
     349      }
     350  }
     351  
     352  
     353  /* Determine the directories we are looking for data in.  This function should
     354     only be called from __gconv_read_conf.  */
     355  static void
     356  __gconv_get_path (void)
     357  {
     358    struct path_elem *result;
     359  
     360    /* This function is only ever called when __gconv_path_elem is NULL.  */
     361    result = __gconv_path_elem;
     362    assert (result == NULL);
     363  
     364    /* Determine the complete path first.  */
     365    char *gconv_path;
     366    size_t gconv_path_len;
     367    char *elem;
     368    char *oldp;
     369    char *cp;
     370    int nelems;
     371    char *cwd;
     372    size_t cwdlen;
     373  
     374    if (__gconv_path_envvar == NULL)
     375      {
     376        /* No user-defined path.  Make a modifiable copy of the
     377           default path.  */
     378        gconv_path = strdupa (default_gconv_path);
     379        gconv_path_len = sizeof (default_gconv_path);
     380        cwd = NULL;
     381        cwdlen = 0;
     382      }
     383    else
     384      {
     385        /* Append the default path to the user-defined path.  */
     386        size_t user_len = strlen (__gconv_path_envvar);
     387  
     388        gconv_path_len = user_len + 1 + sizeof (default_gconv_path);
     389        gconv_path = alloca (gconv_path_len);
     390        __mempcpy (__mempcpy (__mempcpy (gconv_path, __gconv_path_envvar,
     391                                         user_len),
     392                              ":", 1),
     393                   default_gconv_path, sizeof (default_gconv_path));
     394        cwd = __getcwd (NULL, 0);
     395        cwdlen = __glibc_unlikely (cwd == NULL) ? 0 : strlen (cwd);
     396      }
     397    assert (default_gconv_path[0] == '/');
     398  
     399    /* In a first pass we calculate the number of elements.  */
     400    oldp = NULL;
     401    cp = strchr (gconv_path, ':');
     402    nelems = 1;
     403    while (cp != NULL)
     404      {
     405        if (cp != oldp + 1)
     406          ++nelems;
     407        oldp = cp;
     408        cp = strchr (cp + 1, ':');
     409      }
     410  
     411    /* Allocate the memory for the result.  */
     412    result = malloc ((nelems + 1)
     413                                * sizeof (struct path_elem)
     414                                + gconv_path_len + nelems
     415                                + (nelems - 1) * (cwdlen + 1));
     416    if (result != NULL)
     417      {
     418        char *strspace = (char *) &result[nelems + 1];
     419        int n = 0;
     420  
     421        /* Separate the individual parts.  */
     422        __gconv_max_path_elem_len = 0;
     423        elem = __strtok_r (gconv_path, ":", &gconv_path);
     424        assert (elem != NULL);
     425        do
     426          {
     427            result[n].name = strspace;
     428            if (elem[0] != '/')
     429              {
     430                assert (cwd != NULL);
     431                strspace = __mempcpy (strspace, cwd, cwdlen);
     432                *strspace++ = '/';
     433              }
     434            strspace = __stpcpy (strspace, elem);
     435            if (strspace[-1] != '/')
     436              *strspace++ = '/';
     437  
     438            result[n].len = strspace - result[n].name;
     439            if (result[n].len > __gconv_max_path_elem_len)
     440              __gconv_max_path_elem_len = result[n].len;
     441  
     442            *strspace++ = '\0';
     443            ++n;
     444          }
     445        while ((elem = __strtok_r (NULL, ":", &gconv_path)) != NULL);
     446  
     447        result[n].name = NULL;
     448        result[n].len = 0;
     449      }
     450  
     451    __gconv_path_elem = result ?: (struct path_elem *) &empty_path_elem;
     452  
     453    free (cwd);
     454  }
     455  
     456  
     457  /* Read all configuration files found in the user-specified and the default
     458     path.  This function should only be called once during the program's
     459     lifetime.  It disregards locking and synchronization because its only
     460     caller, __gconv_load_conf, handles this.  */
     461  static void
     462  __gconv_read_conf (void)
     463  {
     464    int save_errno = errno;
     465    size_t cnt;
     466  
     467    /* First see whether we should use the cache.  */
     468    if (__gconv_load_cache () == 0)
     469      {
     470        /* Yes, we are done.  */
     471        __set_errno (save_errno);
     472        return;
     473      }
     474  
     475  #ifndef STATIC_GCONV
     476    /* Find out where we have to look.  */
     477    __gconv_get_path ();
     478  
     479    for (cnt = 0; __gconv_path_elem[cnt].name != NULL; ++cnt)
     480      gconv_parseconfdir (NULL, __gconv_path_elem[cnt].name,
     481  			__gconv_path_elem[cnt].len);
     482  #endif
     483  
     484    /* Add the internal modules.  */
     485    for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]);
     486         ++cnt)
     487      {
     488        struct gconv_alias fake_alias;
     489  
     490        fake_alias.fromname = (char *) builtin_modules[cnt].from_string;
     491  
     492        if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare)
     493  	  != NULL)
     494  	/* It'll conflict so don't add it.  */
     495  	continue;
     496  
     497        insert_module (&builtin_modules[cnt], 0);
     498      }
     499  
     500    /* Add aliases for builtin conversions.  */
     501    const char *cp = builtin_aliases;
     502    do
     503      {
     504        const char *from = cp;
     505        const char *to = strchr (from, '\0') + 1;
     506        cp = strchr (to, '\0') + 1;
     507  
     508        add_alias2 (from, to, cp);
     509      }
     510    while (*cp != '\0');
     511  
     512    /* Restore the error number.  */
     513    __set_errno (save_errno);
     514  }
     515  
     516  
     517  /* This "once" variable is used to do a one-time load of the configuration.  */
     518  __libc_once_define (static, once);
     519  
     520  
     521  /* Read all configuration files found in the user-specified and the default
     522     path, but do it only "once" using __gconv_read_conf to do the actual
     523     work.  This is the function that must be called when reading iconv
     524     configuration.  */
     525  void
     526  __gconv_load_conf (void)
     527  {
     528    __libc_once (once, __gconv_read_conf);
     529  }
     530  
     531  
     532  /* Free all resources if necessary.  */
     533  void
     534  __gconv_conf_freemem (void)
     535  {
     536    if (__gconv_path_elem != NULL && __gconv_path_elem != &empty_path_elem)
     537      free ((void *) __gconv_path_elem);
     538  }