(root)/
glib-2.79.0/
glib/
gcharset.c
       1  /* gcharset.c - Charset information
       2   *
       3   * Copyright (C) 2011 Red Hat, Inc.
       4   *
       5   * SPDX-License-Identifier: LGPL-2.1-or-later
       6   *
       7   * This library is free software; you can redistribute it and/or
       8   * modify it under the terms of the GNU Lesser General Public
       9   * License as published by the Free Software Foundation; either
      10   * version 2.1 of the License, or (at your option) any later version.
      11   *
      12   * This library is distributed in the hope that it will be useful,
      13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15   * Lesser General Public License for more details.
      16   *
      17   * You should have received a copy of the GNU Lesser General Public
      18   * License along with this library; if not, see <http://www.gnu.org/licenses/>.
      19   */
      20  
      21  #include "config.h"
      22  
      23  #include "gcharset.h"
      24  #include "gcharsetprivate.h"
      25  
      26  #include "garray.h"
      27  #include "genviron.h"
      28  #include "ghash.h"
      29  #include "gmessages.h"
      30  #include "gstrfuncs.h"
      31  #include "gthread.h"
      32  #include "gthreadprivate.h"
      33  #ifdef G_OS_WIN32
      34  #include "gwin32.h"
      35  #endif
      36  
      37  #include "libcharset/libcharset.h"
      38  
      39  #include <string.h>
      40  #include <stdio.h>
      41  
      42  #if (HAVE_LANGINFO_TIME_CODESET || HAVE_LANGINFO_CODESET)
      43  #include <langinfo.h>
      44  #endif
      45  
      46  #include <locale.h>
      47  #ifdef G_OS_WIN32
      48  #define WIN32_LEAN_AND_MEAN
      49  #include <windows.h>
      50  #endif
      51  
      52  G_LOCK_DEFINE_STATIC (aliases);
      53  
      54  static GHashTable *
      55  get_alias_hash (void)
      56  {
      57    static GHashTable *alias_hash = NULL;
      58    const char *aliases;
      59  
      60    G_LOCK (aliases);
      61  
      62    if (!alias_hash)
      63      {
      64        alias_hash = g_hash_table_new (g_str_hash, g_str_equal);
      65  
      66        aliases = _g_locale_get_charset_aliases ();
      67        while (*aliases != '\0')
      68          {
      69            const char *canonical;
      70            const char *alias;
      71            const char **alias_array;
      72            int count = 0;
      73  
      74            alias = aliases;
      75            aliases += strlen (aliases) + 1;
      76            canonical = aliases;
      77            aliases += strlen (aliases) + 1;
      78  
      79            alias_array = g_hash_table_lookup (alias_hash, canonical);
      80            if (alias_array)
      81              {
      82                while (alias_array[count])
      83                  count++;
      84              }
      85  
      86            alias_array = g_renew (const char *, alias_array, count + 2);
      87            alias_array[count] = alias;
      88            alias_array[count + 1] = NULL;
      89  
      90            g_hash_table_insert (alias_hash, (char *)canonical, alias_array);
      91          }
      92      }
      93  
      94    G_UNLOCK (aliases);
      95  
      96    return alias_hash;
      97  }
      98  
      99  /* As an abuse of the alias table, the following routines gets
     100   * the charsets that are aliases for the canonical name.
     101   */
     102  const char **
     103  _g_charset_get_aliases (const char *canonical_name)
     104  {
     105    GHashTable *alias_hash = get_alias_hash ();
     106  
     107    return g_hash_table_lookup (alias_hash, canonical_name);
     108  }
     109  
     110  static gboolean
     111  g_utf8_get_charset_internal (const char  *raw_data,
     112                               const char **a)
     113  {
     114    /* Allow CHARSET to override the charset of any locale category. Users should
     115     * probably never be setting this — instead, just add the charset after a `.`
     116     * in `LANGUAGE`/`LC_ALL`/`LC_*`/`LANG`. I can’t find any reference (in
     117     * `git log`, code comments, or man pages) to this environment variable being
     118     * standardised or documented or even used anywhere outside GLib. Perhaps it
     119     * should eventually be removed. */
     120    const char *charset = g_getenv ("CHARSET");
     121  
     122    if (charset && *charset)
     123      {
     124        *a = charset;
     125  
     126        if (charset && strstr (charset, "UTF-8"))
     127          return TRUE;
     128        else
     129          return FALSE;
     130      }
     131  
     132    /* The libcharset code tries to be thread-safe without
     133     * a lock, but has a memory leak and a missing memory
     134     * barrier, so we lock for it
     135     */
     136    G_LOCK (aliases);
     137    charset = _g_locale_charset_unalias (raw_data);
     138    G_UNLOCK (aliases);
     139  
     140    if (charset && *charset)
     141      {
     142        *a = charset;
     143  
     144        if (charset && strstr (charset, "UTF-8"))
     145          return TRUE;
     146        else
     147          return FALSE;
     148      }
     149  
     150    /* Assume this for compatibility at present.  */
     151    *a = "US-ASCII";
     152  
     153    return FALSE;
     154  }
     155  
     156  typedef struct _GCharsetCache GCharsetCache;
     157  
     158  struct _GCharsetCache {
     159    gboolean is_utf8;
     160    gchar *raw;
     161    gchar *charset;
     162  };
     163  
     164  static void
     165  charset_cache_free (gpointer data)
     166  {
     167    GCharsetCache *cache = data;
     168    g_free (cache->raw);
     169    g_free (cache->charset);
     170    g_free (cache);
     171  }
     172  
     173  /**
     174   * g_get_charset:
     175   * @charset: (out) (optional) (transfer none): return location for character set
     176   *   name, or %NULL.
     177   *
     178   * Obtains the character set for the [current locale][setlocale]; you
     179   * might use this character set as an argument to g_convert(), to convert
     180   * from the current locale's encoding to some other encoding. (Frequently
     181   * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.)
     182   *
     183   * On Windows the character set returned by this function is the
     184   * so-called system default ANSI code-page. That is the character set
     185   * used by the "narrow" versions of C library and Win32 functions that
     186   * handle file names. It might be different from the character set
     187   * used by the C library's current locale.
     188   *
     189   * On Linux, the character set is found by consulting nl_langinfo() if
     190   * available. If not, the environment variables `LC_ALL`, `LC_CTYPE`, `LANG`
     191   * and `CHARSET` are queried in order. nl_langinfo() returns the C locale if
     192   * no locale has been loaded by setlocale().
     193   *
     194   * The return value is %TRUE if the locale's encoding is UTF-8, in that
     195   * case you can perhaps avoid calling g_convert().
     196   *
     197   * The string returned in @charset is not allocated, and should not be
     198   * freed.
     199   *
     200   * Returns: %TRUE if the returned charset is UTF-8
     201   */
     202  gboolean
     203  g_get_charset (const char **charset)
     204  {
     205    static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
     206    GCharsetCache *cache = g_private_get (&cache_private);
     207    const gchar *raw;
     208  
     209    if (!cache)
     210      cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
     211  
     212    G_LOCK (aliases);
     213    raw = _g_locale_charset_raw ();
     214    G_UNLOCK (aliases);
     215  
     216    if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
     217      {
     218        const gchar *new_charset;
     219  
     220        g_free (cache->raw);
     221        g_free (cache->charset);
     222        cache->raw = g_strdup (raw);
     223        cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
     224        cache->charset = g_strdup (new_charset);
     225      }
     226  
     227    if (charset)
     228      *charset = cache->charset;
     229  
     230    return cache->is_utf8;
     231  }
     232  
     233  /*
     234   * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to
     235   * LC_TIME) to correctly check for charset about time conversion relatives.
     236   *
     237   * Returns: %TRUE if the returned charset is UTF-8
     238   */
     239  gboolean
     240  _g_get_time_charset (const char **charset)
     241  {
     242    static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
     243    GCharsetCache *cache = g_private_get (&cache_private);
     244    const gchar *raw;
     245  
     246    if (!cache)
     247      cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
     248  
     249  #ifdef HAVE_LANGINFO_TIME_CODESET
     250    raw = nl_langinfo (_NL_TIME_CODESET);
     251  #else
     252    G_LOCK (aliases);
     253    raw = _g_locale_charset_raw ();
     254    G_UNLOCK (aliases);
     255  #endif
     256  
     257    if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
     258      {
     259        const gchar *new_charset;
     260  
     261        g_free (cache->raw);
     262        g_free (cache->charset);
     263        cache->raw = g_strdup (raw);
     264        cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
     265        cache->charset = g_strdup (new_charset);
     266      }
     267  
     268    if (charset)
     269      *charset = cache->charset;
     270  
     271    return cache->is_utf8;
     272  }
     273  /*
     274   * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to
     275   * LC_CTYPE) to correctly check for charset about CTYPE conversion relatives.
     276   *
     277   * Returns: %TRUE if the returned charset is UTF-8
     278   */
     279  gboolean
     280  _g_get_ctype_charset (const char **charset)
     281  {
     282    static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
     283    GCharsetCache *cache = g_private_get (&cache_private);
     284    const gchar *raw;
     285  
     286    if (!cache)
     287      cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
     288  
     289  #ifdef HAVE_LANGINFO_CODESET
     290    raw = nl_langinfo (CODESET);
     291  #else
     292    G_LOCK (aliases);
     293    raw = _g_locale_charset_raw ();
     294    G_UNLOCK (aliases);
     295  #endif
     296  
     297    if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
     298      {
     299        const gchar *new_charset;
     300  
     301        g_free (cache->raw);
     302        g_free (cache->charset);
     303        cache->raw = g_strdup (raw);
     304        cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
     305        cache->charset = g_strdup (new_charset);
     306      }
     307  
     308    if (charset)
     309      *charset = cache->charset;
     310  
     311    return cache->is_utf8;
     312  }
     313  
     314  /**
     315   * g_get_codeset:
     316   *
     317   * Gets the character set for the current locale.
     318   *
     319   * Returns: a newly allocated string containing the name
     320   *     of the character set. This string must be freed with g_free().
     321   */
     322  gchar *
     323  g_get_codeset (void)
     324  {
     325    const gchar *charset;
     326  
     327    g_get_charset (&charset);
     328  
     329    return g_strdup (charset);
     330  }
     331  
     332  /**
     333   * g_get_console_charset:
     334   * @charset: (out) (optional) (transfer none): return location for character set
     335   *   name, or %NULL.
     336   *
     337   * Obtains the character set used by the console attached to the process,
     338   * which is suitable for printing output to the terminal.
     339   *
     340   * Usually this matches the result returned by g_get_charset(), but in
     341   * environments where the locale's character set does not match the encoding
     342   * of the console this function tries to guess a more suitable value instead.
     343   *
     344   * On Windows the character set returned by this function is the
     345   * output code page used by the console associated with the calling process.
     346   * If the codepage can't be determined (for example because there is no
     347   * console attached) UTF-8 is assumed.
     348   *
     349   * The return value is %TRUE if the locale's encoding is UTF-8, in that
     350   * case you can perhaps avoid calling g_convert().
     351   *
     352   * The string returned in @charset is not allocated, and should not be
     353   * freed.
     354   *
     355   * Returns: %TRUE if the returned charset is UTF-8
     356   *
     357   * Since: 2.62
     358   */
     359  gboolean
     360  g_get_console_charset (const char **charset)
     361  {
     362  #ifdef G_OS_WIN32
     363    static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
     364    GCharsetCache *cache = g_private_get (&cache_private);
     365    const gchar *locale;
     366    unsigned int cp;
     367    char buf[2 + 20 + 1]; /* "CP" + G_MAXUINT64 (to be safe) in decimal form (20 bytes) + "\0" */
     368    const gchar *raw = NULL;
     369  
     370    if (!cache)
     371      cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
     372  
     373    /* first try to query $LANG (works for Cygwin/MSYS/MSYS2 and others using mintty) */
     374    locale = g_getenv ("LANG");
     375    if (locale != NULL && locale[0] != '\0')
     376      {
     377        /* If the locale name contains an encoding after the dot, return it.  */
     378        const char *dot = strchr (locale, '.');
     379  
     380        if (dot != NULL)
     381          {
     382            const char *modifier;
     383  
     384            dot++;
     385            /* Look for the possible @... trailer and remove it, if any.  */
     386            modifier = strchr (dot, '@');
     387            if (modifier == NULL)
     388              raw = dot;
     389            else if ((gsize) (modifier - dot) < sizeof (buf))
     390              {
     391                memcpy (buf, dot, modifier - dot);
     392                buf[modifier - dot] = '\0';
     393                raw = buf;
     394              }
     395          }
     396      }
     397    /* next try querying console codepage using native win32 API */
     398    if (raw == NULL)
     399      {
     400        cp = GetConsoleOutputCP ();
     401        if (cp)
     402          {
     403            sprintf (buf, "CP%u", cp);
     404            raw = buf;
     405          }
     406        else if (GetLastError () != ERROR_INVALID_HANDLE)
     407          {
     408            gchar *emsg = g_win32_error_message (GetLastError ());
     409            g_warning ("Failed to determine console output code page: %s. "
     410                       "Falling back to UTF-8", emsg);
     411            g_free (emsg);
     412          }
     413      }
     414    /* fall-back to UTF-8 if the rest failed (it's a universal default) */
     415    if (raw == NULL)
     416      raw = "UTF-8";
     417  
     418    if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
     419      {
     420        const gchar *new_charset;
     421  
     422        g_free (cache->raw);
     423        g_free (cache->charset);
     424        cache->raw = g_strdup (raw);
     425        cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
     426        cache->charset = g_strdup (new_charset);
     427      }
     428  
     429    if (charset)
     430      *charset = cache->charset;
     431  
     432    return cache->is_utf8;
     433  #else
     434    /* assume the locale settings match the console encoding on non-Windows OSs */
     435    return g_get_charset (charset);
     436  #endif
     437  }
     438  
     439  #ifndef G_OS_WIN32
     440  
     441  /* read an alias file for the locales */
     442  static void
     443  read_aliases (const gchar *file,
     444                GHashTable  *alias_table)
     445  {
     446    FILE *fp;
     447    char buf[256];
     448  
     449    fp = fopen (file, "re");
     450    if (!fp)
     451      return;
     452    while (fgets (buf, 256, fp))
     453      {
     454        char *p, *q;
     455  
     456        g_strstrip (buf);
     457  
     458        /* Line is a comment */
     459        if ((buf[0] == '#') || (buf[0] == '\0'))
     460          continue;
     461  
     462        /* Reads first column */
     463        for (p = buf, q = NULL; *p; p++) {
     464          if ((*p == '\t') || (*p == ' ') || (*p == ':')) {
     465            *p = '\0';
     466            q = p+1;
     467            while ((*q == '\t') || (*q == ' ')) {
     468              q++;
     469            }
     470            break;
     471          }
     472        }
     473        /* The line only had one column */
     474        if (!q || *q == '\0')
     475          continue;
     476  
     477        /* Read second column */
     478        for (p = q; *p; p++) {
     479          if ((*p == '\t') || (*p == ' ')) {
     480            *p = '\0';
     481            break;
     482          }
     483        }
     484  
     485        /* Add to alias table if necessary */
     486        if (!g_hash_table_lookup (alias_table, buf)) {
     487          g_hash_table_insert (alias_table, g_strdup (buf), g_strdup (q));
     488        }
     489      }
     490    fclose (fp);
     491  }
     492  
     493  #endif
     494  
     495  static char *
     496  unalias_lang (char *lang)
     497  {
     498  #ifndef G_OS_WIN32
     499    static GHashTable *alias_table = NULL;
     500    char *p;
     501    int i;
     502  
     503    if (g_once_init_enter_pointer (&alias_table))
     504      {
     505        GHashTable *table = g_hash_table_new (g_str_hash, g_str_equal);
     506        read_aliases ("/usr/share/locale/locale.alias", table);
     507        g_once_init_leave_pointer (&alias_table, table);
     508      }
     509  
     510    i = 0;
     511    while ((p = g_hash_table_lookup (alias_table, lang)) && (strcmp (p, lang) != 0))
     512      {
     513        lang = p;
     514        if (i++ == 30)
     515          {
     516            static gboolean said_before = FALSE;
     517            if (!said_before)
     518              g_warning ("Too many alias levels for a locale, "
     519                         "may indicate a loop");
     520            said_before = TRUE;
     521            return lang;
     522          }
     523      }
     524  #endif
     525    return lang;
     526  }
     527  
     528  /* Mask for components of locale spec. The ordering here is from
     529   * least significant to most significant
     530   */
     531  enum
     532  {
     533    COMPONENT_CODESET =   1 << 0,
     534    COMPONENT_TERRITORY = 1 << 1,
     535    COMPONENT_MODIFIER =  1 << 2
     536  };
     537  
     538  /* Break an X/Open style locale specification into components
     539   */
     540  static guint
     541  explode_locale (const gchar *locale,
     542                  gchar      **language,
     543                  gchar      **territory,
     544                  gchar      **codeset,
     545                  gchar      **modifier)
     546  {
     547    const gchar *uscore_pos;
     548    const gchar *at_pos;
     549    const gchar *dot_pos;
     550  
     551    guint mask = 0;
     552  
     553    uscore_pos = strchr (locale, '_');
     554    dot_pos = strchr (uscore_pos ? uscore_pos : locale, '.');
     555    at_pos = strchr (dot_pos ? dot_pos : (uscore_pos ? uscore_pos : locale), '@');
     556  
     557    if (at_pos)
     558      {
     559        mask |= COMPONENT_MODIFIER;
     560        *modifier = g_strdup (at_pos);
     561      }
     562    else
     563      at_pos = locale + strlen (locale);
     564  
     565    if (dot_pos)
     566      {
     567        mask |= COMPONENT_CODESET;
     568        *codeset = g_strndup (dot_pos, at_pos - dot_pos);
     569      }
     570    else
     571      dot_pos = at_pos;
     572  
     573    if (uscore_pos)
     574      {
     575        mask |= COMPONENT_TERRITORY;
     576        *territory = g_strndup (uscore_pos, dot_pos - uscore_pos);
     577      }
     578    else
     579      uscore_pos = dot_pos;
     580  
     581    *language = g_strndup (locale, uscore_pos - locale);
     582  
     583    return mask;
     584  }
     585  
     586  /*
     587   * Compute all interesting variants for a given locale name -
     588   * by stripping off different components of the value.
     589   *
     590   * For simplicity, we assume that the locale is in
     591   * X/Open format: language[_territory][.codeset][@modifier]
     592   *
     593   * TODO: Extend this to handle the CEN format (see the GNUlibc docs)
     594   *       as well. We could just copy the code from glibc wholesale
     595   *       but it is big, ugly, and complicated, so I'm reluctant
     596   *       to do so when this should handle 99% of the time...
     597   */
     598  static void
     599  append_locale_variants (GPtrArray *array,
     600                          const gchar *locale)
     601  {
     602    gchar *language = NULL;
     603    gchar *territory = NULL;
     604    gchar *codeset = NULL;
     605    gchar *modifier = NULL;
     606  
     607    guint mask;
     608    guint i, j;
     609  
     610    g_return_if_fail (locale != NULL);
     611  
     612    mask = explode_locale (locale, &language, &territory, &codeset, &modifier);
     613  
     614    /* Iterate through all possible combinations, from least attractive
     615     * to most attractive.
     616     */
     617    for (j = 0; j <= mask; ++j)
     618      {
     619        i = mask - j;
     620  
     621        if ((i & ~mask) == 0)
     622          {
     623            gchar *val = g_strconcat (language,
     624                                      (i & COMPONENT_TERRITORY) ? territory : "",
     625                                      (i & COMPONENT_CODESET) ? codeset : "",
     626                                      (i & COMPONENT_MODIFIER) ? modifier : "",
     627                                      NULL);
     628            g_ptr_array_add (array, val);
     629          }
     630      }
     631  
     632    g_free (language);
     633    if (mask & COMPONENT_CODESET)
     634      g_free (codeset);
     635    if (mask & COMPONENT_TERRITORY)
     636      g_free (territory);
     637    if (mask & COMPONENT_MODIFIER)
     638      g_free (modifier);
     639  }
     640  
     641  /**
     642   * g_get_locale_variants:
     643   * @locale: a locale identifier
     644   *
     645   * Returns a list of derived variants of @locale, which can be used to
     646   * e.g. construct locale-dependent filenames or search paths. The returned
     647   * list is sorted from most desirable to least desirable.
     648   * This function handles territory, charset and extra locale modifiers. See
     649   * [`setlocale(3)`](man:setlocale) for information about locales and their format.
     650   *
     651   * @locale itself is guaranteed to be returned in the output.
     652   *
     653   * For example, if @locale is `fr_BE`, then the returned list
     654   * is `fr_BE`, `fr`. If @locale is `en_GB.UTF-8@euro`, then the returned list
     655   * is `en_GB.UTF-8@euro`, `en_GB.UTF-8`, `en_GB@euro`, `en_GB`, `en.UTF-8@euro`,
     656   * `en.UTF-8`, `en@euro`, `en`.
     657   *
     658   * If you need the list of variants for the current locale,
     659   * use g_get_language_names().
     660   *
     661   * Returns: (transfer full) (array zero-terminated=1) (element-type utf8): a newly
     662   *   allocated array of newly allocated strings with the locale variants. Free with
     663   *   g_strfreev().
     664   *
     665   * Since: 2.28
     666   */
     667  gchar **
     668  g_get_locale_variants (const gchar *locale)
     669  {
     670    GPtrArray *array;
     671  
     672    g_return_val_if_fail (locale != NULL, NULL);
     673  
     674    array = g_ptr_array_sized_new (8);
     675    append_locale_variants (array, locale);
     676    g_ptr_array_add (array, NULL);
     677  
     678    return (gchar **) g_ptr_array_free (array, FALSE);
     679  }
     680  
     681  /* The following is (partly) taken from the gettext package.
     682     Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.  */
     683  
     684  static const gchar *
     685  guess_category_value (const gchar *category_name)
     686  {
     687    const gchar *retval;
     688  
     689    /* The highest priority value is the 'LANGUAGE' environment
     690       variable.  This is a GNU extension.  */
     691    retval = g_getenv ("LANGUAGE");
     692    if ((retval != NULL) && (retval[0] != '\0'))
     693      return retval;
     694  
     695    /* 'LANGUAGE' is not set.  So we have to proceed with the POSIX
     696       methods of looking to 'LC_ALL', 'LC_xxx', and 'LANG'.  On some
     697       systems this can be done by the 'setlocale' function itself.  */
     698  
     699    /* Setting of LC_ALL overwrites all other.  */
     700    retval = g_getenv ("LC_ALL");
     701    if ((retval != NULL) && (retval[0] != '\0'))
     702      return retval;
     703  
     704    /* Next comes the name of the desired category.  */
     705    retval = g_getenv (category_name);
     706    if ((retval != NULL) && (retval[0] != '\0'))
     707      return retval;
     708  
     709    /* Last possibility is the LANG environment variable.  */
     710    retval = g_getenv ("LANG");
     711    if ((retval != NULL) && (retval[0] != '\0'))
     712      return retval;
     713  
     714  #ifdef G_PLATFORM_WIN32
     715    /* g_win32_getlocale() first checks for LC_ALL, LC_MESSAGES and
     716     * LANG, which we already did above. Oh well. The main point of
     717     * calling g_win32_getlocale() is to get the thread's locale as used
     718     * by Windows and the Microsoft C runtime (in the "English_United
     719     * States" format) translated into the Unixish format.
     720     */
     721    {
     722      char *locale = g_win32_getlocale ();
     723      retval = g_intern_string (locale);
     724      g_free (locale);
     725      return retval;
     726    }
     727  #endif
     728  
     729    return NULL;
     730  }
     731  
     732  typedef struct _GLanguageNamesCache GLanguageNamesCache;
     733  
     734  struct _GLanguageNamesCache {
     735    gchar *languages;
     736    gchar **language_names;
     737  };
     738  
     739  static void
     740  language_names_cache_free (gpointer data)
     741  {
     742    GLanguageNamesCache *cache = data;
     743    g_free (cache->languages);
     744    g_strfreev (cache->language_names);
     745    g_free (cache);
     746  }
     747  
     748  /**
     749   * g_get_language_names:
     750   *
     751   * Computes a list of applicable locale names, which can be used to
     752   * e.g. construct locale-dependent filenames or search paths. The returned
     753   * list is sorted from most desirable to least desirable and always contains
     754   * the default locale "C".
     755   *
     756   * For example, if LANGUAGE=de:en_US, then the returned list is
     757   * "de", "en_US", "en", "C".
     758   *
     759   * This function consults the environment variables `LANGUAGE`, `LC_ALL`,
     760   * `LC_MESSAGES` and `LANG` to find the list of locales specified by the
     761   * user.
     762   *
     763   * Returns: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by GLib
     764   *    that must not be modified or freed.
     765   *
     766   * Since: 2.6
     767   */
     768  const gchar * const *
     769  g_get_language_names (void)
     770  {
     771    return g_get_language_names_with_category ("LC_MESSAGES");
     772  }
     773  
     774  /**
     775   * g_get_language_names_with_category:
     776   * @category_name: a locale category name
     777   *
     778   * Computes a list of applicable locale names with a locale category name,
     779   * which can be used to construct the fallback locale-dependent filenames
     780   * or search paths. The returned list is sorted from most desirable to
     781   * least desirable and always contains the default locale "C".
     782   *
     783   * This function consults the environment variables `LANGUAGE`, `LC_ALL`,
     784   * @category_name, and `LANG` to find the list of locales specified by the
     785   * user.
     786   *
     787   * g_get_language_names() returns g_get_language_names_with_category("LC_MESSAGES").
     788   *
     789   * Returns: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by
     790   *    the thread g_get_language_names_with_category was called from.
     791   *    It must not be modified or freed. It must be copied if planned to be used in another thread.
     792   *
     793   * Since: 2.58
     794   */
     795  const gchar * const *
     796  g_get_language_names_with_category (const gchar *category_name)
     797  {
     798    static GPrivate cache_private = G_PRIVATE_INIT ((void (*)(gpointer)) g_hash_table_unref);
     799    GHashTable *cache = g_private_get (&cache_private);
     800    const gchar *languages;
     801    GLanguageNamesCache *name_cache;
     802  
     803    g_return_val_if_fail (category_name != NULL, NULL);
     804  
     805    if (!cache)
     806      {
     807        cache = g_hash_table_new_full (g_str_hash, g_str_equal,
     808                                       g_free, language_names_cache_free);
     809        g_private_set (&cache_private, cache);
     810      }
     811  
     812    languages = guess_category_value (category_name);
     813    if (!languages)
     814      languages = "C";
     815  
     816    name_cache = (GLanguageNamesCache *) g_hash_table_lookup (cache, category_name);
     817    if (!(name_cache && name_cache->languages &&
     818          strcmp (name_cache->languages, languages) == 0))
     819      {
     820        GPtrArray *array;
     821        gchar **alist, **a;
     822  
     823        g_hash_table_remove (cache, category_name);
     824  
     825        array = g_ptr_array_sized_new (8);
     826  
     827        alist = g_strsplit (languages, ":", 0);
     828        for (a = alist; *a; a++)
     829          append_locale_variants (array, unalias_lang (*a));
     830        g_strfreev (alist);
     831        g_ptr_array_add (array, g_strdup ("C"));
     832        g_ptr_array_add (array, NULL);
     833  
     834        name_cache = g_new0 (GLanguageNamesCache, 1);
     835        name_cache->languages = g_strdup (languages);
     836        name_cache->language_names = (gchar **) g_ptr_array_free (array, FALSE);
     837        g_hash_table_insert (cache, g_strdup (category_name), name_cache);
     838      }
     839  
     840    return (const gchar * const *) name_cache->language_names;
     841  }