1  /* Determine a canonical name for the current locale's character encoding.
       2  
       3     Copyright (C) 2000-2006, 2008-2023 Free Software Foundation, Inc.
       4  
       5     This file is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU Lesser General Public License as
       7     published by the Free Software Foundation; either version 2.1 of the
       8     License, or (at your option) any later version.
       9  
      10     This file is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  /* Written by Bruno Haible <bruno@clisp.org>.  */
      19  
      20  #include <config.h>
      21  
      22  /* Specification.  */
      23  #include "localcharset.h"
      24  
      25  #include <stddef.h>
      26  #include <stdio.h>
      27  #include <string.h>
      28  #include <stdlib.h>
      29  
      30  #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
      31  # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
      32  #endif
      33  
      34  #if defined _WIN32 && !defined __CYGWIN__
      35  # define WINDOWS_NATIVE
      36  # include <locale.h>
      37  #endif
      38  
      39  #if defined __EMX__
      40  /* Assume EMX program runs on OS/2, even if compiled under DOS.  */
      41  # ifndef OS2
      42  #  define OS2
      43  # endif
      44  #endif
      45  
      46  #if !defined WINDOWS_NATIVE
      47  # if HAVE_LANGINFO_CODESET
      48  #  include <langinfo.h>
      49  # else
      50  #  if 0 /* see comment regarding use of setlocale(), below */
      51  #   include <locale.h>
      52  #  endif
      53  # endif
      54  # ifdef __CYGWIN__
      55  #  define WIN32_LEAN_AND_MEAN
      56  #  include <windows.h>
      57  # endif
      58  #elif defined WINDOWS_NATIVE
      59  # define WIN32_LEAN_AND_MEAN
      60  # include <windows.h>
      61    /* For the use of setlocale() below, the Gnulib override in setlocale.c is
      62       not needed; see the platform lists in setlocale_null.m4.  */
      63  # undef setlocale
      64  #endif
      65  #if defined OS2
      66  # define INCL_DOS
      67  # include <os2.h>
      68  #endif
      69  
      70  /* For MB_CUR_MAX_L */
      71  #if defined DARWIN7
      72  # include <xlocale.h>
      73  #endif
      74  
      75  
      76  #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
      77  
      78  /* On these platforms, we use a mapping from non-canonical encoding name
      79     to GNU canonical encoding name.  */
      80  
      81  /* With glibc-2.1 or newer, we don't need any canonicalization,
      82     because glibc has iconv and both glibc and libiconv support all
      83     GNU canonical names directly.  */
      84  # if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__)
      85  
      86  struct table_entry
      87  {
      88    const char alias[11+1];
      89    const char canonical[11+1];
      90  };
      91  
      92  /* Table of platform-dependent mappings, sorted in ascending order.  */
      93  static const struct table_entry alias_table[] =
      94    {
      95  #  if defined __FreeBSD__                                   /* FreeBSD */
      96    /*{ "ARMSCII-8",  "ARMSCII-8" },*/
      97      { "Big5",       "BIG5" },
      98      { "C",          "ASCII" },
      99    /*{ "CP1131",     "CP1131" },*/
     100    /*{ "CP1251",     "CP1251" },*/
     101    /*{ "CP866",      "CP866" },*/
     102    /*{ "GB18030",    "GB18030" },*/
     103    /*{ "GB2312",     "GB2312" },*/
     104    /*{ "GBK",        "GBK" },*/
     105    /*{ "ISCII-DEV",  "?" },*/
     106      { "ISO8859-1",  "ISO-8859-1" },
     107      { "ISO8859-13", "ISO-8859-13" },
     108      { "ISO8859-15", "ISO-8859-15" },
     109      { "ISO8859-2",  "ISO-8859-2" },
     110      { "ISO8859-5",  "ISO-8859-5" },
     111      { "ISO8859-7",  "ISO-8859-7" },
     112      { "ISO8859-9",  "ISO-8859-9" },
     113    /*{ "KOI8-R",     "KOI8-R" },*/
     114    /*{ "KOI8-U",     "KOI8-U" },*/
     115      { "SJIS",       "SHIFT_JIS" },
     116      { "US-ASCII",   "ASCII" },
     117      { "eucCN",      "GB2312" },
     118      { "eucJP",      "EUC-JP" },
     119      { "eucKR",      "EUC-KR" }
     120  #   define alias_table_defined
     121  #  endif
     122  #  if defined __NetBSD__                                    /* NetBSD */
     123      { "646",        "ASCII" },
     124    /*{ "ARMSCII-8",  "ARMSCII-8" },*/
     125    /*{ "BIG5",       "BIG5" },*/
     126      { "Big5-HKSCS", "BIG5-HKSCS" },
     127    /*{ "CP1251",     "CP1251" },*/
     128    /*{ "CP866",      "CP866" },*/
     129    /*{ "GB18030",    "GB18030" },*/
     130    /*{ "GB2312",     "GB2312" },*/
     131      { "ISO8859-1",  "ISO-8859-1" },
     132      { "ISO8859-13", "ISO-8859-13" },
     133      { "ISO8859-15", "ISO-8859-15" },
     134      { "ISO8859-2",  "ISO-8859-2" },
     135      { "ISO8859-4",  "ISO-8859-4" },
     136      { "ISO8859-5",  "ISO-8859-5" },
     137      { "ISO8859-7",  "ISO-8859-7" },
     138    /*{ "KOI8-R",     "KOI8-R" },*/
     139    /*{ "KOI8-U",     "KOI8-U" },*/
     140    /*{ "PT154",      "PT154" },*/
     141      { "SJIS",       "SHIFT_JIS" },
     142      { "eucCN",      "GB2312" },
     143      { "eucJP",      "EUC-JP" },
     144      { "eucKR",      "EUC-KR" },
     145      { "eucTW",      "EUC-TW" }
     146  #   define alias_table_defined
     147  #  endif
     148  #  if defined __OpenBSD__                                   /* OpenBSD */
     149      { "646",        "ASCII" },
     150      { "ISO8859-1",  "ISO-8859-1" },
     151      { "ISO8859-13", "ISO-8859-13" },
     152      { "ISO8859-15", "ISO-8859-15" },
     153      { "ISO8859-2",  "ISO-8859-2" },
     154      { "ISO8859-4",  "ISO-8859-4" },
     155      { "ISO8859-5",  "ISO-8859-5" },
     156      { "ISO8859-7",  "ISO-8859-7" },
     157      { "US-ASCII",   "ASCII" }
     158  #   define alias_table_defined
     159  #  endif
     160  #  if defined __APPLE__ && defined __MACH__                 /* Mac OS X */
     161      /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is
     162         useless:
     163         - It returns the empty string when LANG is set to a locale of the
     164           form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
     165           LC_CTYPE file.
     166         - The environment variables LANG, LC_CTYPE, LC_ALL are not set by
     167           the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
     168         - The documentation says:
     169             "... all code that calls BSD system routines should ensure
     170              that the const *char parameters of these routines are in UTF-8
     171              encoding. All BSD system functions expect their string
     172              parameters to be in UTF-8 encoding and nothing else."
     173           It also says
     174             "An additional caveat is that string parameters for files,
     175              paths, and other file-system entities must be in canonical
     176              UTF-8. In a canonical UTF-8 Unicode string, all decomposable
     177              characters are decomposed ..."
     178           but this is not true: You can pass non-decomposed UTF-8 strings
     179           to file system functions, and it is the OS which will convert
     180           them to decomposed UTF-8 before accessing the file system.
     181         - The Apple Terminal application displays UTF-8 by default.
     182         - However, other applications are free to use different encodings:
     183           - xterm uses ISO-8859-1 by default.
     184           - TextEdit uses MacRoman by default.
     185         We prefer UTF-8 over decomposed UTF-8-MAC because one should
     186         minimize the use of decomposed Unicode. Unfortunately, through the
     187         Darwin file system, decomposed UTF-8 strings are leaked into user
     188         space nevertheless.
     189         Then there are also the locales with encodings other than US-ASCII
     190         and UTF-8. These locales can be occasionally useful to users (e.g.
     191         when grepping through ISO-8859-1 encoded text files), when all their
     192         file names are in US-ASCII.
     193       */
     194      { "ARMSCII-8",  "ARMSCII-8" },
     195      { "Big5",       "BIG5" },
     196      { "Big5HKSCS",  "BIG5-HKSCS" },
     197      { "CP1131",     "CP1131" },
     198      { "CP1251",     "CP1251" },
     199      { "CP866",      "CP866" },
     200      { "CP949",      "CP949" },
     201      { "GB18030",    "GB18030" },
     202      { "GB2312",     "GB2312" },
     203      { "GBK",        "GBK" },
     204    /*{ "ISCII-DEV",  "?" },*/
     205      { "ISO8859-1",  "ISO-8859-1" },
     206      { "ISO8859-13", "ISO-8859-13" },
     207      { "ISO8859-15", "ISO-8859-15" },
     208      { "ISO8859-2",  "ISO-8859-2" },
     209      { "ISO8859-4",  "ISO-8859-4" },
     210      { "ISO8859-5",  "ISO-8859-5" },
     211      { "ISO8859-7",  "ISO-8859-7" },
     212      { "ISO8859-9",  "ISO-8859-9" },
     213      { "KOI8-R",     "KOI8-R" },
     214      { "KOI8-U",     "KOI8-U" },
     215      { "PT154",      "PT154" },
     216      { "SJIS",       "SHIFT_JIS" },
     217      { "eucCN",      "GB2312" },
     218      { "eucJP",      "EUC-JP" },
     219      { "eucKR",      "EUC-KR" }
     220  #   define alias_table_defined
     221  #  endif
     222  #  if defined _AIX                                          /* AIX */
     223    /*{ "GBK",        "GBK" },*/
     224      { "IBM-1046",   "CP1046" },
     225      { "IBM-1124",   "CP1124" },
     226      { "IBM-1129",   "CP1129" },
     227      { "IBM-1252",   "CP1252" },
     228      { "IBM-850",    "CP850" },
     229      { "IBM-856",    "CP856" },
     230      { "IBM-921",    "ISO-8859-13" },
     231      { "IBM-922",    "CP922" },
     232      { "IBM-932",    "CP932" },
     233      { "IBM-943",    "CP943" },
     234      { "IBM-eucCN",  "GB2312" },
     235      { "IBM-eucJP",  "EUC-JP" },
     236      { "IBM-eucKR",  "EUC-KR" },
     237      { "IBM-eucTW",  "EUC-TW" },
     238      { "ISO8859-1",  "ISO-8859-1" },
     239      { "ISO8859-15", "ISO-8859-15" },
     240      { "ISO8859-2",  "ISO-8859-2" },
     241      { "ISO8859-5",  "ISO-8859-5" },
     242      { "ISO8859-6",  "ISO-8859-6" },
     243      { "ISO8859-7",  "ISO-8859-7" },
     244      { "ISO8859-8",  "ISO-8859-8" },
     245      { "ISO8859-9",  "ISO-8859-9" },
     246      { "TIS-620",    "TIS-620" },
     247    /*{ "UTF-8",      "UTF-8" },*/
     248      { "big5",       "BIG5" }
     249  #   define alias_table_defined
     250  #  endif
     251  #  if defined __hpux                                        /* HP-UX */
     252      { "SJIS",      "SHIFT_JIS" },
     253      { "arabic8",   "HP-ARABIC8" },
     254      { "big5",      "BIG5" },
     255      { "cp1251",    "CP1251" },
     256      { "eucJP",     "EUC-JP" },
     257      { "eucKR",     "EUC-KR" },
     258      { "eucTW",     "EUC-TW" },
     259      { "gb18030",   "GB18030" },
     260      { "greek8",    "HP-GREEK8" },
     261      { "hebrew8",   "HP-HEBREW8" },
     262      { "hkbig5",    "BIG5-HKSCS" },
     263      { "hp15CN",    "GB2312" },
     264      { "iso88591",  "ISO-8859-1" },
     265      { "iso885913", "ISO-8859-13" },
     266      { "iso885915", "ISO-8859-15" },
     267      { "iso88592",  "ISO-8859-2" },
     268      { "iso88594",  "ISO-8859-4" },
     269      { "iso88595",  "ISO-8859-5" },
     270      { "iso88596",  "ISO-8859-6" },
     271      { "iso88597",  "ISO-8859-7" },
     272      { "iso88598",  "ISO-8859-8" },
     273      { "iso88599",  "ISO-8859-9" },
     274      { "kana8",     "HP-KANA8" },
     275      { "koi8r",     "KOI8-R" },
     276      { "roman8",    "HP-ROMAN8" },
     277      { "tis620",    "TIS-620" },
     278      { "turkish8",  "HP-TURKISH8" },
     279      { "utf8",      "UTF-8" }
     280  #   define alias_table_defined
     281  #  endif
     282  #  if defined __sgi                                         /* IRIX */
     283      { "ISO8859-1",  "ISO-8859-1" },
     284      { "ISO8859-15", "ISO-8859-15" },
     285      { "ISO8859-2",  "ISO-8859-2" },
     286      { "ISO8859-5",  "ISO-8859-5" },
     287      { "ISO8859-7",  "ISO-8859-7" },
     288      { "ISO8859-9",  "ISO-8859-9" },
     289      { "eucCN",      "GB2312" },
     290      { "eucJP",      "EUC-JP" },
     291      { "eucKR",      "EUC-KR" },
     292      { "eucTW",      "EUC-TW" }
     293  #   define alias_table_defined
     294  #  endif
     295  #  if defined __osf__                                       /* OSF/1 */
     296    /*{ "GBK",        "GBK" },*/
     297      { "ISO8859-1",  "ISO-8859-1" },
     298      { "ISO8859-15", "ISO-8859-15" },
     299      { "ISO8859-2",  "ISO-8859-2" },
     300      { "ISO8859-4",  "ISO-8859-4" },
     301      { "ISO8859-5",  "ISO-8859-5" },
     302      { "ISO8859-7",  "ISO-8859-7" },
     303      { "ISO8859-8",  "ISO-8859-8" },
     304      { "ISO8859-9",  "ISO-8859-9" },
     305      { "KSC5601",    "CP949" },
     306      { "SJIS",       "SHIFT_JIS" },
     307      { "TACTIS",     "TIS-620" },
     308    /*{ "UTF-8",      "UTF-8" },*/
     309      { "big5",       "BIG5" },
     310      { "cp850",      "CP850" },
     311      { "dechanyu",   "DEC-HANYU" },
     312      { "dechanzi",   "GB2312" },
     313      { "deckanji",   "DEC-KANJI" },
     314      { "deckorean",  "EUC-KR" },
     315      { "eucJP",      "EUC-JP" },
     316      { "eucKR",      "EUC-KR" },
     317      { "eucTW",      "EUC-TW" },
     318      { "sdeckanji",  "EUC-JP" }
     319  #   define alias_table_defined
     320  #  endif
     321  #  if defined __sun                                         /* Solaris */
     322      { "5601",        "EUC-KR" },
     323      { "646",         "ASCII" },
     324    /*{ "BIG5",        "BIG5" },*/
     325      { "Big5-HKSCS",  "BIG5-HKSCS" },
     326      { "GB18030",     "GB18030" },
     327    /*{ "GBK",         "GBK" },*/
     328      { "ISO8859-1",   "ISO-8859-1" },
     329      { "ISO8859-11",  "TIS-620" },
     330      { "ISO8859-13",  "ISO-8859-13" },
     331      { "ISO8859-15",  "ISO-8859-15" },
     332      { "ISO8859-2",   "ISO-8859-2" },
     333      { "ISO8859-3",   "ISO-8859-3" },
     334      { "ISO8859-4",   "ISO-8859-4" },
     335      { "ISO8859-5",   "ISO-8859-5" },
     336      { "ISO8859-6",   "ISO-8859-6" },
     337      { "ISO8859-7",   "ISO-8859-7" },
     338      { "ISO8859-8",   "ISO-8859-8" },
     339      { "ISO8859-9",   "ISO-8859-9" },
     340      { "PCK",         "SHIFT_JIS" },
     341      { "TIS620.2533", "TIS-620" },
     342    /*{ "UTF-8",       "UTF-8" },*/
     343      { "ansi-1251",   "CP1251" },
     344      { "cns11643",    "EUC-TW" },
     345      { "eucJP",       "EUC-JP" },
     346      { "gb2312",      "GB2312" },
     347      { "koi8-r",      "KOI8-R" }
     348  #   define alias_table_defined
     349  #  endif
     350  #  if defined __minix                                       /* Minix */
     351      { "646", "ASCII" }
     352  #   define alias_table_defined
     353  #  endif
     354  #  if defined WINDOWS_NATIVE || defined __CYGWIN__          /* Windows */
     355      { "CP1361",  "JOHAB" },
     356      { "CP20127", "ASCII" },
     357      { "CP20866", "KOI8-R" },
     358      { "CP20936", "GB2312" },
     359      { "CP21866", "KOI8-RU" },
     360      { "CP28591", "ISO-8859-1" },
     361      { "CP28592", "ISO-8859-2" },
     362      { "CP28593", "ISO-8859-3" },
     363      { "CP28594", "ISO-8859-4" },
     364      { "CP28595", "ISO-8859-5" },
     365      { "CP28596", "ISO-8859-6" },
     366      { "CP28597", "ISO-8859-7" },
     367      { "CP28598", "ISO-8859-8" },
     368      { "CP28599", "ISO-8859-9" },
     369      { "CP28605", "ISO-8859-15" },
     370      { "CP38598", "ISO-8859-8" },
     371      { "CP51932", "EUC-JP" },
     372      { "CP51936", "GB2312" },
     373      { "CP51949", "EUC-KR" },
     374      { "CP51950", "EUC-TW" },
     375      { "CP54936", "GB18030" },
     376      { "CP65001", "UTF-8" },
     377      { "CP936",   "GBK" }
     378  #   define alias_table_defined
     379  #  endif
     380  #  if defined OS2                                           /* OS/2 */
     381      /* The list of encodings is taken from "List of OS/2 Codepages"
     382         by Alex Taylor:
     383         <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
     384         See also "__convcp() of kLIBC":
     385         <https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>.  */
     386      { "CP1004",        "CP1252" },
     387    /*{ "CP1041",        "CP943" },*/
     388    /*{ "CP1088",        "CP949" },*/
     389      { "CP1089",        "ISO-8859-6" },
     390    /*{ "CP1114",        "CP950" },*/
     391    /*{ "CP1115",        "GB2312" },*/
     392      { "CP1208",        "UTF-8" },
     393    /*{ "CP1380",        "GB2312" },*/
     394      { "CP1381",        "GB2312" },
     395      { "CP1383",        "GB2312" },
     396      { "CP1386",        "GBK" },
     397    /*{ "CP301",         "CP943" },*/
     398      { "CP3372",        "EUC-JP" },
     399      { "CP4946",        "CP850" },
     400    /*{ "CP5048",        "JIS_X0208-1990" },*/
     401    /*{ "CP5049",        "JIS_X0212-1990" },*/
     402    /*{ "CP5067",        "KS_C_5601-1987" },*/
     403      { "CP813",         "ISO-8859-7" },
     404      { "CP819",         "ISO-8859-1" },
     405      { "CP878",         "KOI8-R" },
     406    /*{ "CP897",         "CP943" },*/
     407      { "CP912",         "ISO-8859-2" },
     408      { "CP913",         "ISO-8859-3" },
     409      { "CP914",         "ISO-8859-4" },
     410      { "CP915",         "ISO-8859-5" },
     411      { "CP916",         "ISO-8859-8" },
     412      { "CP920",         "ISO-8859-9" },
     413      { "CP921",         "ISO-8859-13" },
     414      { "CP923",         "ISO-8859-15" },
     415    /*{ "CP941",         "CP943" },*/
     416    /*{ "CP947",         "CP950" },*/
     417    /*{ "CP951",         "CP949" },*/
     418    /*{ "CP952",         "JIS_X0208-1990" },*/
     419    /*{ "CP953",         "JIS_X0212-1990" },*/
     420      { "CP954",         "EUC-JP" },
     421      { "CP964",         "EUC-TW" },
     422      { "CP970",         "EUC-KR" },
     423    /*{ "CP971",         "KS_C_5601-1987" },*/
     424      { "IBM-1004",      "CP1252" },
     425    /*{ "IBM-1006",      "?" },*/
     426    /*{ "IBM-1008",      "?" },*/
     427    /*{ "IBM-1041",      "CP943" },*/
     428    /*{ "IBM-1051",      "?" },*/
     429    /*{ "IBM-1088",      "CP949" },*/
     430      { "IBM-1089",      "ISO-8859-6" },
     431    /*{ "IBM-1098",      "?" },*/
     432    /*{ "IBM-1114",      "CP950" },*/
     433    /*{ "IBM-1115",      "GB2312" },*/
     434    /*{ "IBM-1116",      "?" },*/
     435    /*{ "IBM-1117",      "?" },*/
     436    /*{ "IBM-1118",      "?" },*/
     437    /*{ "IBM-1119",      "?" },*/
     438      { "IBM-1124",      "CP1124" },
     439      { "IBM-1125",      "CP1125" },
     440      { "IBM-1131",      "CP1131" },
     441      { "IBM-1208",      "UTF-8" },
     442      { "IBM-1250",      "CP1250" },
     443      { "IBM-1251",      "CP1251" },
     444      { "IBM-1252",      "CP1252" },
     445      { "IBM-1253",      "CP1253" },
     446      { "IBM-1254",      "CP1254" },
     447      { "IBM-1255",      "CP1255" },
     448      { "IBM-1256",      "CP1256" },
     449      { "IBM-1257",      "CP1257" },
     450    /*{ "IBM-1275",      "?" },*/
     451    /*{ "IBM-1276",      "?" },*/
     452    /*{ "IBM-1277",      "?" },*/
     453    /*{ "IBM-1280",      "?" },*/
     454    /*{ "IBM-1281",      "?" },*/
     455    /*{ "IBM-1282",      "?" },*/
     456    /*{ "IBM-1283",      "?" },*/
     457    /*{ "IBM-1380",      "GB2312" },*/
     458      { "IBM-1381",      "GB2312" },
     459      { "IBM-1383",      "GB2312" },
     460      { "IBM-1386",      "GBK" },
     461    /*{ "IBM-301",       "CP943" },*/
     462      { "IBM-3372",      "EUC-JP" },
     463      { "IBM-367",       "ASCII" },
     464      { "IBM-437",       "CP437" },
     465      { "IBM-4946",      "CP850" },
     466    /*{ "IBM-5048",      "JIS_X0208-1990" },*/
     467    /*{ "IBM-5049",      "JIS_X0212-1990" },*/
     468    /*{ "IBM-5067",      "KS_C_5601-1987" },*/
     469      { "IBM-813",       "ISO-8859-7" },
     470      { "IBM-819",       "ISO-8859-1" },
     471      { "IBM-850",       "CP850" },
     472    /*{ "IBM-851",       "?" },*/
     473      { "IBM-852",       "CP852" },
     474      { "IBM-855",       "CP855" },
     475      { "IBM-856",       "CP856" },
     476      { "IBM-857",       "CP857" },
     477    /*{ "IBM-859",       "?" },*/
     478      { "IBM-860",       "CP860" },
     479      { "IBM-861",       "CP861" },
     480      { "IBM-862",       "CP862" },
     481      { "IBM-863",       "CP863" },
     482      { "IBM-864",       "CP864" },
     483      { "IBM-865",       "CP865" },
     484      { "IBM-866",       "CP866" },
     485    /*{ "IBM-868",       "?" },*/
     486      { "IBM-869",       "CP869" },
     487      { "IBM-874",       "CP874" },
     488      { "IBM-878",       "KOI8-R" },
     489    /*{ "IBM-895",       "?" },*/
     490    /*{ "IBM-897",       "CP943" },*/
     491    /*{ "IBM-907",       "?" },*/
     492    /*{ "IBM-909",       "?" },*/
     493      { "IBM-912",       "ISO-8859-2" },
     494      { "IBM-913",       "ISO-8859-3" },
     495      { "IBM-914",       "ISO-8859-4" },
     496      { "IBM-915",       "ISO-8859-5" },
     497      { "IBM-916",       "ISO-8859-8" },
     498      { "IBM-920",       "ISO-8859-9" },
     499      { "IBM-921",       "ISO-8859-13" },
     500      { "IBM-922",       "CP922" },
     501      { "IBM-923",       "ISO-8859-15" },
     502      { "IBM-932",       "CP932" },
     503    /*{ "IBM-941",       "CP943" },*/
     504    /*{ "IBM-942",       "?" },*/
     505      { "IBM-943",       "CP943" },
     506    /*{ "IBM-947",       "CP950" },*/
     507      { "IBM-949",       "CP949" },
     508      { "IBM-950",       "CP950" },
     509    /*{ "IBM-951",       "CP949" },*/
     510    /*{ "IBM-952",       "JIS_X0208-1990" },*/
     511    /*{ "IBM-953",       "JIS_X0212-1990" },*/
     512      { "IBM-954",       "EUC-JP" },
     513    /*{ "IBM-955",       "?" },*/
     514      { "IBM-964",       "EUC-TW" },
     515      { "IBM-970",       "EUC-KR" },
     516    /*{ "IBM-971",       "KS_C_5601-1987" },*/
     517      { "IBM-eucCN",     "GB2312" },
     518      { "IBM-eucJP",     "EUC-JP" },
     519      { "IBM-eucKR",     "EUC-KR" },
     520      { "IBM-eucTW",     "EUC-TW" },
     521      { "IBM33722",      "EUC-JP" },
     522      { "ISO8859-1",     "ISO-8859-1" },
     523      { "ISO8859-2",     "ISO-8859-2" },
     524      { "ISO8859-3",     "ISO-8859-3" },
     525      { "ISO8859-4",     "ISO-8859-4" },
     526      { "ISO8859-5",     "ISO-8859-5" },
     527      { "ISO8859-6",     "ISO-8859-6" },
     528      { "ISO8859-7",     "ISO-8859-7" },
     529      { "ISO8859-8",     "ISO-8859-8" },
     530      { "ISO8859-9",     "ISO-8859-9" },
     531    /*{ "JISX0201-1976", "JISX0201-1976" },*/
     532    /*{ "JISX0208-1978", "?" },*/
     533    /*{ "JISX0208-1983", "JIS_X0208-1983" },*/
     534    /*{ "JISX0208-1990", "JIS_X0208-1990" },*/
     535    /*{ "JISX0212-1990", "JIS_X0212-1990" },*/
     536    /*{ "KSC5601-1987",  "KS_C_5601-1987" },*/
     537      { "SJIS-1",        "CP943" },
     538      { "SJIS-2",        "CP943" },
     539      { "eucJP",         "EUC-JP" },
     540      { "eucKR",         "EUC-KR" },
     541      { "eucTW-1993",    "EUC-TW" }
     542  #   define alias_table_defined
     543  #  endif
     544  #  if defined VMS                                           /* OpenVMS */
     545      /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
     546         "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
     547         section 10.7 "Handling Different Character Sets".  */
     548      { "DECHANYU",  "DEC-HANYU" },
     549      { "DECHANZI",  "GB2312" },
     550      { "DECKANJI",  "DEC-KANJI" },
     551      { "DECKOREAN", "EUC-KR" },
     552      { "ISO8859-1", "ISO-8859-1" },
     553      { "ISO8859-2", "ISO-8859-2" },
     554      { "ISO8859-5", "ISO-8859-5" },
     555      { "ISO8859-7", "ISO-8859-7" },
     556      { "ISO8859-8", "ISO-8859-8" },
     557      { "ISO8859-9", "ISO-8859-9" },
     558      { "SDECKANJI", "EUC-JP" },
     559      { "SJIS",      "SHIFT_JIS" },
     560      { "eucJP",     "EUC-JP" },
     561      { "eucTW",     "EUC-TW" }
     562  #   define alias_table_defined
     563  #  endif
     564  #  ifndef alias_table_defined
     565      /* Just a dummy entry, to avoid a C syntax error.  */
     566      { "", "" }
     567  #  endif
     568    };
     569  
     570  # endif
     571  
     572  #else
     573  
     574  /* On these platforms, we use a mapping from locale name to GNU canonical
     575     encoding name.  */
     576  
     577  struct table_entry
     578  {
     579    const char locale[17+1];
     580    const char canonical[11+1];
     581  };
     582  
     583  /* Table of platform-dependent mappings, sorted in ascending order.  */
     584  static const struct table_entry locale_table[] =
     585    {
     586  # if defined __FreeBSD__                                    /* FreeBSD 4.2 */
     587      { "cs_CZ.ISO_8859-2",  "ISO-8859-2" },
     588      { "da_DK.DIS_8859-15", "ISO-8859-15" },
     589      { "da_DK.ISO_8859-1",  "ISO-8859-1" },
     590      { "de_AT.DIS_8859-15", "ISO-8859-15" },
     591      { "de_AT.ISO_8859-1",  "ISO-8859-1" },
     592      { "de_CH.DIS_8859-15", "ISO-8859-15" },
     593      { "de_CH.ISO_8859-1",  "ISO-8859-1" },
     594      { "de_DE.DIS_8859-15", "ISO-8859-15" },
     595      { "de_DE.ISO_8859-1",  "ISO-8859-1" },
     596      { "en_AU.DIS_8859-15", "ISO-8859-15" },
     597      { "en_AU.ISO_8859-1",  "ISO-8859-1" },
     598      { "en_CA.DIS_8859-15", "ISO-8859-15" },
     599      { "en_CA.ISO_8859-1",  "ISO-8859-1" },
     600      { "en_GB.DIS_8859-15", "ISO-8859-15" },
     601      { "en_GB.ISO_8859-1",  "ISO-8859-1" },
     602      { "en_US.DIS_8859-15", "ISO-8859-15" },
     603      { "en_US.ISO_8859-1",  "ISO-8859-1" },
     604      { "es_ES.DIS_8859-15", "ISO-8859-15" },
     605      { "es_ES.ISO_8859-1",  "ISO-8859-1" },
     606      { "fi_FI.DIS_8859-15", "ISO-8859-15" },
     607      { "fi_FI.ISO_8859-1",  "ISO-8859-1" },
     608      { "fr_BE.DIS_8859-15", "ISO-8859-15" },
     609      { "fr_BE.ISO_8859-1",  "ISO-8859-1" },
     610      { "fr_CA.DIS_8859-15", "ISO-8859-15" },
     611      { "fr_CA.ISO_8859-1",  "ISO-8859-1" },
     612      { "fr_CH.DIS_8859-15", "ISO-8859-15" },
     613      { "fr_CH.ISO_8859-1",  "ISO-8859-1" },
     614      { "fr_FR.DIS_8859-15", "ISO-8859-15" },
     615      { "fr_FR.ISO_8859-1",  "ISO-8859-1" },
     616      { "hr_HR.ISO_8859-2",  "ISO-8859-2" },
     617      { "hu_HU.ISO_8859-2",  "ISO-8859-2" },
     618      { "is_IS.DIS_8859-15", "ISO-8859-15" },
     619      { "is_IS.ISO_8859-1",  "ISO-8859-1" },
     620      { "it_CH.DIS_8859-15", "ISO-8859-15" },
     621      { "it_CH.ISO_8859-1",  "ISO-8859-1" },
     622      { "it_IT.DIS_8859-15", "ISO-8859-15" },
     623      { "it_IT.ISO_8859-1",  "ISO-8859-1" },
     624      { "ja_JP.EUC",         "EUC-JP" },
     625      { "ja_JP.SJIS",        "SHIFT_JIS" },
     626      { "ja_JP.Shift_JIS",   "SHIFT_JIS" },
     627      { "ko_KR.EUC",         "EUC-KR" },
     628      { "la_LN.ASCII",       "ASCII" },
     629      { "la_LN.DIS_8859-15", "ISO-8859-15" },
     630      { "la_LN.ISO_8859-1",  "ISO-8859-1" },
     631      { "la_LN.ISO_8859-2",  "ISO-8859-2" },
     632      { "la_LN.ISO_8859-4",  "ISO-8859-4" },
     633      { "lt_LN.ASCII",       "ASCII" },
     634      { "lt_LN.DIS_8859-15", "ISO-8859-15" },
     635      { "lt_LN.ISO_8859-1",  "ISO-8859-1" },
     636      { "lt_LN.ISO_8859-2",  "ISO-8859-2" },
     637      { "lt_LT.ISO_8859-4",  "ISO-8859-4" },
     638      { "nl_BE.DIS_8859-15", "ISO-8859-15" },
     639      { "nl_BE.ISO_8859-1",  "ISO-8859-1" },
     640      { "nl_NL.DIS_8859-15", "ISO-8859-15" },
     641      { "nl_NL.ISO_8859-1",  "ISO-8859-1" },
     642      { "no_NO.DIS_8859-15", "ISO-8859-15" },
     643      { "no_NO.ISO_8859-1",  "ISO-8859-1" },
     644      { "pl_PL.ISO_8859-2",  "ISO-8859-2" },
     645      { "pt_PT.DIS_8859-15", "ISO-8859-15" },
     646      { "pt_PT.ISO_8859-1",  "ISO-8859-1" },
     647      { "ru_RU.CP866",       "CP866" },
     648      { "ru_RU.ISO_8859-5",  "ISO-8859-5" },
     649      { "ru_RU.KOI8-R",      "KOI8-R" },
     650      { "ru_SU.CP866",       "CP866" },
     651      { "ru_SU.ISO_8859-5",  "ISO-8859-5" },
     652      { "ru_SU.KOI8-R",      "KOI8-R" },
     653      { "sl_SI.ISO_8859-2",  "ISO-8859-2" },
     654      { "sv_SE.DIS_8859-15", "ISO-8859-15" },
     655      { "sv_SE.ISO_8859-1",  "ISO-8859-1" },
     656      { "uk_UA.KOI8-U",      "KOI8-U" },
     657      { "zh_CN.EUC",         "GB2312" },
     658      { "zh_TW.BIG5",        "BIG5" },
     659      { "zh_TW.Big5",        "BIG5" }
     660  #  define locale_table_defined
     661  # endif
     662  # if defined __DJGPP__                                      /* DOS / DJGPP 2.03 */
     663      /* The encodings given here may not all be correct.
     664         If you find that the encoding given for your language and
     665         country is not the one your DOS machine actually uses, just
     666         correct it in this file, and send a mail to
     667         Juan Manuel Guerrero <juan.guerrero@gmx.de>
     668         and <bug-gnulib@gnu.org>.  */
     669      { "C",     "ASCII" },
     670      { "ar",    "CP864" },
     671      { "ar_AE", "CP864" },
     672      { "ar_DZ", "CP864" },
     673      { "ar_EG", "CP864" },
     674      { "ar_IQ", "CP864" },
     675      { "ar_IR", "CP864" },
     676      { "ar_JO", "CP864" },
     677      { "ar_KW", "CP864" },
     678      { "ar_MA", "CP864" },
     679      { "ar_OM", "CP864" },
     680      { "ar_QA", "CP864" },
     681      { "ar_SA", "CP864" },
     682      { "ar_SY", "CP864" },
     683      { "be",    "CP866" },
     684      { "be_BE", "CP866" },
     685      { "bg",    "CP866" }, /* not CP855 ?? */
     686      { "bg_BG", "CP866" }, /* not CP855 ?? */
     687      { "ca",    "CP850" },
     688      { "ca_ES", "CP850" },
     689      { "cs",    "CP852" },
     690      { "cs_CZ", "CP852" },
     691      { "da",    "CP865" }, /* not CP850 ?? */
     692      { "da_DK", "CP865" }, /* not CP850 ?? */
     693      { "de",    "CP850" },
     694      { "de_AT", "CP850" },
     695      { "de_CH", "CP850" },
     696      { "de_DE", "CP850" },
     697      { "el",    "CP869" },
     698      { "el_GR", "CP869" },
     699      { "en",    "CP850" },
     700      { "en_AU", "CP850" }, /* not CP437 ?? */
     701      { "en_CA", "CP850" },
     702      { "en_GB", "CP850" },
     703      { "en_NZ", "CP437" },
     704      { "en_US", "CP437" },
     705      { "en_ZA", "CP850" }, /* not CP437 ?? */
     706      { "eo",    "CP850" },
     707      { "eo_EO", "CP850" },
     708      { "es",    "CP850" },
     709      { "es_AR", "CP850" },
     710      { "es_BO", "CP850" },
     711      { "es_CL", "CP850" },
     712      { "es_CO", "CP850" },
     713      { "es_CR", "CP850" },
     714      { "es_CU", "CP850" },
     715      { "es_DO", "CP850" },
     716      { "es_EC", "CP850" },
     717      { "es_ES", "CP850" },
     718      { "es_GT", "CP850" },
     719      { "es_HN", "CP850" },
     720      { "es_MX", "CP850" },
     721      { "es_NI", "CP850" },
     722      { "es_PA", "CP850" },
     723      { "es_PE", "CP850" },
     724      { "es_PY", "CP850" },
     725      { "es_SV", "CP850" },
     726      { "es_UY", "CP850" },
     727      { "es_VE", "CP850" },
     728      { "et",    "CP850" },
     729      { "et_EE", "CP850" },
     730      { "eu",    "CP850" },
     731      { "eu_ES", "CP850" },
     732      { "fi",    "CP850" },
     733      { "fi_FI", "CP850" },
     734      { "fr",    "CP850" },
     735      { "fr_BE", "CP850" },
     736      { "fr_CA", "CP850" },
     737      { "fr_CH", "CP850" },
     738      { "fr_FR", "CP850" },
     739      { "ga",    "CP850" },
     740      { "ga_IE", "CP850" },
     741      { "gd",    "CP850" },
     742      { "gd_GB", "CP850" },
     743      { "gl",    "CP850" },
     744      { "gl_ES", "CP850" },
     745      { "he",    "CP862" },
     746      { "he_IL", "CP862" },
     747      { "hr",    "CP852" },
     748      { "hr_HR", "CP852" },
     749      { "hu",    "CP852" },
     750      { "hu_HU", "CP852" },
     751      { "id",    "CP850" }, /* not CP437 ?? */
     752      { "id_ID", "CP850" }, /* not CP437 ?? */
     753      { "is",    "CP861" }, /* not CP850 ?? */
     754      { "is_IS", "CP861" }, /* not CP850 ?? */
     755      { "it",    "CP850" },
     756      { "it_CH", "CP850" },
     757      { "it_IT", "CP850" },
     758      { "ja",    "CP932" },
     759      { "ja_JP", "CP932" },
     760      { "kr",    "CP949" }, /* not CP934 ?? */
     761      { "kr_KR", "CP949" }, /* not CP934 ?? */
     762      { "lt",    "CP775" },
     763      { "lt_LT", "CP775" },
     764      { "lv",    "CP775" },
     765      { "lv_LV", "CP775" },
     766      { "mk",    "CP866" }, /* not CP855 ?? */
     767      { "mk_MK", "CP866" }, /* not CP855 ?? */
     768      { "mt",    "CP850" },
     769      { "mt_MT", "CP850" },
     770      { "nb",    "CP865" }, /* not CP850 ?? */
     771      { "nb_NO", "CP865" }, /* not CP850 ?? */
     772      { "nl",    "CP850" },
     773      { "nl_BE", "CP850" },
     774      { "nl_NL", "CP850" },
     775      { "nn",    "CP865" }, /* not CP850 ?? */
     776      { "nn_NO", "CP865" }, /* not CP850 ?? */
     777      { "no",    "CP865" }, /* not CP850 ?? */
     778      { "no_NO", "CP865" }, /* not CP850 ?? */
     779      { "pl",    "CP852" },
     780      { "pl_PL", "CP852" },
     781      { "pt",    "CP850" },
     782      { "pt_BR", "CP850" },
     783      { "pt_PT", "CP850" },
     784      { "ro",    "CP852" },
     785      { "ro_RO", "CP852" },
     786      { "ru",    "CP866" },
     787      { "ru_RU", "CP866" },
     788      { "sk",    "CP852" },
     789      { "sk_SK", "CP852" },
     790      { "sl",    "CP852" },
     791      { "sl_SI", "CP852" },
     792      { "sq",    "CP852" },
     793      { "sq_AL", "CP852" },
     794      { "sr",    "CP852" }, /* CP852 or CP866 or CP855 ?? */
     795      { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */
     796      { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */
     797      { "sv",    "CP850" },
     798      { "sv_SE", "CP850" },
     799      { "th",    "CP874" },
     800      { "th_TH", "CP874" },
     801      { "tr",    "CP857" },
     802      { "tr_TR", "CP857" },
     803      { "uk",    "CP1125" },
     804      { "uk_UA", "CP1125" },
     805      { "zh_CN", "GBK" },
     806      { "zh_TW", "CP950" } /* not CP938 ?? */
     807  #  define locale_table_defined
     808  # endif
     809  # ifndef locale_table_defined
     810      /* Just a dummy entry, to avoid a C syntax error.  */
     811      { "", "" }
     812  # endif
     813    };
     814  
     815  #endif
     816  
     817  
     818  /* Determine the current locale's character encoding, and canonicalize it
     819     into one of the canonical names listed below.
     820     The result must not be freed; it is statically allocated.  The result
     821     becomes invalid when setlocale() is used to change the global locale, or
     822     when the value of one of the environment variables LC_ALL, LC_CTYPE, LANG
     823     is changed; threads in multithreaded programs should not do this.
     824     If the canonical name cannot be determined, the result is a non-canonical
     825     name.  */
     826  
     827  #ifdef STATIC
     828  STATIC
     829  #endif
     830  const char *
     831  locale_charset (void)
     832  {
     833    const char *codeset;
     834  
     835    /* This function must be multithread-safe.  To achieve this without using
     836       thread-local storage, we use a simple strcpy or memcpy to fill this static
     837       buffer.  Filling it through, for example, strcpy + strcat would not be
     838       guaranteed to leave the buffer's contents intact if another thread is
     839       currently accessing it.  If necessary, the contents is first assembled in
     840       a stack-allocated buffer.  */
     841  
     842  #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
     843  
     844  # if HAVE_LANGINFO_CODESET
     845  
     846    /* Most systems support nl_langinfo (CODESET) nowadays.  */
     847    codeset = nl_langinfo (CODESET);
     848  
     849  #  ifdef __CYGWIN__
     850    /* Cygwin < 1.7 does not have locales.  nl_langinfo (CODESET) always
     851       returns "US-ASCII".  Return the suffix of the locale name from the
     852       environment variables (if present) or the codepage as a number.  */
     853    if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
     854      {
     855        const char *locale;
     856        static char resultbuf[2 + 10 + 1];
     857  
     858        locale = getenv ("LC_ALL");
     859        if (locale == NULL || locale[0] == '\0')
     860          {
     861            locale = getenv ("LC_CTYPE");
     862            if (locale == NULL || locale[0] == '\0')
     863              locale = getenv ("LANG");
     864          }
     865        if (locale != NULL && locale[0] != '\0')
     866          {
     867            /* If the locale name contains an encoding after the dot, return
     868               it.  */
     869            const char *dot = strchr (locale, '.');
     870  
     871            if (dot != NULL)
     872              {
     873                const char *modifier;
     874  
     875                dot++;
     876                /* Look for the possible @... trailer and remove it, if any.  */
     877                modifier = strchr (dot, '@');
     878                if (modifier == NULL)
     879                  return dot;
     880                if (modifier - dot < sizeof (resultbuf))
     881                  {
     882                    /* This way of filling resultbuf is multithread-safe.  */
     883                    memcpy (resultbuf, dot, modifier - dot);
     884                    resultbuf [modifier - dot] = '\0';
     885                    return resultbuf;
     886                  }
     887              }
     888          }
     889  
     890        /* The Windows API has a function returning the locale's codepage as a
     891           number: GetACP().  This encoding is used by Cygwin, unless the user
     892           has set the environment variable CYGWIN=codepage:oem (which very few
     893           people do).
     894           Output directed to console windows needs to be converted (to
     895           GetOEMCP() if the console is using a raster font, or to
     896           GetConsoleOutputCP() if it is using a TrueType font).  Cygwin does
     897           this conversion transparently (see winsup/cygwin/fhandler_console.cc),
     898           converting to GetConsoleOutputCP().  This leads to correct results,
     899           except when SetConsoleOutputCP has been called and a raster font is
     900           in use.  */
     901        {
     902          char buf[2 + 10 + 1];
     903  
     904          sprintf (buf, "CP%u", GetACP ());
     905          strcpy (resultbuf, buf);
     906          codeset = resultbuf;
     907        }
     908      }
     909  #  endif
     910  
     911    if (codeset == NULL)
     912      /* The canonical name cannot be determined.  */
     913      codeset = "";
     914  
     915  # elif defined WINDOWS_NATIVE
     916  
     917    char buf[2 + 10 + 1];
     918    static char resultbuf[2 + 10 + 1];
     919  
     920    /* The Windows API has a function returning the locale's codepage as
     921       a number, but the value doesn't change according to what the
     922       'setlocale' call specified.  So we use it as a last resort, in
     923       case the string returned by 'setlocale' doesn't specify the
     924       codepage.  */
     925    char *current_locale = setlocale (LC_CTYPE, NULL);
     926    char *pdot = strrchr (current_locale, '.');
     927  
     928    if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
     929      sprintf (buf, "CP%s", pdot + 1);
     930    else
     931      {
     932        /* The Windows API has a function returning the locale's codepage as a
     933           number: GetACP().
     934           When the output goes to a console window, it needs to be provided in
     935           GetOEMCP() encoding if the console is using a raster font, or in
     936           GetConsoleOutputCP() encoding if it is using a TrueType font.
     937           But in GUI programs and for output sent to files and pipes, GetACP()
     938           encoding is the best bet.  */
     939        sprintf (buf, "CP%u", GetACP ());
     940      }
     941    /* For a locale name such as "French_France.65001", in Windows 10,
     942       setlocale now returns "French_France.utf8" instead.  */
     943    if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0)
     944      codeset = "UTF-8";
     945    else
     946      {
     947        strcpy (resultbuf, buf);
     948        codeset = resultbuf;
     949      }
     950  
     951  # elif defined OS2
     952  
     953    const char *locale;
     954    static char resultbuf[2 + 10 + 1];
     955    ULONG cp[3];
     956    ULONG cplen;
     957  
     958    codeset = NULL;
     959  
     960    /* Allow user to override the codeset, as set in the operating system,
     961       with standard language environment variables.  */
     962    locale = getenv ("LC_ALL");
     963    if (locale == NULL || locale[0] == '\0')
     964      {
     965        locale = getenv ("LC_CTYPE");
     966        if (locale == NULL || locale[0] == '\0')
     967          locale = getenv ("LANG");
     968      }
     969    if (locale != NULL && locale[0] != '\0')
     970      {
     971        /* If the locale name contains an encoding after the dot, return it.  */
     972        const char *dot = strchr (locale, '.');
     973  
     974        if (dot != NULL)
     975          {
     976            const char *modifier;
     977  
     978            dot++;
     979            /* Look for the possible @... trailer and remove it, if any.  */
     980            modifier = strchr (dot, '@');
     981            if (modifier == NULL)
     982              return dot;
     983            if (modifier - dot < sizeof (resultbuf))
     984              {
     985                /* This way of filling resultbuf is multithread-safe.  */
     986                memcpy (resultbuf, dot, modifier - dot);
     987                resultbuf [modifier - dot] = '\0';
     988                return resultbuf;
     989              }
     990          }
     991  
     992        /* For the POSIX locale, don't use the system's codepage.  */
     993        if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
     994          codeset = "";
     995      }
     996  
     997    if (codeset == NULL)
     998      {
     999        /* OS/2 has a function returning the locale's codepage as a number.  */
    1000        if (DosQueryCp (sizeof (cp), cp, &cplen))
    1001          codeset = "";
    1002        else
    1003          {
    1004            char buf[2 + 10 + 1];
    1005  
    1006            sprintf (buf, "CP%u", cp[0]);
    1007            strcpy (resultbuf, buf);
    1008            codeset = resultbuf;
    1009          }
    1010      }
    1011  
    1012  # else
    1013  
    1014  #  error "Add code for other platforms here."
    1015  
    1016  # endif
    1017  
    1018    /* Resolve alias.  */
    1019    {
    1020  # ifdef alias_table_defined
    1021      /* On some platforms, UTF-8 locales are the most frequently used ones.
    1022         Speed up the common case and slow down the less common cases by
    1023         testing for this case first.  */
    1024  #  if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__
    1025      if (strcmp (codeset, "UTF-8") == 0)
    1026        goto done_table_lookup;
    1027      else
    1028  #  endif
    1029        {
    1030          const struct table_entry * const table = alias_table;
    1031          size_t const table_size =
    1032            sizeof (alias_table) / sizeof (struct table_entry);
    1033          /* The table is sorted.  Perform a binary search.  */
    1034          size_t hi = table_size;
    1035          size_t lo = 0;
    1036          while (lo < hi)
    1037            {
    1038              /* Invariant:
    1039                 for i < lo, strcmp (table[i].alias, codeset) < 0,
    1040                 for i >= hi, strcmp (table[i].alias, codeset) > 0.  */
    1041              size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
    1042              int cmp = strcmp (table[mid].alias, codeset);
    1043              if (cmp < 0)
    1044                lo = mid + 1;
    1045              else if (cmp > 0)
    1046                hi = mid;
    1047              else
    1048                {
    1049                  /* Found an i with
    1050                       strcmp (table[i].alias, codeset) == 0.  */
    1051                  codeset = table[mid].canonical;
    1052                  goto done_table_lookup;
    1053                }
    1054            }
    1055        }
    1056      if (0)
    1057        done_table_lookup: ;
    1058      else
    1059  # endif
    1060        {
    1061          /* Did not find it in the table.  */
    1062          /* On Mac OS X, all modern locales use the UTF-8 encoding.
    1063             BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */
    1064  # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
    1065          codeset = "UTF-8";
    1066  # else
    1067          /* Don't return an empty string.  GNU libc and GNU libiconv interpret
    1068             the empty string as denoting "the locale's character encoding",
    1069             thus GNU libiconv would call this function a second time.  */
    1070          if (codeset[0] == '\0')
    1071            codeset = "ASCII";
    1072  # endif
    1073        }
    1074    }
    1075  
    1076  #else
    1077  
    1078    /* On old systems which lack it, use setlocale or getenv.  */
    1079    const char *locale = NULL;
    1080  
    1081    /* But most old systems don't have a complete set of locales.  Some
    1082       (like DJGPP) have only the C locale.  Therefore we don't use setlocale
    1083       here; it would return "C" when it doesn't support the locale name the
    1084       user has set.  */
    1085  # if 0
    1086    locale = setlocale (LC_CTYPE, NULL);
    1087  # endif
    1088    if (locale == NULL || locale[0] == '\0')
    1089      {
    1090        locale = getenv ("LC_ALL");
    1091        if (locale == NULL || locale[0] == '\0')
    1092          {
    1093            locale = getenv ("LC_CTYPE");
    1094            if (locale == NULL || locale[0] == '\0')
    1095              locale = getenv ("LANG");
    1096              if (locale == NULL)
    1097                locale = "";
    1098          }
    1099      }
    1100  
    1101    /* Map locale name to canonical encoding name.  */
    1102    {
    1103  # ifdef locale_table_defined
    1104      const struct table_entry * const table = locale_table;
    1105      size_t const table_size =
    1106        sizeof (locale_table) / sizeof (struct table_entry);
    1107      /* The table is sorted.  Perform a binary search.  */
    1108      size_t hi = table_size;
    1109      size_t lo = 0;
    1110      while (lo < hi)
    1111        {
    1112          /* Invariant:
    1113             for i < lo, strcmp (table[i].locale, locale) < 0,
    1114             for i >= hi, strcmp (table[i].locale, locale) > 0.  */
    1115          size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
    1116          int cmp = strcmp (table[mid].locale, locale);
    1117          if (cmp < 0)
    1118            lo = mid + 1;
    1119          else if (cmp > 0)
    1120            hi = mid;
    1121          else
    1122            {
    1123              /* Found an i with
    1124                   strcmp (table[i].locale, locale) == 0.  */
    1125              codeset = table[mid].canonical;
    1126              goto done_table_lookup;
    1127            }
    1128        }
    1129      if (0)
    1130        done_table_lookup: ;
    1131      else
    1132  # endif
    1133        {
    1134          /* Did not find it in the table.  */
    1135          /* On Mac OS X, all modern locales use the UTF-8 encoding.
    1136             BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */
    1137  # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
    1138          codeset = "UTF-8";
    1139  # else
    1140          /* The canonical name cannot be determined.  */
    1141          /* Don't return an empty string.  GNU libc and GNU libiconv interpret
    1142             the empty string as denoting "the locale's character encoding",
    1143             thus GNU libiconv would call this function a second time.  */
    1144          codeset = "ASCII";
    1145  # endif
    1146        }
    1147    }
    1148  
    1149  #endif
    1150  
    1151  #ifdef DARWIN7
    1152    /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
    1153       (the default codeset) does not work when MB_CUR_MAX is 1.  */
    1154    if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
    1155      codeset = "ASCII";
    1156  #endif
    1157  
    1158    return codeset;
    1159  }