1  """Locale support module.
       2  
       3  The module provides low-level access to the C lib's locale APIs and adds high
       4  level number formatting APIs as well as a locale aliasing engine to complement
       5  these.
       6  
       7  The aliasing engine includes support for many commonly used locale names and
       8  maps them to values suitable for passing to the C lib's setlocale() function. It
       9  also includes default encodings for all supported locale names.
      10  
      11  """
      12  
      13  import sys
      14  import encodings
      15  import encodings.aliases
      16  import re
      17  import _collections_abc
      18  from builtins import str as _builtin_str
      19  import functools
      20  
      21  # Try importing the _locale module.
      22  #
      23  # If this fails, fall back on a basic 'C' locale emulation.
      24  
      25  # Yuck:  LC_MESSAGES is non-standard:  can't tell whether it exists before
      26  # trying the import.  So __all__ is also fiddled at the end of the file.
      27  __all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error",
      28             "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
      29             "str", "atof", "atoi", "format_string", "currency",
      30             "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
      31             "LC_NUMERIC", "LC_ALL", "CHAR_MAX", "getencoding"]
      32  
      33  def _strcoll(a,b):
      34      """ strcoll(string,string) -> int.
      35          Compares two strings according to the locale.
      36      """
      37      return (a > b) - (a < b)
      38  
      39  def _strxfrm(s):
      40      """ strxfrm(string) -> string.
      41          Returns a string that behaves for cmp locale-aware.
      42      """
      43      return s
      44  
      45  try:
      46  
      47      from _locale import *
      48  
      49  except ImportError:
      50  
      51      # Locale emulation
      52  
      53      CHAR_MAX = 127
      54      LC_ALL = 6
      55      LC_COLLATE = 3
      56      LC_CTYPE = 0
      57      LC_MESSAGES = 5
      58      LC_MONETARY = 4
      59      LC_NUMERIC = 1
      60      LC_TIME = 2
      61      Error = ValueError
      62  
      63      def localeconv():
      64          """ localeconv() -> dict.
      65              Returns numeric and monetary locale-specific parameters.
      66          """
      67          # 'C' locale default values
      68          return {'grouping': [127],
      69                  'currency_symbol': '',
      70                  'n_sign_posn': 127,
      71                  'p_cs_precedes': 127,
      72                  'n_cs_precedes': 127,
      73                  'mon_grouping': [],
      74                  'n_sep_by_space': 127,
      75                  'decimal_point': '.',
      76                  'negative_sign': '',
      77                  'positive_sign': '',
      78                  'p_sep_by_space': 127,
      79                  'int_curr_symbol': '',
      80                  'p_sign_posn': 127,
      81                  'thousands_sep': '',
      82                  'mon_thousands_sep': '',
      83                  'frac_digits': 127,
      84                  'mon_decimal_point': '',
      85                  'int_frac_digits': 127}
      86  
      87      def setlocale(category, value=None):
      88          """ setlocale(integer,string=None) -> string.
      89              Activates/queries locale processing.
      90          """
      91          if value not in (None, '', 'C'):
      92              raise Error('_locale emulation only supports "C" locale')
      93          return 'C'
      94  
      95  # These may or may not exist in _locale, so be sure to set them.
      96  if 'strxfrm' not in globals():
      97      strxfrm = _strxfrm
      98  if 'strcoll' not in globals():
      99      strcoll = _strcoll
     100  
     101  
     102  _localeconv = localeconv
     103  
     104  # With this dict, you can override some items of localeconv's return value.
     105  # This is useful for testing purposes.
     106  _override_localeconv = {}
     107  
     108  @functools.wraps(_localeconv)
     109  def localeconv():
     110      d = _localeconv()
     111      if _override_localeconv:
     112          d.update(_override_localeconv)
     113      return d
     114  
     115  
     116  ### Number formatting APIs
     117  
     118  # Author: Martin von Loewis
     119  # improved by Georg Brandl
     120  
     121  # Iterate over grouping intervals
     122  def _grouping_intervals(grouping):
     123      last_interval = None
     124      for interval in grouping:
     125          # if grouping is -1, we are done
     126          if interval == CHAR_MAX:
     127              return
     128          # 0: re-use last group ad infinitum
     129          if interval == 0:
     130              if last_interval is None:
     131                  raise ValueError("invalid grouping")
     132              while True:
     133                  yield last_interval
     134          yield interval
     135          last_interval = interval
     136  
     137  #perform the grouping from right to left
     138  def _group(s, monetary=False):
     139      conv = localeconv()
     140      thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
     141      grouping = conv[monetary and 'mon_grouping' or 'grouping']
     142      if not grouping:
     143          return (s, 0)
     144      if s[-1] == ' ':
     145          stripped = s.rstrip()
     146          right_spaces = s[len(stripped):]
     147          s = stripped
     148      else:
     149          right_spaces = ''
     150      left_spaces = ''
     151      groups = []
     152      for interval in _grouping_intervals(grouping):
     153          if not s or s[-1] not in "0123456789":
     154              # only non-digit characters remain (sign, spaces)
     155              left_spaces = s
     156              s = ''
     157              break
     158          groups.append(s[-interval:])
     159          s = s[:-interval]
     160      if s:
     161          groups.append(s)
     162      groups.reverse()
     163      return (
     164          left_spaces + thousands_sep.join(groups) + right_spaces,
     165          len(thousands_sep) * (len(groups) - 1)
     166      )
     167  
     168  # Strip a given amount of excess padding from the given string
     169  def _strip_padding(s, amount):
     170      lpos = 0
     171      while amount and s[lpos] == ' ':
     172          lpos += 1
     173          amount -= 1
     174      rpos = len(s) - 1
     175      while amount and s[rpos] == ' ':
     176          rpos -= 1
     177          amount -= 1
     178      return s[lpos:rpos+1]
     179  
     180  _percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
     181                           r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
     182  
     183  def _format(percent, value, grouping=False, monetary=False, *additional):
     184      if additional:
     185          formatted = percent % ((value,) + additional)
     186      else:
     187          formatted = percent % value
     188      if percent[-1] in 'eEfFgGdiu':
     189          formatted = _localize(formatted, grouping, monetary)
     190      return formatted
     191  
     192  # Transform formatted as locale number according to the locale settings
     193  def _localize(formatted, grouping=False, monetary=False):
     194      # floats and decimal ints need special action!
     195      if '.' in formatted:
     196          seps = 0
     197          parts = formatted.split('.')
     198          if grouping:
     199              parts[0], seps = _group(parts[0], monetary=monetary)
     200          decimal_point = localeconv()[monetary and 'mon_decimal_point'
     201                                                or 'decimal_point']
     202          formatted = decimal_point.join(parts)
     203          if seps:
     204              formatted = _strip_padding(formatted, seps)
     205      else:
     206          seps = 0
     207          if grouping:
     208              formatted, seps = _group(formatted, monetary=monetary)
     209          if seps:
     210              formatted = _strip_padding(formatted, seps)
     211      return formatted
     212  
     213  def format_string(f, val, grouping=False, monetary=False):
     214      """Formats a string in the same way that the % formatting would use,
     215      but takes the current locale into account.
     216  
     217      Grouping is applied if the third parameter is true.
     218      Conversion uses monetary thousands separator and grouping strings if
     219      forth parameter monetary is true."""
     220      percents = list(_percent_re.finditer(f))
     221      new_f = _percent_re.sub('%s', f)
     222  
     223      if isinstance(val, _collections_abc.Mapping):
     224          new_val = []
     225          for perc in percents:
     226              if perc.group()[-1]=='%':
     227                  new_val.append('%')
     228              else:
     229                  new_val.append(_format(perc.group(), val, grouping, monetary))
     230      else:
     231          if not isinstance(val, tuple):
     232              val = (val,)
     233          new_val = []
     234          i = 0
     235          for perc in percents:
     236              if perc.group()[-1]=='%':
     237                  new_val.append('%')
     238              else:
     239                  starcount = perc.group('modifiers').count('*')
     240                  new_val.append(_format(perc.group(),
     241                                        val[i],
     242                                        grouping,
     243                                        monetary,
     244                                        *val[i+1:i+1+starcount]))
     245                  i += (1 + starcount)
     246      val = tuple(new_val)
     247  
     248      return new_f % val
     249  
     250  def currency(val, symbol=True, grouping=False, international=False):
     251      """Formats val according to the currency settings
     252      in the current locale."""
     253      conv = localeconv()
     254  
     255      # check for illegal values
     256      digits = conv[international and 'int_frac_digits' or 'frac_digits']
     257      if digits == 127:
     258          raise ValueError("Currency formatting is not possible using "
     259                           "the 'C' locale.")
     260  
     261      s = _localize(f'{abs(val):.{digits}f}', grouping, monetary=True)
     262      # '<' and '>' are markers if the sign must be inserted between symbol and value
     263      s = '<' + s + '>'
     264  
     265      if symbol:
     266          smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
     267          precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
     268          separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
     269  
     270          if precedes:
     271              s = smb + (separated and ' ' or '') + s
     272          else:
     273              if international and smb[-1] == ' ':
     274                  smb = smb[:-1]
     275              s = s + (separated and ' ' or '') + smb
     276  
     277      sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
     278      sign = conv[val<0 and 'negative_sign' or 'positive_sign']
     279  
     280      if sign_pos == 0:
     281          s = '(' + s + ')'
     282      elif sign_pos == 1:
     283          s = sign + s
     284      elif sign_pos == 2:
     285          s = s + sign
     286      elif sign_pos == 3:
     287          s = s.replace('<', sign)
     288      elif sign_pos == 4:
     289          s = s.replace('>', sign)
     290      else:
     291          # the default if nothing specified;
     292          # this should be the most fitting sign position
     293          s = sign + s
     294  
     295      return s.replace('<', '').replace('>', '')
     296  
     297  def str(val):
     298      """Convert float to string, taking the locale into account."""
     299      return _format("%.12g", val)
     300  
     301  def delocalize(string):
     302      "Parses a string as a normalized number according to the locale settings."
     303  
     304      conv = localeconv()
     305  
     306      #First, get rid of the grouping
     307      ts = conv['thousands_sep']
     308      if ts:
     309          string = string.replace(ts, '')
     310  
     311      #next, replace the decimal point with a dot
     312      dd = conv['decimal_point']
     313      if dd:
     314          string = string.replace(dd, '.')
     315      return string
     316  
     317  def localize(string, grouping=False, monetary=False):
     318      """Parses a string as locale number according to the locale settings."""
     319      return _localize(string, grouping, monetary)
     320  
     321  def atof(string, func=float):
     322      "Parses a string as a float according to the locale settings."
     323      return func(delocalize(string))
     324  
     325  def atoi(string):
     326      "Converts a string to an integer according to the locale settings."
     327      return int(delocalize(string))
     328  
     329  def _test():
     330      setlocale(LC_ALL, "")
     331      #do grouping
     332      s1 = format_string("%d", 123456789,1)
     333      print(s1, "is", atoi(s1))
     334      #standard formatting
     335      s1 = str(3.14)
     336      print(s1, "is", atof(s1))
     337  
     338  ### Locale name aliasing engine
     339  
     340  # Author: Marc-Andre Lemburg, mal@lemburg.com
     341  # Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
     342  
     343  # store away the low-level version of setlocale (it's
     344  # overridden below)
     345  _setlocale = setlocale
     346  
     347  def _replace_encoding(code, encoding):
     348      if '.' in code:
     349          langname = code[:code.index('.')]
     350      else:
     351          langname = code
     352      # Convert the encoding to a C lib compatible encoding string
     353      norm_encoding = encodings.normalize_encoding(encoding)
     354      #print('norm encoding: %r' % norm_encoding)
     355      norm_encoding = encodings.aliases.aliases.get(norm_encoding.lower(),
     356                                                    norm_encoding)
     357      #print('aliased encoding: %r' % norm_encoding)
     358      encoding = norm_encoding
     359      norm_encoding = norm_encoding.lower()
     360      if norm_encoding in locale_encoding_alias:
     361          encoding = locale_encoding_alias[norm_encoding]
     362      else:
     363          norm_encoding = norm_encoding.replace('_', '')
     364          norm_encoding = norm_encoding.replace('-', '')
     365          if norm_encoding in locale_encoding_alias:
     366              encoding = locale_encoding_alias[norm_encoding]
     367      #print('found encoding %r' % encoding)
     368      return langname + '.' + encoding
     369  
     370  def _append_modifier(code, modifier):
     371      if modifier == 'euro':
     372          if '.' not in code:
     373              return code + '.ISO8859-15'
     374          _, _, encoding = code.partition('.')
     375          if encoding in ('ISO8859-15', 'UTF-8'):
     376              return code
     377          if encoding == 'ISO8859-1':
     378              return _replace_encoding(code, 'ISO8859-15')
     379      return code + '@' + modifier
     380  
     381  def normalize(localename):
     382  
     383      """ Returns a normalized locale code for the given locale
     384          name.
     385  
     386          The returned locale code is formatted for use with
     387          setlocale().
     388  
     389          If normalization fails, the original name is returned
     390          unchanged.
     391  
     392          If the given encoding is not known, the function defaults to
     393          the default encoding for the locale code just like setlocale()
     394          does.
     395  
     396      """
     397      # Normalize the locale name and extract the encoding and modifier
     398      code = localename.lower()
     399      if ':' in code:
     400          # ':' is sometimes used as encoding delimiter.
     401          code = code.replace(':', '.')
     402      if '@' in code:
     403          code, modifier = code.split('@', 1)
     404      else:
     405          modifier = ''
     406      if '.' in code:
     407          langname, encoding = code.split('.')[:2]
     408      else:
     409          langname = code
     410          encoding = ''
     411  
     412      # First lookup: fullname (possibly with encoding and modifier)
     413      lang_enc = langname
     414      if encoding:
     415          norm_encoding = encoding.replace('-', '')
     416          norm_encoding = norm_encoding.replace('_', '')
     417          lang_enc += '.' + norm_encoding
     418      lookup_name = lang_enc
     419      if modifier:
     420          lookup_name += '@' + modifier
     421      code = locale_alias.get(lookup_name, None)
     422      if code is not None:
     423          return code
     424      #print('first lookup failed')
     425  
     426      if modifier:
     427          # Second try: fullname without modifier (possibly with encoding)
     428          code = locale_alias.get(lang_enc, None)
     429          if code is not None:
     430              #print('lookup without modifier succeeded')
     431              if '@' not in code:
     432                  return _append_modifier(code, modifier)
     433              if code.split('@', 1)[1].lower() == modifier:
     434                  return code
     435          #print('second lookup failed')
     436  
     437      if encoding:
     438          # Third try: langname (without encoding, possibly with modifier)
     439          lookup_name = langname
     440          if modifier:
     441              lookup_name += '@' + modifier
     442          code = locale_alias.get(lookup_name, None)
     443          if code is not None:
     444              #print('lookup without encoding succeeded')
     445              if '@' not in code:
     446                  return _replace_encoding(code, encoding)
     447              code, modifier = code.split('@', 1)
     448              return _replace_encoding(code, encoding) + '@' + modifier
     449  
     450          if modifier:
     451              # Fourth try: langname (without encoding and modifier)
     452              code = locale_alias.get(langname, None)
     453              if code is not None:
     454                  #print('lookup without modifier and encoding succeeded')
     455                  if '@' not in code:
     456                      code = _replace_encoding(code, encoding)
     457                      return _append_modifier(code, modifier)
     458                  code, defmod = code.split('@', 1)
     459                  if defmod.lower() == modifier:
     460                      return _replace_encoding(code, encoding) + '@' + defmod
     461  
     462      return localename
     463  
     464  def _parse_localename(localename):
     465  
     466      """ Parses the locale code for localename and returns the
     467          result as tuple (language code, encoding).
     468  
     469          The localename is normalized and passed through the locale
     470          alias engine. A ValueError is raised in case the locale name
     471          cannot be parsed.
     472  
     473          The language code corresponds to RFC 1766.  code and encoding
     474          can be None in case the values cannot be determined or are
     475          unknown to this implementation.
     476  
     477      """
     478      code = normalize(localename)
     479      if '@' in code:
     480          # Deal with locale modifiers
     481          code, modifier = code.split('@', 1)
     482          if modifier == 'euro' and '.' not in code:
     483              # Assume Latin-9 for @euro locales. This is bogus,
     484              # since some systems may use other encodings for these
     485              # locales. Also, we ignore other modifiers.
     486              return code, 'iso-8859-15'
     487  
     488      if '.' in code:
     489          return tuple(code.split('.')[:2])
     490      elif code == 'C':
     491          return None, None
     492      elif code == 'UTF-8':
     493          # On macOS "LC_CTYPE=UTF-8" is a valid locale setting
     494          # for getting UTF-8 handling for text.
     495          return None, 'UTF-8'
     496      raise ValueError('unknown locale: %s' % localename)
     497  
     498  def _build_localename(localetuple):
     499  
     500      """ Builds a locale code from the given tuple (language code,
     501          encoding).
     502  
     503          No aliasing or normalizing takes place.
     504  
     505      """
     506      try:
     507          language, encoding = localetuple
     508  
     509          if language is None:
     510              language = 'C'
     511          if encoding is None:
     512              return language
     513          else:
     514              return language + '.' + encoding
     515      except (TypeError, ValueError):
     516          raise TypeError('Locale must be None, a string, or an iterable of '
     517                          'two strings -- language code, encoding.') from None
     518  
     519  def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
     520  
     521      """ Tries to determine the default locale settings and returns
     522          them as tuple (language code, encoding).
     523  
     524          According to POSIX, a program which has not called
     525          setlocale(LC_ALL, "") runs using the portable 'C' locale.
     526          Calling setlocale(LC_ALL, "") lets it use the default locale as
     527          defined by the LANG variable. Since we don't want to interfere
     528          with the current locale setting we thus emulate the behavior
     529          in the way described above.
     530  
     531          To maintain compatibility with other platforms, not only the
     532          LANG variable is tested, but a list of variables given as
     533          envvars parameter. The first found to be defined will be
     534          used. envvars defaults to the search path used in GNU gettext;
     535          it must always contain the variable name 'LANG'.
     536  
     537          Except for the code 'C', the language code corresponds to RFC
     538          1766.  code and encoding can be None in case the values cannot
     539          be determined.
     540  
     541      """
     542  
     543      import warnings
     544      warnings.warn(
     545          "Use setlocale(), getencoding() and getlocale() instead",
     546          DeprecationWarning, stacklevel=2
     547      )
     548      return _getdefaultlocale(envvars)
     549  
     550  def _getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
     551      try:
     552          # check if it's supported by the _locale module
     553          import _locale
     554          code, encoding = _locale._getdefaultlocale()
     555      except (ImportError, AttributeError):
     556          pass
     557      else:
     558          # make sure the code/encoding values are valid
     559          if sys.platform == "win32" and code and code[:2] == "0x":
     560              # map windows language identifier to language name
     561              code = windows_locale.get(int(code, 0))
     562          # ...add other platform-specific processing here, if
     563          # necessary...
     564          return code, encoding
     565  
     566      # fall back on POSIX behaviour
     567      import os
     568      lookup = os.environ.get
     569      for variable in envvars:
     570          localename = lookup(variable,None)
     571          if localename:
     572              if variable == 'LANGUAGE':
     573                  localename = localename.split(':')[0]
     574              break
     575      else:
     576          localename = 'C'
     577      return _parse_localename(localename)
     578  
     579  
     580  def getlocale(category=LC_CTYPE):
     581  
     582      """ Returns the current setting for the given locale category as
     583          tuple (language code, encoding).
     584  
     585          category may be one of the LC_* value except LC_ALL. It
     586          defaults to LC_CTYPE.
     587  
     588          Except for the code 'C', the language code corresponds to RFC
     589          1766.  code and encoding can be None in case the values cannot
     590          be determined.
     591  
     592      """
     593      localename = _setlocale(category)
     594      if category == LC_ALL and ';' in localename:
     595          raise TypeError('category LC_ALL is not supported')
     596      return _parse_localename(localename)
     597  
     598  def setlocale(category, locale=None):
     599  
     600      """ Set the locale for the given category.  The locale can be
     601          a string, an iterable of two strings (language code and encoding),
     602          or None.
     603  
     604          Iterables are converted to strings using the locale aliasing
     605          engine.  Locale strings are passed directly to the C lib.
     606  
     607          category may be given as one of the LC_* values.
     608  
     609      """
     610      if locale and not isinstance(locale, _builtin_str):
     611          # convert to string
     612          locale = normalize(_build_localename(locale))
     613      return _setlocale(category, locale)
     614  
     615  def resetlocale(category=LC_ALL):
     616  
     617      """ Sets the locale for category to the default setting.
     618  
     619          The default setting is determined by calling
     620          getdefaultlocale(). category defaults to LC_ALL.
     621  
     622      """
     623      import warnings
     624      warnings.warn(
     625          'Use locale.setlocale(locale.LC_ALL, "") instead',
     626          DeprecationWarning, stacklevel=2
     627      )
     628  
     629      with warnings.catch_warnings():
     630          warnings.simplefilter('ignore', category=DeprecationWarning)
     631          loc = getdefaultlocale()
     632  
     633      _setlocale(category, _build_localename(loc))
     634  
     635  
     636  try:
     637      from _locale import getencoding
     638  except ImportError:
     639      def getencoding():
     640          if hasattr(sys, 'getandroidapilevel'):
     641              # On Android langinfo.h and CODESET are missing, and UTF-8 is
     642              # always used in mbstowcs() and wcstombs().
     643              return 'utf-8'
     644          encoding = _getdefaultlocale()[1]
     645          if encoding is None:
     646              # LANG not set, default to UTF-8
     647              encoding = 'utf-8'
     648          return encoding
     649  
     650  try:
     651      CODESET
     652  except NameError:
     653      def getpreferredencoding(do_setlocale=True):
     654          """Return the charset that the user is likely using."""
     655          if sys.flags.warn_default_encoding:
     656              import warnings
     657              warnings.warn(
     658                  "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.",
     659                  EncodingWarning, 2)
     660          if sys.flags.utf8_mode:
     661              return 'utf-8'
     662          return getencoding()
     663  else:
     664      # On Unix, if CODESET is available, use that.
     665      def getpreferredencoding(do_setlocale=True):
     666          """Return the charset that the user is likely using,
     667          according to the system configuration."""
     668  
     669          if sys.flags.warn_default_encoding:
     670              import warnings
     671              warnings.warn(
     672                  "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.",
     673                  EncodingWarning, 2)
     674          if sys.flags.utf8_mode:
     675              return 'utf-8'
     676  
     677          if not do_setlocale:
     678              return getencoding()
     679  
     680          old_loc = setlocale(LC_CTYPE)
     681          try:
     682              try:
     683                  setlocale(LC_CTYPE, "")
     684              except Error:
     685                  pass
     686              return getencoding()
     687          finally:
     688              setlocale(LC_CTYPE, old_loc)
     689  
     690  
     691  ### Database
     692  #
     693  # The following data was extracted from the locale.alias file which
     694  # comes with X11 and then hand edited removing the explicit encoding
     695  # definitions and adding some more aliases. The file is usually
     696  # available as /usr/lib/X11/locale/locale.alias.
     697  #
     698  
     699  #
     700  # The local_encoding_alias table maps lowercase encoding alias names
     701  # to C locale encoding names (case-sensitive). Note that normalize()
     702  # first looks up the encoding in the encodings.aliases dictionary and
     703  # then applies this mapping to find the correct C lib name for the
     704  # encoding.
     705  #
     706  locale_encoding_alias = {
     707  
     708      # Mappings for non-standard encoding names used in locale names
     709      '437':                          'C',
     710      'c':                            'C',
     711      'en':                           'ISO8859-1',
     712      'jis':                          'JIS7',
     713      'jis7':                         'JIS7',
     714      'ajec':                         'eucJP',
     715      'koi8c':                        'KOI8-C',
     716      'microsoftcp1251':              'CP1251',
     717      'microsoftcp1255':              'CP1255',
     718      'microsoftcp1256':              'CP1256',
     719      '88591':                        'ISO8859-1',
     720      '88592':                        'ISO8859-2',
     721      '88595':                        'ISO8859-5',
     722      '885915':                       'ISO8859-15',
     723  
     724      # Mappings from Python codec names to C lib encoding names
     725      'ascii':                        'ISO8859-1',
     726      'latin_1':                      'ISO8859-1',
     727      'iso8859_1':                    'ISO8859-1',
     728      'iso8859_10':                   'ISO8859-10',
     729      'iso8859_11':                   'ISO8859-11',
     730      'iso8859_13':                   'ISO8859-13',
     731      'iso8859_14':                   'ISO8859-14',
     732      'iso8859_15':                   'ISO8859-15',
     733      'iso8859_16':                   'ISO8859-16',
     734      'iso8859_2':                    'ISO8859-2',
     735      'iso8859_3':                    'ISO8859-3',
     736      'iso8859_4':                    'ISO8859-4',
     737      'iso8859_5':                    'ISO8859-5',
     738      'iso8859_6':                    'ISO8859-6',
     739      'iso8859_7':                    'ISO8859-7',
     740      'iso8859_8':                    'ISO8859-8',
     741      'iso8859_9':                    'ISO8859-9',
     742      'iso2022_jp':                   'JIS7',
     743      'shift_jis':                    'SJIS',
     744      'tactis':                       'TACTIS',
     745      'euc_jp':                       'eucJP',
     746      'euc_kr':                       'eucKR',
     747      'utf_8':                        'UTF-8',
     748      'koi8_r':                       'KOI8-R',
     749      'koi8_t':                       'KOI8-T',
     750      'koi8_u':                       'KOI8-U',
     751      'kz1048':                       'RK1048',
     752      'cp1251':                       'CP1251',
     753      'cp1255':                       'CP1255',
     754      'cp1256':                       'CP1256',
     755  
     756      # XXX This list is still incomplete. If you know more
     757      # mappings, please file a bug report. Thanks.
     758  }
     759  
     760  for k, v in sorted(locale_encoding_alias.items()):
     761      k = k.replace('_', '')
     762      locale_encoding_alias.setdefault(k, v)
     763  del k, v
     764  
     765  #
     766  # The locale_alias table maps lowercase alias names to C locale names
     767  # (case-sensitive). Encodings are always separated from the locale
     768  # name using a dot ('.'); they should only be given in case the
     769  # language name is needed to interpret the given encoding alias
     770  # correctly (CJK codes often have this need).
     771  #
     772  # Note that the normalize() function which uses this tables
     773  # removes '_' and '-' characters from the encoding part of the
     774  # locale name before doing the lookup. This saves a lot of
     775  # space in the table.
     776  #
     777  # MAL 2004-12-10:
     778  # Updated alias mapping to most recent locale.alias file
     779  # from X.org distribution using makelocalealias.py.
     780  #
     781  # These are the differences compared to the old mapping (Python 2.4
     782  # and older):
     783  #
     784  #    updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
     785  #    updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
     786  #    updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
     787  #    updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
     788  #    updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
     789  #    updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
     790  #    updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1'
     791  #    updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
     792  #    updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
     793  #    updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
     794  #    updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
     795  #    updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
     796  #    updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
     797  #    updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP'
     798  #    updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13'
     799  #    updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13'
     800  #    updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
     801  #    updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
     802  #    updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11'
     803  #    updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312'
     804  #    updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5'
     805  #    updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5'
     806  #
     807  # MAL 2008-05-30:
     808  # Updated alias mapping to most recent locale.alias file
     809  # from X.org distribution using makelocalealias.py.
     810  #
     811  # These are the differences compared to the old mapping (Python 2.5
     812  # and older):
     813  #
     814  #    updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2'
     815  #    updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
     816  #    updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
     817  #    updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2'
     818  #    updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
     819  #    updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
     820  #    updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
     821  #    updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
     822  #    updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
     823  #    updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
     824  #    updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2'
     825  #    updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
     826  #    updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
     827  #    updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
     828  #    updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
     829  #    updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
     830  #    updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
     831  #    updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8'
     832  #    updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
     833  #
     834  # AP 2010-04-12:
     835  # Updated alias mapping to most recent locale.alias file
     836  # from X.org distribution using makelocalealias.py.
     837  #
     838  # These are the differences compared to the old mapping (Python 2.6.5
     839  # and older):
     840  #
     841  #    updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
     842  #    updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
     843  #    updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
     844  #    updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
     845  #    updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
     846  #    updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
     847  #    updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
     848  #    updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
     849  #    updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin'
     850  #    updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
     851  #    updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin'
     852  #    updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8'
     853  #    updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
     854  #
     855  # SS 2013-12-20:
     856  # Updated alias mapping to most recent locale.alias file
     857  # from X.org distribution using makelocalealias.py.
     858  #
     859  # These are the differences compared to the old mapping (Python 3.3.3
     860  # and older):
     861  #
     862  #    updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
     863  #    updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
     864  #    updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
     865  #    updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
     866  #    updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
     867  #    updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
     868  #    updated 'sd' -> 'sd_IN@devanagari.UTF-8' to 'sd_IN.UTF-8'
     869  #    updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
     870  #    updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8'
     871  #    updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
     872  #    updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
     873  #
     874  # SS 2014-10-01:
     875  # Updated alias mapping with glibc 2.19 supported locales.
     876  #
     877  # SS 2018-05-05:
     878  # Updated alias mapping with glibc 2.27 supported locales.
     879  #
     880  # These are the differences compared to the old mapping (Python 3.6.5
     881  # and older):
     882  #
     883  #    updated 'ca_es@valencia' -> 'ca_ES.ISO8859-15@valencia' to 'ca_ES.UTF-8@valencia'
     884  #    updated 'kk_kz' -> 'kk_KZ.RK1048' to 'kk_KZ.ptcp154'
     885  #    updated 'russian' -> 'ru_RU.ISO8859-5' to 'ru_RU.KOI8-R'
     886  
     887  locale_alias = {
     888      'a3':                                   'az_AZ.KOI8-C',
     889      'a3_az':                                'az_AZ.KOI8-C',
     890      'a3_az.koic':                           'az_AZ.KOI8-C',
     891      'aa_dj':                                'aa_DJ.ISO8859-1',
     892      'aa_er':                                'aa_ER.UTF-8',
     893      'aa_et':                                'aa_ET.UTF-8',
     894      'af':                                   'af_ZA.ISO8859-1',
     895      'af_za':                                'af_ZA.ISO8859-1',
     896      'agr_pe':                               'agr_PE.UTF-8',
     897      'ak_gh':                                'ak_GH.UTF-8',
     898      'am':                                   'am_ET.UTF-8',
     899      'am_et':                                'am_ET.UTF-8',
     900      'american':                             'en_US.ISO8859-1',
     901      'an_es':                                'an_ES.ISO8859-15',
     902      'anp_in':                               'anp_IN.UTF-8',
     903      'ar':                                   'ar_AA.ISO8859-6',
     904      'ar_aa':                                'ar_AA.ISO8859-6',
     905      'ar_ae':                                'ar_AE.ISO8859-6',
     906      'ar_bh':                                'ar_BH.ISO8859-6',
     907      'ar_dz':                                'ar_DZ.ISO8859-6',
     908      'ar_eg':                                'ar_EG.ISO8859-6',
     909      'ar_in':                                'ar_IN.UTF-8',
     910      'ar_iq':                                'ar_IQ.ISO8859-6',
     911      'ar_jo':                                'ar_JO.ISO8859-6',
     912      'ar_kw':                                'ar_KW.ISO8859-6',
     913      'ar_lb':                                'ar_LB.ISO8859-6',
     914      'ar_ly':                                'ar_LY.ISO8859-6',
     915      'ar_ma':                                'ar_MA.ISO8859-6',
     916      'ar_om':                                'ar_OM.ISO8859-6',
     917      'ar_qa':                                'ar_QA.ISO8859-6',
     918      'ar_sa':                                'ar_SA.ISO8859-6',
     919      'ar_sd':                                'ar_SD.ISO8859-6',
     920      'ar_ss':                                'ar_SS.UTF-8',
     921      'ar_sy':                                'ar_SY.ISO8859-6',
     922      'ar_tn':                                'ar_TN.ISO8859-6',
     923      'ar_ye':                                'ar_YE.ISO8859-6',
     924      'arabic':                               'ar_AA.ISO8859-6',
     925      'as':                                   'as_IN.UTF-8',
     926      'as_in':                                'as_IN.UTF-8',
     927      'ast_es':                               'ast_ES.ISO8859-15',
     928      'ayc_pe':                               'ayc_PE.UTF-8',
     929      'az':                                   'az_AZ.ISO8859-9E',
     930      'az_az':                                'az_AZ.ISO8859-9E',
     931      'az_az.iso88599e':                      'az_AZ.ISO8859-9E',
     932      'az_ir':                                'az_IR.UTF-8',
     933      'be':                                   'be_BY.CP1251',
     934      'be@latin':                             'be_BY.UTF-8@latin',
     935      'be_bg.utf8':                           'bg_BG.UTF-8',
     936      'be_by':                                'be_BY.CP1251',
     937      'be_by@latin':                          'be_BY.UTF-8@latin',
     938      'bem_zm':                               'bem_ZM.UTF-8',
     939      'ber_dz':                               'ber_DZ.UTF-8',
     940      'ber_ma':                               'ber_MA.UTF-8',
     941      'bg':                                   'bg_BG.CP1251',
     942      'bg_bg':                                'bg_BG.CP1251',
     943      'bhb_in.utf8':                          'bhb_IN.UTF-8',
     944      'bho_in':                               'bho_IN.UTF-8',
     945      'bho_np':                               'bho_NP.UTF-8',
     946      'bi_vu':                                'bi_VU.UTF-8',
     947      'bn_bd':                                'bn_BD.UTF-8',
     948      'bn_in':                                'bn_IN.UTF-8',
     949      'bo_cn':                                'bo_CN.UTF-8',
     950      'bo_in':                                'bo_IN.UTF-8',
     951      'bokmal':                               'nb_NO.ISO8859-1',
     952      'bokm\xe5l':                            'nb_NO.ISO8859-1',
     953      'br':                                   'br_FR.ISO8859-1',
     954      'br_fr':                                'br_FR.ISO8859-1',
     955      'brx_in':                               'brx_IN.UTF-8',
     956      'bs':                                   'bs_BA.ISO8859-2',
     957      'bs_ba':                                'bs_BA.ISO8859-2',
     958      'bulgarian':                            'bg_BG.CP1251',
     959      'byn_er':                               'byn_ER.UTF-8',
     960      'c':                                    'C',
     961      'c-french':                             'fr_CA.ISO8859-1',
     962      'c.ascii':                              'C',
     963      'c.en':                                 'C',
     964      'c.iso88591':                           'en_US.ISO8859-1',
     965      'c.utf8':                               'C.UTF-8',
     966      'c_c':                                  'C',
     967      'c_c.c':                                'C',
     968      'ca':                                   'ca_ES.ISO8859-1',
     969      'ca_ad':                                'ca_AD.ISO8859-1',
     970      'ca_es':                                'ca_ES.ISO8859-1',
     971      'ca_es@valencia':                       'ca_ES.UTF-8@valencia',
     972      'ca_fr':                                'ca_FR.ISO8859-1',
     973      'ca_it':                                'ca_IT.ISO8859-1',
     974      'catalan':                              'ca_ES.ISO8859-1',
     975      'ce_ru':                                'ce_RU.UTF-8',
     976      'cextend':                              'en_US.ISO8859-1',
     977      'chinese-s':                            'zh_CN.eucCN',
     978      'chinese-t':                            'zh_TW.eucTW',
     979      'chr_us':                               'chr_US.UTF-8',
     980      'ckb_iq':                               'ckb_IQ.UTF-8',
     981      'cmn_tw':                               'cmn_TW.UTF-8',
     982      'crh_ua':                               'crh_UA.UTF-8',
     983      'croatian':                             'hr_HR.ISO8859-2',
     984      'cs':                                   'cs_CZ.ISO8859-2',
     985      'cs_cs':                                'cs_CZ.ISO8859-2',
     986      'cs_cz':                                'cs_CZ.ISO8859-2',
     987      'csb_pl':                               'csb_PL.UTF-8',
     988      'cv_ru':                                'cv_RU.UTF-8',
     989      'cy':                                   'cy_GB.ISO8859-1',
     990      'cy_gb':                                'cy_GB.ISO8859-1',
     991      'cz':                                   'cs_CZ.ISO8859-2',
     992      'cz_cz':                                'cs_CZ.ISO8859-2',
     993      'czech':                                'cs_CZ.ISO8859-2',
     994      'da':                                   'da_DK.ISO8859-1',
     995      'da_dk':                                'da_DK.ISO8859-1',
     996      'danish':                               'da_DK.ISO8859-1',
     997      'dansk':                                'da_DK.ISO8859-1',
     998      'de':                                   'de_DE.ISO8859-1',
     999      'de_at':                                'de_AT.ISO8859-1',
    1000      'de_be':                                'de_BE.ISO8859-1',
    1001      'de_ch':                                'de_CH.ISO8859-1',
    1002      'de_de':                                'de_DE.ISO8859-1',
    1003      'de_it':                                'de_IT.ISO8859-1',
    1004      'de_li.utf8':                           'de_LI.UTF-8',
    1005      'de_lu':                                'de_LU.ISO8859-1',
    1006      'deutsch':                              'de_DE.ISO8859-1',
    1007      'doi_in':                               'doi_IN.UTF-8',
    1008      'dutch':                                'nl_NL.ISO8859-1',
    1009      'dutch.iso88591':                       'nl_BE.ISO8859-1',
    1010      'dv_mv':                                'dv_MV.UTF-8',
    1011      'dz_bt':                                'dz_BT.UTF-8',
    1012      'ee':                                   'ee_EE.ISO8859-4',
    1013      'ee_ee':                                'ee_EE.ISO8859-4',
    1014      'eesti':                                'et_EE.ISO8859-1',
    1015      'el':                                   'el_GR.ISO8859-7',
    1016      'el_cy':                                'el_CY.ISO8859-7',
    1017      'el_gr':                                'el_GR.ISO8859-7',
    1018      'el_gr@euro':                           'el_GR.ISO8859-15',
    1019      'en':                                   'en_US.ISO8859-1',
    1020      'en_ag':                                'en_AG.UTF-8',
    1021      'en_au':                                'en_AU.ISO8859-1',
    1022      'en_be':                                'en_BE.ISO8859-1',
    1023      'en_bw':                                'en_BW.ISO8859-1',
    1024      'en_ca':                                'en_CA.ISO8859-1',
    1025      'en_dk':                                'en_DK.ISO8859-1',
    1026      'en_dl.utf8':                           'en_DL.UTF-8',
    1027      'en_gb':                                'en_GB.ISO8859-1',
    1028      'en_hk':                                'en_HK.ISO8859-1',
    1029      'en_ie':                                'en_IE.ISO8859-1',
    1030      'en_il':                                'en_IL.UTF-8',
    1031      'en_in':                                'en_IN.ISO8859-1',
    1032      'en_ng':                                'en_NG.UTF-8',
    1033      'en_nz':                                'en_NZ.ISO8859-1',
    1034      'en_ph':                                'en_PH.ISO8859-1',
    1035      'en_sc.utf8':                           'en_SC.UTF-8',
    1036      'en_sg':                                'en_SG.ISO8859-1',
    1037      'en_uk':                                'en_GB.ISO8859-1',
    1038      'en_us':                                'en_US.ISO8859-1',
    1039      'en_us@euro@euro':                      'en_US.ISO8859-15',
    1040      'en_za':                                'en_ZA.ISO8859-1',
    1041      'en_zm':                                'en_ZM.UTF-8',
    1042      'en_zw':                                'en_ZW.ISO8859-1',
    1043      'en_zw.utf8':                           'en_ZS.UTF-8',
    1044      'eng_gb':                               'en_GB.ISO8859-1',
    1045      'english':                              'en_EN.ISO8859-1',
    1046      'english.iso88591':                     'en_US.ISO8859-1',
    1047      'english_uk':                           'en_GB.ISO8859-1',
    1048      'english_united-states':                'en_US.ISO8859-1',
    1049      'english_united-states.437':            'C',
    1050      'english_us':                           'en_US.ISO8859-1',
    1051      'eo':                                   'eo_XX.ISO8859-3',
    1052      'eo.utf8':                              'eo.UTF-8',
    1053      'eo_eo':                                'eo_EO.ISO8859-3',
    1054      'eo_us.utf8':                           'eo_US.UTF-8',
    1055      'eo_xx':                                'eo_XX.ISO8859-3',
    1056      'es':                                   'es_ES.ISO8859-1',
    1057      'es_ar':                                'es_AR.ISO8859-1',
    1058      'es_bo':                                'es_BO.ISO8859-1',
    1059      'es_cl':                                'es_CL.ISO8859-1',
    1060      'es_co':                                'es_CO.ISO8859-1',
    1061      'es_cr':                                'es_CR.ISO8859-1',
    1062      'es_cu':                                'es_CU.UTF-8',
    1063      'es_do':                                'es_DO.ISO8859-1',
    1064      'es_ec':                                'es_EC.ISO8859-1',
    1065      'es_es':                                'es_ES.ISO8859-1',
    1066      'es_gt':                                'es_GT.ISO8859-1',
    1067      'es_hn':                                'es_HN.ISO8859-1',
    1068      'es_mx':                                'es_MX.ISO8859-1',
    1069      'es_ni':                                'es_NI.ISO8859-1',
    1070      'es_pa':                                'es_PA.ISO8859-1',
    1071      'es_pe':                                'es_PE.ISO8859-1',
    1072      'es_pr':                                'es_PR.ISO8859-1',
    1073      'es_py':                                'es_PY.ISO8859-1',
    1074      'es_sv':                                'es_SV.ISO8859-1',
    1075      'es_us':                                'es_US.ISO8859-1',
    1076      'es_uy':                                'es_UY.ISO8859-1',
    1077      'es_ve':                                'es_VE.ISO8859-1',
    1078      'estonian':                             'et_EE.ISO8859-1',
    1079      'et':                                   'et_EE.ISO8859-15',
    1080      'et_ee':                                'et_EE.ISO8859-15',
    1081      'eu':                                   'eu_ES.ISO8859-1',
    1082      'eu_es':                                'eu_ES.ISO8859-1',
    1083      'eu_fr':                                'eu_FR.ISO8859-1',
    1084      'fa':                                   'fa_IR.UTF-8',
    1085      'fa_ir':                                'fa_IR.UTF-8',
    1086      'fa_ir.isiri3342':                      'fa_IR.ISIRI-3342',
    1087      'ff_sn':                                'ff_SN.UTF-8',
    1088      'fi':                                   'fi_FI.ISO8859-15',
    1089      'fi_fi':                                'fi_FI.ISO8859-15',
    1090      'fil_ph':                               'fil_PH.UTF-8',
    1091      'finnish':                              'fi_FI.ISO8859-1',
    1092      'fo':                                   'fo_FO.ISO8859-1',
    1093      'fo_fo':                                'fo_FO.ISO8859-1',
    1094      'fr':                                   'fr_FR.ISO8859-1',
    1095      'fr_be':                                'fr_BE.ISO8859-1',
    1096      'fr_ca':                                'fr_CA.ISO8859-1',
    1097      'fr_ch':                                'fr_CH.ISO8859-1',
    1098      'fr_fr':                                'fr_FR.ISO8859-1',
    1099      'fr_lu':                                'fr_LU.ISO8859-1',
    1100      'fran\xe7ais':                          'fr_FR.ISO8859-1',
    1101      'fre_fr':                               'fr_FR.ISO8859-1',
    1102      'french':                               'fr_FR.ISO8859-1',
    1103      'french.iso88591':                      'fr_CH.ISO8859-1',
    1104      'french_france':                        'fr_FR.ISO8859-1',
    1105      'fur_it':                               'fur_IT.UTF-8',
    1106      'fy_de':                                'fy_DE.UTF-8',
    1107      'fy_nl':                                'fy_NL.UTF-8',
    1108      'ga':                                   'ga_IE.ISO8859-1',
    1109      'ga_ie':                                'ga_IE.ISO8859-1',
    1110      'galego':                               'gl_ES.ISO8859-1',
    1111      'galician':                             'gl_ES.ISO8859-1',
    1112      'gd':                                   'gd_GB.ISO8859-1',
    1113      'gd_gb':                                'gd_GB.ISO8859-1',
    1114      'ger_de':                               'de_DE.ISO8859-1',
    1115      'german':                               'de_DE.ISO8859-1',
    1116      'german.iso88591':                      'de_CH.ISO8859-1',
    1117      'german_germany':                       'de_DE.ISO8859-1',
    1118      'gez_er':                               'gez_ER.UTF-8',
    1119      'gez_et':                               'gez_ET.UTF-8',
    1120      'gl':                                   'gl_ES.ISO8859-1',
    1121      'gl_es':                                'gl_ES.ISO8859-1',
    1122      'greek':                                'el_GR.ISO8859-7',
    1123      'gu_in':                                'gu_IN.UTF-8',
    1124      'gv':                                   'gv_GB.ISO8859-1',
    1125      'gv_gb':                                'gv_GB.ISO8859-1',
    1126      'ha_ng':                                'ha_NG.UTF-8',
    1127      'hak_tw':                               'hak_TW.UTF-8',
    1128      'he':                                   'he_IL.ISO8859-8',
    1129      'he_il':                                'he_IL.ISO8859-8',
    1130      'hebrew':                               'he_IL.ISO8859-8',
    1131      'hi':                                   'hi_IN.ISCII-DEV',
    1132      'hi_in':                                'hi_IN.ISCII-DEV',
    1133      'hi_in.isciidev':                       'hi_IN.ISCII-DEV',
    1134      'hif_fj':                               'hif_FJ.UTF-8',
    1135      'hne':                                  'hne_IN.UTF-8',
    1136      'hne_in':                               'hne_IN.UTF-8',
    1137      'hr':                                   'hr_HR.ISO8859-2',
    1138      'hr_hr':                                'hr_HR.ISO8859-2',
    1139      'hrvatski':                             'hr_HR.ISO8859-2',
    1140      'hsb_de':                               'hsb_DE.ISO8859-2',
    1141      'ht_ht':                                'ht_HT.UTF-8',
    1142      'hu':                                   'hu_HU.ISO8859-2',
    1143      'hu_hu':                                'hu_HU.ISO8859-2',
    1144      'hungarian':                            'hu_HU.ISO8859-2',
    1145      'hy_am':                                'hy_AM.UTF-8',
    1146      'hy_am.armscii8':                       'hy_AM.ARMSCII_8',
    1147      'ia':                                   'ia.UTF-8',
    1148      'ia_fr':                                'ia_FR.UTF-8',
    1149      'icelandic':                            'is_IS.ISO8859-1',
    1150      'id':                                   'id_ID.ISO8859-1',
    1151      'id_id':                                'id_ID.ISO8859-1',
    1152      'ig_ng':                                'ig_NG.UTF-8',
    1153      'ik_ca':                                'ik_CA.UTF-8',
    1154      'in':                                   'id_ID.ISO8859-1',
    1155      'in_id':                                'id_ID.ISO8859-1',
    1156      'is':                                   'is_IS.ISO8859-1',
    1157      'is_is':                                'is_IS.ISO8859-1',
    1158      'iso-8859-1':                           'en_US.ISO8859-1',
    1159      'iso-8859-15':                          'en_US.ISO8859-15',
    1160      'iso8859-1':                            'en_US.ISO8859-1',
    1161      'iso8859-15':                           'en_US.ISO8859-15',
    1162      'iso_8859_1':                           'en_US.ISO8859-1',
    1163      'iso_8859_15':                          'en_US.ISO8859-15',
    1164      'it':                                   'it_IT.ISO8859-1',
    1165      'it_ch':                                'it_CH.ISO8859-1',
    1166      'it_it':                                'it_IT.ISO8859-1',
    1167      'italian':                              'it_IT.ISO8859-1',
    1168      'iu':                                   'iu_CA.NUNACOM-8',
    1169      'iu_ca':                                'iu_CA.NUNACOM-8',
    1170      'iu_ca.nunacom8':                       'iu_CA.NUNACOM-8',
    1171      'iw':                                   'he_IL.ISO8859-8',
    1172      'iw_il':                                'he_IL.ISO8859-8',
    1173      'iw_il.utf8':                           'iw_IL.UTF-8',
    1174      'ja':                                   'ja_JP.eucJP',
    1175      'ja_jp':                                'ja_JP.eucJP',
    1176      'ja_jp.euc':                            'ja_JP.eucJP',
    1177      'ja_jp.mscode':                         'ja_JP.SJIS',
    1178      'ja_jp.pck':                            'ja_JP.SJIS',
    1179      'japan':                                'ja_JP.eucJP',
    1180      'japanese':                             'ja_JP.eucJP',
    1181      'japanese-euc':                         'ja_JP.eucJP',
    1182      'japanese.euc':                         'ja_JP.eucJP',
    1183      'jp_jp':                                'ja_JP.eucJP',
    1184      'ka':                                   'ka_GE.GEORGIAN-ACADEMY',
    1185      'ka_ge':                                'ka_GE.GEORGIAN-ACADEMY',
    1186      'ka_ge.georgianacademy':                'ka_GE.GEORGIAN-ACADEMY',
    1187      'ka_ge.georgianps':                     'ka_GE.GEORGIAN-PS',
    1188      'ka_ge.georgianrs':                     'ka_GE.GEORGIAN-ACADEMY',
    1189      'kab_dz':                               'kab_DZ.UTF-8',
    1190      'kk_kz':                                'kk_KZ.ptcp154',
    1191      'kl':                                   'kl_GL.ISO8859-1',
    1192      'kl_gl':                                'kl_GL.ISO8859-1',
    1193      'km_kh':                                'km_KH.UTF-8',
    1194      'kn':                                   'kn_IN.UTF-8',
    1195      'kn_in':                                'kn_IN.UTF-8',
    1196      'ko':                                   'ko_KR.eucKR',
    1197      'ko_kr':                                'ko_KR.eucKR',
    1198      'ko_kr.euc':                            'ko_KR.eucKR',
    1199      'kok_in':                               'kok_IN.UTF-8',
    1200      'korean':                               'ko_KR.eucKR',
    1201      'korean.euc':                           'ko_KR.eucKR',
    1202      'ks':                                   'ks_IN.UTF-8',
    1203      'ks_in':                                'ks_IN.UTF-8',
    1204      'ks_in@devanagari.utf8':                'ks_IN.UTF-8@devanagari',
    1205      'ku_tr':                                'ku_TR.ISO8859-9',
    1206      'kw':                                   'kw_GB.ISO8859-1',
    1207      'kw_gb':                                'kw_GB.ISO8859-1',
    1208      'ky':                                   'ky_KG.UTF-8',
    1209      'ky_kg':                                'ky_KG.UTF-8',
    1210      'lb_lu':                                'lb_LU.UTF-8',
    1211      'lg_ug':                                'lg_UG.ISO8859-10',
    1212      'li_be':                                'li_BE.UTF-8',
    1213      'li_nl':                                'li_NL.UTF-8',
    1214      'lij_it':                               'lij_IT.UTF-8',
    1215      'lithuanian':                           'lt_LT.ISO8859-13',
    1216      'ln_cd':                                'ln_CD.UTF-8',
    1217      'lo':                                   'lo_LA.MULELAO-1',
    1218      'lo_la':                                'lo_LA.MULELAO-1',
    1219      'lo_la.cp1133':                         'lo_LA.IBM-CP1133',
    1220      'lo_la.ibmcp1133':                      'lo_LA.IBM-CP1133',
    1221      'lo_la.mulelao1':                       'lo_LA.MULELAO-1',
    1222      'lt':                                   'lt_LT.ISO8859-13',
    1223      'lt_lt':                                'lt_LT.ISO8859-13',
    1224      'lv':                                   'lv_LV.ISO8859-13',
    1225      'lv_lv':                                'lv_LV.ISO8859-13',
    1226      'lzh_tw':                               'lzh_TW.UTF-8',
    1227      'mag_in':                               'mag_IN.UTF-8',
    1228      'mai':                                  'mai_IN.UTF-8',
    1229      'mai_in':                               'mai_IN.UTF-8',
    1230      'mai_np':                               'mai_NP.UTF-8',
    1231      'mfe_mu':                               'mfe_MU.UTF-8',
    1232      'mg_mg':                                'mg_MG.ISO8859-15',
    1233      'mhr_ru':                               'mhr_RU.UTF-8',
    1234      'mi':                                   'mi_NZ.ISO8859-1',
    1235      'mi_nz':                                'mi_NZ.ISO8859-1',
    1236      'miq_ni':                               'miq_NI.UTF-8',
    1237      'mjw_in':                               'mjw_IN.UTF-8',
    1238      'mk':                                   'mk_MK.ISO8859-5',
    1239      'mk_mk':                                'mk_MK.ISO8859-5',
    1240      'ml':                                   'ml_IN.UTF-8',
    1241      'ml_in':                                'ml_IN.UTF-8',
    1242      'mn_mn':                                'mn_MN.UTF-8',
    1243      'mni_in':                               'mni_IN.UTF-8',
    1244      'mr':                                   'mr_IN.UTF-8',
    1245      'mr_in':                                'mr_IN.UTF-8',
    1246      'ms':                                   'ms_MY.ISO8859-1',
    1247      'ms_my':                                'ms_MY.ISO8859-1',
    1248      'mt':                                   'mt_MT.ISO8859-3',
    1249      'mt_mt':                                'mt_MT.ISO8859-3',
    1250      'my_mm':                                'my_MM.UTF-8',
    1251      'nan_tw':                               'nan_TW.UTF-8',
    1252      'nb':                                   'nb_NO.ISO8859-1',
    1253      'nb_no':                                'nb_NO.ISO8859-1',
    1254      'nds_de':                               'nds_DE.UTF-8',
    1255      'nds_nl':                               'nds_NL.UTF-8',
    1256      'ne_np':                                'ne_NP.UTF-8',
    1257      'nhn_mx':                               'nhn_MX.UTF-8',
    1258      'niu_nu':                               'niu_NU.UTF-8',
    1259      'niu_nz':                               'niu_NZ.UTF-8',
    1260      'nl':                                   'nl_NL.ISO8859-1',
    1261      'nl_aw':                                'nl_AW.UTF-8',
    1262      'nl_be':                                'nl_BE.ISO8859-1',
    1263      'nl_nl':                                'nl_NL.ISO8859-1',
    1264      'nn':                                   'nn_NO.ISO8859-1',
    1265      'nn_no':                                'nn_NO.ISO8859-1',
    1266      'no':                                   'no_NO.ISO8859-1',
    1267      'no@nynorsk':                           'ny_NO.ISO8859-1',
    1268      'no_no':                                'no_NO.ISO8859-1',
    1269      'no_no.iso88591@bokmal':                'no_NO.ISO8859-1',
    1270      'no_no.iso88591@nynorsk':               'no_NO.ISO8859-1',
    1271      'norwegian':                            'no_NO.ISO8859-1',
    1272      'nr':                                   'nr_ZA.ISO8859-1',
    1273      'nr_za':                                'nr_ZA.ISO8859-1',
    1274      'nso':                                  'nso_ZA.ISO8859-15',
    1275      'nso_za':                               'nso_ZA.ISO8859-15',
    1276      'ny':                                   'ny_NO.ISO8859-1',
    1277      'ny_no':                                'ny_NO.ISO8859-1',
    1278      'nynorsk':                              'nn_NO.ISO8859-1',
    1279      'oc':                                   'oc_FR.ISO8859-1',
    1280      'oc_fr':                                'oc_FR.ISO8859-1',
    1281      'om_et':                                'om_ET.UTF-8',
    1282      'om_ke':                                'om_KE.ISO8859-1',
    1283      'or':                                   'or_IN.UTF-8',
    1284      'or_in':                                'or_IN.UTF-8',
    1285      'os_ru':                                'os_RU.UTF-8',
    1286      'pa':                                   'pa_IN.UTF-8',
    1287      'pa_in':                                'pa_IN.UTF-8',
    1288      'pa_pk':                                'pa_PK.UTF-8',
    1289      'pap_an':                               'pap_AN.UTF-8',
    1290      'pap_aw':                               'pap_AW.UTF-8',
    1291      'pap_cw':                               'pap_CW.UTF-8',
    1292      'pd':                                   'pd_US.ISO8859-1',
    1293      'pd_de':                                'pd_DE.ISO8859-1',
    1294      'pd_us':                                'pd_US.ISO8859-1',
    1295      'ph':                                   'ph_PH.ISO8859-1',
    1296      'ph_ph':                                'ph_PH.ISO8859-1',
    1297      'pl':                                   'pl_PL.ISO8859-2',
    1298      'pl_pl':                                'pl_PL.ISO8859-2',
    1299      'polish':                               'pl_PL.ISO8859-2',
    1300      'portuguese':                           'pt_PT.ISO8859-1',
    1301      'portuguese_brazil':                    'pt_BR.ISO8859-1',
    1302      'posix':                                'C',
    1303      'posix-utf2':                           'C',
    1304      'pp':                                   'pp_AN.ISO8859-1',
    1305      'pp_an':                                'pp_AN.ISO8859-1',
    1306      'ps_af':                                'ps_AF.UTF-8',
    1307      'pt':                                   'pt_PT.ISO8859-1',
    1308      'pt_br':                                'pt_BR.ISO8859-1',
    1309      'pt_pt':                                'pt_PT.ISO8859-1',
    1310      'quz_pe':                               'quz_PE.UTF-8',
    1311      'raj_in':                               'raj_IN.UTF-8',
    1312      'ro':                                   'ro_RO.ISO8859-2',
    1313      'ro_ro':                                'ro_RO.ISO8859-2',
    1314      'romanian':                             'ro_RO.ISO8859-2',
    1315      'ru':                                   'ru_RU.UTF-8',
    1316      'ru_ru':                                'ru_RU.UTF-8',
    1317      'ru_ua':                                'ru_UA.KOI8-U',
    1318      'rumanian':                             'ro_RO.ISO8859-2',
    1319      'russian':                              'ru_RU.KOI8-R',
    1320      'rw':                                   'rw_RW.ISO8859-1',
    1321      'rw_rw':                                'rw_RW.ISO8859-1',
    1322      'sa_in':                                'sa_IN.UTF-8',
    1323      'sat_in':                               'sat_IN.UTF-8',
    1324      'sc_it':                                'sc_IT.UTF-8',
    1325      'sd':                                   'sd_IN.UTF-8',
    1326      'sd_in':                                'sd_IN.UTF-8',
    1327      'sd_in@devanagari.utf8':                'sd_IN.UTF-8@devanagari',
    1328      'sd_pk':                                'sd_PK.UTF-8',
    1329      'se_no':                                'se_NO.UTF-8',
    1330      'serbocroatian':                        'sr_RS.UTF-8@latin',
    1331      'sgs_lt':                               'sgs_LT.UTF-8',
    1332      'sh':                                   'sr_RS.UTF-8@latin',
    1333      'sh_ba.iso88592@bosnia':                'sr_CS.ISO8859-2',
    1334      'sh_hr':                                'sh_HR.ISO8859-2',
    1335      'sh_hr.iso88592':                       'hr_HR.ISO8859-2',
    1336      'sh_sp':                                'sr_CS.ISO8859-2',
    1337      'sh_yu':                                'sr_RS.UTF-8@latin',
    1338      'shn_mm':                               'shn_MM.UTF-8',
    1339      'shs_ca':                               'shs_CA.UTF-8',
    1340      'si':                                   'si_LK.UTF-8',
    1341      'si_lk':                                'si_LK.UTF-8',
    1342      'sid_et':                               'sid_ET.UTF-8',
    1343      'sinhala':                              'si_LK.UTF-8',
    1344      'sk':                                   'sk_SK.ISO8859-2',
    1345      'sk_sk':                                'sk_SK.ISO8859-2',
    1346      'sl':                                   'sl_SI.ISO8859-2',
    1347      'sl_cs':                                'sl_CS.ISO8859-2',
    1348      'sl_si':                                'sl_SI.ISO8859-2',
    1349      'slovak':                               'sk_SK.ISO8859-2',
    1350      'slovene':                              'sl_SI.ISO8859-2',
    1351      'slovenian':                            'sl_SI.ISO8859-2',
    1352      'sm_ws':                                'sm_WS.UTF-8',
    1353      'so_dj':                                'so_DJ.ISO8859-1',
    1354      'so_et':                                'so_ET.UTF-8',
    1355      'so_ke':                                'so_KE.ISO8859-1',
    1356      'so_so':                                'so_SO.ISO8859-1',
    1357      'sp':                                   'sr_CS.ISO8859-5',
    1358      'sp_yu':                                'sr_CS.ISO8859-5',
    1359      'spanish':                              'es_ES.ISO8859-1',
    1360      'spanish_spain':                        'es_ES.ISO8859-1',
    1361      'sq':                                   'sq_AL.ISO8859-2',
    1362      'sq_al':                                'sq_AL.ISO8859-2',
    1363      'sq_mk':                                'sq_MK.UTF-8',
    1364      'sr':                                   'sr_RS.UTF-8',
    1365      'sr@cyrillic':                          'sr_RS.UTF-8',
    1366      'sr@latn':                              'sr_CS.UTF-8@latin',
    1367      'sr_cs':                                'sr_CS.UTF-8',
    1368      'sr_cs.iso88592@latn':                  'sr_CS.ISO8859-2',
    1369      'sr_cs@latn':                           'sr_CS.UTF-8@latin',
    1370      'sr_me':                                'sr_ME.UTF-8',
    1371      'sr_rs':                                'sr_RS.UTF-8',
    1372      'sr_rs@latn':                           'sr_RS.UTF-8@latin',
    1373      'sr_sp':                                'sr_CS.ISO8859-2',
    1374      'sr_yu':                                'sr_RS.UTF-8@latin',
    1375      'sr_yu.cp1251@cyrillic':                'sr_CS.CP1251',
    1376      'sr_yu.iso88592':                       'sr_CS.ISO8859-2',
    1377      'sr_yu.iso88595':                       'sr_CS.ISO8859-5',
    1378      'sr_yu.iso88595@cyrillic':              'sr_CS.ISO8859-5',
    1379      'sr_yu.microsoftcp1251@cyrillic':       'sr_CS.CP1251',
    1380      'sr_yu.utf8':                           'sr_RS.UTF-8',
    1381      'sr_yu.utf8@cyrillic':                  'sr_RS.UTF-8',
    1382      'sr_yu@cyrillic':                       'sr_RS.UTF-8',
    1383      'ss':                                   'ss_ZA.ISO8859-1',
    1384      'ss_za':                                'ss_ZA.ISO8859-1',
    1385      'st':                                   'st_ZA.ISO8859-1',
    1386      'st_za':                                'st_ZA.ISO8859-1',
    1387      'sv':                                   'sv_SE.ISO8859-1',
    1388      'sv_fi':                                'sv_FI.ISO8859-1',
    1389      'sv_se':                                'sv_SE.ISO8859-1',
    1390      'sw_ke':                                'sw_KE.UTF-8',
    1391      'sw_tz':                                'sw_TZ.UTF-8',
    1392      'swedish':                              'sv_SE.ISO8859-1',
    1393      'szl_pl':                               'szl_PL.UTF-8',
    1394      'ta':                                   'ta_IN.TSCII-0',
    1395      'ta_in':                                'ta_IN.TSCII-0',
    1396      'ta_in.tscii':                          'ta_IN.TSCII-0',
    1397      'ta_in.tscii0':                         'ta_IN.TSCII-0',
    1398      'ta_lk':                                'ta_LK.UTF-8',
    1399      'tcy_in.utf8':                          'tcy_IN.UTF-8',
    1400      'te':                                   'te_IN.UTF-8',
    1401      'te_in':                                'te_IN.UTF-8',
    1402      'tg':                                   'tg_TJ.KOI8-C',
    1403      'tg_tj':                                'tg_TJ.KOI8-C',
    1404      'th':                                   'th_TH.ISO8859-11',
    1405      'th_th':                                'th_TH.ISO8859-11',
    1406      'th_th.tactis':                         'th_TH.TIS620',
    1407      'th_th.tis620':                         'th_TH.TIS620',
    1408      'thai':                                 'th_TH.ISO8859-11',
    1409      'the_np':                               'the_NP.UTF-8',
    1410      'ti_er':                                'ti_ER.UTF-8',
    1411      'ti_et':                                'ti_ET.UTF-8',
    1412      'tig_er':                               'tig_ER.UTF-8',
    1413      'tk_tm':                                'tk_TM.UTF-8',
    1414      'tl':                                   'tl_PH.ISO8859-1',
    1415      'tl_ph':                                'tl_PH.ISO8859-1',
    1416      'tn':                                   'tn_ZA.ISO8859-15',
    1417      'tn_za':                                'tn_ZA.ISO8859-15',
    1418      'to_to':                                'to_TO.UTF-8',
    1419      'tpi_pg':                               'tpi_PG.UTF-8',
    1420      'tr':                                   'tr_TR.ISO8859-9',
    1421      'tr_cy':                                'tr_CY.ISO8859-9',
    1422      'tr_tr':                                'tr_TR.ISO8859-9',
    1423      'ts':                                   'ts_ZA.ISO8859-1',
    1424      'ts_za':                                'ts_ZA.ISO8859-1',
    1425      'tt':                                   'tt_RU.TATAR-CYR',
    1426      'tt_ru':                                'tt_RU.TATAR-CYR',
    1427      'tt_ru.tatarcyr':                       'tt_RU.TATAR-CYR',
    1428      'tt_ru@iqtelif':                        'tt_RU.UTF-8@iqtelif',
    1429      'turkish':                              'tr_TR.ISO8859-9',
    1430      'ug_cn':                                'ug_CN.UTF-8',
    1431      'uk':                                   'uk_UA.KOI8-U',
    1432      'uk_ua':                                'uk_UA.KOI8-U',
    1433      'univ':                                 'en_US.utf',
    1434      'universal':                            'en_US.utf',
    1435      'universal.utf8@ucs4':                  'en_US.UTF-8',
    1436      'unm_us':                               'unm_US.UTF-8',
    1437      'ur':                                   'ur_PK.CP1256',
    1438      'ur_in':                                'ur_IN.UTF-8',
    1439      'ur_pk':                                'ur_PK.CP1256',
    1440      'uz':                                   'uz_UZ.UTF-8',
    1441      'uz_uz':                                'uz_UZ.UTF-8',
    1442      'uz_uz@cyrillic':                       'uz_UZ.UTF-8',
    1443      've':                                   've_ZA.UTF-8',
    1444      've_za':                                've_ZA.UTF-8',
    1445      'vi':                                   'vi_VN.TCVN',
    1446      'vi_vn':                                'vi_VN.TCVN',
    1447      'vi_vn.tcvn':                           'vi_VN.TCVN',
    1448      'vi_vn.tcvn5712':                       'vi_VN.TCVN',
    1449      'vi_vn.viscii':                         'vi_VN.VISCII',
    1450      'vi_vn.viscii111':                      'vi_VN.VISCII',
    1451      'wa':                                   'wa_BE.ISO8859-1',
    1452      'wa_be':                                'wa_BE.ISO8859-1',
    1453      'wae_ch':                               'wae_CH.UTF-8',
    1454      'wal_et':                               'wal_ET.UTF-8',
    1455      'wo_sn':                                'wo_SN.UTF-8',
    1456      'xh':                                   'xh_ZA.ISO8859-1',
    1457      'xh_za':                                'xh_ZA.ISO8859-1',
    1458      'yi':                                   'yi_US.CP1255',
    1459      'yi_us':                                'yi_US.CP1255',
    1460      'yo_ng':                                'yo_NG.UTF-8',
    1461      'yue_hk':                               'yue_HK.UTF-8',
    1462      'yuw_pg':                               'yuw_PG.UTF-8',
    1463      'zh':                                   'zh_CN.eucCN',
    1464      'zh_cn':                                'zh_CN.gb2312',
    1465      'zh_cn.big5':                           'zh_TW.big5',
    1466      'zh_cn.euc':                            'zh_CN.eucCN',
    1467      'zh_hk':                                'zh_HK.big5hkscs',
    1468      'zh_hk.big5hk':                         'zh_HK.big5hkscs',
    1469      'zh_sg':                                'zh_SG.GB2312',
    1470      'zh_sg.gbk':                            'zh_SG.GBK',
    1471      'zh_tw':                                'zh_TW.big5',
    1472      'zh_tw.euc':                            'zh_TW.eucTW',
    1473      'zh_tw.euctw':                          'zh_TW.eucTW',
    1474      'zu':                                   'zu_ZA.ISO8859-1',
    1475      'zu_za':                                'zu_ZA.ISO8859-1',
    1476  }
    1477  
    1478  #
    1479  # This maps Windows language identifiers to locale strings.
    1480  #
    1481  # This list has been updated from
    1482  # http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp
    1483  # to include every locale up to Windows Vista.
    1484  #
    1485  # NOTE: this mapping is incomplete.  If your language is missing, please
    1486  # submit a bug report to the Python bug tracker at http://bugs.python.org/
    1487  # Make sure you include the missing language identifier and the suggested
    1488  # locale code.
    1489  #
    1490  
    1491  windows_locale = {
    1492      0x0436: "af_ZA", # Afrikaans
    1493      0x041c: "sq_AL", # Albanian
    1494      0x0484: "gsw_FR",# Alsatian - France
    1495      0x045e: "am_ET", # Amharic - Ethiopia
    1496      0x0401: "ar_SA", # Arabic - Saudi Arabia
    1497      0x0801: "ar_IQ", # Arabic - Iraq
    1498      0x0c01: "ar_EG", # Arabic - Egypt
    1499      0x1001: "ar_LY", # Arabic - Libya
    1500      0x1401: "ar_DZ", # Arabic - Algeria
    1501      0x1801: "ar_MA", # Arabic - Morocco
    1502      0x1c01: "ar_TN", # Arabic - Tunisia
    1503      0x2001: "ar_OM", # Arabic - Oman
    1504      0x2401: "ar_YE", # Arabic - Yemen
    1505      0x2801: "ar_SY", # Arabic - Syria
    1506      0x2c01: "ar_JO", # Arabic - Jordan
    1507      0x3001: "ar_LB", # Arabic - Lebanon
    1508      0x3401: "ar_KW", # Arabic - Kuwait
    1509      0x3801: "ar_AE", # Arabic - United Arab Emirates
    1510      0x3c01: "ar_BH", # Arabic - Bahrain
    1511      0x4001: "ar_QA", # Arabic - Qatar
    1512      0x042b: "hy_AM", # Armenian
    1513      0x044d: "as_IN", # Assamese - India
    1514      0x042c: "az_AZ", # Azeri - Latin
    1515      0x082c: "az_AZ", # Azeri - Cyrillic
    1516      0x046d: "ba_RU", # Bashkir
    1517      0x042d: "eu_ES", # Basque - Russia
    1518      0x0423: "be_BY", # Belarusian
    1519      0x0445: "bn_IN", # Begali
    1520      0x201a: "bs_BA", # Bosnian - Cyrillic
    1521      0x141a: "bs_BA", # Bosnian - Latin
    1522      0x047e: "br_FR", # Breton - France
    1523      0x0402: "bg_BG", # Bulgarian
    1524  #    0x0455: "my_MM", # Burmese - Not supported
    1525      0x0403: "ca_ES", # Catalan
    1526      0x0004: "zh_CHS",# Chinese - Simplified
    1527      0x0404: "zh_TW", # Chinese - Taiwan
    1528      0x0804: "zh_CN", # Chinese - PRC
    1529      0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R.
    1530      0x1004: "zh_SG", # Chinese - Singapore
    1531      0x1404: "zh_MO", # Chinese - Macao S.A.R.
    1532      0x7c04: "zh_CHT",# Chinese - Traditional
    1533      0x0483: "co_FR", # Corsican - France
    1534      0x041a: "hr_HR", # Croatian
    1535      0x101a: "hr_BA", # Croatian - Bosnia
    1536      0x0405: "cs_CZ", # Czech
    1537      0x0406: "da_DK", # Danish
    1538      0x048c: "gbz_AF",# Dari - Afghanistan
    1539      0x0465: "div_MV",# Divehi - Maldives
    1540      0x0413: "nl_NL", # Dutch - The Netherlands
    1541      0x0813: "nl_BE", # Dutch - Belgium
    1542      0x0409: "en_US", # English - United States
    1543      0x0809: "en_GB", # English - United Kingdom
    1544      0x0c09: "en_AU", # English - Australia
    1545      0x1009: "en_CA", # English - Canada
    1546      0x1409: "en_NZ", # English - New Zealand
    1547      0x1809: "en_IE", # English - Ireland
    1548      0x1c09: "en_ZA", # English - South Africa
    1549      0x2009: "en_JA", # English - Jamaica
    1550      0x2409: "en_CB", # English - Caribbean
    1551      0x2809: "en_BZ", # English - Belize
    1552      0x2c09: "en_TT", # English - Trinidad
    1553      0x3009: "en_ZW", # English - Zimbabwe
    1554      0x3409: "en_PH", # English - Philippines
    1555      0x4009: "en_IN", # English - India
    1556      0x4409: "en_MY", # English - Malaysia
    1557      0x4809: "en_IN", # English - Singapore
    1558      0x0425: "et_EE", # Estonian
    1559      0x0438: "fo_FO", # Faroese
    1560      0x0464: "fil_PH",# Filipino
    1561      0x040b: "fi_FI", # Finnish
    1562      0x040c: "fr_FR", # French - France
    1563      0x080c: "fr_BE", # French - Belgium
    1564      0x0c0c: "fr_CA", # French - Canada
    1565      0x100c: "fr_CH", # French - Switzerland
    1566      0x140c: "fr_LU", # French - Luxembourg
    1567      0x180c: "fr_MC", # French - Monaco
    1568      0x0462: "fy_NL", # Frisian - Netherlands
    1569      0x0456: "gl_ES", # Galician
    1570      0x0437: "ka_GE", # Georgian
    1571      0x0407: "de_DE", # German - Germany
    1572      0x0807: "de_CH", # German - Switzerland
    1573      0x0c07: "de_AT", # German - Austria
    1574      0x1007: "de_LU", # German - Luxembourg
    1575      0x1407: "de_LI", # German - Liechtenstein
    1576      0x0408: "el_GR", # Greek
    1577      0x046f: "kl_GL", # Greenlandic - Greenland
    1578      0x0447: "gu_IN", # Gujarati
    1579      0x0468: "ha_NG", # Hausa - Latin
    1580      0x040d: "he_IL", # Hebrew
    1581      0x0439: "hi_IN", # Hindi
    1582      0x040e: "hu_HU", # Hungarian
    1583      0x040f: "is_IS", # Icelandic
    1584      0x0421: "id_ID", # Indonesian
    1585      0x045d: "iu_CA", # Inuktitut - Syllabics
    1586      0x085d: "iu_CA", # Inuktitut - Latin
    1587      0x083c: "ga_IE", # Irish - Ireland
    1588      0x0410: "it_IT", # Italian - Italy
    1589      0x0810: "it_CH", # Italian - Switzerland
    1590      0x0411: "ja_JP", # Japanese
    1591      0x044b: "kn_IN", # Kannada - India
    1592      0x043f: "kk_KZ", # Kazakh
    1593      0x0453: "kh_KH", # Khmer - Cambodia
    1594      0x0486: "qut_GT",# K'iche - Guatemala
    1595      0x0487: "rw_RW", # Kinyarwanda - Rwanda
    1596      0x0457: "kok_IN",# Konkani
    1597      0x0412: "ko_KR", # Korean
    1598      0x0440: "ky_KG", # Kyrgyz
    1599      0x0454: "lo_LA", # Lao - Lao PDR
    1600      0x0426: "lv_LV", # Latvian
    1601      0x0427: "lt_LT", # Lithuanian
    1602      0x082e: "dsb_DE",# Lower Sorbian - Germany
    1603      0x046e: "lb_LU", # Luxembourgish
    1604      0x042f: "mk_MK", # FYROM Macedonian
    1605      0x043e: "ms_MY", # Malay - Malaysia
    1606      0x083e: "ms_BN", # Malay - Brunei Darussalam
    1607      0x044c: "ml_IN", # Malayalam - India
    1608      0x043a: "mt_MT", # Maltese
    1609      0x0481: "mi_NZ", # Maori
    1610      0x047a: "arn_CL",# Mapudungun
    1611      0x044e: "mr_IN", # Marathi
    1612      0x047c: "moh_CA",# Mohawk - Canada
    1613      0x0450: "mn_MN", # Mongolian - Cyrillic
    1614      0x0850: "mn_CN", # Mongolian - PRC
    1615      0x0461: "ne_NP", # Nepali
    1616      0x0414: "nb_NO", # Norwegian - Bokmal
    1617      0x0814: "nn_NO", # Norwegian - Nynorsk
    1618      0x0482: "oc_FR", # Occitan - France
    1619      0x0448: "or_IN", # Oriya - India
    1620      0x0463: "ps_AF", # Pashto - Afghanistan
    1621      0x0429: "fa_IR", # Persian
    1622      0x0415: "pl_PL", # Polish
    1623      0x0416: "pt_BR", # Portuguese - Brazil
    1624      0x0816: "pt_PT", # Portuguese - Portugal
    1625      0x0446: "pa_IN", # Punjabi
    1626      0x046b: "quz_BO",# Quechua (Bolivia)
    1627      0x086b: "quz_EC",# Quechua (Ecuador)
    1628      0x0c6b: "quz_PE",# Quechua (Peru)
    1629      0x0418: "ro_RO", # Romanian - Romania
    1630      0x0417: "rm_CH", # Romansh
    1631      0x0419: "ru_RU", # Russian
    1632      0x243b: "smn_FI",# Sami Finland
    1633      0x103b: "smj_NO",# Sami Norway
    1634      0x143b: "smj_SE",# Sami Sweden
    1635      0x043b: "se_NO", # Sami Northern Norway
    1636      0x083b: "se_SE", # Sami Northern Sweden
    1637      0x0c3b: "se_FI", # Sami Northern Finland
    1638      0x203b: "sms_FI",# Sami Skolt
    1639      0x183b: "sma_NO",# Sami Southern Norway
    1640      0x1c3b: "sma_SE",# Sami Southern Sweden
    1641      0x044f: "sa_IN", # Sanskrit
    1642      0x0c1a: "sr_SP", # Serbian - Cyrillic
    1643      0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic
    1644      0x081a: "sr_SP", # Serbian - Latin
    1645      0x181a: "sr_BA", # Serbian - Bosnia Latin
    1646      0x045b: "si_LK", # Sinhala - Sri Lanka
    1647      0x046c: "ns_ZA", # Northern Sotho
    1648      0x0432: "tn_ZA", # Setswana - Southern Africa
    1649      0x041b: "sk_SK", # Slovak
    1650      0x0424: "sl_SI", # Slovenian
    1651      0x040a: "es_ES", # Spanish - Spain
    1652      0x080a: "es_MX", # Spanish - Mexico
    1653      0x0c0a: "es_ES", # Spanish - Spain (Modern)
    1654      0x100a: "es_GT", # Spanish - Guatemala
    1655      0x140a: "es_CR", # Spanish - Costa Rica
    1656      0x180a: "es_PA", # Spanish - Panama
    1657      0x1c0a: "es_DO", # Spanish - Dominican Republic
    1658      0x200a: "es_VE", # Spanish - Venezuela
    1659      0x240a: "es_CO", # Spanish - Colombia
    1660      0x280a: "es_PE", # Spanish - Peru
    1661      0x2c0a: "es_AR", # Spanish - Argentina
    1662      0x300a: "es_EC", # Spanish - Ecuador
    1663      0x340a: "es_CL", # Spanish - Chile
    1664      0x380a: "es_UR", # Spanish - Uruguay
    1665      0x3c0a: "es_PY", # Spanish - Paraguay
    1666      0x400a: "es_BO", # Spanish - Bolivia
    1667      0x440a: "es_SV", # Spanish - El Salvador
    1668      0x480a: "es_HN", # Spanish - Honduras
    1669      0x4c0a: "es_NI", # Spanish - Nicaragua
    1670      0x500a: "es_PR", # Spanish - Puerto Rico
    1671      0x540a: "es_US", # Spanish - United States
    1672  #    0x0430: "", # Sutu - Not supported
    1673      0x0441: "sw_KE", # Swahili
    1674      0x041d: "sv_SE", # Swedish - Sweden
    1675      0x081d: "sv_FI", # Swedish - Finland
    1676      0x045a: "syr_SY",# Syriac
    1677      0x0428: "tg_TJ", # Tajik - Cyrillic
    1678      0x085f: "tmz_DZ",# Tamazight - Latin
    1679      0x0449: "ta_IN", # Tamil
    1680      0x0444: "tt_RU", # Tatar
    1681      0x044a: "te_IN", # Telugu
    1682      0x041e: "th_TH", # Thai
    1683      0x0851: "bo_BT", # Tibetan - Bhutan
    1684      0x0451: "bo_CN", # Tibetan - PRC
    1685      0x041f: "tr_TR", # Turkish
    1686      0x0442: "tk_TM", # Turkmen - Cyrillic
    1687      0x0480: "ug_CN", # Uighur - Arabic
    1688      0x0422: "uk_UA", # Ukrainian
    1689      0x042e: "wen_DE",# Upper Sorbian - Germany
    1690      0x0420: "ur_PK", # Urdu
    1691      0x0820: "ur_IN", # Urdu - India
    1692      0x0443: "uz_UZ", # Uzbek - Latin
    1693      0x0843: "uz_UZ", # Uzbek - Cyrillic
    1694      0x042a: "vi_VN", # Vietnamese
    1695      0x0452: "cy_GB", # Welsh
    1696      0x0488: "wo_SN", # Wolof - Senegal
    1697      0x0434: "xh_ZA", # Xhosa - South Africa
    1698      0x0485: "sah_RU",# Yakut - Cyrillic
    1699      0x0478: "ii_CN", # Yi - PRC
    1700      0x046a: "yo_NG", # Yoruba - Nigeria
    1701      0x0435: "zu_ZA", # Zulu
    1702  }
    1703  
    1704  def _print_locale():
    1705  
    1706      """ Test function.
    1707      """
    1708      categories = {}
    1709      def _init_categories(categories=categories):
    1710          for k,v in globals().items():
    1711              if k[:3] == 'LC_':
    1712                  categories[k] = v
    1713      _init_categories()
    1714      del categories['LC_ALL']
    1715  
    1716      print('Locale defaults as determined by getdefaultlocale():')
    1717      print('-'*72)
    1718      lang, enc = getdefaultlocale()
    1719      print('Language: ', lang or '(undefined)')
    1720      print('Encoding: ', enc or '(undefined)')
    1721      print()
    1722  
    1723      print('Locale settings on startup:')
    1724      print('-'*72)
    1725      for name,category in categories.items():
    1726          print(name, '...')
    1727          lang, enc = getlocale(category)
    1728          print('   Language: ', lang or '(undefined)')
    1729          print('   Encoding: ', enc or '(undefined)')
    1730          print()
    1731  
    1732      print()
    1733      print('Locale settings after calling resetlocale():')
    1734      print('-'*72)
    1735      resetlocale()
    1736      for name,category in categories.items():
    1737          print(name, '...')
    1738          lang, enc = getlocale(category)
    1739          print('   Language: ', lang or '(undefined)')
    1740          print('   Encoding: ', enc or '(undefined)')
    1741          print()
    1742  
    1743      try:
    1744          setlocale(LC_ALL, "")
    1745      except:
    1746          print('NOTE:')
    1747          print('setlocale(LC_ALL, "") does not support the default locale')
    1748          print('given in the OS environment variables.')
    1749      else:
    1750          print()
    1751          print('Locale settings after calling setlocale(LC_ALL, ""):')
    1752          print('-'*72)
    1753          for name,category in categories.items():
    1754              print(name, '...')
    1755              lang, enc = getlocale(category)
    1756              print('   Language: ', lang or '(undefined)')
    1757              print('   Encoding: ', enc or '(undefined)')
    1758              print()
    1759  
    1760  ###
    1761  
    1762  try:
    1763      LC_MESSAGES
    1764  except NameError:
    1765      pass
    1766  else:
    1767      __all__.append("LC_MESSAGES")
    1768  
    1769  if __name__=='__main__':
    1770      print('Locale aliasing:')
    1771      print()
    1772      _print_locale()
    1773      print()
    1774      print('Number formatting:')
    1775      print()
    1776      _test()