1 """Locale support module.
2
3 The module provides low-level access to the C lib's locale APIs and adds high
4 level number formatting APIs as well as a locale aliasing engine to complement
5 these.
6
7 The aliasing engine includes support for many commonly used locale names and
8 maps them to values suitable for passing to the C lib's setlocale() function. It
9 also includes default encodings for all supported locale names.
10
11 """
12
13 import sys
14 import encodings
15 import encodings.aliases
16 import re
17 import _collections_abc
18 from builtins import str as _builtin_str
19 import functools
20
21 # Try importing the _locale module.
22 #
23 # If this fails, fall back on a basic 'C' locale emulation.
24
25 # Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
26 # trying the import. So __all__ is also fiddled at the end of the file.
27 __all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error",
28 "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
29 "str", "atof", "atoi", "format_string", "currency",
30 "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
31 "LC_NUMERIC", "LC_ALL", "CHAR_MAX", "getencoding"]
32
33 def _strcoll(a,b):
34 """ strcoll(string,string) -> int.
35 Compares two strings according to the locale.
36 """
37 return (a > b) - (a < b)
38
39 def _strxfrm(s):
40 """ strxfrm(string) -> string.
41 Returns a string that behaves for cmp locale-aware.
42 """
43 return s
44
45 try:
46
47 from _locale import *
48
49 except ImportError:
50
51 # Locale emulation
52
53 CHAR_MAX = 127
54 LC_ALL = 6
55 LC_COLLATE = 3
56 LC_CTYPE = 0
57 LC_MESSAGES = 5
58 LC_MONETARY = 4
59 LC_NUMERIC = 1
60 LC_TIME = 2
61 Error = ValueError
62
63 def localeconv():
64 """ localeconv() -> dict.
65 Returns numeric and monetary locale-specific parameters.
66 """
67 # 'C' locale default values
68 return {'grouping': [127],
69 'currency_symbol': '',
70 'n_sign_posn': 127,
71 'p_cs_precedes': 127,
72 'n_cs_precedes': 127,
73 'mon_grouping': [],
74 'n_sep_by_space': 127,
75 'decimal_point': '.',
76 'negative_sign': '',
77 'positive_sign': '',
78 'p_sep_by_space': 127,
79 'int_curr_symbol': '',
80 'p_sign_posn': 127,
81 'thousands_sep': '',
82 'mon_thousands_sep': '',
83 'frac_digits': 127,
84 'mon_decimal_point': '',
85 'int_frac_digits': 127}
86
87 def setlocale(category, value=None):
88 """ setlocale(integer,string=None) -> string.
89 Activates/queries locale processing.
90 """
91 if value not in (None, '', 'C'):
92 raise Error('_locale emulation only supports "C" locale')
93 return 'C'
94
95 # These may or may not exist in _locale, so be sure to set them.
96 if 'strxfrm' not in globals():
97 strxfrm = _strxfrm
98 if 'strcoll' not in globals():
99 strcoll = _strcoll
100
101
102 _localeconv = localeconv
103
104 # With this dict, you can override some items of localeconv's return value.
105 # This is useful for testing purposes.
106 _override_localeconv = {}
107
108 @functools.wraps(_localeconv)
109 def localeconv():
110 d = _localeconv()
111 if _override_localeconv:
112 d.update(_override_localeconv)
113 return d
114
115
116 ### Number formatting APIs
117
118 # Author: Martin von Loewis
119 # improved by Georg Brandl
120
121 # Iterate over grouping intervals
122 def _grouping_intervals(grouping):
123 last_interval = None
124 for interval in grouping:
125 # if grouping is -1, we are done
126 if interval == CHAR_MAX:
127 return
128 # 0: re-use last group ad infinitum
129 if interval == 0:
130 if last_interval is None:
131 raise ValueError("invalid grouping")
132 while True:
133 yield last_interval
134 yield interval
135 last_interval = interval
136
137 #perform the grouping from right to left
138 def _group(s, monetary=False):
139 conv = localeconv()
140 thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
141 grouping = conv[monetary and 'mon_grouping' or 'grouping']
142 if not grouping:
143 return (s, 0)
144 if s[-1] == ' ':
145 stripped = s.rstrip()
146 right_spaces = s[len(stripped):]
147 s = stripped
148 else:
149 right_spaces = ''
150 left_spaces = ''
151 groups = []
152 for interval in _grouping_intervals(grouping):
153 if not s or s[-1] not in "0123456789":
154 # only non-digit characters remain (sign, spaces)
155 left_spaces = s
156 s = ''
157 break
158 groups.append(s[-interval:])
159 s = s[:-interval]
160 if s:
161 groups.append(s)
162 groups.reverse()
163 return (
164 left_spaces + thousands_sep.join(groups) + right_spaces,
165 len(thousands_sep) * (len(groups) - 1)
166 )
167
168 # Strip a given amount of excess padding from the given string
169 def _strip_padding(s, amount):
170 lpos = 0
171 while amount and s[lpos] == ' ':
172 lpos += 1
173 amount -= 1
174 rpos = len(s) - 1
175 while amount and s[rpos] == ' ':
176 rpos -= 1
177 amount -= 1
178 return s[lpos:rpos+1]
179
180 _percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
181 r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
182
183 def _format(percent, value, grouping=False, monetary=False, *additional):
184 if additional:
185 formatted = percent % ((value,) + additional)
186 else:
187 formatted = percent % value
188 if percent[-1] in 'eEfFgGdiu':
189 formatted = _localize(formatted, grouping, monetary)
190 return formatted
191
192 # Transform formatted as locale number according to the locale settings
193 def _localize(formatted, grouping=False, monetary=False):
194 # floats and decimal ints need special action!
195 if '.' in formatted:
196 seps = 0
197 parts = formatted.split('.')
198 if grouping:
199 parts[0], seps = _group(parts[0], monetary=monetary)
200 decimal_point = localeconv()[monetary and 'mon_decimal_point'
201 or 'decimal_point']
202 formatted = decimal_point.join(parts)
203 if seps:
204 formatted = _strip_padding(formatted, seps)
205 else:
206 seps = 0
207 if grouping:
208 formatted, seps = _group(formatted, monetary=monetary)
209 if seps:
210 formatted = _strip_padding(formatted, seps)
211 return formatted
212
213 def format_string(f, val, grouping=False, monetary=False):
214 """Formats a string in the same way that the % formatting would use,
215 but takes the current locale into account.
216
217 Grouping is applied if the third parameter is true.
218 Conversion uses monetary thousands separator and grouping strings if
219 forth parameter monetary is true."""
220 percents = list(_percent_re.finditer(f))
221 new_f = _percent_re.sub('%s', f)
222
223 if isinstance(val, _collections_abc.Mapping):
224 new_val = []
225 for perc in percents:
226 if perc.group()[-1]=='%':
227 new_val.append('%')
228 else:
229 new_val.append(_format(perc.group(), val, grouping, monetary))
230 else:
231 if not isinstance(val, tuple):
232 val = (val,)
233 new_val = []
234 i = 0
235 for perc in percents:
236 if perc.group()[-1]=='%':
237 new_val.append('%')
238 else:
239 starcount = perc.group('modifiers').count('*')
240 new_val.append(_format(perc.group(),
241 val[i],
242 grouping,
243 monetary,
244 *val[i+1:i+1+starcount]))
245 i += (1 + starcount)
246 val = tuple(new_val)
247
248 return new_f % val
249
250 def currency(val, symbol=True, grouping=False, international=False):
251 """Formats val according to the currency settings
252 in the current locale."""
253 conv = localeconv()
254
255 # check for illegal values
256 digits = conv[international and 'int_frac_digits' or 'frac_digits']
257 if digits == 127:
258 raise ValueError("Currency formatting is not possible using "
259 "the 'C' locale.")
260
261 s = _localize(f'{abs(val):.{digits}f}', grouping, monetary=True)
262 # '<' and '>' are markers if the sign must be inserted between symbol and value
263 s = '<' + s + '>'
264
265 if symbol:
266 smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
267 precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
268 separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
269
270 if precedes:
271 s = smb + (separated and ' ' or '') + s
272 else:
273 if international and smb[-1] == ' ':
274 smb = smb[:-1]
275 s = s + (separated and ' ' or '') + smb
276
277 sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
278 sign = conv[val<0 and 'negative_sign' or 'positive_sign']
279
280 if sign_pos == 0:
281 s = '(' + s + ')'
282 elif sign_pos == 1:
283 s = sign + s
284 elif sign_pos == 2:
285 s = s + sign
286 elif sign_pos == 3:
287 s = s.replace('<', sign)
288 elif sign_pos == 4:
289 s = s.replace('>', sign)
290 else:
291 # the default if nothing specified;
292 # this should be the most fitting sign position
293 s = sign + s
294
295 return s.replace('<', '').replace('>', '')
296
297 def str(val):
298 """Convert float to string, taking the locale into account."""
299 return _format("%.12g", val)
300
301 def delocalize(string):
302 "Parses a string as a normalized number according to the locale settings."
303
304 conv = localeconv()
305
306 #First, get rid of the grouping
307 ts = conv['thousands_sep']
308 if ts:
309 string = string.replace(ts, '')
310
311 #next, replace the decimal point with a dot
312 dd = conv['decimal_point']
313 if dd:
314 string = string.replace(dd, '.')
315 return string
316
317 def localize(string, grouping=False, monetary=False):
318 """Parses a string as locale number according to the locale settings."""
319 return _localize(string, grouping, monetary)
320
321 def atof(string, func=float):
322 "Parses a string as a float according to the locale settings."
323 return func(delocalize(string))
324
325 def atoi(string):
326 "Converts a string to an integer according to the locale settings."
327 return int(delocalize(string))
328
329 def _test():
330 setlocale(LC_ALL, "")
331 #do grouping
332 s1 = format_string("%d", 123456789,1)
333 print(s1, "is", atoi(s1))
334 #standard formatting
335 s1 = str(3.14)
336 print(s1, "is", atof(s1))
337
338 ### Locale name aliasing engine
339
340 # Author: Marc-Andre Lemburg, mal@lemburg.com
341 # Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
342
343 # store away the low-level version of setlocale (it's
344 # overridden below)
345 _setlocale = setlocale
346
347 def _replace_encoding(code, encoding):
348 if '.' in code:
349 langname = code[:code.index('.')]
350 else:
351 langname = code
352 # Convert the encoding to a C lib compatible encoding string
353 norm_encoding = encodings.normalize_encoding(encoding)
354 #print('norm encoding: %r' % norm_encoding)
355 norm_encoding = encodings.aliases.aliases.get(norm_encoding.lower(),
356 norm_encoding)
357 #print('aliased encoding: %r' % norm_encoding)
358 encoding = norm_encoding
359 norm_encoding = norm_encoding.lower()
360 if norm_encoding in locale_encoding_alias:
361 encoding = locale_encoding_alias[norm_encoding]
362 else:
363 norm_encoding = norm_encoding.replace('_', '')
364 norm_encoding = norm_encoding.replace('-', '')
365 if norm_encoding in locale_encoding_alias:
366 encoding = locale_encoding_alias[norm_encoding]
367 #print('found encoding %r' % encoding)
368 return langname + '.' + encoding
369
370 def _append_modifier(code, modifier):
371 if modifier == 'euro':
372 if '.' not in code:
373 return code + '.ISO8859-15'
374 _, _, encoding = code.partition('.')
375 if encoding in ('ISO8859-15', 'UTF-8'):
376 return code
377 if encoding == 'ISO8859-1':
378 return _replace_encoding(code, 'ISO8859-15')
379 return code + '@' + modifier
380
381 def normalize(localename):
382
383 """ Returns a normalized locale code for the given locale
384 name.
385
386 The returned locale code is formatted for use with
387 setlocale().
388
389 If normalization fails, the original name is returned
390 unchanged.
391
392 If the given encoding is not known, the function defaults to
393 the default encoding for the locale code just like setlocale()
394 does.
395
396 """
397 # Normalize the locale name and extract the encoding and modifier
398 code = localename.lower()
399 if ':' in code:
400 # ':' is sometimes used as encoding delimiter.
401 code = code.replace(':', '.')
402 if '@' in code:
403 code, modifier = code.split('@', 1)
404 else:
405 modifier = ''
406 if '.' in code:
407 langname, encoding = code.split('.')[:2]
408 else:
409 langname = code
410 encoding = ''
411
412 # First lookup: fullname (possibly with encoding and modifier)
413 lang_enc = langname
414 if encoding:
415 norm_encoding = encoding.replace('-', '')
416 norm_encoding = norm_encoding.replace('_', '')
417 lang_enc += '.' + norm_encoding
418 lookup_name = lang_enc
419 if modifier:
420 lookup_name += '@' + modifier
421 code = locale_alias.get(lookup_name, None)
422 if code is not None:
423 return code
424 #print('first lookup failed')
425
426 if modifier:
427 # Second try: fullname without modifier (possibly with encoding)
428 code = locale_alias.get(lang_enc, None)
429 if code is not None:
430 #print('lookup without modifier succeeded')
431 if '@' not in code:
432 return _append_modifier(code, modifier)
433 if code.split('@', 1)[1].lower() == modifier:
434 return code
435 #print('second lookup failed')
436
437 if encoding:
438 # Third try: langname (without encoding, possibly with modifier)
439 lookup_name = langname
440 if modifier:
441 lookup_name += '@' + modifier
442 code = locale_alias.get(lookup_name, None)
443 if code is not None:
444 #print('lookup without encoding succeeded')
445 if '@' not in code:
446 return _replace_encoding(code, encoding)
447 code, modifier = code.split('@', 1)
448 return _replace_encoding(code, encoding) + '@' + modifier
449
450 if modifier:
451 # Fourth try: langname (without encoding and modifier)
452 code = locale_alias.get(langname, None)
453 if code is not None:
454 #print('lookup without modifier and encoding succeeded')
455 if '@' not in code:
456 code = _replace_encoding(code, encoding)
457 return _append_modifier(code, modifier)
458 code, defmod = code.split('@', 1)
459 if defmod.lower() == modifier:
460 return _replace_encoding(code, encoding) + '@' + defmod
461
462 return localename
463
464 def _parse_localename(localename):
465
466 """ Parses the locale code for localename and returns the
467 result as tuple (language code, encoding).
468
469 The localename is normalized and passed through the locale
470 alias engine. A ValueError is raised in case the locale name
471 cannot be parsed.
472
473 The language code corresponds to RFC 1766. code and encoding
474 can be None in case the values cannot be determined or are
475 unknown to this implementation.
476
477 """
478 code = normalize(localename)
479 if '@' in code:
480 # Deal with locale modifiers
481 code, modifier = code.split('@', 1)
482 if modifier == 'euro' and '.' not in code:
483 # Assume Latin-9 for @euro locales. This is bogus,
484 # since some systems may use other encodings for these
485 # locales. Also, we ignore other modifiers.
486 return code, 'iso-8859-15'
487
488 if '.' in code:
489 return tuple(code.split('.')[:2])
490 elif code == 'C':
491 return None, None
492 elif code == 'UTF-8':
493 # On macOS "LC_CTYPE=UTF-8" is a valid locale setting
494 # for getting UTF-8 handling for text.
495 return None, 'UTF-8'
496 raise ValueError('unknown locale: %s' % localename)
497
498 def _build_localename(localetuple):
499
500 """ Builds a locale code from the given tuple (language code,
501 encoding).
502
503 No aliasing or normalizing takes place.
504
505 """
506 try:
507 language, encoding = localetuple
508
509 if language is None:
510 language = 'C'
511 if encoding is None:
512 return language
513 else:
514 return language + '.' + encoding
515 except (TypeError, ValueError):
516 raise TypeError('Locale must be None, a string, or an iterable of '
517 'two strings -- language code, encoding.') from None
518
519 def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
520
521 """ Tries to determine the default locale settings and returns
522 them as tuple (language code, encoding).
523
524 According to POSIX, a program which has not called
525 setlocale(LC_ALL, "") runs using the portable 'C' locale.
526 Calling setlocale(LC_ALL, "") lets it use the default locale as
527 defined by the LANG variable. Since we don't want to interfere
528 with the current locale setting we thus emulate the behavior
529 in the way described above.
530
531 To maintain compatibility with other platforms, not only the
532 LANG variable is tested, but a list of variables given as
533 envvars parameter. The first found to be defined will be
534 used. envvars defaults to the search path used in GNU gettext;
535 it must always contain the variable name 'LANG'.
536
537 Except for the code 'C', the language code corresponds to RFC
538 1766. code and encoding can be None in case the values cannot
539 be determined.
540
541 """
542
543 import warnings
544 warnings.warn(
545 "Use setlocale(), getencoding() and getlocale() instead",
546 DeprecationWarning, stacklevel=2
547 )
548 return _getdefaultlocale(envvars)
549
550 def _getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
551 try:
552 # check if it's supported by the _locale module
553 import _locale
554 code, encoding = _locale._getdefaultlocale()
555 except (ImportError, AttributeError):
556 pass
557 else:
558 # make sure the code/encoding values are valid
559 if sys.platform == "win32" and code and code[:2] == "0x":
560 # map windows language identifier to language name
561 code = windows_locale.get(int(code, 0))
562 # ...add other platform-specific processing here, if
563 # necessary...
564 return code, encoding
565
566 # fall back on POSIX behaviour
567 import os
568 lookup = os.environ.get
569 for variable in envvars:
570 localename = lookup(variable,None)
571 if localename:
572 if variable == 'LANGUAGE':
573 localename = localename.split(':')[0]
574 break
575 else:
576 localename = 'C'
577 return _parse_localename(localename)
578
579
580 def getlocale(category=LC_CTYPE):
581
582 """ Returns the current setting for the given locale category as
583 tuple (language code, encoding).
584
585 category may be one of the LC_* value except LC_ALL. It
586 defaults to LC_CTYPE.
587
588 Except for the code 'C', the language code corresponds to RFC
589 1766. code and encoding can be None in case the values cannot
590 be determined.
591
592 """
593 localename = _setlocale(category)
594 if category == LC_ALL and ';' in localename:
595 raise TypeError('category LC_ALL is not supported')
596 return _parse_localename(localename)
597
598 def setlocale(category, locale=None):
599
600 """ Set the locale for the given category. The locale can be
601 a string, an iterable of two strings (language code and encoding),
602 or None.
603
604 Iterables are converted to strings using the locale aliasing
605 engine. Locale strings are passed directly to the C lib.
606
607 category may be given as one of the LC_* values.
608
609 """
610 if locale and not isinstance(locale, _builtin_str):
611 # convert to string
612 locale = normalize(_build_localename(locale))
613 return _setlocale(category, locale)
614
615 def resetlocale(category=LC_ALL):
616
617 """ Sets the locale for category to the default setting.
618
619 The default setting is determined by calling
620 getdefaultlocale(). category defaults to LC_ALL.
621
622 """
623 import warnings
624 warnings.warn(
625 'Use locale.setlocale(locale.LC_ALL, "") instead',
626 DeprecationWarning, stacklevel=2
627 )
628
629 with warnings.catch_warnings():
630 warnings.simplefilter('ignore', category=DeprecationWarning)
631 loc = getdefaultlocale()
632
633 _setlocale(category, _build_localename(loc))
634
635
636 try:
637 from _locale import getencoding
638 except ImportError:
639 def getencoding():
640 if hasattr(sys, 'getandroidapilevel'):
641 # On Android langinfo.h and CODESET are missing, and UTF-8 is
642 # always used in mbstowcs() and wcstombs().
643 return 'utf-8'
644 encoding = _getdefaultlocale()[1]
645 if encoding is None:
646 # LANG not set, default to UTF-8
647 encoding = 'utf-8'
648 return encoding
649
650 try:
651 CODESET
652 except NameError:
653 def getpreferredencoding(do_setlocale=True):
654 """Return the charset that the user is likely using."""
655 if sys.flags.warn_default_encoding:
656 import warnings
657 warnings.warn(
658 "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.",
659 EncodingWarning, 2)
660 if sys.flags.utf8_mode:
661 return 'utf-8'
662 return getencoding()
663 else:
664 # On Unix, if CODESET is available, use that.
665 def getpreferredencoding(do_setlocale=True):
666 """Return the charset that the user is likely using,
667 according to the system configuration."""
668
669 if sys.flags.warn_default_encoding:
670 import warnings
671 warnings.warn(
672 "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.",
673 EncodingWarning, 2)
674 if sys.flags.utf8_mode:
675 return 'utf-8'
676
677 if not do_setlocale:
678 return getencoding()
679
680 old_loc = setlocale(LC_CTYPE)
681 try:
682 try:
683 setlocale(LC_CTYPE, "")
684 except Error:
685 pass
686 return getencoding()
687 finally:
688 setlocale(LC_CTYPE, old_loc)
689
690
691 ### Database
692 #
693 # The following data was extracted from the locale.alias file which
694 # comes with X11 and then hand edited removing the explicit encoding
695 # definitions and adding some more aliases. The file is usually
696 # available as /usr/lib/X11/locale/locale.alias.
697 #
698
699 #
700 # The local_encoding_alias table maps lowercase encoding alias names
701 # to C locale encoding names (case-sensitive). Note that normalize()
702 # first looks up the encoding in the encodings.aliases dictionary and
703 # then applies this mapping to find the correct C lib name for the
704 # encoding.
705 #
706 locale_encoding_alias = {
707
708 # Mappings for non-standard encoding names used in locale names
709 '437': 'C',
710 'c': 'C',
711 'en': 'ISO8859-1',
712 'jis': 'JIS7',
713 'jis7': 'JIS7',
714 'ajec': 'eucJP',
715 'koi8c': 'KOI8-C',
716 'microsoftcp1251': 'CP1251',
717 'microsoftcp1255': 'CP1255',
718 'microsoftcp1256': 'CP1256',
719 '88591': 'ISO8859-1',
720 '88592': 'ISO8859-2',
721 '88595': 'ISO8859-5',
722 '885915': 'ISO8859-15',
723
724 # Mappings from Python codec names to C lib encoding names
725 'ascii': 'ISO8859-1',
726 'latin_1': 'ISO8859-1',
727 'iso8859_1': 'ISO8859-1',
728 'iso8859_10': 'ISO8859-10',
729 'iso8859_11': 'ISO8859-11',
730 'iso8859_13': 'ISO8859-13',
731 'iso8859_14': 'ISO8859-14',
732 'iso8859_15': 'ISO8859-15',
733 'iso8859_16': 'ISO8859-16',
734 'iso8859_2': 'ISO8859-2',
735 'iso8859_3': 'ISO8859-3',
736 'iso8859_4': 'ISO8859-4',
737 'iso8859_5': 'ISO8859-5',
738 'iso8859_6': 'ISO8859-6',
739 'iso8859_7': 'ISO8859-7',
740 'iso8859_8': 'ISO8859-8',
741 'iso8859_9': 'ISO8859-9',
742 'iso2022_jp': 'JIS7',
743 'shift_jis': 'SJIS',
744 'tactis': 'TACTIS',
745 'euc_jp': 'eucJP',
746 'euc_kr': 'eucKR',
747 'utf_8': 'UTF-8',
748 'koi8_r': 'KOI8-R',
749 'koi8_t': 'KOI8-T',
750 'koi8_u': 'KOI8-U',
751 'kz1048': 'RK1048',
752 'cp1251': 'CP1251',
753 'cp1255': 'CP1255',
754 'cp1256': 'CP1256',
755
756 # XXX This list is still incomplete. If you know more
757 # mappings, please file a bug report. Thanks.
758 }
759
760 for k, v in sorted(locale_encoding_alias.items()):
761 k = k.replace('_', '')
762 locale_encoding_alias.setdefault(k, v)
763 del k, v
764
765 #
766 # The locale_alias table maps lowercase alias names to C locale names
767 # (case-sensitive). Encodings are always separated from the locale
768 # name using a dot ('.'); they should only be given in case the
769 # language name is needed to interpret the given encoding alias
770 # correctly (CJK codes often have this need).
771 #
772 # Note that the normalize() function which uses this tables
773 # removes '_' and '-' characters from the encoding part of the
774 # locale name before doing the lookup. This saves a lot of
775 # space in the table.
776 #
777 # MAL 2004-12-10:
778 # Updated alias mapping to most recent locale.alias file
779 # from X.org distribution using makelocalealias.py.
780 #
781 # These are the differences compared to the old mapping (Python 2.4
782 # and older):
783 #
784 # updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
785 # updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
786 # updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
787 # updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
788 # updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
789 # updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
790 # updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1'
791 # updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
792 # updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
793 # updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
794 # updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
795 # updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
796 # updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
797 # updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP'
798 # updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13'
799 # updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13'
800 # updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
801 # updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
802 # updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11'
803 # updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312'
804 # updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5'
805 # updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5'
806 #
807 # MAL 2008-05-30:
808 # Updated alias mapping to most recent locale.alias file
809 # from X.org distribution using makelocalealias.py.
810 #
811 # These are the differences compared to the old mapping (Python 2.5
812 # and older):
813 #
814 # updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2'
815 # updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
816 # updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
817 # updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2'
818 # updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
819 # updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
820 # updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
821 # updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
822 # updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
823 # updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
824 # updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2'
825 # updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
826 # updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
827 # updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
828 # updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
829 # updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
830 # updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
831 # updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8'
832 # updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
833 #
834 # AP 2010-04-12:
835 # Updated alias mapping to most recent locale.alias file
836 # from X.org distribution using makelocalealias.py.
837 #
838 # These are the differences compared to the old mapping (Python 2.6.5
839 # and older):
840 #
841 # updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
842 # updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
843 # updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
844 # updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
845 # updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
846 # updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
847 # updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
848 # updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
849 # updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin'
850 # updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
851 # updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin'
852 # updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8'
853 # updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
854 #
855 # SS 2013-12-20:
856 # Updated alias mapping to most recent locale.alias file
857 # from X.org distribution using makelocalealias.py.
858 #
859 # These are the differences compared to the old mapping (Python 3.3.3
860 # and older):
861 #
862 # updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
863 # updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
864 # updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
865 # updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
866 # updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
867 # updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
868 # updated 'sd' -> 'sd_IN@devanagari.UTF-8' to 'sd_IN.UTF-8'
869 # updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
870 # updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8'
871 # updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
872 # updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
873 #
874 # SS 2014-10-01:
875 # Updated alias mapping with glibc 2.19 supported locales.
876 #
877 # SS 2018-05-05:
878 # Updated alias mapping with glibc 2.27 supported locales.
879 #
880 # These are the differences compared to the old mapping (Python 3.6.5
881 # and older):
882 #
883 # updated 'ca_es@valencia' -> 'ca_ES.ISO8859-15@valencia' to 'ca_ES.UTF-8@valencia'
884 # updated 'kk_kz' -> 'kk_KZ.RK1048' to 'kk_KZ.ptcp154'
885 # updated 'russian' -> 'ru_RU.ISO8859-5' to 'ru_RU.KOI8-R'
886
887 locale_alias = {
888 'a3': 'az_AZ.KOI8-C',
889 'a3_az': 'az_AZ.KOI8-C',
890 'a3_az.koic': 'az_AZ.KOI8-C',
891 'aa_dj': 'aa_DJ.ISO8859-1',
892 'aa_er': 'aa_ER.UTF-8',
893 'aa_et': 'aa_ET.UTF-8',
894 'af': 'af_ZA.ISO8859-1',
895 'af_za': 'af_ZA.ISO8859-1',
896 'agr_pe': 'agr_PE.UTF-8',
897 'ak_gh': 'ak_GH.UTF-8',
898 'am': 'am_ET.UTF-8',
899 'am_et': 'am_ET.UTF-8',
900 'american': 'en_US.ISO8859-1',
901 'an_es': 'an_ES.ISO8859-15',
902 'anp_in': 'anp_IN.UTF-8',
903 'ar': 'ar_AA.ISO8859-6',
904 'ar_aa': 'ar_AA.ISO8859-6',
905 'ar_ae': 'ar_AE.ISO8859-6',
906 'ar_bh': 'ar_BH.ISO8859-6',
907 'ar_dz': 'ar_DZ.ISO8859-6',
908 'ar_eg': 'ar_EG.ISO8859-6',
909 'ar_in': 'ar_IN.UTF-8',
910 'ar_iq': 'ar_IQ.ISO8859-6',
911 'ar_jo': 'ar_JO.ISO8859-6',
912 'ar_kw': 'ar_KW.ISO8859-6',
913 'ar_lb': 'ar_LB.ISO8859-6',
914 'ar_ly': 'ar_LY.ISO8859-6',
915 'ar_ma': 'ar_MA.ISO8859-6',
916 'ar_om': 'ar_OM.ISO8859-6',
917 'ar_qa': 'ar_QA.ISO8859-6',
918 'ar_sa': 'ar_SA.ISO8859-6',
919 'ar_sd': 'ar_SD.ISO8859-6',
920 'ar_ss': 'ar_SS.UTF-8',
921 'ar_sy': 'ar_SY.ISO8859-6',
922 'ar_tn': 'ar_TN.ISO8859-6',
923 'ar_ye': 'ar_YE.ISO8859-6',
924 'arabic': 'ar_AA.ISO8859-6',
925 'as': 'as_IN.UTF-8',
926 'as_in': 'as_IN.UTF-8',
927 'ast_es': 'ast_ES.ISO8859-15',
928 'ayc_pe': 'ayc_PE.UTF-8',
929 'az': 'az_AZ.ISO8859-9E',
930 'az_az': 'az_AZ.ISO8859-9E',
931 'az_az.iso88599e': 'az_AZ.ISO8859-9E',
932 'az_ir': 'az_IR.UTF-8',
933 'be': 'be_BY.CP1251',
934 'be@latin': 'be_BY.UTF-8@latin',
935 'be_bg.utf8': 'bg_BG.UTF-8',
936 'be_by': 'be_BY.CP1251',
937 'be_by@latin': 'be_BY.UTF-8@latin',
938 'bem_zm': 'bem_ZM.UTF-8',
939 'ber_dz': 'ber_DZ.UTF-8',
940 'ber_ma': 'ber_MA.UTF-8',
941 'bg': 'bg_BG.CP1251',
942 'bg_bg': 'bg_BG.CP1251',
943 'bhb_in.utf8': 'bhb_IN.UTF-8',
944 'bho_in': 'bho_IN.UTF-8',
945 'bho_np': 'bho_NP.UTF-8',
946 'bi_vu': 'bi_VU.UTF-8',
947 'bn_bd': 'bn_BD.UTF-8',
948 'bn_in': 'bn_IN.UTF-8',
949 'bo_cn': 'bo_CN.UTF-8',
950 'bo_in': 'bo_IN.UTF-8',
951 'bokmal': 'nb_NO.ISO8859-1',
952 'bokm\xe5l': 'nb_NO.ISO8859-1',
953 'br': 'br_FR.ISO8859-1',
954 'br_fr': 'br_FR.ISO8859-1',
955 'brx_in': 'brx_IN.UTF-8',
956 'bs': 'bs_BA.ISO8859-2',
957 'bs_ba': 'bs_BA.ISO8859-2',
958 'bulgarian': 'bg_BG.CP1251',
959 'byn_er': 'byn_ER.UTF-8',
960 'c': 'C',
961 'c-french': 'fr_CA.ISO8859-1',
962 'c.ascii': 'C',
963 'c.en': 'C',
964 'c.iso88591': 'en_US.ISO8859-1',
965 'c.utf8': 'C.UTF-8',
966 'c_c': 'C',
967 'c_c.c': 'C',
968 'ca': 'ca_ES.ISO8859-1',
969 'ca_ad': 'ca_AD.ISO8859-1',
970 'ca_es': 'ca_ES.ISO8859-1',
971 'ca_es@valencia': 'ca_ES.UTF-8@valencia',
972 'ca_fr': 'ca_FR.ISO8859-1',
973 'ca_it': 'ca_IT.ISO8859-1',
974 'catalan': 'ca_ES.ISO8859-1',
975 'ce_ru': 'ce_RU.UTF-8',
976 'cextend': 'en_US.ISO8859-1',
977 'chinese-s': 'zh_CN.eucCN',
978 'chinese-t': 'zh_TW.eucTW',
979 'chr_us': 'chr_US.UTF-8',
980 'ckb_iq': 'ckb_IQ.UTF-8',
981 'cmn_tw': 'cmn_TW.UTF-8',
982 'crh_ua': 'crh_UA.UTF-8',
983 'croatian': 'hr_HR.ISO8859-2',
984 'cs': 'cs_CZ.ISO8859-2',
985 'cs_cs': 'cs_CZ.ISO8859-2',
986 'cs_cz': 'cs_CZ.ISO8859-2',
987 'csb_pl': 'csb_PL.UTF-8',
988 'cv_ru': 'cv_RU.UTF-8',
989 'cy': 'cy_GB.ISO8859-1',
990 'cy_gb': 'cy_GB.ISO8859-1',
991 'cz': 'cs_CZ.ISO8859-2',
992 'cz_cz': 'cs_CZ.ISO8859-2',
993 'czech': 'cs_CZ.ISO8859-2',
994 'da': 'da_DK.ISO8859-1',
995 'da_dk': 'da_DK.ISO8859-1',
996 'danish': 'da_DK.ISO8859-1',
997 'dansk': 'da_DK.ISO8859-1',
998 'de': 'de_DE.ISO8859-1',
999 'de_at': 'de_AT.ISO8859-1',
1000 'de_be': 'de_BE.ISO8859-1',
1001 'de_ch': 'de_CH.ISO8859-1',
1002 'de_de': 'de_DE.ISO8859-1',
1003 'de_it': 'de_IT.ISO8859-1',
1004 'de_li.utf8': 'de_LI.UTF-8',
1005 'de_lu': 'de_LU.ISO8859-1',
1006 'deutsch': 'de_DE.ISO8859-1',
1007 'doi_in': 'doi_IN.UTF-8',
1008 'dutch': 'nl_NL.ISO8859-1',
1009 'dutch.iso88591': 'nl_BE.ISO8859-1',
1010 'dv_mv': 'dv_MV.UTF-8',
1011 'dz_bt': 'dz_BT.UTF-8',
1012 'ee': 'ee_EE.ISO8859-4',
1013 'ee_ee': 'ee_EE.ISO8859-4',
1014 'eesti': 'et_EE.ISO8859-1',
1015 'el': 'el_GR.ISO8859-7',
1016 'el_cy': 'el_CY.ISO8859-7',
1017 'el_gr': 'el_GR.ISO8859-7',
1018 'el_gr@euro': 'el_GR.ISO8859-15',
1019 'en': 'en_US.ISO8859-1',
1020 'en_ag': 'en_AG.UTF-8',
1021 'en_au': 'en_AU.ISO8859-1',
1022 'en_be': 'en_BE.ISO8859-1',
1023 'en_bw': 'en_BW.ISO8859-1',
1024 'en_ca': 'en_CA.ISO8859-1',
1025 'en_dk': 'en_DK.ISO8859-1',
1026 'en_dl.utf8': 'en_DL.UTF-8',
1027 'en_gb': 'en_GB.ISO8859-1',
1028 'en_hk': 'en_HK.ISO8859-1',
1029 'en_ie': 'en_IE.ISO8859-1',
1030 'en_il': 'en_IL.UTF-8',
1031 'en_in': 'en_IN.ISO8859-1',
1032 'en_ng': 'en_NG.UTF-8',
1033 'en_nz': 'en_NZ.ISO8859-1',
1034 'en_ph': 'en_PH.ISO8859-1',
1035 'en_sc.utf8': 'en_SC.UTF-8',
1036 'en_sg': 'en_SG.ISO8859-1',
1037 'en_uk': 'en_GB.ISO8859-1',
1038 'en_us': 'en_US.ISO8859-1',
1039 'en_us@euro@euro': 'en_US.ISO8859-15',
1040 'en_za': 'en_ZA.ISO8859-1',
1041 'en_zm': 'en_ZM.UTF-8',
1042 'en_zw': 'en_ZW.ISO8859-1',
1043 'en_zw.utf8': 'en_ZS.UTF-8',
1044 'eng_gb': 'en_GB.ISO8859-1',
1045 'english': 'en_EN.ISO8859-1',
1046 'english.iso88591': 'en_US.ISO8859-1',
1047 'english_uk': 'en_GB.ISO8859-1',
1048 'english_united-states': 'en_US.ISO8859-1',
1049 'english_united-states.437': 'C',
1050 'english_us': 'en_US.ISO8859-1',
1051 'eo': 'eo_XX.ISO8859-3',
1052 'eo.utf8': 'eo.UTF-8',
1053 'eo_eo': 'eo_EO.ISO8859-3',
1054 'eo_us.utf8': 'eo_US.UTF-8',
1055 'eo_xx': 'eo_XX.ISO8859-3',
1056 'es': 'es_ES.ISO8859-1',
1057 'es_ar': 'es_AR.ISO8859-1',
1058 'es_bo': 'es_BO.ISO8859-1',
1059 'es_cl': 'es_CL.ISO8859-1',
1060 'es_co': 'es_CO.ISO8859-1',
1061 'es_cr': 'es_CR.ISO8859-1',
1062 'es_cu': 'es_CU.UTF-8',
1063 'es_do': 'es_DO.ISO8859-1',
1064 'es_ec': 'es_EC.ISO8859-1',
1065 'es_es': 'es_ES.ISO8859-1',
1066 'es_gt': 'es_GT.ISO8859-1',
1067 'es_hn': 'es_HN.ISO8859-1',
1068 'es_mx': 'es_MX.ISO8859-1',
1069 'es_ni': 'es_NI.ISO8859-1',
1070 'es_pa': 'es_PA.ISO8859-1',
1071 'es_pe': 'es_PE.ISO8859-1',
1072 'es_pr': 'es_PR.ISO8859-1',
1073 'es_py': 'es_PY.ISO8859-1',
1074 'es_sv': 'es_SV.ISO8859-1',
1075 'es_us': 'es_US.ISO8859-1',
1076 'es_uy': 'es_UY.ISO8859-1',
1077 'es_ve': 'es_VE.ISO8859-1',
1078 'estonian': 'et_EE.ISO8859-1',
1079 'et': 'et_EE.ISO8859-15',
1080 'et_ee': 'et_EE.ISO8859-15',
1081 'eu': 'eu_ES.ISO8859-1',
1082 'eu_es': 'eu_ES.ISO8859-1',
1083 'eu_fr': 'eu_FR.ISO8859-1',
1084 'fa': 'fa_IR.UTF-8',
1085 'fa_ir': 'fa_IR.UTF-8',
1086 'fa_ir.isiri3342': 'fa_IR.ISIRI-3342',
1087 'ff_sn': 'ff_SN.UTF-8',
1088 'fi': 'fi_FI.ISO8859-15',
1089 'fi_fi': 'fi_FI.ISO8859-15',
1090 'fil_ph': 'fil_PH.UTF-8',
1091 'finnish': 'fi_FI.ISO8859-1',
1092 'fo': 'fo_FO.ISO8859-1',
1093 'fo_fo': 'fo_FO.ISO8859-1',
1094 'fr': 'fr_FR.ISO8859-1',
1095 'fr_be': 'fr_BE.ISO8859-1',
1096 'fr_ca': 'fr_CA.ISO8859-1',
1097 'fr_ch': 'fr_CH.ISO8859-1',
1098 'fr_fr': 'fr_FR.ISO8859-1',
1099 'fr_lu': 'fr_LU.ISO8859-1',
1100 'fran\xe7ais': 'fr_FR.ISO8859-1',
1101 'fre_fr': 'fr_FR.ISO8859-1',
1102 'french': 'fr_FR.ISO8859-1',
1103 'french.iso88591': 'fr_CH.ISO8859-1',
1104 'french_france': 'fr_FR.ISO8859-1',
1105 'fur_it': 'fur_IT.UTF-8',
1106 'fy_de': 'fy_DE.UTF-8',
1107 'fy_nl': 'fy_NL.UTF-8',
1108 'ga': 'ga_IE.ISO8859-1',
1109 'ga_ie': 'ga_IE.ISO8859-1',
1110 'galego': 'gl_ES.ISO8859-1',
1111 'galician': 'gl_ES.ISO8859-1',
1112 'gd': 'gd_GB.ISO8859-1',
1113 'gd_gb': 'gd_GB.ISO8859-1',
1114 'ger_de': 'de_DE.ISO8859-1',
1115 'german': 'de_DE.ISO8859-1',
1116 'german.iso88591': 'de_CH.ISO8859-1',
1117 'german_germany': 'de_DE.ISO8859-1',
1118 'gez_er': 'gez_ER.UTF-8',
1119 'gez_et': 'gez_ET.UTF-8',
1120 'gl': 'gl_ES.ISO8859-1',
1121 'gl_es': 'gl_ES.ISO8859-1',
1122 'greek': 'el_GR.ISO8859-7',
1123 'gu_in': 'gu_IN.UTF-8',
1124 'gv': 'gv_GB.ISO8859-1',
1125 'gv_gb': 'gv_GB.ISO8859-1',
1126 'ha_ng': 'ha_NG.UTF-8',
1127 'hak_tw': 'hak_TW.UTF-8',
1128 'he': 'he_IL.ISO8859-8',
1129 'he_il': 'he_IL.ISO8859-8',
1130 'hebrew': 'he_IL.ISO8859-8',
1131 'hi': 'hi_IN.ISCII-DEV',
1132 'hi_in': 'hi_IN.ISCII-DEV',
1133 'hi_in.isciidev': 'hi_IN.ISCII-DEV',
1134 'hif_fj': 'hif_FJ.UTF-8',
1135 'hne': 'hne_IN.UTF-8',
1136 'hne_in': 'hne_IN.UTF-8',
1137 'hr': 'hr_HR.ISO8859-2',
1138 'hr_hr': 'hr_HR.ISO8859-2',
1139 'hrvatski': 'hr_HR.ISO8859-2',
1140 'hsb_de': 'hsb_DE.ISO8859-2',
1141 'ht_ht': 'ht_HT.UTF-8',
1142 'hu': 'hu_HU.ISO8859-2',
1143 'hu_hu': 'hu_HU.ISO8859-2',
1144 'hungarian': 'hu_HU.ISO8859-2',
1145 'hy_am': 'hy_AM.UTF-8',
1146 'hy_am.armscii8': 'hy_AM.ARMSCII_8',
1147 'ia': 'ia.UTF-8',
1148 'ia_fr': 'ia_FR.UTF-8',
1149 'icelandic': 'is_IS.ISO8859-1',
1150 'id': 'id_ID.ISO8859-1',
1151 'id_id': 'id_ID.ISO8859-1',
1152 'ig_ng': 'ig_NG.UTF-8',
1153 'ik_ca': 'ik_CA.UTF-8',
1154 'in': 'id_ID.ISO8859-1',
1155 'in_id': 'id_ID.ISO8859-1',
1156 'is': 'is_IS.ISO8859-1',
1157 'is_is': 'is_IS.ISO8859-1',
1158 'iso-8859-1': 'en_US.ISO8859-1',
1159 'iso-8859-15': 'en_US.ISO8859-15',
1160 'iso8859-1': 'en_US.ISO8859-1',
1161 'iso8859-15': 'en_US.ISO8859-15',
1162 'iso_8859_1': 'en_US.ISO8859-1',
1163 'iso_8859_15': 'en_US.ISO8859-15',
1164 'it': 'it_IT.ISO8859-1',
1165 'it_ch': 'it_CH.ISO8859-1',
1166 'it_it': 'it_IT.ISO8859-1',
1167 'italian': 'it_IT.ISO8859-1',
1168 'iu': 'iu_CA.NUNACOM-8',
1169 'iu_ca': 'iu_CA.NUNACOM-8',
1170 'iu_ca.nunacom8': 'iu_CA.NUNACOM-8',
1171 'iw': 'he_IL.ISO8859-8',
1172 'iw_il': 'he_IL.ISO8859-8',
1173 'iw_il.utf8': 'iw_IL.UTF-8',
1174 'ja': 'ja_JP.eucJP',
1175 'ja_jp': 'ja_JP.eucJP',
1176 'ja_jp.euc': 'ja_JP.eucJP',
1177 'ja_jp.mscode': 'ja_JP.SJIS',
1178 'ja_jp.pck': 'ja_JP.SJIS',
1179 'japan': 'ja_JP.eucJP',
1180 'japanese': 'ja_JP.eucJP',
1181 'japanese-euc': 'ja_JP.eucJP',
1182 'japanese.euc': 'ja_JP.eucJP',
1183 'jp_jp': 'ja_JP.eucJP',
1184 'ka': 'ka_GE.GEORGIAN-ACADEMY',
1185 'ka_ge': 'ka_GE.GEORGIAN-ACADEMY',
1186 'ka_ge.georgianacademy': 'ka_GE.GEORGIAN-ACADEMY',
1187 'ka_ge.georgianps': 'ka_GE.GEORGIAN-PS',
1188 'ka_ge.georgianrs': 'ka_GE.GEORGIAN-ACADEMY',
1189 'kab_dz': 'kab_DZ.UTF-8',
1190 'kk_kz': 'kk_KZ.ptcp154',
1191 'kl': 'kl_GL.ISO8859-1',
1192 'kl_gl': 'kl_GL.ISO8859-1',
1193 'km_kh': 'km_KH.UTF-8',
1194 'kn': 'kn_IN.UTF-8',
1195 'kn_in': 'kn_IN.UTF-8',
1196 'ko': 'ko_KR.eucKR',
1197 'ko_kr': 'ko_KR.eucKR',
1198 'ko_kr.euc': 'ko_KR.eucKR',
1199 'kok_in': 'kok_IN.UTF-8',
1200 'korean': 'ko_KR.eucKR',
1201 'korean.euc': 'ko_KR.eucKR',
1202 'ks': 'ks_IN.UTF-8',
1203 'ks_in': 'ks_IN.UTF-8',
1204 'ks_in@devanagari.utf8': 'ks_IN.UTF-8@devanagari',
1205 'ku_tr': 'ku_TR.ISO8859-9',
1206 'kw': 'kw_GB.ISO8859-1',
1207 'kw_gb': 'kw_GB.ISO8859-1',
1208 'ky': 'ky_KG.UTF-8',
1209 'ky_kg': 'ky_KG.UTF-8',
1210 'lb_lu': 'lb_LU.UTF-8',
1211 'lg_ug': 'lg_UG.ISO8859-10',
1212 'li_be': 'li_BE.UTF-8',
1213 'li_nl': 'li_NL.UTF-8',
1214 'lij_it': 'lij_IT.UTF-8',
1215 'lithuanian': 'lt_LT.ISO8859-13',
1216 'ln_cd': 'ln_CD.UTF-8',
1217 'lo': 'lo_LA.MULELAO-1',
1218 'lo_la': 'lo_LA.MULELAO-1',
1219 'lo_la.cp1133': 'lo_LA.IBM-CP1133',
1220 'lo_la.ibmcp1133': 'lo_LA.IBM-CP1133',
1221 'lo_la.mulelao1': 'lo_LA.MULELAO-1',
1222 'lt': 'lt_LT.ISO8859-13',
1223 'lt_lt': 'lt_LT.ISO8859-13',
1224 'lv': 'lv_LV.ISO8859-13',
1225 'lv_lv': 'lv_LV.ISO8859-13',
1226 'lzh_tw': 'lzh_TW.UTF-8',
1227 'mag_in': 'mag_IN.UTF-8',
1228 'mai': 'mai_IN.UTF-8',
1229 'mai_in': 'mai_IN.UTF-8',
1230 'mai_np': 'mai_NP.UTF-8',
1231 'mfe_mu': 'mfe_MU.UTF-8',
1232 'mg_mg': 'mg_MG.ISO8859-15',
1233 'mhr_ru': 'mhr_RU.UTF-8',
1234 'mi': 'mi_NZ.ISO8859-1',
1235 'mi_nz': 'mi_NZ.ISO8859-1',
1236 'miq_ni': 'miq_NI.UTF-8',
1237 'mjw_in': 'mjw_IN.UTF-8',
1238 'mk': 'mk_MK.ISO8859-5',
1239 'mk_mk': 'mk_MK.ISO8859-5',
1240 'ml': 'ml_IN.UTF-8',
1241 'ml_in': 'ml_IN.UTF-8',
1242 'mn_mn': 'mn_MN.UTF-8',
1243 'mni_in': 'mni_IN.UTF-8',
1244 'mr': 'mr_IN.UTF-8',
1245 'mr_in': 'mr_IN.UTF-8',
1246 'ms': 'ms_MY.ISO8859-1',
1247 'ms_my': 'ms_MY.ISO8859-1',
1248 'mt': 'mt_MT.ISO8859-3',
1249 'mt_mt': 'mt_MT.ISO8859-3',
1250 'my_mm': 'my_MM.UTF-8',
1251 'nan_tw': 'nan_TW.UTF-8',
1252 'nb': 'nb_NO.ISO8859-1',
1253 'nb_no': 'nb_NO.ISO8859-1',
1254 'nds_de': 'nds_DE.UTF-8',
1255 'nds_nl': 'nds_NL.UTF-8',
1256 'ne_np': 'ne_NP.UTF-8',
1257 'nhn_mx': 'nhn_MX.UTF-8',
1258 'niu_nu': 'niu_NU.UTF-8',
1259 'niu_nz': 'niu_NZ.UTF-8',
1260 'nl': 'nl_NL.ISO8859-1',
1261 'nl_aw': 'nl_AW.UTF-8',
1262 'nl_be': 'nl_BE.ISO8859-1',
1263 'nl_nl': 'nl_NL.ISO8859-1',
1264 'nn': 'nn_NO.ISO8859-1',
1265 'nn_no': 'nn_NO.ISO8859-1',
1266 'no': 'no_NO.ISO8859-1',
1267 'no@nynorsk': 'ny_NO.ISO8859-1',
1268 'no_no': 'no_NO.ISO8859-1',
1269 'no_no.iso88591@bokmal': 'no_NO.ISO8859-1',
1270 'no_no.iso88591@nynorsk': 'no_NO.ISO8859-1',
1271 'norwegian': 'no_NO.ISO8859-1',
1272 'nr': 'nr_ZA.ISO8859-1',
1273 'nr_za': 'nr_ZA.ISO8859-1',
1274 'nso': 'nso_ZA.ISO8859-15',
1275 'nso_za': 'nso_ZA.ISO8859-15',
1276 'ny': 'ny_NO.ISO8859-1',
1277 'ny_no': 'ny_NO.ISO8859-1',
1278 'nynorsk': 'nn_NO.ISO8859-1',
1279 'oc': 'oc_FR.ISO8859-1',
1280 'oc_fr': 'oc_FR.ISO8859-1',
1281 'om_et': 'om_ET.UTF-8',
1282 'om_ke': 'om_KE.ISO8859-1',
1283 'or': 'or_IN.UTF-8',
1284 'or_in': 'or_IN.UTF-8',
1285 'os_ru': 'os_RU.UTF-8',
1286 'pa': 'pa_IN.UTF-8',
1287 'pa_in': 'pa_IN.UTF-8',
1288 'pa_pk': 'pa_PK.UTF-8',
1289 'pap_an': 'pap_AN.UTF-8',
1290 'pap_aw': 'pap_AW.UTF-8',
1291 'pap_cw': 'pap_CW.UTF-8',
1292 'pd': 'pd_US.ISO8859-1',
1293 'pd_de': 'pd_DE.ISO8859-1',
1294 'pd_us': 'pd_US.ISO8859-1',
1295 'ph': 'ph_PH.ISO8859-1',
1296 'ph_ph': 'ph_PH.ISO8859-1',
1297 'pl': 'pl_PL.ISO8859-2',
1298 'pl_pl': 'pl_PL.ISO8859-2',
1299 'polish': 'pl_PL.ISO8859-2',
1300 'portuguese': 'pt_PT.ISO8859-1',
1301 'portuguese_brazil': 'pt_BR.ISO8859-1',
1302 'posix': 'C',
1303 'posix-utf2': 'C',
1304 'pp': 'pp_AN.ISO8859-1',
1305 'pp_an': 'pp_AN.ISO8859-1',
1306 'ps_af': 'ps_AF.UTF-8',
1307 'pt': 'pt_PT.ISO8859-1',
1308 'pt_br': 'pt_BR.ISO8859-1',
1309 'pt_pt': 'pt_PT.ISO8859-1',
1310 'quz_pe': 'quz_PE.UTF-8',
1311 'raj_in': 'raj_IN.UTF-8',
1312 'ro': 'ro_RO.ISO8859-2',
1313 'ro_ro': 'ro_RO.ISO8859-2',
1314 'romanian': 'ro_RO.ISO8859-2',
1315 'ru': 'ru_RU.UTF-8',
1316 'ru_ru': 'ru_RU.UTF-8',
1317 'ru_ua': 'ru_UA.KOI8-U',
1318 'rumanian': 'ro_RO.ISO8859-2',
1319 'russian': 'ru_RU.KOI8-R',
1320 'rw': 'rw_RW.ISO8859-1',
1321 'rw_rw': 'rw_RW.ISO8859-1',
1322 'sa_in': 'sa_IN.UTF-8',
1323 'sat_in': 'sat_IN.UTF-8',
1324 'sc_it': 'sc_IT.UTF-8',
1325 'sd': 'sd_IN.UTF-8',
1326 'sd_in': 'sd_IN.UTF-8',
1327 'sd_in@devanagari.utf8': 'sd_IN.UTF-8@devanagari',
1328 'sd_pk': 'sd_PK.UTF-8',
1329 'se_no': 'se_NO.UTF-8',
1330 'serbocroatian': 'sr_RS.UTF-8@latin',
1331 'sgs_lt': 'sgs_LT.UTF-8',
1332 'sh': 'sr_RS.UTF-8@latin',
1333 'sh_ba.iso88592@bosnia': 'sr_CS.ISO8859-2',
1334 'sh_hr': 'sh_HR.ISO8859-2',
1335 'sh_hr.iso88592': 'hr_HR.ISO8859-2',
1336 'sh_sp': 'sr_CS.ISO8859-2',
1337 'sh_yu': 'sr_RS.UTF-8@latin',
1338 'shn_mm': 'shn_MM.UTF-8',
1339 'shs_ca': 'shs_CA.UTF-8',
1340 'si': 'si_LK.UTF-8',
1341 'si_lk': 'si_LK.UTF-8',
1342 'sid_et': 'sid_ET.UTF-8',
1343 'sinhala': 'si_LK.UTF-8',
1344 'sk': 'sk_SK.ISO8859-2',
1345 'sk_sk': 'sk_SK.ISO8859-2',
1346 'sl': 'sl_SI.ISO8859-2',
1347 'sl_cs': 'sl_CS.ISO8859-2',
1348 'sl_si': 'sl_SI.ISO8859-2',
1349 'slovak': 'sk_SK.ISO8859-2',
1350 'slovene': 'sl_SI.ISO8859-2',
1351 'slovenian': 'sl_SI.ISO8859-2',
1352 'sm_ws': 'sm_WS.UTF-8',
1353 'so_dj': 'so_DJ.ISO8859-1',
1354 'so_et': 'so_ET.UTF-8',
1355 'so_ke': 'so_KE.ISO8859-1',
1356 'so_so': 'so_SO.ISO8859-1',
1357 'sp': 'sr_CS.ISO8859-5',
1358 'sp_yu': 'sr_CS.ISO8859-5',
1359 'spanish': 'es_ES.ISO8859-1',
1360 'spanish_spain': 'es_ES.ISO8859-1',
1361 'sq': 'sq_AL.ISO8859-2',
1362 'sq_al': 'sq_AL.ISO8859-2',
1363 'sq_mk': 'sq_MK.UTF-8',
1364 'sr': 'sr_RS.UTF-8',
1365 'sr@cyrillic': 'sr_RS.UTF-8',
1366 'sr@latn': 'sr_CS.UTF-8@latin',
1367 'sr_cs': 'sr_CS.UTF-8',
1368 'sr_cs.iso88592@latn': 'sr_CS.ISO8859-2',
1369 'sr_cs@latn': 'sr_CS.UTF-8@latin',
1370 'sr_me': 'sr_ME.UTF-8',
1371 'sr_rs': 'sr_RS.UTF-8',
1372 'sr_rs@latn': 'sr_RS.UTF-8@latin',
1373 'sr_sp': 'sr_CS.ISO8859-2',
1374 'sr_yu': 'sr_RS.UTF-8@latin',
1375 'sr_yu.cp1251@cyrillic': 'sr_CS.CP1251',
1376 'sr_yu.iso88592': 'sr_CS.ISO8859-2',
1377 'sr_yu.iso88595': 'sr_CS.ISO8859-5',
1378 'sr_yu.iso88595@cyrillic': 'sr_CS.ISO8859-5',
1379 'sr_yu.microsoftcp1251@cyrillic': 'sr_CS.CP1251',
1380 'sr_yu.utf8': 'sr_RS.UTF-8',
1381 'sr_yu.utf8@cyrillic': 'sr_RS.UTF-8',
1382 'sr_yu@cyrillic': 'sr_RS.UTF-8',
1383 'ss': 'ss_ZA.ISO8859-1',
1384 'ss_za': 'ss_ZA.ISO8859-1',
1385 'st': 'st_ZA.ISO8859-1',
1386 'st_za': 'st_ZA.ISO8859-1',
1387 'sv': 'sv_SE.ISO8859-1',
1388 'sv_fi': 'sv_FI.ISO8859-1',
1389 'sv_se': 'sv_SE.ISO8859-1',
1390 'sw_ke': 'sw_KE.UTF-8',
1391 'sw_tz': 'sw_TZ.UTF-8',
1392 'swedish': 'sv_SE.ISO8859-1',
1393 'szl_pl': 'szl_PL.UTF-8',
1394 'ta': 'ta_IN.TSCII-0',
1395 'ta_in': 'ta_IN.TSCII-0',
1396 'ta_in.tscii': 'ta_IN.TSCII-0',
1397 'ta_in.tscii0': 'ta_IN.TSCII-0',
1398 'ta_lk': 'ta_LK.UTF-8',
1399 'tcy_in.utf8': 'tcy_IN.UTF-8',
1400 'te': 'te_IN.UTF-8',
1401 'te_in': 'te_IN.UTF-8',
1402 'tg': 'tg_TJ.KOI8-C',
1403 'tg_tj': 'tg_TJ.KOI8-C',
1404 'th': 'th_TH.ISO8859-11',
1405 'th_th': 'th_TH.ISO8859-11',
1406 'th_th.tactis': 'th_TH.TIS620',
1407 'th_th.tis620': 'th_TH.TIS620',
1408 'thai': 'th_TH.ISO8859-11',
1409 'the_np': 'the_NP.UTF-8',
1410 'ti_er': 'ti_ER.UTF-8',
1411 'ti_et': 'ti_ET.UTF-8',
1412 'tig_er': 'tig_ER.UTF-8',
1413 'tk_tm': 'tk_TM.UTF-8',
1414 'tl': 'tl_PH.ISO8859-1',
1415 'tl_ph': 'tl_PH.ISO8859-1',
1416 'tn': 'tn_ZA.ISO8859-15',
1417 'tn_za': 'tn_ZA.ISO8859-15',
1418 'to_to': 'to_TO.UTF-8',
1419 'tpi_pg': 'tpi_PG.UTF-8',
1420 'tr': 'tr_TR.ISO8859-9',
1421 'tr_cy': 'tr_CY.ISO8859-9',
1422 'tr_tr': 'tr_TR.ISO8859-9',
1423 'ts': 'ts_ZA.ISO8859-1',
1424 'ts_za': 'ts_ZA.ISO8859-1',
1425 'tt': 'tt_RU.TATAR-CYR',
1426 'tt_ru': 'tt_RU.TATAR-CYR',
1427 'tt_ru.tatarcyr': 'tt_RU.TATAR-CYR',
1428 'tt_ru@iqtelif': 'tt_RU.UTF-8@iqtelif',
1429 'turkish': 'tr_TR.ISO8859-9',
1430 'ug_cn': 'ug_CN.UTF-8',
1431 'uk': 'uk_UA.KOI8-U',
1432 'uk_ua': 'uk_UA.KOI8-U',
1433 'univ': 'en_US.utf',
1434 'universal': 'en_US.utf',
1435 'universal.utf8@ucs4': 'en_US.UTF-8',
1436 'unm_us': 'unm_US.UTF-8',
1437 'ur': 'ur_PK.CP1256',
1438 'ur_in': 'ur_IN.UTF-8',
1439 'ur_pk': 'ur_PK.CP1256',
1440 'uz': 'uz_UZ.UTF-8',
1441 'uz_uz': 'uz_UZ.UTF-8',
1442 'uz_uz@cyrillic': 'uz_UZ.UTF-8',
1443 've': 've_ZA.UTF-8',
1444 've_za': 've_ZA.UTF-8',
1445 'vi': 'vi_VN.TCVN',
1446 'vi_vn': 'vi_VN.TCVN',
1447 'vi_vn.tcvn': 'vi_VN.TCVN',
1448 'vi_vn.tcvn5712': 'vi_VN.TCVN',
1449 'vi_vn.viscii': 'vi_VN.VISCII',
1450 'vi_vn.viscii111': 'vi_VN.VISCII',
1451 'wa': 'wa_BE.ISO8859-1',
1452 'wa_be': 'wa_BE.ISO8859-1',
1453 'wae_ch': 'wae_CH.UTF-8',
1454 'wal_et': 'wal_ET.UTF-8',
1455 'wo_sn': 'wo_SN.UTF-8',
1456 'xh': 'xh_ZA.ISO8859-1',
1457 'xh_za': 'xh_ZA.ISO8859-1',
1458 'yi': 'yi_US.CP1255',
1459 'yi_us': 'yi_US.CP1255',
1460 'yo_ng': 'yo_NG.UTF-8',
1461 'yue_hk': 'yue_HK.UTF-8',
1462 'yuw_pg': 'yuw_PG.UTF-8',
1463 'zh': 'zh_CN.eucCN',
1464 'zh_cn': 'zh_CN.gb2312',
1465 'zh_cn.big5': 'zh_TW.big5',
1466 'zh_cn.euc': 'zh_CN.eucCN',
1467 'zh_hk': 'zh_HK.big5hkscs',
1468 'zh_hk.big5hk': 'zh_HK.big5hkscs',
1469 'zh_sg': 'zh_SG.GB2312',
1470 'zh_sg.gbk': 'zh_SG.GBK',
1471 'zh_tw': 'zh_TW.big5',
1472 'zh_tw.euc': 'zh_TW.eucTW',
1473 'zh_tw.euctw': 'zh_TW.eucTW',
1474 'zu': 'zu_ZA.ISO8859-1',
1475 'zu_za': 'zu_ZA.ISO8859-1',
1476 }
1477
1478 #
1479 # This maps Windows language identifiers to locale strings.
1480 #
1481 # This list has been updated from
1482 # http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp
1483 # to include every locale up to Windows Vista.
1484 #
1485 # NOTE: this mapping is incomplete. If your language is missing, please
1486 # submit a bug report to the Python bug tracker at http://bugs.python.org/
1487 # Make sure you include the missing language identifier and the suggested
1488 # locale code.
1489 #
1490
1491 windows_locale = {
1492 0x0436: "af_ZA", # Afrikaans
1493 0x041c: "sq_AL", # Albanian
1494 0x0484: "gsw_FR",# Alsatian - France
1495 0x045e: "am_ET", # Amharic - Ethiopia
1496 0x0401: "ar_SA", # Arabic - Saudi Arabia
1497 0x0801: "ar_IQ", # Arabic - Iraq
1498 0x0c01: "ar_EG", # Arabic - Egypt
1499 0x1001: "ar_LY", # Arabic - Libya
1500 0x1401: "ar_DZ", # Arabic - Algeria
1501 0x1801: "ar_MA", # Arabic - Morocco
1502 0x1c01: "ar_TN", # Arabic - Tunisia
1503 0x2001: "ar_OM", # Arabic - Oman
1504 0x2401: "ar_YE", # Arabic - Yemen
1505 0x2801: "ar_SY", # Arabic - Syria
1506 0x2c01: "ar_JO", # Arabic - Jordan
1507 0x3001: "ar_LB", # Arabic - Lebanon
1508 0x3401: "ar_KW", # Arabic - Kuwait
1509 0x3801: "ar_AE", # Arabic - United Arab Emirates
1510 0x3c01: "ar_BH", # Arabic - Bahrain
1511 0x4001: "ar_QA", # Arabic - Qatar
1512 0x042b: "hy_AM", # Armenian
1513 0x044d: "as_IN", # Assamese - India
1514 0x042c: "az_AZ", # Azeri - Latin
1515 0x082c: "az_AZ", # Azeri - Cyrillic
1516 0x046d: "ba_RU", # Bashkir
1517 0x042d: "eu_ES", # Basque - Russia
1518 0x0423: "be_BY", # Belarusian
1519 0x0445: "bn_IN", # Begali
1520 0x201a: "bs_BA", # Bosnian - Cyrillic
1521 0x141a: "bs_BA", # Bosnian - Latin
1522 0x047e: "br_FR", # Breton - France
1523 0x0402: "bg_BG", # Bulgarian
1524 # 0x0455: "my_MM", # Burmese - Not supported
1525 0x0403: "ca_ES", # Catalan
1526 0x0004: "zh_CHS",# Chinese - Simplified
1527 0x0404: "zh_TW", # Chinese - Taiwan
1528 0x0804: "zh_CN", # Chinese - PRC
1529 0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R.
1530 0x1004: "zh_SG", # Chinese - Singapore
1531 0x1404: "zh_MO", # Chinese - Macao S.A.R.
1532 0x7c04: "zh_CHT",# Chinese - Traditional
1533 0x0483: "co_FR", # Corsican - France
1534 0x041a: "hr_HR", # Croatian
1535 0x101a: "hr_BA", # Croatian - Bosnia
1536 0x0405: "cs_CZ", # Czech
1537 0x0406: "da_DK", # Danish
1538 0x048c: "gbz_AF",# Dari - Afghanistan
1539 0x0465: "div_MV",# Divehi - Maldives
1540 0x0413: "nl_NL", # Dutch - The Netherlands
1541 0x0813: "nl_BE", # Dutch - Belgium
1542 0x0409: "en_US", # English - United States
1543 0x0809: "en_GB", # English - United Kingdom
1544 0x0c09: "en_AU", # English - Australia
1545 0x1009: "en_CA", # English - Canada
1546 0x1409: "en_NZ", # English - New Zealand
1547 0x1809: "en_IE", # English - Ireland
1548 0x1c09: "en_ZA", # English - South Africa
1549 0x2009: "en_JA", # English - Jamaica
1550 0x2409: "en_CB", # English - Caribbean
1551 0x2809: "en_BZ", # English - Belize
1552 0x2c09: "en_TT", # English - Trinidad
1553 0x3009: "en_ZW", # English - Zimbabwe
1554 0x3409: "en_PH", # English - Philippines
1555 0x4009: "en_IN", # English - India
1556 0x4409: "en_MY", # English - Malaysia
1557 0x4809: "en_IN", # English - Singapore
1558 0x0425: "et_EE", # Estonian
1559 0x0438: "fo_FO", # Faroese
1560 0x0464: "fil_PH",# Filipino
1561 0x040b: "fi_FI", # Finnish
1562 0x040c: "fr_FR", # French - France
1563 0x080c: "fr_BE", # French - Belgium
1564 0x0c0c: "fr_CA", # French - Canada
1565 0x100c: "fr_CH", # French - Switzerland
1566 0x140c: "fr_LU", # French - Luxembourg
1567 0x180c: "fr_MC", # French - Monaco
1568 0x0462: "fy_NL", # Frisian - Netherlands
1569 0x0456: "gl_ES", # Galician
1570 0x0437: "ka_GE", # Georgian
1571 0x0407: "de_DE", # German - Germany
1572 0x0807: "de_CH", # German - Switzerland
1573 0x0c07: "de_AT", # German - Austria
1574 0x1007: "de_LU", # German - Luxembourg
1575 0x1407: "de_LI", # German - Liechtenstein
1576 0x0408: "el_GR", # Greek
1577 0x046f: "kl_GL", # Greenlandic - Greenland
1578 0x0447: "gu_IN", # Gujarati
1579 0x0468: "ha_NG", # Hausa - Latin
1580 0x040d: "he_IL", # Hebrew
1581 0x0439: "hi_IN", # Hindi
1582 0x040e: "hu_HU", # Hungarian
1583 0x040f: "is_IS", # Icelandic
1584 0x0421: "id_ID", # Indonesian
1585 0x045d: "iu_CA", # Inuktitut - Syllabics
1586 0x085d: "iu_CA", # Inuktitut - Latin
1587 0x083c: "ga_IE", # Irish - Ireland
1588 0x0410: "it_IT", # Italian - Italy
1589 0x0810: "it_CH", # Italian - Switzerland
1590 0x0411: "ja_JP", # Japanese
1591 0x044b: "kn_IN", # Kannada - India
1592 0x043f: "kk_KZ", # Kazakh
1593 0x0453: "kh_KH", # Khmer - Cambodia
1594 0x0486: "qut_GT",# K'iche - Guatemala
1595 0x0487: "rw_RW", # Kinyarwanda - Rwanda
1596 0x0457: "kok_IN",# Konkani
1597 0x0412: "ko_KR", # Korean
1598 0x0440: "ky_KG", # Kyrgyz
1599 0x0454: "lo_LA", # Lao - Lao PDR
1600 0x0426: "lv_LV", # Latvian
1601 0x0427: "lt_LT", # Lithuanian
1602 0x082e: "dsb_DE",# Lower Sorbian - Germany
1603 0x046e: "lb_LU", # Luxembourgish
1604 0x042f: "mk_MK", # FYROM Macedonian
1605 0x043e: "ms_MY", # Malay - Malaysia
1606 0x083e: "ms_BN", # Malay - Brunei Darussalam
1607 0x044c: "ml_IN", # Malayalam - India
1608 0x043a: "mt_MT", # Maltese
1609 0x0481: "mi_NZ", # Maori
1610 0x047a: "arn_CL",# Mapudungun
1611 0x044e: "mr_IN", # Marathi
1612 0x047c: "moh_CA",# Mohawk - Canada
1613 0x0450: "mn_MN", # Mongolian - Cyrillic
1614 0x0850: "mn_CN", # Mongolian - PRC
1615 0x0461: "ne_NP", # Nepali
1616 0x0414: "nb_NO", # Norwegian - Bokmal
1617 0x0814: "nn_NO", # Norwegian - Nynorsk
1618 0x0482: "oc_FR", # Occitan - France
1619 0x0448: "or_IN", # Oriya - India
1620 0x0463: "ps_AF", # Pashto - Afghanistan
1621 0x0429: "fa_IR", # Persian
1622 0x0415: "pl_PL", # Polish
1623 0x0416: "pt_BR", # Portuguese - Brazil
1624 0x0816: "pt_PT", # Portuguese - Portugal
1625 0x0446: "pa_IN", # Punjabi
1626 0x046b: "quz_BO",# Quechua (Bolivia)
1627 0x086b: "quz_EC",# Quechua (Ecuador)
1628 0x0c6b: "quz_PE",# Quechua (Peru)
1629 0x0418: "ro_RO", # Romanian - Romania
1630 0x0417: "rm_CH", # Romansh
1631 0x0419: "ru_RU", # Russian
1632 0x243b: "smn_FI",# Sami Finland
1633 0x103b: "smj_NO",# Sami Norway
1634 0x143b: "smj_SE",# Sami Sweden
1635 0x043b: "se_NO", # Sami Northern Norway
1636 0x083b: "se_SE", # Sami Northern Sweden
1637 0x0c3b: "se_FI", # Sami Northern Finland
1638 0x203b: "sms_FI",# Sami Skolt
1639 0x183b: "sma_NO",# Sami Southern Norway
1640 0x1c3b: "sma_SE",# Sami Southern Sweden
1641 0x044f: "sa_IN", # Sanskrit
1642 0x0c1a: "sr_SP", # Serbian - Cyrillic
1643 0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic
1644 0x081a: "sr_SP", # Serbian - Latin
1645 0x181a: "sr_BA", # Serbian - Bosnia Latin
1646 0x045b: "si_LK", # Sinhala - Sri Lanka
1647 0x046c: "ns_ZA", # Northern Sotho
1648 0x0432: "tn_ZA", # Setswana - Southern Africa
1649 0x041b: "sk_SK", # Slovak
1650 0x0424: "sl_SI", # Slovenian
1651 0x040a: "es_ES", # Spanish - Spain
1652 0x080a: "es_MX", # Spanish - Mexico
1653 0x0c0a: "es_ES", # Spanish - Spain (Modern)
1654 0x100a: "es_GT", # Spanish - Guatemala
1655 0x140a: "es_CR", # Spanish - Costa Rica
1656 0x180a: "es_PA", # Spanish - Panama
1657 0x1c0a: "es_DO", # Spanish - Dominican Republic
1658 0x200a: "es_VE", # Spanish - Venezuela
1659 0x240a: "es_CO", # Spanish - Colombia
1660 0x280a: "es_PE", # Spanish - Peru
1661 0x2c0a: "es_AR", # Spanish - Argentina
1662 0x300a: "es_EC", # Spanish - Ecuador
1663 0x340a: "es_CL", # Spanish - Chile
1664 0x380a: "es_UR", # Spanish - Uruguay
1665 0x3c0a: "es_PY", # Spanish - Paraguay
1666 0x400a: "es_BO", # Spanish - Bolivia
1667 0x440a: "es_SV", # Spanish - El Salvador
1668 0x480a: "es_HN", # Spanish - Honduras
1669 0x4c0a: "es_NI", # Spanish - Nicaragua
1670 0x500a: "es_PR", # Spanish - Puerto Rico
1671 0x540a: "es_US", # Spanish - United States
1672 # 0x0430: "", # Sutu - Not supported
1673 0x0441: "sw_KE", # Swahili
1674 0x041d: "sv_SE", # Swedish - Sweden
1675 0x081d: "sv_FI", # Swedish - Finland
1676 0x045a: "syr_SY",# Syriac
1677 0x0428: "tg_TJ", # Tajik - Cyrillic
1678 0x085f: "tmz_DZ",# Tamazight - Latin
1679 0x0449: "ta_IN", # Tamil
1680 0x0444: "tt_RU", # Tatar
1681 0x044a: "te_IN", # Telugu
1682 0x041e: "th_TH", # Thai
1683 0x0851: "bo_BT", # Tibetan - Bhutan
1684 0x0451: "bo_CN", # Tibetan - PRC
1685 0x041f: "tr_TR", # Turkish
1686 0x0442: "tk_TM", # Turkmen - Cyrillic
1687 0x0480: "ug_CN", # Uighur - Arabic
1688 0x0422: "uk_UA", # Ukrainian
1689 0x042e: "wen_DE",# Upper Sorbian - Germany
1690 0x0420: "ur_PK", # Urdu
1691 0x0820: "ur_IN", # Urdu - India
1692 0x0443: "uz_UZ", # Uzbek - Latin
1693 0x0843: "uz_UZ", # Uzbek - Cyrillic
1694 0x042a: "vi_VN", # Vietnamese
1695 0x0452: "cy_GB", # Welsh
1696 0x0488: "wo_SN", # Wolof - Senegal
1697 0x0434: "xh_ZA", # Xhosa - South Africa
1698 0x0485: "sah_RU",# Yakut - Cyrillic
1699 0x0478: "ii_CN", # Yi - PRC
1700 0x046a: "yo_NG", # Yoruba - Nigeria
1701 0x0435: "zu_ZA", # Zulu
1702 }
1703
1704 def _print_locale():
1705
1706 """ Test function.
1707 """
1708 categories = {}
1709 def _init_categories(categories=categories):
1710 for k,v in globals().items():
1711 if k[:3] == 'LC_':
1712 categories[k] = v
1713 _init_categories()
1714 del categories['LC_ALL']
1715
1716 print('Locale defaults as determined by getdefaultlocale():')
1717 print('-'*72)
1718 lang, enc = getdefaultlocale()
1719 print('Language: ', lang or '(undefined)')
1720 print('Encoding: ', enc or '(undefined)')
1721 print()
1722
1723 print('Locale settings on startup:')
1724 print('-'*72)
1725 for name,category in categories.items():
1726 print(name, '...')
1727 lang, enc = getlocale(category)
1728 print(' Language: ', lang or '(undefined)')
1729 print(' Encoding: ', enc or '(undefined)')
1730 print()
1731
1732 print()
1733 print('Locale settings after calling resetlocale():')
1734 print('-'*72)
1735 resetlocale()
1736 for name,category in categories.items():
1737 print(name, '...')
1738 lang, enc = getlocale(category)
1739 print(' Language: ', lang or '(undefined)')
1740 print(' Encoding: ', enc or '(undefined)')
1741 print()
1742
1743 try:
1744 setlocale(LC_ALL, "")
1745 except:
1746 print('NOTE:')
1747 print('setlocale(LC_ALL, "") does not support the default locale')
1748 print('given in the OS environment variables.')
1749 else:
1750 print()
1751 print('Locale settings after calling setlocale(LC_ALL, ""):')
1752 print('-'*72)
1753 for name,category in categories.items():
1754 print(name, '...')
1755 lang, enc = getlocale(category)
1756 print(' Language: ', lang or '(undefined)')
1757 print(' Encoding: ', enc or '(undefined)')
1758 print()
1759
1760 ###
1761
1762 try:
1763 LC_MESSAGES
1764 except NameError:
1765 pass
1766 else:
1767 __all__.append("LC_MESSAGES")
1768
1769 if __name__=='__main__':
1770 print('Locale aliasing:')
1771 print()
1772 _print_locale()
1773 print()
1774 print('Number formatting:')
1775 print()
1776 _test()