1  /****************************************************************************
       2   *                                                                          *
       3   *                         GNAT COMPILER COMPONENTS                         *
       4   *                                                                          *
       5   *                             L O C A L E S                                *
       6   *                                                                          *
       7   *                          C Implementation File                           *
       8   *                                                                          *
       9   *             Copyright (C) 2010-2023, Free Software Foundation, Inc.      *
      10   *                                                                          *
      11   * GNAT is free software;  you can  redistribute it  and/or modify it under *
      12   * terms of the  GNU General Public License as published  by the Free Soft- *
      13   * ware  Foundation;  either version 3,  or (at your option) any later ver- *
      14   * sion.  GNAT is distributed in the hope that it will be useful, but WITH- *
      15   * OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY *
      16   * or FITNESS FOR A PARTICULAR PURPOSE.                                     *
      17   *                                                                          *
      18   * As a special exception under Section 7 of GPL version 3, you are granted *
      19   * additional permissions described in the GCC Runtime Library Exception,   *
      20   * version 3.1, as published by the Free Software Foundation.               *
      21   *                                                                          *
      22   * You should have received a copy of the GNU General Public License and    *
      23   * a copy of the GCC Runtime Library Exception along with this program;     *
      24   * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    *
      25   * <http://www.gnu.org/licenses/>.                                          *
      26   *                                                                          *
      27   * GNAT was originally developed  by the GNAT team at  New York University. *
      28   * Extensive contributions were provided by Ada Core Technologies Inc.      *
      29   *                                                                          *
      30   ****************************************************************************/
      31  
      32  /*  This file provides OS-dependent support for the Ada.Locales package.    */
      33  
      34  #include <locale.h>
      35  #include <ctype.h>
      36  #include <stddef.h>
      37  
      38  #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
      39  
      40  typedef char char4 [4];
      41  
      42  /* Table containing equivalences between ISO_639_1 codes and their ISO_639_3
      43     alpha-3 code plus their language name. */
      44  
      45  static char* iso_639[] =
      46  {
      47    "aa", "aar", "Afar",
      48    "ab", "abk", "Abkhazian",
      49    "ae", "ave", "Avestan",
      50    "af", "afr", "Afrikaans",
      51    "ak", "aka", "Akan",
      52    "am", "amh", "Amharic",
      53    "an", "arg", "Aragonese",
      54    "ar", "ara", "Arabic",
      55    "as", "asm", "Assamese",
      56    "av", "ava", "Avaric",
      57    "ay", "aym", "Aymara",
      58    "az", "aze", "Azerbaijani",
      59  
      60    "ba", "bak", "Bashkir",
      61    "be", "bel", "Belarusian",
      62    "bg", "bul", "Bulgarian",
      63    "bi", "bis", "Bislama",
      64    "bm", "bam", "Bambara",
      65    "bn", "ben", "Bengali",
      66    "bo", "bod", "Tibetan",
      67    "br", "bre", "Breton",
      68    "bs", "bos", "Bosnian",
      69  
      70    "ca", "cat", "Catalan",
      71    "ce", "che", "Chechen",
      72    "ch", "cha", "Chamorro",
      73    "co", "cos", "Corsican",
      74    "cr", "cre", "Cree",
      75    "cs", "ces", "Czech",
      76    "cu", "chu", "Church Slavic",
      77    "cv", "chv", "Chuvash",
      78    "cy", "cym", "Welsh",
      79  
      80    "da", "dan", "Danish",
      81    "de", "deu", "German",
      82    "dv", "div", "Divehi",
      83    "dz", "dzo", "Dzongkha",
      84  
      85    "ee", "ewe", "Ewe",
      86    "el", "ell", "Modern Greek",
      87    "en", "eng", "English",
      88    "eo", "epo", "Esperanto",
      89    "es", "spa", "Spanish",
      90    "et", "est", "Estonian",
      91    "eu", "eus", "Basque",
      92  
      93    "fa", "fas", "Persian",
      94    "ff", "ful", "Fulah",
      95    "fi", "fin", "Finnish",
      96    "fj", "fij", "Fijian",
      97    "fo", "fao", "Faroese",
      98    "fr", "fra", "French",
      99    "fy", "fry", "Western Frisian",
     100  
     101    "ga", "gle", "Irish",
     102    "gd", "gla", "Scottish Gaelic",
     103    "gl", "glg", "Galician",
     104    "gn", "grn", "Guarani",
     105    "gu", "guj", "Gujarati",
     106    "gv", "glv", "Manx",
     107  
     108    "ha", "hau", "Hausa",
     109    "he", "heb", "Hebrew",
     110    "hi", "hin", "Hindi",
     111    "ho", "hmo", "Hiri Motu",
     112    "hr", "hrv", "Croatian",
     113    "ht", "hat", "Haitian",
     114    "hu", "hun", "Hungarian",
     115    "hy", "hye", "Armenian",
     116    "hz", "her", "Herero",
     117  
     118    "ia", "ina", "Interlingua",
     119    "id", "ind", "Indonesian",
     120    "ie", "ile", "Interlingue",
     121    "ig", "ibo", "Igbo",
     122    "ii", "iii", "Sichuan Yi",
     123    "ik", "ipk", "Inupiaq",
     124    "io", "ido", "Ido",
     125    "is", "isl", "Icelandic",
     126    "it", "ita", "Italian",
     127    "iu", "iku", "Inuktitut",
     128  
     129    "ja", "jpn", "Japanese",
     130    "jv", "jav", "Javanese",
     131  
     132    "ka", "kat", "Georgian",
     133    "kg", "kon", "Kongo",
     134    "ki", "kik", "Kikuyu",
     135    "kj", "kua", "Kuanyama",
     136    "kk", "kaz", "Kazakh",
     137    "kl", "kal", "Kalaallisut",
     138    "km", "khm", "Central Khmer",
     139    "kn", "kan", "Kannada",
     140    "ko", "kor", "Korean",
     141    "kr", "kau", "Kanuri",
     142    "ks", "kas", "Kashmiri",
     143    "ku", "kur", "Kurdish",
     144    "kv", "kom", "Komi",
     145    "kw", "cor", "Cornish",
     146    "ky", "kir", "Kirghiz",
     147  
     148    "la", "lat", "Latin",
     149    "lb", "ltz", "Luxembourgish",
     150    "lg", "lug", "Ganda",
     151    "li", "lim", "Limburgan",
     152    "ln", "lin", "Lingala",
     153    "lo", "lao", "Lao",
     154    "lt", "lit", "Lithuanian",
     155    "lu", "lub", "Luba-Katanga",
     156    "lv", "lav", "Latvian",
     157  
     158    "mg", "mlg", "Malagasy",
     159    "mh", "mah", "Marshallese",
     160    "mi", "mri", "Maori",
     161    "mk", "mkd", "Macedonian",
     162    "ml", "mal", "Malayalam",
     163    "mn", "mon", "Mongolian",
     164    "mr", "mar", "Marathi",
     165    "ms", "msa", "Malay",
     166    "mt", "mlt", "Maltese",
     167    "my", "mya", "Burmese",
     168  
     169    "na", "nau", "Nauru",
     170    "nb", "nob", "Norwegian Bokmal",
     171    "nd", "nde", "North Ndebele",
     172    "ne", "nep", "Nepali",
     173    "ng", "ndo", "Ndonga",
     174    "nl", "nld", "Dutch",
     175    "nn", "nno", "Norwegian Nynorsk",
     176    "no", "nor", "Norwegian",
     177    "nr", "nbl", "South Ndebele",
     178    "nv", "nav", "Navajo",
     179    "ny", "nya", "Nyanja",
     180  
     181    "oc", "oci", "Occitan",
     182    "oj", "oji", "Ojibwa",
     183    "om", "orm", "Oromo",
     184    "or", "ori", "Oriya",
     185    "os", "oss", "Ossetian",
     186  
     187    "pa", "pan", "Panjabi",
     188    "pi", "pli", "Pali",
     189    "pl", "pol", "Polish",
     190    "ps", "pus", "Pushto",
     191    "pt", "por", "Portuguese",
     192  
     193    "qu", "que", "Quechua",
     194  
     195    "rm", "roh", "Romansh",
     196    "rn", "run", "Rundi",
     197    "ro", "ron", "Romanian",
     198    "ru", "rus", "Russian",
     199    "rw", "kin", "Kinyarwanda",
     200  
     201    "sa", "san", "Sanskrit",
     202    "sc", "srd", "Sardinian",
     203    "sd", "snd", "Sindhi",
     204    "se", "sme", "Northern Sami",
     205    "sg", "sag", "Sango",
     206    "sh", "hbs", "Serbo-Croatian",
     207    "si", "sin", "Sinhala",
     208    "sk", "slk", "Slovak",
     209    "sl", "slv", "Slovenian",
     210    "sm", "smo", "Samoan",
     211    "sn", "sna", "Shona",
     212    "so", "som", "Somali",
     213    "sq", "sqi", "Albanian",
     214    "sr", "srp", "Serbian",
     215    "ss", "ssw", "Swati",
     216    "st", "sot", "Southern Sotho",
     217    "su", "sun", "Sundanese",
     218    "sv", "swe", "Swedish",
     219    "sw", "swa", "Swahili",
     220  
     221    "ta", "tam", "Tamil",
     222    "te", "tel", "Telugu",
     223    "tg", "tgk", "Tajik",
     224    "th", "tha", "Thai",
     225    "ti", "tir", "Tigrinya",
     226    "tk", "tuk", "Turkmen",
     227    "tl", "tgl", "Tagalog",
     228    "tn", "tsn", "Tswana",
     229    "to", "ton", "Tonga",
     230    "tr", "tur", "Turkish",
     231    "ts", "tso", "Tsonga",
     232    "tt", "tat", "Tatar",
     233    "tw", "twi", "Twi",
     234    "ty", "tah", "Tahitian",
     235  
     236    "ug", "uig", "Uighur",
     237    "uk", "ukr", "Ukrainian",
     238    "ur", "urd", "Urdu",
     239    "uz", "uzb", "Uzbek",
     240  
     241    "ve", "ven", "Venda",
     242    "vi", "vie", "Vietnamese",
     243    "vo", "vol", "Volapuk",
     244  
     245    "wa", "wln", "Walloon",
     246    "wo", "wol", "Wolof",
     247  
     248    "xh", "xho", "Xhosa",
     249  
     250    "yi", "yid", "Yiddish",
     251    "yo", "yor", "Yoruba",
     252  
     253    "za", "zha", "Zhuang",
     254    "zh", "zho", "Chinese",
     255    "zu", "zul", "Zulu"
     256  };
     257  
     258  /* Table containing equivalences between ISO_3166 alpha-2 codes and country
     259     names. This table has several entries for codes that have several valid
     260     country names. */
     261  
     262  static char* iso_3166[] =
     263  {
     264    "AU", "Australia",
     265    "AD", "Andorra",
     266    "AE", "United Arab Emirates",
     267    "AF", "Afghanistan",
     268    "AG", "Antigua and Barbuda",
     269    "AI", "Anguilla",
     270    "AL", "Albania",
     271    "AM", "Armenia",
     272    "AN", "Netherlands Antilles",
     273    "AO", "Angola",
     274    "AQ", "Antarctica",
     275    "AR", "Argentina",
     276    "AS", "American Samoa",
     277    "AT", "Austria",
     278    "AU", "Australia",
     279    "AW", "Aruba",
     280    "AX", "Aland Islands",
     281    "AZ", "Azerbaijan",
     282  
     283    "BA", "Bosnia and Herzegovina",
     284    "BB", "Barbados",
     285    "BD", "Bangladesh",
     286    "BE", "Belgium",
     287    "BF", "Burkina Faso",
     288    "BG", "Bulgaria",
     289    "BH", "Bahrain",
     290    "BI", "Burundi",
     291    "BJ", "Benin",
     292    "BL", "Saint Barthélemy",
     293    "BM", "Bermuda",
     294    "BN", "Brunei Darussalam",
     295    "BO", "Bolivia, Plurinational State of",
     296    "BQ", "Bonaire, Sint Eustatius and Saba",
     297    "BR", "Brazil",
     298    "BS", "Bahamas",
     299    "BT", "Bhutan",
     300    "BV", "Bouvet Island",
     301    "BW", "Botswana",
     302    "BY", "Belarus",
     303    "BZ", "Belize",
     304  
     305    "CA", "Canada",
     306    "CC", "Cocos (Keeling) Islands",
     307    "CD", "Congo, Democratic Republic of the",
     308    "CF", "Central African Republic",
     309    "CG", "Congo",
     310    "CH", "Switzerland",
     311    "CI", "Côte d'Ivoire",
     312    "CK", "Cook Islands",
     313    "CL", "Chile",
     314    "CM", "Cameroon",
     315    "CN", "China",
     316    "CN", "People’s Republic of China",
     317    "CN", "PR China",
     318    "CN", "PR-China",
     319    "CO", "Colombia",
     320    "CR", "Costa Rica",
     321    "CS", "Czechoslovakia",
     322    "CU", "Cuba",
     323    "CV", "Cape Verde",
     324    "CW", "Curaçao",
     325    "CX", "Christmas Island",
     326    "CY", "Cyprus",
     327    "CZ", "Czech Republic",
     328  
     329    "DE", "Germany",
     330    "DJ", "Djibouti",
     331    "DK", "Denmark",
     332    "DM", "Dominica",
     333    "DO", "Dominican Republic",
     334    "DZ", "Algeria",
     335  
     336    "EC", "Ecuador",
     337    "EE", "Estonia",
     338    "EG", "Egypt",
     339    "EH", "Western Sahara",
     340    "ER", "Eritrea",
     341    "ES", "Spain",
     342    "ET", "Ethiopia",
     343  
     344    "FI", "Finland",
     345    "FG", "Fiji",
     346    "FK", "Falkland Islands (Malvinas)",
     347    "FM", "Micronesia, Federated States of",
     348    "FO", "Faroe Islands",
     349    "FR", "France",
     350  
     351    "GA", "Gabon",
     352    "GB", "United Kingdom",
     353    "GB", "United-Kingdom",
     354    "GB", "England",
     355    "GB", "Britain",
     356    "GB", "Great Britain",
     357    "GD", "Grenada",
     358    "GE", "Georgia",
     359    "GF", "French Guiana",
     360    "GG", "Guernsey",
     361    "GH", "Ghana",
     362    "GI", "Gibraltar",
     363    "GL", "Greenland",
     364    "GM", "Gambia",
     365    "GN", "Guinea",
     366    "GP", "Guadeloupe",
     367    "GQ", "Equatorial Guinea",
     368    "GR", "Greece",
     369    "GS", "South Georgia and the South Sandwich Islands",
     370    "GT", "Guatemala",
     371    "GU", "Guam",
     372    "GW", "Guinea-Bissau",
     373    "GY", "Guyana",
     374  
     375    "HK", "Hong Kong",
     376    "HK", "Hong-Kong",
     377    "HM", "Heard Island and McDonald Islands",
     378    "HN", "Honduras",
     379    "HR", "Croatia",
     380    "HT", "Haiti",
     381    "HU", "Hungary",
     382  
     383    "ID", "Indonesia",
     384    "IE", "Ireland",
     385    "IL", "Israel",
     386    "IM", "Isle of Man",
     387    "IN", "India",
     388    "IO", "British Indian Ocean Territory",
     389    "IQ", "Iraq",
     390    "IR", "Iran",
     391    "IR", "Iran, Islamic Republic of",
     392    "IS", "Iceland",
     393    "IT", "Italy",
     394  
     395    "JE", "Jersey",
     396    "JM", "Jamaica",
     397    "JO", "Jordan",
     398    "JP", "Japan",
     399  
     400    "KE", "Kenya",
     401    "KG", "Kyrgyzstan",
     402    "KH", "Cambodia",
     403    "KI", "Kiribati",
     404    "KM", "Comoros",
     405    "KN", "Saint Kitts and Nevis",
     406    "KP", "Korea, Democratic People's Republic of",
     407    "KR", "Korea, Republic of",
     408    "KW", "Kuwait",
     409    "KY", "Cayman Islands",
     410    "KZ", "Kazakhstan",
     411  
     412    "LA", "Lao People's Democratic Republic",
     413    "LB", "Lebanon",
     414    "LC", "Saint Lucia",
     415    "LI", "Liechtenstein",
     416    "LK", "Sri Lanka",
     417    "LR", "Liberia",
     418    "LS", "Lesotho",
     419    "LT", "Lithuania",
     420    "LU", "Luxembourg",
     421    "LV", "Latvia",
     422    "LY", "Libya",
     423  
     424    "MA", "Morocco",
     425    "MC", "Monaco",
     426    "MD", "Moldova, Republic of",
     427    "ME", "Montenegro",
     428    "MF", "Saint Martin",
     429    "MG", "Madagascar",
     430    "MH", "Marshall Islands",
     431    "MK", "Macedonia",
     432    "ML", "Mali",
     433    "MM", "Myanmar",
     434    "MN", "Mongolia",
     435    "MO", "Macao",
     436    "MP", "Northern Mariana Islands",
     437    "MQ", "Martinique",
     438    "MR", "Mauritania",
     439    "MS", "Montserrat",
     440    "MT", "Malta",
     441    "MU", "Mauritius",
     442    "MV", "Maldives",
     443    "MW", "Malawi",
     444    "MX", "Mexico",
     445    "MY", "Malaysia",
     446    "MZ", "Mozambique",
     447  
     448    "NA", "Namibia",
     449    "NC", "New Caledonia",
     450    "NE", "Niger",
     451    "NF", "Norfolk Island",
     452    "NG", "Nigeria",
     453    "NI", "Nicaragua",
     454    "NL", "Netherlands",
     455    "NL", "Holland",
     456    "NO", "Norway",
     457    "NP", "Nepal",
     458    "NR", "Nauru",
     459    "NU", "Niue",
     460    "NZ", "New Zealand",
     461    "NZ", "New-Zealand",
     462  
     463    "OM", "Oman",
     464  
     465    "PA", "Panama",
     466    "PE", "Peru",
     467    "PF", "French Polynesia",
     468    "PG", "Papua New Guinea",
     469    "PH", "Philippines",
     470    "PK", "Pakistan",
     471    "PL", "Poland",
     472    "PM", "Saint Pierre and Miquelon",
     473    "PN", "Pitcairn",
     474    "PR", "Puerto Rico",
     475    "PS", "Palestine, State of",
     476    "PT", "Portugal",
     477    "PW", "Palau",
     478    "PY", "Paraguay",
     479  
     480    "QA", "Qatar",
     481  
     482    "RE", "Réunion",
     483    "RO", "Romania",
     484    "RS", "Serbia",
     485    "RU", "Russian Federation",
     486    "RW", "Rwanda",
     487  
     488    "SA", "Saudi Arabia",
     489    "SB", "Solomon Islands",
     490    "SC", "Seychelles",
     491    "SD", "Sudan",
     492    "SE", "Sweden",
     493    "SG", "Singapore",
     494    "SH", "Saint Helena, Ascension and Tristan da Cunha",
     495    "SI", "Slovenia",
     496    "SJ", "Svalbard and Jan Mayen",
     497    "SK", "Slovakia",
     498    "SL", "Sierra Leone",
     499    "SM", "San Marino",
     500    "SN", "Senegal",
     501    "SO", "Somalia",
     502    "SR", "Suriname",
     503    "SS", "South Sudan",
     504    "SV", "El Salvador",
     505    "SX", "Sint Maarten (Dutch part)",
     506    "SY", "Syrian Arab Republic",
     507    "SZ", "Swaziland",
     508  
     509    "TC", "Turks and Caicos Islands",
     510    "TD", "Chad",
     511    "TF", "French Southern Territories",
     512    "TG", "Togo",
     513    "TH", "Thailand",
     514    "TJ", "Tajikistan",
     515    "TK", "Tokelau",
     516    "TL", "Timor-Leste",
     517    "TM", "Turkmenistan",
     518    "TN", "Tunisia",
     519    "TO", "Tonga",
     520    "TP", "East Timor",
     521    "TR", "Turkey",
     522    "TT", "Trinidad and Tobago",
     523    "TV", "Tuvalu",
     524    "TW", "Taiwan",
     525    "TW", "Taiwan, Province of China",
     526    "TZ", "Tanzania",
     527    "TZ", "Tanzania, United Republic of",
     528  
     529    "UA", "Ukraine",
     530    "UG", "Uganda",
     531    "UM", "United States Minor Outlying Islands",
     532    "US", "United States",
     533    "US", "United States of America",
     534    "US", "United-States",
     535    "UY", "Uruguay",
     536    "UZ", "Uzbekistan",
     537  
     538    "VA", "Holy See (Vatican City State)",
     539    "VC", "Saint Vincent and the Grenadines",
     540    "VE", "Venezuela",
     541    "VE", "Venezuela, Bolivarian Republic of",
     542    "VG", "Virgin Islands, British",
     543    "VI", "Virgin Islands, U.S.",
     544    "VN", "Viet Nam",
     545    "VU", "Vanuatu",
     546    "WF", "Wallis and Futuna",
     547    "WS", "Samoa",
     548  
     549    "YE", "Yemen",
     550    "YT", "Mayotte",
     551    "YU", "Yugoslavia",
     552  
     553    "ZA", "South Africa",
     554    "ZM", "Zambia",
     555    "ZW", "Zimbabwe"
     556  };
     557  
     558  /* Utility function to perform case insensitive string comparison. Returns 1
     559     if both strings are equal and 0 otherwise. */
     560  
     561  static int
     562  str_case_equals (const char *s1, const char *s2) {
     563    while (*s1 != '\0' && *s2 != '\0' && tolower(*s1) == tolower(*s2)) {
     564      s1++;
     565      s2++;
     566    }
     567  
     568    return (*s1 == '\0') && (*s2 == '\0');
     569  }
     570  
     571  /* Utility function to copy length characters of a string. The target string
     572     must have space to store the extra string null terminator. */
     573  
     574  static void
     575  str_copy (char *target, char *source, int length) {
     576    for (; length > 0; source++, target++, length--) {
     577      *target = *source;
     578    }
     579  
     580    *target = '\0';
     581  }
     582  
     583  /* Utility function to search for the last byte of the lc_all string to be
     584     processed. Required because in some targets (for example, AIX), the
     585     string returned by setlocale() has duplicates. */
     586  
     587  static char*
     588  str_get_last_byte (char *lc_all) {
     589    char* first_space = NULL;
     590    char* second_space = NULL;
     591    char* last_byte = NULL;
     592    char* s1 = lc_all;
     593  
     594    /* Search for the 1st space (if any) */
     595    while (*s1 != ' ' && *s1 != '\0')
     596      s1++;
     597  
     598    if (*s1 == '\0') {
     599      last_byte = s1;
     600  
     601    } else {
     602      first_space = s1;
     603  
     604      /* Skip this space and search for the 2nd one (if available) */
     605      s1++;
     606      while (*s1 != ' ' && *s1 != '\0')
     607        s1++;
     608  
     609      if (*s1 == '\0') {
     610        last_byte = s1;
     611  
     612      } else {
     613        second_space=s1;
     614  
     615        /* Search for the last byte of lc_all */
     616        while (*s1 != '\0')
     617          s1++;
     618  
     619        last_byte = s1;
     620  
     621        /* Check if the two strings match */
     622        {
     623          int len1 = first_space - lc_all;
     624          int len2 = second_space - first_space - 1;
     625  
     626          if (len1 == len2) {
     627            char* p1 = lc_all;
     628            char* p2 = first_space + 1;
     629  
     630            /* Compare their contents */
     631            while (*p1 == *p2 && p2 != second_space) {
     632              p1++;
     633              p2++;
     634            }
     635  
     636            /* if the two strings match then update the last byte */
     637  
     638            if (p2 == second_space) {
     639              last_byte = first_space;
     640            }
     641          }
     642        }
     643      }
     644    }
     645  
     646    return last_byte;
     647  }
     648  
     649  /* Utility function to search in the iso_639_1 table for an iso-639-1 code;
     650     returns the corresponding iso-639-3 code or NULL if not found. */
     651  
     652  static char*
     653  iso_639_1_to_639_3(char* iso_639_1_code) {
     654    int len = ARRAY_SIZE (iso_639);
     655    char **p = iso_639;
     656    int j;
     657  
     658    for (j=0; j < len/3; j++) {
     659      char* s1 = iso_639_1_code;
     660      char* s2 = *p;
     661  
     662      if (s1[0]==s2[0] && s1[1]==s2[1]) {
     663        p++;
     664        return *p;
     665      }
     666  
     667      p = p + 3;
     668    }
     669  
     670    return NULL;
     671  }
     672  
     673  /* Utility function to search in the iso_639_1 table for a language name;
     674     returns the corresponding iso-639-3 code or NULL if not found. */
     675  
     676  static char*
     677  language_name_to_639_3(char* name) {
     678    int len = ARRAY_SIZE (iso_639);
     679    char **p = iso_639;
     680    int j;
     681  
     682    p = p + 2;
     683    for (j=0; j < len/3; j++) {
     684      if (str_case_equals(name, *p)) {
     685        p--;
     686        return *p;
     687      }
     688  
     689      p = p + 3;
     690    }
     691  
     692    return NULL;
     693  }
     694  
     695  /* Utility function to search in the iso_3166 table for a country name;
     696     returns the corresponding iso-3166 code or NULL if not found. */
     697  
     698  static char*
     699  country_name_to_3166 (char* name) {
     700    int len = ARRAY_SIZE (iso_3166);
     701    char **p = iso_3166;
     702    int j;
     703  
     704    p++;
     705    for (j=0; j < len/2; j++) {
     706      if (str_case_equals(name, *p)) {
     707        p--;
     708        return *p;
     709      }
     710  
     711      p = p + 2;
     712    }
     713  
     714    return NULL;
     715  }
     716  
     717  /*
     718    c_get_language_code needs to fill in the Alpha-3 encoding of the
     719    language code (3 lowercase letters). That should be "und" if the
     720    language is unknown. [see Ada.Locales]
     721  */
     722  void
     723  c_get_language_code (char4 p) {
     724    char* Saved_Locale = setlocale(LC_ALL, NULL);
     725    char  iso_639_3_code[] = "und";                        /* Language Unknown */
     726    char* lc_all;
     727    char* result;
     728  
     729    /* Get locales set in the environment */
     730  
     731    setlocale(LC_ALL, "");
     732    lc_all = setlocale(LC_ALL, NULL);
     733  
     734    /* The string returned by setlocale has the following format:
     735  
     736             language[_territory][.code-set][@modifier]
     737  
     738       where language is an ISO 639 language code, territory is an ISO 3166
     739       country code, and codeset is a character set or encoding identifier
     740       like ISO-8859-1 or UTF-8.
     741    */
     742  
     743    if (lc_all != NULL) {
     744      char* s = lc_all;
     745      int lang_length = 0;
     746  
     747      /* Copy the language part (which may be an ISO-639-1 code, an ISO-639-3
     748         code, or a language name) adding a string terminator */
     749  
     750      while (*s != '_' && *s != '.' && *s != '@' && *s != '\0')
     751        s++;
     752  
     753      lang_length = s - lc_all;
     754  
     755      /* Handle conversion of ISO-639-1 to ISO-639-3 */
     756  
     757      if (lang_length == 2) {
     758        char  iso_639_1[3];
     759        char* to_iso_639_3;
     760  
     761        /* Duplicate the ISO-639-1 code adding the null terminator required to
     762           search for the equivalent ISO-639-3 code; we cannot just append the
     763           null terminator since the pointer may reference non-writable memory.
     764        */
     765  
     766        str_copy(iso_639_1, lc_all, lang_length);
     767        to_iso_639_3 = iso_639_1_to_639_3(iso_639_1);
     768  
     769        if (to_iso_639_3)
     770          str_copy(iso_639_3_code, to_iso_639_3, 3);
     771  
     772      /* Copy the ISO-639-3 code (adding a null terminator) */
     773  
     774      } else if (lang_length == 3) {
     775        str_copy(iso_639_3_code, lc_all, lang_length);
     776  
     777      /* Handle conversion of language name to ISO-639-3 */
     778  
     779      } else if (lang_length > 3) {
     780        char  name_copy[lang_length + 1];
     781        char* to_iso_639_3;
     782  
     783        /* Duplicate the ISO-639-1 code adding the null terminator required to
     784           search for the equivalent ISO-639-3 code; we cannot just append the
     785           null terminator since the pointer may reference non-writable memory.
     786        */
     787  
     788        str_copy(name_copy, lc_all, lang_length);
     789        to_iso_639_3 = language_name_to_639_3(name_copy);
     790  
     791        if (to_iso_639_3)
     792          str_copy(iso_639_3_code, to_iso_639_3, 3);
     793      }
     794    }
     795  
     796    /* Copy out the computed ISO_639_3 code */
     797  
     798    result = iso_639_3_code;
     799    for (; *result != '\0'; p++, result++)
     800      *p = *result;
     801  
     802    /* Restore the original locale settings */
     803  
     804    setlocale(LC_ALL, Saved_Locale);
     805  
     806    return;
     807  }
     808  
     809  /*
     810    c_get_country_code needs to fill in the Alpha-2 encoding of the
     811    country code (2 uppercase letters). That should be "ZZ" if the
     812    country is unknown. [see Ada.Locales]
     813  */
     814  void
     815  c_get_country_code (char4 p) {
     816    char* Saved_Locale = setlocale(LC_ALL, NULL);
     817    char  iso_3166_code[] = "ZZ";                           /* Country Unknown */
     818    char* lc_all;
     819    char* result;
     820  
     821    /* Get locales set in the environment */
     822  
     823    setlocale(LC_ALL, "");
     824    lc_all = setlocale(LC_ALL, NULL);
     825  
     826    /* The string returned by setlocale has the following format:
     827  
     828             language[_territory][.code-set][@modifier]
     829  
     830       where language is an ISO 639 language code, territory is an ISO 3166
     831       country code, and codeset is a character set or encoding identifier
     832       like ISO-8859-1 or UTF-8.
     833    */
     834  
     835    if (lc_all != NULL) {
     836      char* s1 = lc_all;
     837      char* s2 = NULL;
     838      char* last_byte = str_get_last_byte(lc_all);
     839      int country_length = 0;
     840  
     841      /* Search for the beginning of the country code */
     842  
     843      s1 = lc_all;
     844      while (*s1 != '_' && *s1 != '.' && *s1 != '@' && s1 != last_byte)
     845        s1++;
     846  
     847      if (*s1 == '_') {
     848        s1++;
     849        s2 = s1;
     850  
     851        while (*s2 != '.' && *s2 != '@' && s2 != last_byte)
     852          s2++;
     853  
     854        country_length = s2 - s1;
     855  
     856        if (country_length == 2) {
     857          str_copy(iso_3166_code, s1, country_length);
     858  
     859        /* setlocale returned us the country name */
     860  
     861        } else if (country_length > 3) {
     862          char  name_copy[country_length + 1];
     863          char* to_3166;
     864  
     865          str_copy(name_copy, s1, country_length);
     866          to_3166 = country_name_to_3166(name_copy);
     867  
     868          if (to_3166)
     869            str_copy(iso_3166_code, to_3166, 2);
     870        }
     871      }
     872    }
     873  
     874    /* Copy out the computed ISO_3166 code */
     875  
     876    result = iso_3166_code;
     877    for (; *result != '\0'; p++, result++)
     878      *p = *result;
     879  
     880    /* Restore the original locale settings */
     881  
     882    setlocale(LC_ALL, Saved_Locale);
     883  
     884    return;
     885  }