1  /* Charset handling while reading PO files.
       2     Copyright (C) 2001-2007, 2010, 2019-2021 Free Software Foundation, Inc.
       3     Written by Bruno Haible <haible@clisp.cons.org>, 2001.
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  
      19  #ifdef HAVE_CONFIG_H
      20  # include "config.h"
      21  #endif
      22  #include <alloca.h>
      23  
      24  /* Specification.  */
      25  #include "po-charset.h"
      26  
      27  #include <stdlib.h>
      28  #include <string.h>
      29  
      30  #include "xmalloca.h"
      31  #include "xvasprintf.h"
      32  #include "po-xerror.h"
      33  #if !IN_LIBGETTEXTPO
      34  # include "basename-lgpl.h"
      35  # include "progname.h"
      36  #endif
      37  #include "c-strstr.h"
      38  #include "c-strcase.h"
      39  #include "gettext.h"
      40  
      41  #define _(str) gettext (str)
      42  
      43  #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
      44  
      45  static const char ascii[] = "ASCII";
      46  
      47  /* The canonicalized encoding name for ASCII.  */
      48  const char *po_charset_ascii = ascii;
      49  
      50  static const char utf8[] = "UTF-8";
      51  
      52  /* The canonicalized encoding name for UTF-8.  */
      53  const char *po_charset_utf8 = utf8;
      54  
      55  /* Canonicalize an encoding name.  */
      56  const char *
      57  po_charset_canonicalize (const char *charset)
      58  {
      59    /* The list of charsets supported by glibc's iconv() and by the portable
      60       iconv() across platforms.  Taken from intl/localcharset.h.  */
      61    static const char *standard_charsets[] =
      62    {
      63      ascii, "ANSI_X3.4-1968", "US-ASCII",        /* i = 0..2 */
      64      "ISO-8859-1", "ISO_8859-1",                 /* i = 3, 4 */
      65      "ISO-8859-2", "ISO_8859-2",
      66      "ISO-8859-3", "ISO_8859-3",
      67      "ISO-8859-4", "ISO_8859-4",
      68      "ISO-8859-5", "ISO_8859-5",
      69      "ISO-8859-6", "ISO_8859-6",
      70      "ISO-8859-7", "ISO_8859-7",
      71      "ISO-8859-8", "ISO_8859-8",
      72      "ISO-8859-9", "ISO_8859-9",
      73      "ISO-8859-13", "ISO_8859-13",
      74      "ISO-8859-14", "ISO_8859-14",
      75      "ISO-8859-15", "ISO_8859-15",               /* i = 25, 26 */
      76      "KOI8-R",
      77      "KOI8-U",
      78      "KOI8-T",
      79      "CP850",
      80      "CP866",
      81      "CP874",
      82      "CP932",
      83      "CP949",
      84      "CP950",
      85      "CP1250",
      86      "CP1251",
      87      "CP1252",
      88      "CP1253",
      89      "CP1254",
      90      "CP1255",
      91      "CP1256",
      92      "CP1257",
      93      "GB2312",
      94      "EUC-JP",
      95      "EUC-KR",
      96      "EUC-TW",
      97      "BIG5",
      98      "BIG5-HKSCS",
      99      "GBK",
     100      "GB18030",
     101      "SHIFT_JIS",
     102      "JOHAB",
     103      "TIS-620",
     104      "VISCII",
     105      "GEORGIAN-PS",
     106      utf8
     107    };
     108    size_t i;
     109  
     110    for (i = 0; i < SIZEOF (standard_charsets); i++)
     111      if (c_strcasecmp (charset, standard_charsets[i]) == 0)
     112        return standard_charsets[i < 3 ? 0 : i < 27 ? ((i - 3) & ~1) + 3 : i];
     113    return NULL;
     114  }
     115  
     116  /* Test for ASCII compatibility.  */
     117  bool
     118  po_charset_ascii_compatible (const char *canon_charset)
     119  {
     120    /* There are only a few exceptions to ASCII compatibility.  */
     121    if (strcmp (canon_charset, "SHIFT_JIS") == 0
     122        || strcmp (canon_charset, "JOHAB") == 0
     123        || strcmp (canon_charset, "VISCII") == 0)
     124      return false;
     125    else
     126      return true;
     127  }
     128  
     129  /* Test for a weird encoding, i.e. an encoding which has double-byte
     130     characters ending in 0x5C.  */
     131  bool po_is_charset_weird (const char *canon_charset)
     132  {
     133    static const char *weird_charsets[] =
     134    {
     135      "BIG5",
     136      "BIG5-HKSCS",
     137      "GBK",
     138      "GB18030",
     139      "SHIFT_JIS",
     140      "JOHAB"
     141    };
     142    size_t i;
     143  
     144    for (i = 0; i < SIZEOF (weird_charsets); i++)
     145      if (strcmp (canon_charset, weird_charsets[i]) == 0)
     146        return true;
     147    return false;
     148  }
     149  
     150  /* Test for a weird CJK encoding, i.e. a weird encoding with CJK structure.
     151     An encoding has CJK structure if every valid character stream is composed
     152     of single bytes in the range 0x{00..7F} and of byte pairs in the range
     153     0x{80..FF}{30..FF}.  */
     154  bool po_is_charset_weird_cjk (const char *canon_charset)
     155  {
     156    static const char *weird_cjk_charsets[] =
     157    {                     /* single bytes   double bytes       */
     158      "BIG5",             /* 0x{00..7F},    0x{A1..F9}{40..FE} */
     159      "BIG5-HKSCS",       /* 0x{00..7F},    0x{88..FE}{40..FE} */
     160      "GBK",              /* 0x{00..7F},    0x{81..FE}{40..FE} */
     161      "GB18030",          /* 0x{00..7F},    0x{81..FE}{30..FE} */
     162      "SHIFT_JIS",        /* 0x{00..7F},    0x{81..F9}{40..FC} */
     163      "JOHAB"             /* 0x{00..7F},    0x{84..F9}{31..FE} */
     164    };
     165    size_t i;
     166  
     167    for (i = 0; i < SIZEOF (weird_cjk_charsets); i++)
     168      if (strcmp (canon_charset, weird_cjk_charsets[i]) == 0)
     169        return true;
     170    return false;
     171  }
     172  
     173  /* Hardcoded iterator functions for all kinds of encodings.
     174     We could also implement a general iterator function with iconv(),
     175     but we need a fast one.  */
     176  
     177  /* Character iterator for 8-bit encodings.  */
     178  static size_t
     179  char_iterator (const char *s)
     180  {
     181    return 1;
     182  }
     183  
     184  /* Character iterator for GB2312.  See libiconv/lib/euc_cn.h.  */
     185  /* Character iterator for EUC-KR.  See libiconv/lib/euc_kr.h.  */
     186  static size_t
     187  euc_character_iterator (const char *s)
     188  {
     189    unsigned char c = *s;
     190    if (c >= 0xa1 && c < 0xff)
     191      {
     192        unsigned char c2 = s[1];
     193        if (c2 >= 0xa1 && c2 < 0xff)
     194          return 2;
     195      }
     196    return 1;
     197  }
     198  
     199  /* Character iterator for EUC-JP.  See libiconv/lib/euc_jp.h.  */
     200  static size_t
     201  euc_jp_character_iterator (const char *s)
     202  {
     203    unsigned char c = *s;
     204    if (c >= 0xa1 && c < 0xff)
     205      {
     206        unsigned char c2 = s[1];
     207        if (c2 >= 0xa1 && c2 < 0xff)
     208          return 2;
     209      }
     210    else if (c == 0x8e)
     211      {
     212        unsigned char c2 = s[1];
     213        if (c2 >= 0xa1 && c2 < 0xe0)
     214          return 2;
     215      }
     216    else if (c == 0x8f)
     217      {
     218        unsigned char c2 = s[1];
     219        if (c2 >= 0xa1 && c2 < 0xff)
     220          {
     221            unsigned char c3 = s[2];
     222            if (c3 >= 0xa1 && c3 < 0xff)
     223              return 3;
     224          }
     225      }
     226    return 1;
     227  }
     228  
     229  /* Character iterator for EUC-TW.  See libiconv/lib/euc_tw.h.  */
     230  static size_t
     231  euc_tw_character_iterator (const char *s)
     232  {
     233    unsigned char c = *s;
     234    if (c >= 0xa1 && c < 0xff)
     235      {
     236        unsigned char c2 = s[1];
     237        if (c2 >= 0xa1 && c2 < 0xff)
     238          return 2;
     239      }
     240    else if (c == 0x8e)
     241      {
     242        unsigned char c2 = s[1];
     243        if (c2 >= 0xa1 && c2 <= 0xb0)
     244          {
     245            unsigned char c3 = s[2];
     246            if (c3 >= 0xa1 && c3 < 0xff)
     247              {
     248                unsigned char c4 = s[3];
     249                if (c4 >= 0xa1 && c4 < 0xff)
     250                  return 4;
     251              }
     252          }
     253      }
     254    return 1;
     255  }
     256  
     257  /* Character iterator for BIG5.  See libiconv/lib/ces_big5.h.  */
     258  static size_t
     259  big5_character_iterator (const char *s)
     260  {
     261    unsigned char c = *s;
     262    if (c >= 0xa1 && c < 0xff)
     263      {
     264        unsigned char c2 = s[1];
     265        if ((c2 >= 0x40 && c2 < 0x7f) || (c2 >= 0xa1 && c2 < 0xff))
     266          return 2;
     267      }
     268    return 1;
     269  }
     270  
     271  /* Character iterator for BIG5-HKSCS.  See libiconv/lib/big5hkscs.h.  */
     272  static size_t
     273  big5hkscs_character_iterator (const char *s)
     274  {
     275    unsigned char c = *s;
     276    if (c >= 0x88 && c < 0xff)
     277      {
     278        unsigned char c2 = s[1];
     279        if ((c2 >= 0x40 && c2 < 0x7f) || (c2 >= 0xa1 && c2 < 0xff))
     280          return 2;
     281      }
     282    return 1;
     283  }
     284  
     285  /* Character iterator for GBK.  See libiconv/lib/ces_gbk.h and
     286     libiconv/lib/gbk.h.  */
     287  static size_t
     288  gbk_character_iterator (const char *s)
     289  {
     290    unsigned char c = *s;
     291    if (c >= 0x81 && c < 0xff)
     292      {
     293        unsigned char c2 = s[1];
     294        if ((c2 >= 0x40 && c2 < 0x7f) || (c2 >= 0x80 && c2 < 0xff))
     295          return 2;
     296      }
     297    return 1;
     298  }
     299  
     300  /* Character iterator for GB18030.  See libiconv/lib/gb18030.h.  */
     301  static size_t
     302  gb18030_character_iterator (const char *s)
     303  {
     304    unsigned char c = *s;
     305    if (c >= 0x81 && c < 0xff)
     306      {
     307        unsigned char c2 = s[1];
     308        if ((c2 >= 0x40 && c2 < 0x7f) || (c2 >= 0x80 && c2 < 0xff))
     309          return 2;
     310      }
     311    if (c >= 0x81 && c <= 0x84)
     312      {
     313        unsigned char c2 = s[1];
     314        if (c2 >= 0x30 && c2 <= 0x39)
     315          {
     316            unsigned char c3 = s[2];
     317            if (c3 >= 0x81 && c3 < 0xff)
     318              {
     319                unsigned char c4 = s[3];
     320                if (c4 >= 0x30 && c4 <= 0x39)
     321                  return 4;
     322              }
     323          }
     324      }
     325    return 1;
     326  }
     327  
     328  /* Character iterator for SHIFT_JIS.  See libiconv/lib/sjis.h.  */
     329  static size_t
     330  shift_jis_character_iterator (const char *s)
     331  {
     332    unsigned char c = *s;
     333    if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xf9))
     334      {
     335        unsigned char c2 = s[1];
     336        if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfc))
     337          return 2;
     338      }
     339    return 1;
     340  }
     341  
     342  /* Character iterator for JOHAB.  See libiconv/lib/johab.h and
     343     libiconv/lib/johab_hangul.h.  */
     344  static size_t
     345  johab_character_iterator (const char *s)
     346  {
     347    unsigned char c = *s;
     348    if (c >= 0x84 && c <= 0xd3)
     349      {
     350        unsigned char c2 = s[1];
     351        if ((c2 >= 0x41 && c2 < 0x7f) || (c2 >= 0x81 && c2 < 0xff))
     352          return 2;
     353      }
     354    else if (c >= 0xd9 && c <= 0xf9)
     355      {
     356        unsigned char c2 = s[1];
     357        if ((c2 >= 0x31 && c2 <= 0x7e) || (c2 >= 0x91 && c2 <= 0xfe))
     358          return 2;
     359      }
     360    return 1;
     361  }
     362  
     363  /* Character iterator for UTF-8.  See libiconv/lib/utf8.h.  */
     364  static size_t
     365  utf8_character_iterator (const char *s)
     366  {
     367    unsigned char c = *s;
     368    if (c >= 0xc2)
     369      {
     370        if (c < 0xe0)
     371          {
     372            unsigned char c2 = s[1];
     373            if (c2 >= 0x80 && c2 < 0xc0)
     374              return 2;
     375          }
     376        else if (c < 0xf0)
     377          {
     378            unsigned char c2 = s[1];
     379            if (c2 >= 0x80 && c2 < 0xc0)
     380              {
     381                unsigned char c3 = s[2];
     382                if (c3 >= 0x80 && c3 < 0xc0)
     383                  return 3;
     384              }
     385          }
     386        else if (c < 0xf8)
     387          {
     388            unsigned char c2 = s[1];
     389            if (c2 >= 0x80 && c2 < 0xc0)
     390              {
     391                unsigned char c3 = s[2];
     392                if (c3 >= 0x80 && c3 < 0xc0)
     393                  {
     394                    unsigned char c4 = s[3];
     395                    if (c4 >= 0x80 && c4 < 0xc0)
     396                      return 4;
     397                  }
     398              }
     399          }
     400      }
     401    return 1;
     402  }
     403  
     404  /* Returns a character iterator for a given encoding.
     405     Given a pointer into a string, it returns the number occupied by the next
     406     single character.  If the piece of string is not valid or if the *s == '\0',
     407     it returns 1.  */
     408  character_iterator_t
     409  po_charset_character_iterator (const char *canon_charset)
     410  {
     411    if (canon_charset == utf8)
     412      return utf8_character_iterator;
     413    if (strcmp (canon_charset, "GB2312") == 0
     414        || strcmp (canon_charset, "EUC-KR") == 0)
     415      return euc_character_iterator;
     416    if (strcmp (canon_charset, "EUC-JP") == 0)
     417      return euc_jp_character_iterator;
     418    if (strcmp (canon_charset, "EUC-TW") == 0)
     419      return euc_tw_character_iterator;
     420    if (strcmp (canon_charset, "BIG5") == 0)
     421      return big5_character_iterator;
     422    if (strcmp (canon_charset, "BIG5-HKSCS") == 0)
     423      return big5hkscs_character_iterator;
     424    if (strcmp (canon_charset, "GBK") == 0)
     425      return gbk_character_iterator;
     426    if (strcmp (canon_charset, "GB18030") == 0)
     427      return gb18030_character_iterator;
     428    if (strcmp (canon_charset, "SHIFT_JIS") == 0)
     429      return shift_jis_character_iterator;
     430    if (strcmp (canon_charset, "JOHAB") == 0)
     431      return johab_character_iterator;
     432    return char_iterator;
     433  }
     434  
     435  
     436  /* The PO file's encoding, as specified in the header entry.  */
     437  const char *po_lex_charset;
     438  
     439  /* Representation of U+2068 FIRST STRONG ISOLATE (FSI) in the PO file's
     440     encoding, or NULL if not available.  */
     441  const char *po_lex_isolate_start;
     442  /* Representation of U+2069 POP DIRECTIONAL ISOLATE (PDI) in the PO file's
     443     encoding, or NULL if not available.  */
     444  const char *po_lex_isolate_end;
     445  
     446  #if HAVE_ICONV
     447  /* Converter from the PO file's encoding to UTF-8.  */
     448  iconv_t po_lex_iconv;
     449  #endif
     450  /* If no converter is available, some information about the structure of the
     451     PO file's encoding.  */
     452  bool po_lex_weird_cjk;
     453  
     454  void
     455  po_lex_charset_init ()
     456  {
     457    po_lex_charset = NULL;
     458    po_lex_isolate_start = NULL;
     459    po_lex_isolate_end = NULL;
     460  #if HAVE_ICONV
     461    po_lex_iconv = (iconv_t)(-1);
     462  #endif
     463    po_lex_weird_cjk = false;
     464  }
     465  
     466  void
     467  po_lex_charset_set (const char *header_entry, const char *filename)
     468  {
     469    /* Verify the validity of CHARSET.  It is necessary
     470       1. for the correct treatment of multibyte characters containing
     471          0x5C bytes in the PO lexer,
     472       2. so that at run time, gettext() can call iconv() to convert
     473          msgstr.  */
     474    const char *charsetstr = c_strstr (header_entry, "charset=");
     475  
     476    if (charsetstr != NULL)
     477      {
     478        size_t len;
     479        char *charset;
     480        const char *canon_charset;
     481  
     482        charsetstr += strlen ("charset=");
     483        len = strcspn (charsetstr, " \t\n");
     484        charset = (char *) xmalloca (len + 1);
     485        memcpy (charset, charsetstr, len);
     486        charset[len] = '\0';
     487  
     488        canon_charset = po_charset_canonicalize (charset);
     489        if (canon_charset == NULL)
     490          {
     491            /* Don't warn for POT files, because POT files usually contain
     492               only ASCII msgids.  */
     493            size_t filenamelen = strlen (filename);
     494  
     495            if (!(filenamelen >= 4
     496                  && memcmp (filename + filenamelen - 4, ".pot", 4) == 0
     497                  && strcmp (charset, "CHARSET") == 0))
     498              {
     499                char *warning_message =
     500                  xasprintf (_("\
     501  Charset \"%s\" is not a portable encoding name.\n\
     502  Message conversion to user's charset might not work.\n"),
     503                             charset);
     504                po_xerror (PO_SEVERITY_WARNING, NULL,
     505                           filename, (size_t)(-1), (size_t)(-1), true,
     506                           warning_message);
     507                free (warning_message);
     508              }
     509          }
     510        else
     511          {
     512            const char *envval;
     513  
     514            po_lex_charset = canon_charset;
     515  
     516            if (strcmp (canon_charset, "UTF-8") == 0)
     517              {
     518                po_lex_isolate_start = "\xE2\x81\xA8";
     519                po_lex_isolate_end = "\xE2\x81\xA9";
     520              }
     521            else if (strcmp (canon_charset, "GB18030") == 0)
     522              {
     523                po_lex_isolate_start = "\x81\x36\xAC\x34";
     524                po_lex_isolate_end = "\x81\x36\xAC\x35";
     525              }
     526            else
     527              {
     528                /* The other encodings don't contain U+2068, U+2069.  */
     529                po_lex_isolate_start = NULL;
     530                po_lex_isolate_end = NULL;
     531              }
     532  
     533  #if HAVE_ICONV
     534            if (po_lex_iconv != (iconv_t)(-1))
     535              iconv_close (po_lex_iconv);
     536  #endif
     537  
     538            /* The old Solaris/openwin msgfmt and GNU msgfmt <= 0.10.35
     539               don't know about multibyte encodings, and require a spurious
     540               backslash after every multibyte character whose last byte is
     541               0x5C.  Some programs, like vim, distribute PO files in this
     542               broken format.  GNU msgfmt must continue to support this old
     543               PO file format when the Makefile requests it.  */
     544            envval = getenv ("OLD_PO_FILE_INPUT");
     545            if (envval != NULL && *envval != '\0')
     546              {
     547                /* Assume the PO file is in old format, with extraneous
     548                   backslashes.  */
     549  #if HAVE_ICONV
     550                po_lex_iconv = (iconv_t)(-1);
     551  #endif
     552                po_lex_weird_cjk = false;
     553              }
     554            else
     555              {
     556                /* Use iconv() to parse multibyte characters.  */
     557  #if HAVE_ICONV
     558                /* Avoid glibc-2.1 bug with EUC-KR.  */
     559  # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     560       && !defined _LIBICONV_VERSION
     561                if (strcmp (po_lex_charset, "EUC-KR") == 0)
     562                  po_lex_iconv = (iconv_t)(-1);
     563                else
     564  # endif
     565                /* Avoid Solaris 2.9 bug with GB2312, EUC-TW, BIG5, BIG5-HKSCS,
     566                   GBK, GB18030.  */
     567  # if defined __sun && !defined _LIBICONV_VERSION
     568                if (   strcmp (po_lex_charset, "GB2312") == 0
     569                    || strcmp (po_lex_charset, "EUC-TW") == 0
     570                    || strcmp (po_lex_charset, "BIG5") == 0
     571                    || strcmp (po_lex_charset, "BIG5-HKSCS") == 0
     572                    || strcmp (po_lex_charset, "GBK") == 0
     573                    || strcmp (po_lex_charset, "GB18030") == 0)
     574                  po_lex_iconv = (iconv_t)(-1);
     575                else
     576  # endif
     577                po_lex_iconv = iconv_open ("UTF-8", po_lex_charset);
     578                if (po_lex_iconv == (iconv_t)(-1))
     579                  {
     580                    const char *progname;
     581                    char *warning_message;
     582                    const char *recommendation;
     583                    const char *note;
     584                    char *whole_message;
     585  
     586  # if IN_LIBGETTEXTPO
     587                    progname = "libgettextpo";
     588  # else
     589                    progname = last_component (program_name);
     590  # endif
     591  
     592                    warning_message =
     593                      xasprintf (_("\
     594  Charset \"%s\" is not supported. %s relies on iconv(),\n\
     595  and iconv() does not support \"%s\".\n"),
     596                                 po_lex_charset, progname, po_lex_charset);
     597  
     598  # if !defined _LIBICONV_VERSION
     599                    recommendation = _("\
     600  Installing GNU libiconv and then reinstalling GNU gettext\n\
     601  would fix this problem.\n");
     602  # else
     603                    recommendation = "";
     604  # endif
     605  
     606                    /* Test for a charset which has double-byte characters
     607                       ending in 0x5C.  For these encodings, the string parser
     608                       is likely to be confused if it can't see the character
     609                       boundaries.  */
     610                    po_lex_weird_cjk = po_is_charset_weird_cjk (po_lex_charset);
     611                    if (po_is_charset_weird (po_lex_charset)
     612                        && !po_lex_weird_cjk)
     613                      note = _("Continuing anyway, expect parse errors.");
     614                    else
     615                      note = _("Continuing anyway.");
     616  
     617                    whole_message =
     618                      xasprintf ("%s%s%s\n",
     619                                 warning_message, recommendation, note);
     620  
     621                    po_xerror (PO_SEVERITY_WARNING, NULL,
     622                               filename, (size_t)(-1), (size_t)(-1), true,
     623                               whole_message);
     624  
     625                    free (whole_message);
     626                    free (warning_message);
     627                  }
     628  #else
     629                /* Test for a charset which has double-byte characters
     630                   ending in 0x5C.  For these encodings, the string parser
     631                   is likely to be confused if it can't see the character
     632                   boundaries.  */
     633                po_lex_weird_cjk = po_is_charset_weird_cjk (po_lex_charset);
     634                if (po_is_charset_weird (po_lex_charset) && !po_lex_weird_cjk)
     635                  {
     636                    const char *progname;
     637                    char *warning_message;
     638                    const char *recommendation;
     639                    const char *note;
     640                    char *whole_message;
     641  
     642  # if IN_LIBGETTEXTPO
     643                    progname = "libgettextpo";
     644  # else
     645                    progname = last_component (program_name);
     646  # endif
     647  
     648                    warning_message =
     649                      xasprintf (_("\
     650  Charset \"%s\" is not supported. %s relies on iconv().\n\
     651  This version was built without iconv().\n"),
     652                                 po_lex_charset, progname);
     653  
     654                    recommendation = _("\
     655  Installing GNU libiconv and then reinstalling GNU gettext\n\
     656  would fix this problem.\n");
     657  
     658                    note = _("Continuing anyway, expect parse errors.");
     659  
     660                    whole_message =
     661                      xasprintf ("%s%s%s\n",
     662                                 warning_message, recommendation, note);
     663  
     664                    po_xerror (PO_SEVERITY_WARNING, NULL,
     665                               filename, (size_t)(-1), (size_t)(-1), true,
     666                               whole_message);
     667  
     668                    free (whole_message);
     669                    free (warning_message);
     670                  }
     671  #endif
     672              }
     673          }
     674        freea (charset);
     675      }
     676    else
     677      {
     678        /* Don't warn for POT files, because POT files usually contain
     679           only ASCII msgids.  */
     680        size_t filenamelen = strlen (filename);
     681  
     682        if (!(filenamelen >= 4
     683              && memcmp (filename + filenamelen - 4, ".pot", 4) == 0))
     684          po_xerror (PO_SEVERITY_WARNING,
     685                     NULL, filename, (size_t)(-1), (size_t)(-1), true,
     686                     _("\
     687  Charset missing in header.\n\
     688  Message conversion to user's charset will not work.\n"));
     689      }
     690  }
     691  
     692  void
     693  po_lex_charset_close ()
     694  {
     695    po_lex_charset = NULL;
     696    po_lex_isolate_start = NULL;
     697    po_lex_isolate_end = NULL;
     698  #if HAVE_ICONV
     699    if (po_lex_iconv != (iconv_t)(-1))
     700      {
     701        iconv_close (po_lex_iconv);
     702        po_lex_iconv = (iconv_t)(-1);
     703      }
     704  #endif
     705    po_lex_weird_cjk = false;
     706  }