(root)/
gettext-0.22.4/
gettext-tools/
src/
xg-encoding.c
       1  /* Keeping track of the encoding of strings to be extracted.
       2     Copyright (C) 2001-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation; either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  #ifdef HAVE_CONFIG_H
      18  # include <config.h>
      19  #endif
      20  
      21  /* Specification.  */
      22  #include "xg-encoding.h"
      23  
      24  #include <stdio.h>
      25  #include <stdlib.h>
      26  
      27  #include "msgl-ascii.h"
      28  #include "msgl-iconv.h"
      29  #include "po-charset.h"
      30  #include "unistr.h"
      31  #include "xalloc.h"
      32  #include "xerror.h"
      33  #include "xvasprintf.h"
      34  
      35  #include "gettext.h"
      36  #define _(str) gettext (str)
      37  
      38  
      39  /* Canonicalized encoding name for all input files.
      40     It can be NULL when the --from-code option has not been specified.  In this
      41     case, the default (ASCII or UTF-8) depends on the programming language.  */
      42  const char *xgettext_global_source_encoding;
      43  
      44  #if HAVE_ICONV
      45  /* Converter from xgettext_global_source_encoding to UTF-8 (except from
      46     ASCII or UTF-8, when this conversion is a no-op).  */
      47  iconv_t xgettext_global_source_iconv;
      48  #endif
      49  
      50  /* Canonicalized encoding name for the current input file.  */
      51  const char *xgettext_current_source_encoding;
      52  
      53  #if HAVE_ICONV
      54  /* Converter from xgettext_current_source_encoding to UTF-8 (except from
      55     ASCII or UTF-8, when this conversion is a no-op).  */
      56  iconv_t xgettext_current_source_iconv;
      57  #endif
      58  
      59  
      60  /* Error message about non-ASCII character in a specific lexical context.  */
      61  char *
      62  non_ascii_error_message (lexical_context_ty lcontext,
      63                           const char *file_name, size_t line_number)
      64  {
      65    char buffer[22];
      66    char *errmsg;
      67  
      68    if (line_number == (size_t)(-1))
      69      buffer[0] = '\0';
      70    else
      71      sprintf (buffer, ":%ld", (long) line_number);
      72  
      73    switch (lcontext)
      74      {
      75      case lc_outside:
      76      case lc_xml_content:
      77        errmsg =
      78          xasprintf (_("Non-ASCII character at %s%s."), file_name, buffer);
      79        break;
      80      case lc_comment:
      81        errmsg =
      82          xasprintf (_("Non-ASCII comment at or before %s%s."),
      83                     file_name, buffer);
      84        break;
      85      case lc_string:
      86        errmsg =
      87          xasprintf (_("Non-ASCII string at %s%s."), file_name, buffer);
      88        break;
      89      case lc_xml_open_tag:
      90      case lc_xml_close_tag:
      91        errmsg =
      92          xasprintf (_("Non-ASCII XML tag at %s%s."), file_name, buffer);
      93        break;
      94      default:
      95        abort ();
      96      }
      97    return errmsg;
      98  }
      99  
     100  /* Error message about non-UTF-8 character in a specific lexical context.  */
     101  static char *
     102  non_utf8_error_message (lexical_context_ty lcontext,
     103                          const char *file_name, size_t line_number)
     104  {
     105    char buffer[22];
     106    char *errmsg;
     107  
     108    if (line_number == (size_t)(-1))
     109      buffer[0] = '\0';
     110    else
     111      sprintf (buffer, ":%ld", (long) line_number);
     112  
     113    switch (lcontext)
     114      {
     115      case lc_outside:
     116      case lc_xml_content:
     117        errmsg =
     118          xasprintf (_("Character at %s%s is not UTF-8 encoded."),
     119                     file_name, buffer);
     120        break;
     121      case lc_comment:
     122        errmsg =
     123          xasprintf (_("Comment at or before %s%s is not UTF-8 encoded."),
     124                     file_name, buffer);
     125        break;
     126      case lc_string:
     127        errmsg =
     128          xasprintf (_("String at %s%s is not UTF-8 encoded."),
     129                     file_name, buffer);
     130        break;
     131      case lc_xml_open_tag:
     132      case lc_xml_close_tag:
     133        errmsg =
     134          xasprintf (_("XML tag at %s%s is not UTF-8 encoded."), file_name, buffer);
     135        break;
     136      default:
     137        abort ();
     138      }
     139    return errmsg;
     140  }
     141  
     142  /* Convert the given string from xgettext_current_source_encoding to
     143     the output file encoding (i.e. ASCII or UTF-8).
     144     The resulting string is either the argument string, or freshly allocated.
     145     The file_name and line_number are only used for error message purposes.  */
     146  char *
     147  from_current_source_encoding (const char *string,
     148                                lexical_context_ty lcontext,
     149                                const char *file_name, size_t line_number)
     150  {
     151    if (xgettext_current_source_encoding == po_charset_ascii)
     152      {
     153        if (!is_ascii_string (string))
     154          {
     155            multiline_error (xstrdup (""),
     156                             xasprintf ("%s\n%s\n",
     157                                        non_ascii_error_message (lcontext,
     158                                                                 file_name,
     159                                                                 line_number),
     160                                        _("Please specify the source encoding through --from-code.")));
     161            exit (EXIT_FAILURE);
     162          }
     163      }
     164    else if (xgettext_current_source_encoding == po_charset_utf8)
     165      {
     166        if (u8_check ((const uint8_t *) string, strlen (string)) != NULL)
     167          {
     168            multiline_error (xstrdup (""),
     169                             xasprintf ("%s\n%s\n",
     170                                        non_utf8_error_message (lcontext,
     171                                                                file_name,
     172                                                                line_number),
     173                                        _("Please specify the source encoding through --from-code.")));
     174            exit (EXIT_FAILURE);
     175          }
     176      }
     177    else
     178      {
     179  #if HAVE_ICONV
     180        struct conversion_context context;
     181  
     182        context.from_code = xgettext_current_source_encoding;
     183        context.to_code = po_charset_utf8;
     184        context.from_filename = file_name;
     185        context.message = NULL;
     186  
     187        string = convert_string_directly (xgettext_current_source_iconv, string,
     188                                          &context);
     189  #else
     190        /* If we don't have iconv(), the only supported values for
     191           xgettext_global_source_encoding and thus also for
     192           xgettext_current_source_encoding are ASCII and UTF-8.
     193           convert_string_directly() should not be called in this case.  */
     194        abort ();
     195  #endif
     196      }
     197  
     198    return (char *) string;
     199  }
     200  
     201  /* Like from_current_source_encoding, for a string that may contain NULs.  */
     202  string_desc_t
     203  string_desc_from_current_source_encoding (string_desc_t string,
     204                                            lexical_context_ty lcontext,
     205                                            const char *file_name,
     206                                            size_t line_number)
     207  {
     208    if (xgettext_current_source_encoding == po_charset_ascii)
     209      {
     210        if (!is_ascii_string_desc (string))
     211          {
     212            multiline_error (xstrdup (""),
     213                             xasprintf ("%s\n%s\n",
     214                                        non_ascii_error_message (lcontext,
     215                                                                 file_name,
     216                                                                 line_number),
     217                                        _("Please specify the source encoding through --from-code.")));
     218            exit (EXIT_FAILURE);
     219          }
     220      }
     221    else if (xgettext_current_source_encoding == po_charset_utf8)
     222      {
     223        if (u8_check ((const uint8_t *) string_desc_data (string),
     224                      string_desc_length (string))
     225            != NULL)
     226          {
     227            multiline_error (xstrdup (""),
     228                             xasprintf ("%s\n%s\n",
     229                                        non_utf8_error_message (lcontext,
     230                                                                file_name,
     231                                                                line_number),
     232                                        _("Please specify the source encoding through --from-code.")));
     233            exit (EXIT_FAILURE);
     234          }
     235      }
     236    else
     237      {
     238  #if HAVE_ICONV
     239        struct conversion_context context;
     240  
     241        context.from_code = xgettext_current_source_encoding;
     242        context.to_code = po_charset_utf8;
     243        context.from_filename = file_name;
     244        context.message = NULL;
     245  
     246        string = convert_string_desc_directly (xgettext_current_source_iconv,
     247                                               string, &context);
     248  #else
     249        /* If we don't have iconv(), the only supported values for
     250           xgettext_global_source_encoding and thus also for
     251           xgettext_current_source_encoding are ASCII and UTF-8.
     252           convert_string_desc_directly() should not be called in this case.  */
     253        abort ();
     254  #endif
     255      }
     256  
     257    return string;
     258  }