(root)/
gettext-0.22.4/
gettext-tools/
src/
recode-sr-latin.c
       1  /* Recode Serbian text from Cyrillic to Latin script.
       2     Copyright (C) 2006-2007, 2010, 2012, 2018-2023 Free Software Foundation, Inc.
       3     Written by Bruno Haible <bruno@clisp.org>, 2006.
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #ifdef HAVE_CONFIG_H
      19  # include "config.h"
      20  #endif
      21  
      22  #include <errno.h>
      23  #include <getopt.h>
      24  #include <stdbool.h>
      25  #include <stdio.h>
      26  #include <stdlib.h>
      27  #include <locale.h>
      28  
      29  #if HAVE_ICONV
      30  #include <iconv.h>
      31  #endif
      32  
      33  #include "noreturn.h"
      34  #include "closeout.h"
      35  #include "error.h"
      36  #include "progname.h"
      37  #include "relocatable.h"
      38  #include "basename-lgpl.h"
      39  #include "xalloc.h"
      40  #include "localcharset.h"
      41  #include "c-strcase.h"
      42  #include "xstriconv.h"
      43  #include "filters.h"
      44  #include "propername.h"
      45  #include "gettext.h"
      46  
      47  #define _(str) gettext (str)
      48  
      49  
      50  /* Long options.  */
      51  static const struct option long_options[] =
      52  {
      53    { "help", no_argument, NULL, 'h' },
      54    { "version", no_argument, NULL, 'V' },
      55    { NULL, 0, NULL, 0 }
      56  };
      57  
      58  /* Forward declaration of local functions.  */
      59  _GL_NORETURN_FUNC static void usage (int status);
      60  static void process (FILE *stream);
      61  
      62  int
      63  main (int argc, char *argv[])
      64  {
      65    /* Default values for command line options.  */
      66    bool do_help = false;
      67    bool do_version = false;
      68  
      69    int opt;
      70  
      71    /* Set program name for message texts.  */
      72    set_program_name (argv[0]);
      73  
      74    /* Set locale via LC_ALL.  */
      75    setlocale (LC_ALL, "");
      76  
      77    /* Set the text message domain.  */
      78    bindtextdomain (PACKAGE, relocate (LOCALEDIR));
      79    textdomain (PACKAGE);
      80  
      81    /* Ensure that write errors on stdout are detected.  */
      82    atexit (close_stdout);
      83  
      84    /* Parse command line options.  */
      85    while ((opt = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
      86      switch (opt)
      87      {
      88      case '\0':          /* Long option.  */
      89        break;
      90      case 'h':
      91        do_help = true;
      92        break;
      93      case 'V':
      94        do_version = true;
      95        break;
      96      default:
      97        usage (EXIT_FAILURE);
      98      }
      99  
     100    /* Version information is requested.  */
     101    if (do_version)
     102      {
     103        printf ("%s (GNU %s) %s\n", last_component (program_name),
     104                PACKAGE, VERSION);
     105        /* xgettext: no-wrap */
     106        printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
     107  License GPLv3+: GNU GPL version 3 or later <%s>\n\
     108  This is free software: you are free to change and redistribute it.\n\
     109  There is NO WARRANTY, to the extent permitted by law.\n\
     110  "),
     111                "2006-2023", "https://gnu.org/licenses/gpl.html");
     112        printf (_("Written by %s and %s.\n"),
     113                /* TRANSLATORS: This is a proper name. The last name is
     114                   (with Unicode escapes) "\u0160egan" or (with HTML entities)
     115                   "&Scaron;egan".  */
     116                proper_name_utf8 ("Danilo Segan", "Danilo \305\240egan"),
     117                proper_name ("Bruno Haible"));
     118        exit (EXIT_SUCCESS);
     119      }
     120  
     121    /* Help is requested.  */
     122    if (do_help)
     123      usage (EXIT_SUCCESS);
     124  
     125    if (argc - optind > 0)
     126      error (EXIT_FAILURE, 0, _("too many arguments"));
     127  
     128    process (stdin);
     129  
     130    exit (EXIT_SUCCESS);
     131  }
     132  
     133  
     134  /* Display usage information and exit.  */
     135  static void
     136  usage (int status)
     137  {
     138    if (status != EXIT_SUCCESS)
     139      fprintf (stderr, _("Try '%s --help' for more information.\n"),
     140               program_name);
     141    else
     142      {
     143        /* xgettext: no-wrap */
     144        printf (_("\
     145  Usage: %s [OPTION]\n\
     146  "), program_name);
     147        printf ("\n");
     148        /* xgettext: no-wrap */
     149        printf (_("\
     150  Recode Serbian text from Cyrillic to Latin script.\n"));
     151        /* xgettext: no-wrap */
     152        printf (_("\
     153  The input text is read from standard input.  The converted text is output to\n\
     154  standard output.\n"));
     155        printf ("\n");
     156        /* xgettext: no-wrap */
     157        printf (_("\
     158  Informative output:\n"));
     159        /* xgettext: no-wrap */
     160        printf (_("\
     161    -h, --help                  display this help and exit\n"));
     162        /* xgettext: no-wrap */
     163        printf (_("\
     164    -V, --version               output version information and exit\n"));
     165        printf ("\n");
     166        /* TRANSLATORS: The first placeholder is the web address of the Savannah
     167           project of this package.  The second placeholder is the bug-reporting
     168           email address for this package.  Please add _another line_ saying
     169           "Report translation bugs to <...>\n" with the address for translation
     170           bugs (typically your translation team's web or email address).  */
     171        printf(_("\
     172  Report bugs in the bug tracker at <%s>\n\
     173  or by email to <%s>.\n"),
     174               "https://savannah.gnu.org/projects/gettext",
     175               "bug-gettext@gnu.org");
     176      }
     177  
     178    exit (status);
     179  }
     180  
     181  
     182  /* Routines for reading a line.
     183     Don't use routines that drop NUL bytes.  Don't use getline(), because it
     184     doesn't provide a good error message in case of memory allocation failure.
     185     The gnulib module 'linebuffer' is nearly the right thing, except that we
     186     don't want an extra newline at the end of file.  */
     187  
     188  /* A 'struct linebuffer' holds a line of text. */
     189  
     190  struct linebuffer
     191  {
     192    size_t size;                  /* Allocated. */
     193    size_t length;                /* Used. */
     194    char *buffer;
     195  };
     196  
     197  /* Initialize linebuffer LINEBUFFER for use. */
     198  static inline void
     199  init_linebuffer (struct linebuffer *lb)
     200  {
     201    lb->size = 0;
     202    lb->length = 0;
     203    lb->buffer = NULL;
     204  }
     205  
     206  /* Read an arbitrarily long line of text from STREAM into linebuffer LB.
     207     Keep the newline.  Do not NUL terminate.
     208     Return LINEBUFFER, except at end of file return NULL.  */
     209  static struct linebuffer *
     210  read_linebuffer (struct linebuffer *lb, FILE *stream)
     211  {
     212    if (feof (stream))
     213      return NULL;
     214    else
     215      {
     216        char *p = lb->buffer;
     217        char *end = lb->buffer + lb->size;
     218  
     219        for (;;)
     220          {
     221            int c = getc (stream);
     222            if (c == EOF)
     223              {
     224                if (p == lb->buffer || ferror (stream))
     225                  return NULL;
     226                break;
     227              }
     228            if (p == end)
     229              {
     230                size_t oldsize = lb->size; /* = p - lb->buffer */
     231                size_t newsize = 2 * oldsize + 40;
     232                lb->buffer = (char *) xrealloc (lb->buffer, newsize);
     233                lb->size = newsize;
     234                p = lb->buffer + oldsize;
     235                end = lb->buffer + newsize;
     236              }
     237            *p++ = c;
     238            if (c == '\n')
     239              break;
     240          }
     241  
     242        lb->length = p - lb->buffer;
     243        return lb;
     244      }
     245  }
     246  
     247  /* Free linebuffer LB and its data, all allocated with malloc. */
     248  static inline void
     249  destroy_linebuffer (struct linebuffer *lb)
     250  {
     251    if (lb->buffer != NULL)
     252      free (lb->buffer);
     253  }
     254  
     255  
     256  /* Process the input and produce the output.  */
     257  static void
     258  process (FILE *stream)
     259  {
     260    struct linebuffer lb;
     261    const char *locale_code = locale_charset ();
     262    bool need_code_conversion = (c_strcasecmp (locale_code, "UTF-8") != 0);
     263  #if HAVE_ICONV
     264    iconv_t conv_to_utf8 = (iconv_t)(-1);
     265    iconv_t conv_from_utf8 = (iconv_t)(-1);
     266    char *last_utf8_line;
     267    size_t last_utf8_line_len;
     268    char *last_backconv_line;
     269    size_t last_backconv_line_len;
     270  #endif
     271  
     272    init_linebuffer (&lb);
     273  
     274    /* Initialize the conversion descriptors.  */
     275    if (need_code_conversion)
     276      {
     277  #if HAVE_ICONV
     278        /* Avoid glibc-2.1 bug with EUC-KR.  */
     279  # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     280       && !defined _LIBICONV_VERSION
     281        if (strcmp (locale_code, "EUC-KR") != 0)
     282  # endif
     283          {
     284            conv_to_utf8 = iconv_open ("UTF-8", locale_code);
     285            /* TODO:  Maybe append //TRANSLIT here?  */
     286            conv_from_utf8 = iconv_open (locale_code, "UTF-8");
     287          }
     288        if (conv_to_utf8 == (iconv_t)(-1))
     289          error (EXIT_FAILURE, 0,
     290                 _("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), and iconv() does not support this conversion."),
     291                 locale_code, "UTF-8", last_component (program_name));
     292        if (conv_from_utf8 == (iconv_t)(-1))
     293          error (EXIT_FAILURE, 0,
     294                 _("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), and iconv() does not support this conversion."),
     295                 "UTF-8", locale_code, last_component (program_name));
     296        last_utf8_line = NULL;
     297        last_utf8_line_len = 0;
     298        last_backconv_line = NULL;
     299        last_backconv_line_len = 0;
     300  #else
     301        error (EXIT_FAILURE, 0,
     302               _("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). This version was built without iconv()."),
     303               locale_code, "UTF-8", last_component (program_name));
     304  #endif
     305      }
     306  
     307    /* Read the input line by line.
     308       Processing it character by character is not possible, because some
     309       filters need to look at adjacent characters.  Processing the entire file
     310       in a whole chunk would take an excessive amount of memory.  */
     311    for (;;)
     312      {
     313        char *line;
     314        size_t line_len;
     315        char *filtered_line;
     316        size_t filtered_line_len;
     317  
     318        /* Read a line.  */
     319        if (read_linebuffer (&lb, stream) == NULL)
     320          break;
     321        line = lb.buffer;
     322        line_len = lb.length;
     323        /* read_linebuffer always returns a non-void result.  */
     324        if (line_len == 0)
     325          abort ();
     326  
     327  #if HAVE_ICONV
     328        /* Convert it to UTF-8.  */
     329        if (need_code_conversion)
     330          {
     331            char *utf8_line = last_utf8_line;
     332            size_t utf8_line_len = last_utf8_line_len;
     333  
     334            if (xmem_cd_iconv (line, line_len, conv_to_utf8,
     335                               &utf8_line, &utf8_line_len) != 0)
     336              error (EXIT_FAILURE, errno,
     337                     _("input is not valid in \"%s\" encoding"),
     338                     locale_code);
     339            if (utf8_line != last_utf8_line)
     340              {
     341                if (last_utf8_line != NULL)
     342                  free (last_utf8_line);
     343                last_utf8_line = utf8_line;
     344                last_utf8_line_len = utf8_line_len;
     345              }
     346  
     347            line = utf8_line;
     348            line_len = utf8_line_len;
     349          }
     350  #endif
     351  
     352        /* Apply the filter.  */
     353        serbian_to_latin (line, line_len, &filtered_line, &filtered_line_len);
     354  
     355  #if HAVE_ICONV
     356        /* Convert it back to the original encoding.  */
     357        if (need_code_conversion)
     358          {
     359            char *backconv_line = last_backconv_line;
     360            size_t backconv_line_len = last_backconv_line_len;
     361  
     362            if (xmem_cd_iconv (filtered_line, filtered_line_len, conv_from_utf8,
     363                               &backconv_line, &backconv_line_len) != 0)
     364              error (EXIT_FAILURE, errno,
     365                     _("error while converting from \"%s\" encoding to \"%s\" encoding"),
     366                     "UTF-8", locale_code);
     367            if (backconv_line != last_backconv_line)
     368              {
     369                if (last_backconv_line != NULL)
     370                  free (last_backconv_line);
     371                last_backconv_line = backconv_line;
     372                last_backconv_line_len = backconv_line_len;
     373              }
     374  
     375            fwrite (backconv_line, 1, backconv_line_len, stdout);
     376          }
     377        else
     378  #endif
     379          fwrite (filtered_line, 1, filtered_line_len, stdout);
     380  
     381        free (filtered_line);
     382      }
     383  
     384  #if HAVE_ICONV
     385    if (need_code_conversion)
     386      {
     387        iconv_close (conv_from_utf8);
     388        iconv_close (conv_to_utf8);
     389      }
     390  #endif
     391  
     392    destroy_linebuffer (&lb);
     393  }