(root)/
gettext-0.22.4/
gettext-tools/
src/
msguniq.c
       1  /* Remove, select or merge duplicate translations.
       2     Copyright (C) 2001-2007, 2009-2010, 2012, 2014, 2016, 2018-2023 Free Software Foundation, Inc.
       3     Written by Bruno Haible <haible@clisp.cons.org>, 2001.
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  
      19  #ifdef HAVE_CONFIG_H
      20  # include "config.h"
      21  #endif
      22  
      23  #include <getopt.h>
      24  #include <limits.h>
      25  #include <stdio.h>
      26  #include <stdlib.h>
      27  #include <locale.h>
      28  
      29  #include <textstyle.h>
      30  
      31  #include "noreturn.h"
      32  #include "closeout.h"
      33  #include "dir-list.h"
      34  #include "str-list.h"
      35  #include "error.h"
      36  #include "error-progname.h"
      37  #include "progname.h"
      38  #include "relocatable.h"
      39  #include "basename-lgpl.h"
      40  #include "message.h"
      41  #include "read-catalog.h"
      42  #include "read-po.h"
      43  #include "read-properties.h"
      44  #include "read-stringtable.h"
      45  #include "write-catalog.h"
      46  #include "write-po.h"
      47  #include "write-properties.h"
      48  #include "write-stringtable.h"
      49  #include "msgl-cat.h"
      50  #include "propername.h"
      51  #include "gettext.h"
      52  
      53  #define _(str) gettext (str)
      54  
      55  
      56  /* Force output of PO file even if empty.  */
      57  static int force_po;
      58  
      59  /* Target encoding.  */
      60  static const char *to_code;
      61  
      62  /* Long options.  */
      63  static const struct option long_options[] =
      64  {
      65    { "add-location", optional_argument, NULL, 'n' },
      66    { "color", optional_argument, NULL, CHAR_MAX + 5 },
      67    { "directory", required_argument, NULL, 'D' },
      68    { "escape", no_argument, NULL, 'E' },
      69    { "force-po", no_argument, &force_po, 1 },
      70    { "help", no_argument, NULL, 'h' },
      71    { "indent", no_argument, NULL, 'i' },
      72    { "no-escape", no_argument, NULL, 'e' },
      73    { "no-location", no_argument, NULL, CHAR_MAX + 7 },
      74    { "no-wrap", no_argument, NULL, CHAR_MAX + 2 },
      75    { "output-file", required_argument, NULL, 'o' },
      76    { "properties-input", no_argument, NULL, 'P' },
      77    { "properties-output", no_argument, NULL, 'p' },
      78    { "repeated", no_argument, NULL, 'd' },
      79    { "sort-by-file", no_argument, NULL, 'F' },
      80    { "sort-output", no_argument, NULL, 's' },
      81    { "strict", no_argument, NULL, 'S' },
      82    { "stringtable-input", no_argument, NULL, CHAR_MAX + 3 },
      83    { "stringtable-output", no_argument, NULL, CHAR_MAX + 4 },
      84    { "style", required_argument, NULL, CHAR_MAX + 6 },
      85    { "to-code", required_argument, NULL, 't' },
      86    { "unique", no_argument, NULL, 'u' },
      87    { "use-first", no_argument, NULL, CHAR_MAX + 1 },
      88    { "version", no_argument, NULL, 'V' },
      89    { "width", required_argument, NULL, 'w' },
      90    { NULL, 0, NULL, 0 }
      91  };
      92  
      93  
      94  /* Forward declaration of local functions.  */
      95  _GL_NORETURN_FUNC static void usage (int status);
      96  
      97  
      98  int
      99  main (int argc, char **argv)
     100  {
     101    int optchar;
     102    bool do_help;
     103    bool do_version;
     104    char *output_file;
     105    const char *input_file;
     106    string_list_ty *file_list;
     107    msgdomain_list_ty *result;
     108    catalog_input_format_ty input_syntax = &input_format_po;
     109    catalog_output_format_ty output_syntax = &output_format_po;
     110    bool sort_by_msgid = false;
     111    bool sort_by_filepos = false;
     112  
     113    /* Set program name for messages.  */
     114    set_program_name (argv[0]);
     115    error_print_progname = maybe_print_progname;
     116  
     117    /* Set locale via LC_ALL.  */
     118    setlocale (LC_ALL, "");
     119  
     120    /* Set the text message domain.  */
     121    bindtextdomain (PACKAGE, relocate (LOCALEDIR));
     122    bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
     123    textdomain (PACKAGE);
     124  
     125    /* Ensure that write errors on stdout are detected.  */
     126    atexit (close_stdout);
     127  
     128    /* Set default values for variables.  */
     129    do_help = false;
     130    do_version = false;
     131    output_file = NULL;
     132    input_file = NULL;
     133    more_than = 0;
     134    less_than = INT_MAX;
     135    use_first = false;
     136  
     137    while ((optchar = getopt_long (argc, argv, "dD:eEFhino:pPst:uVw:",
     138                                   long_options, NULL)) != EOF)
     139      switch (optchar)
     140        {
     141        case '\0':                /* Long option.  */
     142          break;
     143  
     144        case 'd':
     145          more_than = 1;
     146          less_than = INT_MAX;
     147          break;
     148  
     149        case 'D':
     150          dir_list_append (optarg);
     151          break;
     152  
     153        case 'e':
     154          message_print_style_escape (false);
     155          break;
     156  
     157        case 'E':
     158          message_print_style_escape (true);
     159          break;
     160  
     161        case 'F':
     162          sort_by_filepos = true;
     163          break;
     164  
     165        case 'h':
     166          do_help = true;
     167          break;
     168  
     169        case 'i':
     170          message_print_style_indent ();
     171          break;
     172  
     173        case 'n':
     174          if (handle_filepos_comment_option (optarg))
     175            usage (EXIT_FAILURE);
     176          break;
     177  
     178        case 'o':
     179          output_file = optarg;
     180          break;
     181  
     182        case 'p':
     183          output_syntax = &output_format_properties;
     184          break;
     185  
     186        case 'P':
     187          input_syntax = &input_format_properties;
     188          break;
     189  
     190        case 's':
     191          sort_by_msgid = true;
     192          break;
     193  
     194        case 'S':
     195          message_print_style_uniforum ();
     196          break;
     197  
     198        case 't':
     199          to_code = optarg;
     200          break;
     201  
     202        case 'u':
     203          more_than = 0;
     204          less_than = 2;
     205          break;
     206  
     207        case 'V':
     208          do_version = true;
     209          break;
     210  
     211        case 'w':
     212          {
     213            int value;
     214            char *endp;
     215            value = strtol (optarg, &endp, 10);
     216            if (endp != optarg)
     217              message_page_width_set (value);
     218          }
     219          break;
     220  
     221        case CHAR_MAX + 1:
     222          use_first = true;
     223          break;
     224  
     225        case CHAR_MAX + 2: /* --no-wrap */
     226          message_page_width_ignore ();
     227          break;
     228  
     229        case CHAR_MAX + 3: /* --stringtable-input */
     230          input_syntax = &input_format_stringtable;
     231          break;
     232  
     233        case CHAR_MAX + 4: /* --stringtable-output */
     234          output_syntax = &output_format_stringtable;
     235          break;
     236  
     237        case CHAR_MAX + 5: /* --color */
     238          if (handle_color_option (optarg) || color_test_mode)
     239            usage (EXIT_FAILURE);
     240          break;
     241  
     242        case CHAR_MAX + 6: /* --style */
     243          handle_style_option (optarg);
     244          break;
     245  
     246        case CHAR_MAX + 7: /* --no-location */
     247          message_print_style_filepos (filepos_comment_none);
     248          break;
     249  
     250        default:
     251          usage (EXIT_FAILURE);
     252          /* NOTREACHED */
     253        }
     254  
     255    /* Version information requested.  */
     256    if (do_version)
     257      {
     258        printf ("%s (GNU %s) %s\n", last_component (program_name),
     259                PACKAGE, VERSION);
     260        /* xgettext: no-wrap */
     261        printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
     262  License GPLv3+: GNU GPL version 3 or later <%s>\n\
     263  This is free software: you are free to change and redistribute it.\n\
     264  There is NO WARRANTY, to the extent permitted by law.\n\
     265  "),
     266                "2001-2023", "https://gnu.org/licenses/gpl.html");
     267        printf (_("Written by %s.\n"), proper_name ("Bruno Haible"));
     268        exit (EXIT_SUCCESS);
     269      }
     270  
     271    /* Help is requested.  */
     272    if (do_help)
     273      usage (EXIT_SUCCESS);
     274  
     275    /* Test whether we have an .po file name as argument.  */
     276    if (optind == argc)
     277      input_file = "-";
     278    else if (optind + 1 == argc)
     279      input_file = argv[optind];
     280    else
     281      {
     282        error (EXIT_SUCCESS, 0, _("at most one input file allowed"));
     283        usage (EXIT_FAILURE);
     284      }
     285  
     286    /* Verify selected options.  */
     287    if (sort_by_msgid && sort_by_filepos)
     288      error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
     289             "--sort-output", "--sort-by-file");
     290  
     291    /* Determine list of files we have to process: a single file.  */
     292    file_list = string_list_alloc ();
     293    string_list_append (file_list, input_file);
     294  
     295    /* Read input files, then filter, convert and merge messages.  */
     296    allow_duplicates = true;
     297    result = catenate_msgdomain_list (file_list, input_syntax, to_code);
     298  
     299    string_list_free (file_list);
     300  
     301    /* Sorting the list of messages.  */
     302    if (sort_by_filepos)
     303      msgdomain_list_sort_by_filepos (result);
     304    else if (sort_by_msgid)
     305      msgdomain_list_sort_by_msgid (result);
     306  
     307    /* Write the PO file.  */
     308    msgdomain_list_print (result, output_file, output_syntax, force_po, false);
     309  
     310    exit (error_message_count > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
     311  }
     312  
     313  
     314  /* Display usage information and exit.  */
     315  static void
     316  usage (int status)
     317  {
     318    if (status != EXIT_SUCCESS)
     319      fprintf (stderr, _("Try '%s --help' for more information.\n"),
     320               program_name);
     321    else
     322      {
     323        printf (_("\
     324  Usage: %s [OPTION] [INPUTFILE]\n\
     325  "), program_name);
     326        printf ("\n");
     327        /* xgettext: no-wrap */
     328        printf (_("\
     329  Unifies duplicate translations in a translation catalog.\n\
     330  Finds duplicate translations of the same message ID.  Such duplicates are\n\
     331  invalid input for other programs like msgfmt, msgmerge or msgcat.  By\n\
     332  default, duplicates are merged together.  When using the --repeated option,\n\
     333  only duplicates are output, and all other messages are discarded.  Comments\n\
     334  and extracted comments will be cumulated, except that if --use-first is\n\
     335  specified, they will be taken from the first translation.  File positions\n\
     336  will be cumulated.  When using the --unique option, duplicates are discarded.\n\
     337  "));
     338        printf ("\n");
     339        printf (_("\
     340  Mandatory arguments to long options are mandatory for short options too.\n"));
     341        printf ("\n");
     342        printf (_("\
     343  Input file location:\n"));
     344        printf (_("\
     345    INPUTFILE                   input PO file\n"));
     346        printf (_("\
     347    -D, --directory=DIRECTORY   add DIRECTORY to list for input files search\n"));
     348        printf (_("\
     349  If no input file is given or if it is -, standard input is read.\n"));
     350        printf ("\n");
     351        printf (_("\
     352  Output file location:\n"));
     353        printf (_("\
     354    -o, --output-file=FILE      write output to specified file\n"));
     355        printf (_("\
     356  The results are written to standard output if no output file is specified\n\
     357  or if it is -.\n"));
     358        printf ("\n");
     359        printf (_("\
     360  Message selection:\n"));
     361        printf (_("\
     362    -d, --repeated              print only duplicates\n"));
     363        printf (_("\
     364    -u, --unique                print only unique messages, discard duplicates\n"));
     365        printf ("\n");
     366        printf (_("\
     367  Input file syntax:\n"));
     368        printf (_("\
     369    -P, --properties-input      input file is in Java .properties syntax\n"));
     370        printf (_("\
     371        --stringtable-input     input file is in NeXTstep/GNUstep .strings syntax\n"));
     372        printf ("\n");
     373        printf (_("\
     374  Output details:\n"));
     375        printf (_("\
     376    -t, --to-code=NAME          encoding for output\n"));
     377        printf (_("\
     378        --use-first             use first available translation for each\n\
     379                                message, don't merge several translations\n"));
     380        printf (_("\
     381        --color                 use colors and other text attributes always\n\
     382        --color=WHEN            use colors and other text attributes if WHEN.\n\
     383                                WHEN may be 'always', 'never', 'auto', or 'html'.\n"));
     384        printf (_("\
     385        --style=STYLEFILE       specify CSS style rule file for --color\n"));
     386        printf (_("\
     387    -e, --no-escape             do not use C escapes in output (default)\n"));
     388        printf (_("\
     389    -E, --escape                use C escapes in output, no extended chars\n"));
     390        printf (_("\
     391        --force-po              write PO file even if empty\n"));
     392        printf (_("\
     393    -i, --indent                write the .po file using indented style\n"));
     394        printf (_("\
     395        --no-location           do not write '#: filename:line' lines\n"));
     396        printf (_("\
     397    -n, --add-location          generate '#: filename:line' lines (default)\n"));
     398        printf (_("\
     399        --strict                write out strict Uniforum conforming .po file\n"));
     400        printf (_("\
     401    -p, --properties-output     write out a Java .properties file\n"));
     402        printf (_("\
     403        --stringtable-output    write out a NeXTstep/GNUstep .strings file\n"));
     404        printf (_("\
     405    -w, --width=NUMBER          set output page width\n"));
     406        printf (_("\
     407        --no-wrap               do not break long message lines, longer than\n\
     408                                the output page width, into several lines\n"));
     409        printf (_("\
     410    -s, --sort-output           generate sorted output\n"));
     411        printf (_("\
     412    -F, --sort-by-file          sort output by file location\n"));
     413        printf ("\n");
     414        printf (_("\
     415  Informative output:\n"));
     416        printf (_("\
     417    -h, --help                  display this help and exit\n"));
     418        printf (_("\
     419    -V, --version               output version information and exit\n"));
     420        printf ("\n");
     421        /* TRANSLATORS: The first placeholder is the web address of the Savannah
     422           project of this package.  The second placeholder is the bug-reporting
     423           email address for this package.  Please add _another line_ saying
     424           "Report translation bugs to <...>\n" with the address for translation
     425           bugs (typically your translation team's web or email address).  */
     426        printf(_("\
     427  Report bugs in the bug tracker at <%s>\n\
     428  or by email to <%s>.\n"),
     429               "https://savannah.gnu.org/projects/gettext",
     430               "bug-gettext@gnu.org");
     431      }
     432  
     433    exit (status);
     434  }
     435