(root)/
glibc-2.38/
catgets/
gencat.c
       1  /* Copyright (C) 1996-2023 Free Software Foundation, Inc.
       2     This file is part of the GNU C Library.
       3  
       4     This program is free software; you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published
       6     by the Free Software Foundation; version 2 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program; if not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  #ifdef HAVE_CONFIG_H
      18  # include "config.h"
      19  #endif
      20  
      21  #include <argp.h>
      22  #include <assert.h>
      23  #include <ctype.h>
      24  #include <endian.h>
      25  #include <errno.h>
      26  #include <error.h>
      27  #include <fcntl.h>
      28  #include <iconv.h>
      29  #include <langinfo.h>
      30  #include <locale.h>
      31  #include <libintl.h>
      32  #include <limits.h>
      33  #include <nl_types.h>
      34  #include <obstack.h>
      35  #include <stdint.h>
      36  #include <stdio.h>
      37  #include <stdlib.h>
      38  #include <string.h>
      39  #include <unistd.h>
      40  #include <unistd_ext.h>
      41  #include <wchar.h>
      42  
      43  #include "version.h"
      44  
      45  #include "catgetsinfo.h"
      46  
      47  
      48  #define SWAPU32(w) \
      49    (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
      50  
      51  struct message_list
      52  {
      53    int number;
      54    const char *message;
      55  
      56    const char *fname;
      57    size_t line;
      58    const char *symbol;
      59  
      60    struct message_list *next;
      61  };
      62  
      63  
      64  struct set_list
      65  {
      66    int number;
      67    int deleted;
      68    struct message_list *messages;
      69    int last_message;
      70  
      71    const char *fname;
      72    size_t line;
      73    const char *symbol;
      74  
      75    struct set_list *next;
      76  };
      77  
      78  
      79  struct catalog
      80  {
      81    struct set_list *all_sets;
      82    struct set_list *current_set;
      83    size_t total_messages;
      84    wint_t quote_char;
      85    int last_set;
      86  
      87    struct obstack mem_pool;
      88  };
      89  
      90  
      91  /* If non-zero force creation of new file, not using existing one.  */
      92  static int force_new;
      93  
      94  /* Name of output file.  */
      95  static const char *output_name;
      96  
      97  /* Name of generated C header file.  */
      98  static const char *header_name;
      99  
     100  /* Name and version of program.  */
     101  static void print_version (FILE *stream, struct argp_state *state);
     102  void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
     103  
     104  #define OPT_NEW 1
     105  
     106  /* Definitions of arguments for argp functions.  */
     107  static const struct argp_option options[] =
     108  {
     109    { "header", 'H', N_("NAME"), 0,
     110      N_("Create C header file NAME containing symbol definitions") },
     111    { "new", OPT_NEW, NULL, 0,
     112      N_("Do not use existing catalog, force new output file") },
     113    { "output", 'o', N_("NAME"), 0, N_("Write output to file NAME") },
     114    { NULL, 0, NULL, 0, NULL }
     115  };
     116  
     117  /* Short description of program.  */
     118  static const char doc[] = N_("Generate message catalog.\
     119  \vIf INPUT-FILE is -, input is read from standard input.  If OUTPUT-FILE\n\
     120  is -, output is written to standard output.\n");
     121  
     122  /* Strings for arguments in help texts.  */
     123  static const char args_doc[] = N_("\
     124  -o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]");
     125  
     126  /* Prototype for option handler.  */
     127  static error_t parse_opt (int key, char *arg, struct argp_state *state);
     128  
     129  /* Function to print some extra text in the help message.  */
     130  static char *more_help (int key, const char *text, void *input);
     131  
     132  /* Data structure to communicate with argp functions.  */
     133  static struct argp argp =
     134  {
     135    options, parse_opt, args_doc, doc, NULL, more_help
     136  };
     137  
     138  
     139  /* Wrapper functions with error checking for standard functions.  */
     140  #include <programs/xmalloc.h>
     141  
     142  /* Prototypes for local functions.  */
     143  static void error_print (void);
     144  static struct catalog *read_input_file (struct catalog *current,
     145  					const char *fname);
     146  static void write_out (struct catalog *result, const char *output_name,
     147  		       const char *header_name);
     148  static struct set_list *find_set (struct catalog *current, int number);
     149  static void normalize_line (const char *fname, size_t line, iconv_t cd,
     150  			    wchar_t *string, wchar_t quote_char,
     151  			    wchar_t escape_char);
     152  static void read_old (struct catalog *catalog, const char *file_name);
     153  static int open_conversion (const char *codesetp, iconv_t *cd_towcp,
     154  			    iconv_t *cd_tombp, wchar_t *escape_charp);
     155  
     156  
     157  int
     158  main (int argc, char *argv[])
     159  {
     160    struct catalog *result;
     161    int remaining;
     162  
     163    /* Set program name for messages.  */
     164    error_print_progname = error_print;
     165  
     166    /* Set locale via LC_ALL.  */
     167    setlocale (LC_ALL, "");
     168  
     169    /* Set the text message domain.  */
     170    textdomain (PACKAGE);
     171  
     172    /* Initialize local variables.  */
     173    result = NULL;
     174  
     175    /* Parse and process arguments.  */
     176    argp_parse (&argp, argc, argv, 0, &remaining, NULL);
     177  
     178    /* Determine output file.  */
     179    if (output_name == NULL)
     180      output_name = remaining < argc ? argv[remaining++] : "-";
     181  
     182    /* Process all input files.  */
     183    setlocale (LC_CTYPE, "C");
     184    if (remaining < argc)
     185      do
     186        result = read_input_file (result, argv[remaining]);
     187      while (++remaining < argc);
     188    else
     189      result = read_input_file (NULL, "-");
     190  
     191    /* Write out the result.  */
     192    if (result != NULL)
     193      write_out (result, output_name, header_name);
     194  
     195    return error_message_count != 0;
     196  }
     197  
     198  
     199  /* Handle program arguments.  */
     200  static error_t
     201  parse_opt (int key, char *arg, struct argp_state *state)
     202  {
     203    switch (key)
     204      {
     205      case 'H':
     206        header_name = arg;
     207        break;
     208      case OPT_NEW:
     209        force_new = 1;
     210        break;
     211      case 'o':
     212        output_name = arg;
     213        break;
     214      default:
     215        return ARGP_ERR_UNKNOWN;
     216      }
     217    return 0;
     218  }
     219  
     220  
     221  static char *
     222  more_help (int key, const char *text, void *input)
     223  {
     224    char *tp = NULL;
     225    switch (key)
     226      {
     227      case ARGP_KEY_HELP_EXTRA:
     228        /* We print some extra information.  */
     229        if (asprintf (&tp, gettext ("\
     230  For bug reporting instructions, please see:\n\
     231  %s.\n"), REPORT_BUGS_TO) < 0)
     232  	return NULL;
     233        return tp;
     234      default:
     235        break;
     236      }
     237    return (char *) text;
     238  }
     239  
     240  /* Print the version information.  */
     241  static void
     242  print_version (FILE *stream, struct argp_state *state)
     243  {
     244    fprintf (stream, "gencat %s%s\n", PKGVERSION, VERSION);
     245    fprintf (stream, gettext ("\
     246  Copyright (C) %s Free Software Foundation, Inc.\n\
     247  This is free software; see the source for copying conditions.  There is NO\n\
     248  warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
     249  "), "2023");
     250    fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
     251  }
     252  
     253  
     254  /* The address of this function will be assigned to the hook in the
     255     error functions.  */
     256  static void
     257  error_print (void)
     258  {
     259    /* We don't want the program name to be printed in messages.  Emacs'
     260       compile.el does not like this.  */
     261  }
     262  
     263  
     264  static struct catalog *
     265  read_input_file (struct catalog *current, const char *fname)
     266  {
     267    FILE *fp;
     268    char *buf;
     269    size_t len;
     270    size_t line_number;
     271    wchar_t *wbuf;
     272    size_t wbufsize;
     273    iconv_t cd_towc = (iconv_t) -1;
     274    iconv_t cd_tomb = (iconv_t) -1;
     275    wchar_t escape_char = L'\\';
     276    char *codeset = NULL;
     277  
     278    if (strcmp (fname, "-") == 0 || strcmp (fname, "/dev/stdin") == 0)
     279      {
     280        fp = stdin;
     281        fname = gettext ("*standard input*");
     282      }
     283    else
     284      fp = fopen (fname, "r");
     285    if (fp == NULL)
     286      {
     287        error (0, errno, gettext ("cannot open input file `%s'"), fname);
     288        return current;
     289      }
     290  
     291    /* If we haven't seen anything yet, allocate result structure.  */
     292    if (current == NULL)
     293      {
     294        current = (struct catalog *) xcalloc (1, sizeof (*current));
     295  
     296  #define obstack_chunk_alloc malloc
     297  #define obstack_chunk_free free
     298        obstack_init (&current->mem_pool);
     299  
     300        current->current_set = find_set (current, NL_SETD);
     301      }
     302  
     303    buf = NULL;
     304    len = 0;
     305    line_number = 0;
     306  
     307    wbufsize = 1024;
     308    wbuf = (wchar_t *) xmalloc (wbufsize);
     309  
     310    while (!feof (fp))
     311      {
     312        int continued;
     313        int used;
     314        size_t start_line = line_number + 1;
     315        char *this_line;
     316  
     317        do
     318  	{
     319  	  int act_len;
     320  
     321  	  act_len = getline (&buf, &len, fp);
     322  	  if (act_len <= 0)
     323  	    break;
     324  	  ++line_number;
     325  
     326  	  /* It the line continued?  */
     327  	  continued = 0;
     328  	  if (buf[act_len - 1] == '\n')
     329  	    {
     330  	      --act_len;
     331  
     332  	      /* There might be more than one backslash at the end of
     333  		 the line.  Only if there is an odd number of them is
     334  		 the line continued.  */
     335  	      if (act_len > 0 && buf[act_len - 1] == '\\')
     336  		{
     337  		  int temp_act_len = act_len;
     338  
     339  		  do
     340  		    {
     341  		      --temp_act_len;
     342  		      continued = !continued;
     343  		    }
     344  		  while (temp_act_len > 0 && buf[temp_act_len - 1] == '\\');
     345  
     346  		  if (continued)
     347  		    --act_len;
     348  		}
     349  	    }
     350  
     351  	  /* Append to currently selected line.  */
     352  	  obstack_grow (&current->mem_pool, buf, act_len);
     353  	}
     354        while (continued);
     355  
     356        obstack_1grow (&current->mem_pool, '\0');
     357        this_line = (char *) obstack_finish (&current->mem_pool);
     358  
     359        used = 0;
     360        if (this_line[0] == '$')
     361  	{
     362  	  if (isblank (this_line[1]))
     363  	    {
     364  	      int cnt = 1;
     365  	      while (isblank (this_line[cnt]))
     366  		++cnt;
     367  	      if (strncmp (&this_line[cnt], "codeset=", 8) != 0)
     368  		/* This is a comment line. Do nothing.  */;
     369  	      else if (codeset != NULL)
     370  		/* Ignore multiple codeset. */;
     371  	      else
     372  		{
     373  		  int start = cnt + 8;
     374  		  cnt = start;
     375  		  while (this_line[cnt] != '\0' && !isspace (this_line[cnt]))
     376  		    ++cnt;
     377  		  if (cnt != start)
     378  		    {
     379  		      int len = cnt - start;
     380  		      codeset = xmalloc (len + 1);
     381  		      *((char *) mempcpy (codeset, &this_line[start], len))
     382  			= '\0';
     383  		    }
     384  		}
     385  	    }
     386  	  else if (strncmp (&this_line[1], "set", 3) == 0)
     387  	    {
     388  	      int cnt = sizeof ("set");
     389  	      int set_number;
     390  	      const char *symbol = NULL;
     391  	      while (isspace (this_line[cnt]))
     392  		++cnt;
     393  
     394  	      if (isdigit (this_line[cnt]))
     395  		{
     396  		  set_number = atol (&this_line[cnt]);
     397  
     398  		  /* If the given number for the character set is
     399  		     higher than any we used for symbolic set names
     400  		     avoid clashing by using only higher numbers for
     401  		     the following symbolic definitions.  */
     402  		  if (set_number > current->last_set)
     403  		    current->last_set = set_number;
     404  		}
     405  	      else
     406  		{
     407  		  /* See whether it is a reasonable identifier.  */
     408  		  int start = cnt;
     409  		  while (isalnum (this_line[cnt]) || this_line[cnt] == '_')
     410  		    ++cnt;
     411  
     412  		  if (cnt == start)
     413  		    {
     414  		      /* No correct character found.  */
     415  		      error_at_line (0, 0, fname, start_line,
     416  				     gettext ("illegal set number"));
     417  		      set_number = 0;
     418  		    }
     419  		  else
     420  		    {
     421  		      /* We have found seomthing that looks like a
     422  			 correct identifier.  */
     423  		      struct set_list *runp;
     424  
     425  		      this_line[cnt] = '\0';
     426  		      used = 1;
     427  		      symbol = &this_line[start];
     428  
     429  		      /* Test whether the identifier was already used.  */
     430  		      runp = current->all_sets;
     431  		      while (runp != 0)
     432  			if (runp->symbol != NULL
     433  			    && strcmp (runp->symbol, symbol) == 0)
     434  			  break;
     435  			else
     436  			  runp = runp->next;
     437  
     438  		      if (runp != NULL)
     439  			{
     440  			  /* We cannot allow duplicate identifiers for
     441  			     message sets.  */
     442  			  error_at_line (0, 0, fname, start_line,
     443  					 gettext ("duplicate set definition"));
     444  			  error_at_line (0, 0, runp->fname, runp->line,
     445  					 gettext ("\
     446  this is the first definition"));
     447  			  set_number = 0;
     448  			}
     449  		      else
     450  			/* Allocate next free message set for identifier.  */
     451  			set_number = ++current->last_set;
     452  		    }
     453  		}
     454  
     455  	      if (set_number != 0)
     456  		{
     457  		  /* We found a legal set number.  */
     458  		  current->current_set = find_set (current, set_number);
     459  		  if (symbol != NULL)
     460  		      used = 1;
     461  		  current->current_set->symbol = symbol;
     462  		  current->current_set->fname = fname;
     463  		  current->current_set->line = start_line;
     464  		}
     465  	    }
     466  	  else if (strncmp (&this_line[1], "delset", 6) == 0)
     467  	    {
     468  	      int cnt = sizeof ("delset");
     469  	      while (isspace (this_line[cnt]))
     470  		++cnt;
     471  
     472  	      if (isdigit (this_line[cnt]))
     473  		{
     474  		  size_t set_number = atol (&this_line[cnt]);
     475  		  struct set_list *set;
     476  
     477  		  /* Mark the message set with the given number as
     478  		     deleted.  */
     479  		  set = find_set (current, set_number);
     480  		  set->deleted = 1;
     481  		}
     482  	      else
     483  		{
     484  		  /* See whether it is a reasonable identifier.  */
     485  		  int start = cnt;
     486  		  while (isalnum (this_line[cnt]) || this_line[cnt] == '_')
     487  		    ++cnt;
     488  
     489  		  if (cnt == start)
     490  		    error_at_line (0, 0, fname, start_line,
     491  				   gettext ("illegal set number"));
     492  		  else
     493  		    {
     494  		      const char *symbol;
     495  		      struct set_list *runp;
     496  
     497  		      this_line[cnt] = '\0';
     498  		      used = 1;
     499  		      symbol = &this_line[start];
     500  
     501  		      /* We have a symbolic set name.  This name must
     502  			 appear somewhere else in the catalogs read so
     503  			 far.  */
     504  		      for (runp = current->all_sets; runp != NULL;
     505  			   runp = runp->next)
     506  			{
     507  			  if (strcmp (runp->symbol, symbol) == 0)
     508  			    {
     509  			      runp->deleted = 1;
     510  			      break;
     511  			    }
     512  			}
     513  		      if (runp == NULL)
     514  			/* Name does not exist before.  */
     515  			error_at_line (0, 0, fname, start_line,
     516  				       gettext ("unknown set `%s'"), symbol);
     517  		    }
     518  		}
     519  	    }
     520  	  else if (strncmp (&this_line[1], "quote", 5) == 0)
     521  	    {
     522  	      char buf[2];
     523  	      char *bufptr;
     524  	      size_t buflen;
     525  	      char *wbufptr;
     526  	      size_t wbuflen;
     527  	      int cnt;
     528  
     529  	      cnt = sizeof ("quote");
     530  	      while (isspace (this_line[cnt]))
     531  		++cnt;
     532  
     533  	      /* We need the conversion.  */
     534  	      if (cd_towc == (iconv_t) -1
     535  		  && open_conversion (codeset, &cd_towc, &cd_tomb,
     536  				      &escape_char) != 0)
     537  		/* Something is wrong.  */
     538  		goto out;
     539  
     540  	      /* Yes, the quote char can be '\0'; this means no quote
     541  		 char.  The function using the information works on
     542  		 wide characters so we have to convert it here.  */
     543  	      buf[0] = this_line[cnt];
     544  	      buf[1] = '\0';
     545  	      bufptr = buf;
     546  	      buflen = 2;
     547  
     548  	      wbufptr = (char *) wbuf;
     549  	      wbuflen = wbufsize;
     550  
     551  	      /* Flush the state.  */
     552  	      iconv (cd_towc, NULL, NULL, NULL, NULL);
     553  
     554  	      iconv (cd_towc, &bufptr, &buflen, &wbufptr, &wbuflen);
     555  	      if (buflen != 0 || (wchar_t *) wbufptr != &wbuf[2])
     556  		error_at_line (0, 0, fname, start_line,
     557  			       gettext ("invalid quote character"));
     558  	      else
     559  		/* Use the converted wide character.  */
     560  		current->quote_char = wbuf[0];
     561  	    }
     562  	  else
     563  	    {
     564  	      int cnt;
     565  	      cnt = 2;
     566  	      while (this_line[cnt] != '\0' && !isspace (this_line[cnt]))
     567  		++cnt;
     568  	      this_line[cnt] = '\0';
     569  	      error_at_line (0, 0, fname, start_line,
     570  			     gettext ("unknown directive `%s': line ignored"),
     571  			     &this_line[1]);
     572  	    }
     573  	}
     574        else if (isalnum (this_line[0]) || this_line[0] == '_')
     575  	{
     576  	  const char *ident = this_line;
     577  	  char *line = this_line;
     578  	  int message_number;
     579  
     580  	  do
     581  	    ++line;
     582  	  while (line[0] != '\0' && !isspace (line[0]));
     583  	  if (line[0] != '\0')
     584  	    *line++ = '\0';	/* Terminate the identifier.  */
     585  
     586  	  /* Now we found the beginning of the message itself.  */
     587  
     588  	  if (isdigit (ident[0]))
     589  	    {
     590  	      struct message_list *runp;
     591  	      struct message_list *lastp;
     592  
     593  	      message_number = atoi (ident);
     594  
     595  	      /* Find location to insert the new message.  */
     596  	      runp = current->current_set->messages;
     597  	      lastp = NULL;
     598  	      while (runp != NULL)
     599  		if (runp->number == message_number)
     600  		  break;
     601  		else
     602  		  {
     603  		    lastp = runp;
     604  		    runp = runp->next;
     605  		  }
     606  	      if (runp != NULL)
     607  		{
     608  		  /* Oh, oh.  There is already a message with this
     609  		     number in the message set.  */
     610  		  if (runp->symbol == NULL)
     611  		    {
     612  		      /* The existing message had its number specified
     613  			 by the user.  Fatal collision type uh, oh.  */
     614  		      error_at_line (0, 0, fname, start_line,
     615  				     gettext ("duplicated message number"));
     616  		      error_at_line (0, 0, runp->fname, runp->line,
     617  				     gettext ("this is the first definition"));
     618  		      message_number = 0;
     619  		    }
     620  		  else
     621  		    {
     622  		      /* Collision was with number auto-assigned to a
     623  			 symbolic.  Change existing symbolic number
     624  			 and move to end the list (if not already there).  */
     625  		      runp->number = ++current->current_set->last_message;
     626  
     627  		      if (runp->next != NULL)
     628  			{
     629  			  struct message_list *endp;
     630  
     631  			  if (lastp == NULL)
     632  			    current->current_set->messages=runp->next;
     633  			  else
     634  			    lastp->next=runp->next;
     635  
     636  			  endp = runp->next;
     637  			  while (endp->next != NULL)
     638  			    endp = endp->next;
     639  
     640  			  endp->next = runp;
     641  			  runp->next = NULL;
     642  			}
     643  		    }
     644  		}
     645  	      ident = NULL;	/* We don't have a symbol.  */
     646  
     647  	      if (message_number != 0
     648  		  && message_number > current->current_set->last_message)
     649  		current->current_set->last_message = message_number;
     650  	    }
     651  	  else if (ident[0] != '\0')
     652  	    {
     653  	      struct message_list *runp;
     654  
     655  	      /* Test whether the symbolic name was not used for
     656  		 another message in this message set.  */
     657  	      runp = current->current_set->messages;
     658  	      while (runp != NULL)
     659  		if (runp->symbol != NULL && strcmp (ident, runp->symbol) == 0)
     660  		  break;
     661  		else
     662  		  runp = runp->next;
     663  	      if (runp != NULL)
     664  		{
     665  		  /* The name is already used.  */
     666  		  error_at_line (0, 0, fname, start_line, gettext ("\
     667  duplicated message identifier"));
     668  		  error_at_line (0, 0, runp->fname, runp->line,
     669  				 gettext ("this is the first definition"));
     670  		  message_number = 0;
     671  		}
     672  	      else
     673  		/* Give the message the next unused number.  */
     674  		message_number = ++current->current_set->last_message;
     675  	    }
     676  	  else
     677  	    message_number = 0;
     678  
     679  	  if (message_number != 0)
     680  	    {
     681  	      char *inbuf;
     682  	      size_t inlen;
     683  	      char *outbuf;
     684  	      size_t outlen;
     685  	      struct message_list *newp;
     686  	      size_t line_len = strlen (line) + 1;
     687  	      size_t ident_len = 0;
     688  
     689  	      /* We need the conversion.  */
     690  	      if (cd_towc == (iconv_t) -1
     691  		  && open_conversion (codeset, &cd_towc, &cd_tomb,
     692  				      &escape_char) != 0)
     693  		/* Something is wrong.  */
     694  		goto out;
     695  
     696  	      /* Convert to a wide character string.  We have to
     697  		 interpret escape sequences which will be impossible
     698  		 without doing the conversion if the codeset of the
     699  		 message is stateful.  */
     700  	      while (1)
     701  		{
     702  		  inbuf = line;
     703  		  inlen = line_len;
     704  		  outbuf = (char *) wbuf;
     705  		  outlen = wbufsize;
     706  
     707  		  /* Flush the state.  */
     708  		  iconv (cd_towc, NULL, NULL, NULL, NULL);
     709  
     710  		  iconv (cd_towc, &inbuf, &inlen, &outbuf, &outlen);
     711  		  if (inlen == 0)
     712  		    {
     713  		      /* The string is converted.  */
     714  		      assert (outlen < wbufsize);
     715  		      assert (wbuf[(wbufsize - outlen) / sizeof (wchar_t) - 1]
     716  			      == L'\0');
     717  		      break;
     718  		    }
     719  
     720  		  if (outlen != 0)
     721  		    {
     722  		      /* Something is wrong with this string, we ignore it.  */
     723  		      error_at_line (0, 0, fname, start_line, gettext ("\
     724  invalid character: message ignored"));
     725  		      goto ignore;
     726  		    }
     727  
     728  		  /* The output buffer is too small.  */
     729  		  wbufsize *= 2;
     730  		  wbuf = (wchar_t *) xrealloc (wbuf, wbufsize);
     731  		}
     732  
     733  	      /* Strip quote characters, change escape sequences into
     734  		 correct characters etc.  */
     735  	      normalize_line (fname, start_line, cd_towc, wbuf,
     736  			      current->quote_char, escape_char);
     737  
     738  	      if (ident)
     739  		ident_len = line - this_line;
     740  
     741  	      /* Now the string is free of escape sequences.  Convert it
     742  		 back into a multibyte character string.  First free the
     743  		 memory allocated for the original string.  */
     744  	      obstack_free (&current->mem_pool, this_line);
     745  
     746  	      used = 1;	/* Yes, we use the line.  */
     747  
     748  	      /* Now fill in the new string.  It should never happen that
     749  		 the replaced string is longer than the original.  */
     750  	      inbuf = (char *) wbuf;
     751  	      inlen = (wcslen (wbuf) + 1) * sizeof (wchar_t);
     752  
     753  	      outlen = obstack_room (&current->mem_pool);
     754  	      obstack_blank (&current->mem_pool, outlen);
     755  	      this_line = (char *) obstack_base (&current->mem_pool);
     756  	      outbuf = this_line + ident_len;
     757  	      outlen -= ident_len;
     758  
     759  	      /* Flush the state.  */
     760  	      iconv (cd_tomb, NULL, NULL, NULL, NULL);
     761  
     762  	      iconv (cd_tomb, &inbuf, &inlen, &outbuf, &outlen);
     763  	      if (inlen != 0)
     764  		{
     765  		  error_at_line (0, 0, fname, start_line,
     766  				 gettext ("invalid line"));
     767  		  goto ignore;
     768  		}
     769  	      assert (outbuf[-1] == '\0');
     770  
     771  	      /* Free the memory in the obstack we don't use.  */
     772  	      obstack_blank (&current->mem_pool, -(int) outlen);
     773  	      line = obstack_finish (&current->mem_pool);
     774  
     775  	      newp = (struct message_list *) xmalloc (sizeof (*newp));
     776  	      newp->number = message_number;
     777  	      newp->message = line + ident_len;
     778  	      /* Remember symbolic name; is NULL if no is given.  */
     779  	      newp->symbol = ident ? line : NULL;
     780  	      /* Remember where we found the character.  */
     781  	      newp->fname = fname;
     782  	      newp->line = start_line;
     783  
     784  	      /* Find place to insert to message.  We keep them in a
     785  		 sorted single linked list.  */
     786  	      if (current->current_set->messages == NULL
     787  		  || current->current_set->messages->number > message_number)
     788  		{
     789  		  newp->next = current->current_set->messages;
     790  		  current->current_set->messages = newp;
     791  		}
     792  	      else
     793  		{
     794  		  struct message_list *runp;
     795  		  runp = current->current_set->messages;
     796  		  while (runp->next != NULL)
     797  		    if (runp->next->number > message_number)
     798  		      break;
     799  		    else
     800  		      runp = runp->next;
     801  		  newp->next = runp->next;
     802  		  runp->next = newp;
     803  		}
     804  	    }
     805  	  ++current->total_messages;
     806  	}
     807        else
     808  	{
     809  	  size_t cnt;
     810  
     811  	  cnt = 0;
     812  	  /* See whether we have any non-white space character in this
     813  	     line.  */
     814  	  while (this_line[cnt] != '\0' && isspace (this_line[cnt]))
     815  	    ++cnt;
     816  
     817  	  if (this_line[cnt] != '\0')
     818  	    /* Yes, some unknown characters found.  */
     819  	    error_at_line (0, 0, fname, start_line,
     820  			   gettext ("malformed line ignored"));
     821  	}
     822  
     823      ignore:
     824        /* We can save the memory for the line if it was not used.  */
     825        if (!used)
     826  	obstack_free (&current->mem_pool, this_line);
     827      }
     828  
     829    /* Close the conversion modules.  */
     830    iconv_close (cd_towc);
     831    iconv_close (cd_tomb);
     832    free (codeset);
     833  
     834   out:
     835    free (wbuf);
     836  
     837    if (fp != stdin)
     838      fclose (fp);
     839    return current;
     840  }
     841  
     842  static void
     843  write_out (struct catalog *catalog, const char *output_name,
     844  	   const char *header_name)
     845  {
     846    /* Computing the "optimal" size.  */
     847    struct set_list *set_run;
     848    size_t best_total, best_size, best_depth;
     849    size_t act_size, act_depth;
     850    struct catalog_obj obj;
     851    struct obstack string_pool;
     852    const char *strings;
     853    size_t strings_size;
     854    uint32_t *array1, *array2;
     855    size_t cnt;
     856    int fd;
     857  
     858    /* If not otherwise told try to read file with existing
     859       translations.  */
     860    if (!force_new)
     861      read_old (catalog, output_name);
     862  
     863    /* Initialize best_size with a very high value.  */
     864    best_total = best_size = best_depth = UINT_MAX;
     865  
     866    /* We need some start size for testing.  Let's start with
     867       TOTAL_MESSAGES / 5, which theoretically provides a mean depth of
     868       5.  */
     869    act_size = 1 + catalog->total_messages / 5;
     870  
     871    /* We determine the size of a hash table here.  Because the message
     872       numbers can be chosen arbitrary by the programmer we cannot use
     873       the simple method of accessing the array using the message
     874       number.  The algorithm is based on the trivial hash function
     875       NUMBER % TABLE_SIZE, where collisions are stored in a second
     876       dimension up to TABLE_DEPTH.  We here compute TABLE_SIZE so that
     877       the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal.  */
     878    while (act_size <= best_total)
     879      {
     880        size_t deep[act_size];
     881  
     882        act_depth = 1;
     883        memset (deep, '\0', act_size * sizeof (size_t));
     884        set_run = catalog->all_sets;
     885        while (set_run != NULL)
     886  	{
     887  	  struct message_list *message_run;
     888  
     889  	  message_run = set_run->messages;
     890  	  while (message_run != NULL)
     891  	    {
     892  	      size_t idx = (message_run->number * set_run->number) % act_size;
     893  
     894  	      ++deep[idx];
     895  	      if (deep[idx] > act_depth)
     896  		{
     897  		  act_depth = deep[idx];
     898  		  if (act_depth * act_size > best_total)
     899  		    break;
     900  		}
     901  	      message_run = message_run->next;
     902  	    }
     903  	  set_run = set_run->next;
     904  	}
     905  
     906        if (act_depth * act_size <= best_total)
     907  	{
     908  	  /* We have found a better solution.  */
     909  	  best_total = act_depth * act_size;
     910  	  best_size = act_size;
     911  	  best_depth = act_depth;
     912  	}
     913  
     914        ++act_size;
     915      }
     916  
     917    /* let's be prepared for an empty message file.  */
     918    if (best_size == UINT_MAX)
     919      {
     920        best_size = 1;
     921        best_depth = 1;
     922      }
     923  
     924    /* OK, now we have the size we will use.  Fill in the header, build
     925       the table and the second one with swapped byte order.  */
     926    obj.magic = CATGETS_MAGIC;
     927    obj.plane_size = best_size;
     928    obj.plane_depth = best_depth;
     929  
     930    uint32_t array_size = best_size * best_depth * sizeof (uint32_t) * 3;
     931    /* Allocate room for all needed arrays.  */
     932    array1 = (uint32_t *) alloca (array_size);
     933    memset (array1, '\0', array_size);
     934    array2 = (uint32_t *) alloca (array_size);
     935    obstack_init (&string_pool);
     936  
     937    set_run = catalog->all_sets;
     938    while (set_run != NULL)
     939      {
     940        struct message_list *message_run;
     941  
     942        message_run = set_run->messages;
     943        while (message_run != NULL)
     944  	{
     945  	  size_t idx = (((message_run->number * set_run->number) % best_size)
     946  			* 3);
     947  	  /* Determine collision depth.  */
     948  	  while (array1[idx] != 0)
     949  	    idx += best_size * 3;
     950  
     951  	  /* Store set number, message number and pointer into string
     952  	     space, relative to the first string.  */
     953  	  array1[idx + 0] = set_run->number;
     954  	  array1[idx + 1] = message_run->number;
     955  	  array1[idx + 2] = obstack_object_size (&string_pool);
     956  
     957  	  /* Add current string to the continuous space containing all
     958  	     strings.  */
     959  	  obstack_grow0 (&string_pool, message_run->message,
     960  			 strlen (message_run->message));
     961  
     962  	  message_run = message_run->next;
     963  	}
     964  
     965        set_run = set_run->next;
     966      }
     967    strings_size = obstack_object_size (&string_pool);
     968    strings = obstack_finish (&string_pool);
     969  
     970    /* Compute ARRAY2 by changing the byte order.  */
     971    for (cnt = 0; cnt < best_size * best_depth * 3; ++cnt)
     972      array2[cnt] = SWAPU32 (array1[cnt]);
     973  
     974    /* Now we can write out the whole data.  */
     975    if (strcmp (output_name, "-") == 0
     976        || strcmp (output_name, "/dev/stdout") == 0)
     977      fd = STDOUT_FILENO;
     978    else
     979      {
     980        fd = creat (output_name, 0666);
     981        if (fd < 0)
     982  	error (EXIT_FAILURE, errno, gettext ("cannot open output file `%s'"),
     983  	       output_name);
     984      }
     985  
     986    /* Write out header.  */
     987    write_all(fd, &obj, sizeof (obj));
     988  
     989    /* We always write out the little endian version of the index
     990       arrays.  */
     991  #if __BYTE_ORDER == __LITTLE_ENDIAN
     992    write_all(fd, array1, array_size);
     993    write_all(fd, array2, array_size);
     994  #elif __BYTE_ORDER == __BIG_ENDIAN
     995    write_all(fd, array2, array_size);
     996    write_all(fd, array1, array_size);
     997  #else
     998  # error Cannot handle __BYTE_ORDER byte order
     999  #endif
    1000  
    1001    /* Finally write the strings.  */
    1002    write_all(fd, strings, strings_size);
    1003  
    1004    if (fd != STDOUT_FILENO)
    1005      close (fd);
    1006  
    1007    /* If requested now write out the header file.  */
    1008    if (header_name != NULL)
    1009      {
    1010        int first = 1;
    1011        FILE *fp;
    1012  
    1013        /* Open output file.  "-" or "/dev/stdout" means write to
    1014  	 standard output.  */
    1015        if (strcmp (header_name, "-") == 0
    1016  	  || strcmp (header_name, "/dev/stdout") == 0)
    1017  	fp = stdout;
    1018        else
    1019  	{
    1020  	  fp = fopen (header_name, "w");
    1021  	  if (fp == NULL)
    1022  	    error (EXIT_FAILURE, errno,
    1023  		   gettext ("cannot open output file `%s'"), header_name);
    1024  	}
    1025  
    1026        /* Iterate over all sets and all messages.  */
    1027        set_run = catalog->all_sets;
    1028        while (set_run != NULL)
    1029  	{
    1030  	  struct message_list *message_run;
    1031  
    1032  	  /* If the current message set has a symbolic name write this
    1033  	     out first.  */
    1034  	  if (set_run->symbol != NULL)
    1035  	    fprintf (fp, "%s#define %sSet %#x\t/* %s:%zu */\n",
    1036  		     first ? "" : "\n", set_run->symbol, set_run->number - 1,
    1037  		     set_run->fname, set_run->line);
    1038  	  first = 0;
    1039  
    1040  	  message_run = set_run->messages;
    1041  	  while (message_run != NULL)
    1042  	    {
    1043  	      /* If the current message has a symbolic name write
    1044  		 #define out.  But we have to take care for the set
    1045  		 not having a symbolic name.  */
    1046  	      if (message_run->symbol != NULL)
    1047  		{
    1048  		  if (set_run->symbol == NULL)
    1049  		    fprintf (fp, "#define AutomaticSet%d%s %#x\t/* %s:%zu */\n",
    1050  			     set_run->number, message_run->symbol,
    1051  			     message_run->number, message_run->fname,
    1052  			     message_run->line);
    1053  		  else
    1054  		    fprintf (fp, "#define %s%s %#x\t/* %s:%zu */\n",
    1055  			     set_run->symbol, message_run->symbol,
    1056  			     message_run->number, message_run->fname,
    1057  			     message_run->line);
    1058  		}
    1059  
    1060  	      message_run = message_run->next;
    1061  	    }
    1062  
    1063  	  set_run = set_run->next;
    1064  	}
    1065  
    1066        if (fp != stdout)
    1067  	fclose (fp);
    1068      }
    1069  }
    1070  
    1071  
    1072  static struct set_list *
    1073  find_set (struct catalog *current, int number)
    1074  {
    1075    struct set_list *result = current->all_sets;
    1076  
    1077    /* We must avoid set number 0 because a set of this number signals
    1078       in the tables that the entry is not occupied.  */
    1079    ++number;
    1080  
    1081    while (result != NULL)
    1082      if (result->number == number)
    1083        return result;
    1084      else
    1085        result = result->next;
    1086  
    1087    /* Prepare new message set.  */
    1088    result = (struct set_list *) xcalloc (1, sizeof (*result));
    1089    result->number = number;
    1090    result->next = current->all_sets;
    1091    current->all_sets = result;
    1092  
    1093    return result;
    1094  }
    1095  
    1096  
    1097  /* Normalize given string *in*place* by processing escape sequences
    1098     and quote characters.  */
    1099  static void
    1100  normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string,
    1101  		wchar_t quote_char, wchar_t escape_char)
    1102  {
    1103    int is_quoted;
    1104    wchar_t *rp = string;
    1105    wchar_t *wp = string;
    1106  
    1107    if (quote_char != L'\0' && *rp == quote_char)
    1108      {
    1109        is_quoted = 1;
    1110        ++rp;
    1111      }
    1112    else
    1113      is_quoted = 0;
    1114  
    1115    while (*rp != L'\0')
    1116      if (*rp == quote_char)
    1117        /* We simply end the string when we find the first time an
    1118  	 not-escaped quote character.  */
    1119  	break;
    1120      else if (*rp == escape_char)
    1121        {
    1122  	++rp;
    1123  	if (quote_char != L'\0' && *rp == quote_char)
    1124  	  /* This is an extension to XPG.  */
    1125  	  *wp++ = *rp++;
    1126  	else
    1127  	  /* Recognize escape sequences.  */
    1128  	  switch (*rp)
    1129  	    {
    1130  	    case L'n':
    1131  	      *wp++ = L'\n';
    1132  	      ++rp;
    1133  	      break;
    1134  	    case L't':
    1135  	      *wp++ = L'\t';
    1136  	      ++rp;
    1137  	      break;
    1138  	    case L'v':
    1139  	      *wp++ = L'\v';
    1140  	      ++rp;
    1141  	      break;
    1142  	    case L'b':
    1143  	      *wp++ = L'\b';
    1144  	      ++rp;
    1145  	      break;
    1146  	    case L'r':
    1147  	      *wp++ = L'\r';
    1148  	      ++rp;
    1149  	      break;
    1150  	    case L'f':
    1151  	      *wp++ = L'\f';
    1152  	      ++rp;
    1153  	      break;
    1154  	    case L'0' ... L'7':
    1155  	      {
    1156  		int number;
    1157  		char cbuf[2];
    1158  		char *cbufptr;
    1159  		size_t cbufin;
    1160  		wchar_t wcbuf[2];
    1161  		char *wcbufptr;
    1162  		size_t wcbufin;
    1163  
    1164  		number = *rp++ - L'0';
    1165  		while (number <= (255 / 8) && *rp >= L'0' && *rp <= L'7')
    1166  		  {
    1167  		    number *= 8;
    1168  		    number += *rp++ - L'0';
    1169  		  }
    1170  
    1171  		cbuf[0] = (char) number;
    1172  		cbuf[1] = '\0';
    1173  		cbufptr = cbuf;
    1174  		cbufin = 2;
    1175  
    1176  		wcbufptr = (char *) wcbuf;
    1177  		wcbufin = sizeof (wcbuf);
    1178  
    1179  		/* Flush the state.  */
    1180  		iconv (cd, NULL, NULL, NULL, NULL);
    1181  
    1182  		iconv (cd, &cbufptr, &cbufin, &wcbufptr, &wcbufin);
    1183  		if (cbufptr != &cbuf[2] || (wchar_t *) wcbufptr != &wcbuf[2])
    1184  		  error_at_line (0, 0, fname, line,
    1185  				 gettext ("invalid escape sequence"));
    1186  		else
    1187  		  *wp++ = wcbuf[0];
    1188  	      }
    1189  	      break;
    1190  	    default:
    1191  	      if (*rp == escape_char)
    1192  		{
    1193  		  *wp++ = escape_char;
    1194  		  ++rp;
    1195  		}
    1196  	      else
    1197  		{
    1198  		  /* Simply ignore the backslash character.  */
    1199  		}
    1200  	      break;
    1201  	    }
    1202        }
    1203      else
    1204        *wp++ = *rp++;
    1205  
    1206    /* If we saw a quote character at the beginning we expect another
    1207       one at the end.  */
    1208    if (is_quoted && *rp != quote_char)
    1209      error_at_line (0, 0, fname, line, gettext ("unterminated message"));
    1210  
    1211    /* Terminate string.  */
    1212    *wp = L'\0';
    1213    return;
    1214  }
    1215  
    1216  
    1217  static void
    1218  read_old (struct catalog *catalog, const char *file_name)
    1219  {
    1220    struct catalog_info old_cat_obj;
    1221    struct set_list *set = NULL;
    1222    int last_set = -1;
    1223    size_t cnt;
    1224  
    1225    /* Try to open catalog, but don't look through the NLSPATH.  */
    1226    if (__open_catalog (file_name, NULL, NULL, &old_cat_obj) != 0)
    1227      {
    1228        if (errno == ENOENT)
    1229  	/* No problem, the catalog simply does not exist.  */
    1230  	return;
    1231        else
    1232  	error (EXIT_FAILURE, errno,
    1233  	       gettext ("while opening old catalog file"));
    1234      }
    1235  
    1236    /* OK, we have the catalog loaded.  Now read all messages and merge
    1237       them.  When set and message number clash for any message the new
    1238       one is used.  If the new one is empty it indicates that the
    1239       message should be deleted.  */
    1240    for (cnt = 0; cnt < old_cat_obj.plane_size * old_cat_obj.plane_depth; ++cnt)
    1241      {
    1242        struct message_list *message, *last;
    1243  
    1244        if (old_cat_obj.name_ptr[cnt * 3 + 0] == 0)
    1245  	/* No message in this slot.  */
    1246  	continue;
    1247  
    1248        if (old_cat_obj.name_ptr[cnt * 3 + 0] - 1 != (uint32_t) last_set)
    1249  	{
    1250  	  last_set = old_cat_obj.name_ptr[cnt * 3 + 0] - 1;
    1251  	  set = find_set (catalog, old_cat_obj.name_ptr[cnt * 3 + 0] - 1);
    1252  	}
    1253  
    1254        last = NULL;
    1255        message = set->messages;
    1256        while (message != NULL)
    1257  	{
    1258  	  if ((uint32_t) message->number >= old_cat_obj.name_ptr[cnt * 3 + 1])
    1259  	    break;
    1260  	  last = message;
    1261  	  message = message->next;
    1262  	}
    1263  
    1264        if (message == NULL
    1265  	  || (uint32_t) message->number > old_cat_obj.name_ptr[cnt * 3 + 1])
    1266  	{
    1267  	  /* We have found a message which is not yet in the catalog.
    1268  	     Insert it at the right position.  */
    1269  	  struct message_list *newp;
    1270  
    1271  	  newp = (struct message_list *) xmalloc (sizeof (*newp));
    1272  	  newp->number = old_cat_obj.name_ptr[cnt * 3 + 1];
    1273  	  newp->message =
    1274  	    &old_cat_obj.strings[old_cat_obj.name_ptr[cnt * 3 + 2]];
    1275  	  newp->fname = NULL;
    1276  	  newp->line = 0;
    1277  	  newp->symbol = NULL;
    1278  	  newp->next = message;
    1279  
    1280  	  if (last == NULL)
    1281  	    set->messages = newp;
    1282  	  else
    1283  	    last->next = newp;
    1284  
    1285  	  ++catalog->total_messages;
    1286  	}
    1287        else if (*message->message == '\0')
    1288  	{
    1289  	  /* The new empty message has overridden the old one thus
    1290  	     "deleting" it as required.  Now remove the empty remains. */
    1291  	  if (last == NULL)
    1292  	    set->messages = message->next;
    1293  	  else
    1294  	    last->next = message->next;
    1295  	}
    1296      }
    1297  }
    1298  
    1299  
    1300  static int
    1301  open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp,
    1302  		 wchar_t *escape_charp)
    1303  {
    1304    char buf[2];
    1305    char *bufptr;
    1306    size_t bufsize;
    1307    wchar_t wbuf[2];
    1308    char *wbufptr;
    1309    size_t wbufsize;
    1310  
    1311    /* If the input file does not specify the codeset use the locale's.  */
    1312    if (codeset == NULL)
    1313      {
    1314        setlocale (LC_ALL, "");
    1315        codeset = nl_langinfo (CODESET);
    1316        setlocale (LC_ALL, "C");
    1317      }
    1318  
    1319    /* Get the conversion modules.  */
    1320    *cd_towcp = iconv_open ("WCHAR_T", codeset);
    1321    *cd_tombp = iconv_open (codeset, "WCHAR_T");
    1322    if (*cd_towcp == (iconv_t) -1 || *cd_tombp == (iconv_t) -1)
    1323      {
    1324        error (0, 0, gettext ("conversion modules not available"));
    1325        if (*cd_towcp != (iconv_t) -1)
    1326  	iconv_close (*cd_towcp);
    1327  
    1328        return 1;
    1329      }
    1330  
    1331    /* One special case for historical reasons is the backslash
    1332       character.  In some codesets the byte value 0x5c is not mapped to
    1333       U005c in Unicode.  These charsets then don't have a backslash
    1334       character at all.  Therefore we have to live with whatever the
    1335       codeset provides and recognize, instead of the U005c, the character
    1336       the byte value 0x5c is mapped to.  */
    1337    buf[0] = '\\';
    1338    buf[1] = '\0';
    1339    bufptr = buf;
    1340    bufsize = 2;
    1341  
    1342    wbufptr = (char *) wbuf;
    1343    wbufsize = sizeof (wbuf);
    1344  
    1345    iconv (*cd_towcp, &bufptr, &bufsize, &wbufptr, &wbufsize);
    1346    if (bufsize != 0 || wbufsize != 0)
    1347      {
    1348        /* Something went wrong, we couldn't convert the byte 0x5c.  Go
    1349  	 on with using U005c.  */
    1350        error (0, 0, gettext ("cannot determine escape character"));
    1351        *escape_charp = L'\\';
    1352      }
    1353    else
    1354      *escape_charp = wbuf[0];
    1355  
    1356    return 0;
    1357  }