(root)/
gettext-0.22.4/
gettext-tools/
src/
x-po.c
       1  /* xgettext PO, JavaProperties, and NXStringTable backends.
       2     Copyright (C) 1995-1998, 2000-2003, 2005-2006, 2008-2009, 2014, 2018, 2020, 2023 Free Software Foundation, Inc.
       3  
       4     This file was written by Peter Miller <millerp@canb.auug.org.au>
       5  
       6     This program is free software: you can redistribute it and/or modify
       7     it under the terms of the GNU General Public License as published by
       8     the Free Software Foundation; either version 3 of the License, or
       9     (at your option) any later version.
      10  
      11     This program is distributed in the hope that it will be useful,
      12     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14     GNU General Public License for more details.
      15  
      16     You should have received a copy of the GNU General Public License
      17     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      18  
      19  #ifdef HAVE_CONFIG_H
      20  # include <config.h>
      21  #endif
      22  
      23  /* Specification.  */
      24  #include "x-po.h"
      25  #include "x-properties.h"
      26  #include "x-stringtable.h"
      27  
      28  #include <stdio.h>
      29  #include <stdlib.h>
      30  #include <stdbool.h>
      31  #include <string.h>
      32  
      33  #include "message.h"
      34  #include "xgettext.h"
      35  #include "xalloc.h"
      36  #include "read-catalog.h"
      37  #include "read-po.h"
      38  #include "read-properties.h"
      39  #include "read-stringtable.h"
      40  #include "msgl-iconv.h"
      41  #include "msgl-ascii.h"
      42  #include "po-charset.h"
      43  #include "po-lex.h"
      44  #include "gettext.h"
      45  
      46  /* A convenience macro.  I don't like writing gettext() every time.  */
      47  #define _(str) gettext (str)
      48  
      49  
      50  /* The charset found in the header entry.  */
      51  static char *header_charset;
      52  
      53  /* Define a subclass extract_catalog_reader_ty of default_catalog_reader_ty.  */
      54  
      55  static void
      56  extract_add_message (default_catalog_reader_ty *this,
      57                       char *msgctxt,
      58                       char *msgid,
      59                       lex_pos_ty *msgid_pos,
      60                       char *msgid_plural,
      61                       char *msgstr, size_t msgstr_len,
      62                       lex_pos_ty *msgstr_pos,
      63                       char *prev_msgctxt,
      64                       char *prev_msgid,
      65                       char *prev_msgid_plural,
      66                       bool force_fuzzy, bool obsolete)
      67  {
      68    /* See whether we shall exclude this message.  */
      69    if (exclude != NULL && message_list_search (exclude, msgctxt, msgid) != NULL)
      70      goto discard;
      71  
      72    /* If the msgid is the empty string, it is the old header.  Throw it
      73       away, we have constructed a new one.  Only remember its charset.
      74       But if no new one was constructed, keep the old header.  This is useful
      75       because the old header may contain a charset= directive.  */
      76    if (msgctxt == NULL && *msgid == '\0' && !xgettext_omit_header)
      77      {
      78        {
      79          const char *charsetstr = strstr (msgstr, "charset=");
      80  
      81          if (charsetstr != NULL)
      82            {
      83              size_t len;
      84              char *charset;
      85  
      86              charsetstr += strlen ("charset=");
      87              len = strcspn (charsetstr, " \t\n");
      88              charset = XNMALLOC (len + 1, char);
      89              memcpy (charset, charsetstr, len);
      90              charset[len] = '\0';
      91  
      92              if (header_charset != NULL)
      93                free (header_charset);
      94              header_charset = charset;
      95            }
      96        }
      97  
      98       discard:
      99        if (msgctxt != NULL)
     100          free (msgctxt);
     101        free (msgid);
     102        if (msgid_plural != NULL)
     103          free (msgid_plural);
     104        free (msgstr);
     105        if (prev_msgctxt != NULL)
     106          free (prev_msgctxt);
     107        if (prev_msgid != NULL)
     108          free (prev_msgid);
     109        if (prev_msgid_plural != NULL)
     110          free (prev_msgid_plural);
     111        return;
     112      }
     113  
     114    /* Invoke superclass method.  */
     115    default_add_message (this, msgctxt, msgid, msgid_pos, msgid_plural,
     116                         msgstr, msgstr_len, msgstr_pos,
     117                         prev_msgctxt, prev_msgid, prev_msgid_plural,
     118                         force_fuzzy, obsolete);
     119  }
     120  
     121  
     122  /* So that the one parser can be used for multiple programs, and also
     123     use good data hiding and encapsulation practices, an object
     124     oriented approach has been taken.  An object instance is allocated,
     125     and all actions resulting from the parse will be through
     126     invocations of method functions of that object.  */
     127  
     128  static default_catalog_reader_class_ty extract_methods =
     129  {
     130    {
     131      sizeof (default_catalog_reader_ty),
     132      default_constructor,
     133      default_destructor,
     134      default_parse_brief,
     135      default_parse_debrief,
     136      default_directive_domain,
     137      default_directive_message,
     138      default_comment,
     139      default_comment_dot,
     140      default_comment_filepos,
     141      default_comment_special
     142    },
     143    default_set_domain, /* set_domain */
     144    extract_add_message, /* add_message */
     145    NULL /* frob_new_message */
     146  };
     147  
     148  
     149  static void
     150  extract (FILE *fp,
     151           const char *real_filename, const char *logical_filename,
     152           catalog_input_format_ty input_syntax,
     153           msgdomain_list_ty *mdlp)
     154  {
     155    default_catalog_reader_ty *pop;
     156  
     157    header_charset = NULL;
     158  
     159    pop = default_catalog_reader_alloc (&extract_methods);
     160    pop->handle_comments = true;
     161    pop->allow_domain_directives = false;
     162    pop->allow_duplicates = false;
     163    pop->allow_duplicates_if_same_msgstr = true;
     164    pop->file_name = real_filename;
     165    pop->mdlp = NULL;
     166    pop->mlp = mdlp->item[0]->messages;
     167    catalog_reader_parse ((abstract_catalog_reader_ty *) pop, fp, real_filename,
     168                          logical_filename, input_syntax);
     169    catalog_reader_free ((abstract_catalog_reader_ty *) pop);
     170  
     171    if (header_charset != NULL)
     172      {
     173        if (!xgettext_omit_header)
     174          {
     175            /* Put the old charset into the freshly constructed header entry.  */
     176            message_ty *mp =
     177              message_list_search (mdlp->item[0]->messages, NULL, "");
     178  
     179            if (mp != NULL && !mp->obsolete)
     180              {
     181                const char *header = mp->msgstr;
     182  
     183                if (header != NULL)
     184                  {
     185                    const char *charsetstr = strstr (header, "charset=");
     186  
     187                    if (charsetstr != NULL)
     188                      {
     189                        size_t len, len1, len2, len3;
     190                        char *new_header;
     191  
     192                        charsetstr += strlen ("charset=");
     193                        len = strcspn (charsetstr, " \t\n");
     194  
     195                        len1 = charsetstr - header;
     196                        len2 = strlen (header_charset);
     197                        len3 = (header + strlen (header)) - (charsetstr + len);
     198                        new_header = XNMALLOC (len1 + len2 + len3 + 1, char);
     199                        memcpy (new_header, header, len1);
     200                        memcpy (new_header + len1, header_charset, len2);
     201                        memcpy (new_header + len1 + len2, charsetstr + len, len3 + 1);
     202                        mp->msgstr = new_header;
     203                        mp->msgstr_len = len1 + len2 + len3 + 1;
     204                      }
     205                  }
     206              }
     207  
     208            if (!input_syntax->produces_utf8)
     209              {
     210                /* Convert the messages to UTF-8.
     211                   finalize_header() expects this.  */
     212                message_list_ty *mlp = mdlp->item[0]->messages;
     213                iconv_message_list (mlp, NULL, po_charset_utf8, logical_filename);
     214              }
     215          }
     216  
     217        free (header_charset);
     218      }
     219    else
     220      {
     221        if (!xgettext_omit_header && !input_syntax->produces_utf8)
     222          {
     223            /* finalize_header() expects the messages to be in UTF-8 encoding.
     224               We don't know the encoding here; therefore we have to reject the
     225               input if it is not entirely ASCII.  */
     226            if (!is_ascii_msgdomain_list (mdlp))
     227              error (EXIT_FAILURE, 0,
     228                     _("%s: input file doesn't contain a header entry with a charset specification"),
     229                     logical_filename);
     230          }
     231      }
     232  }
     233  
     234  
     235  void
     236  extract_po (FILE *fp,
     237              const char *real_filename, const char *logical_filename,
     238              flag_context_list_table_ty *flag_table,
     239              msgdomain_list_ty *mdlp)
     240  {
     241    extract (fp, real_filename,  logical_filename, &input_format_po, mdlp);
     242  }
     243  
     244  
     245  void
     246  extract_properties (FILE *fp,
     247                      const char *real_filename, const char *logical_filename,
     248                      flag_context_list_table_ty *flag_table,
     249                      msgdomain_list_ty *mdlp)
     250  {
     251    extract (fp, real_filename,  logical_filename, &input_format_properties,
     252             mdlp);
     253  }
     254  
     255  
     256  void
     257  extract_stringtable (FILE *fp,
     258                       const char *real_filename, const char *logical_filename,
     259                       flag_context_list_table_ty *flag_table,
     260                       msgdomain_list_ty *mdlp)
     261  {
     262    extract (fp, real_filename,  logical_filename, &input_format_stringtable,
     263             mdlp);
     264  }