1 /* xgettext PO, JavaProperties, and NXStringTable backends.
2 Copyright (C) 1995-1998, 2000-2003, 2005-2006, 2008-2009, 2014, 2018, 2020, 2023 Free Software Foundation, Inc.
3
4 This file was written by Peter Miller <millerp@canb.auug.org.au>
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 /* Specification. */
24 #include "x-po.h"
25 #include "x-properties.h"
26 #include "x-stringtable.h"
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <stdbool.h>
31 #include <string.h>
32
33 #include "message.h"
34 #include "xgettext.h"
35 #include "xalloc.h"
36 #include "read-catalog.h"
37 #include "read-po.h"
38 #include "read-properties.h"
39 #include "read-stringtable.h"
40 #include "msgl-iconv.h"
41 #include "msgl-ascii.h"
42 #include "po-charset.h"
43 #include "po-lex.h"
44 #include "gettext.h"
45
46 /* A convenience macro. I don't like writing gettext() every time. */
47 #define _(str) gettext (str)
48
49
50 /* The charset found in the header entry. */
51 static char *header_charset;
52
53 /* Define a subclass extract_catalog_reader_ty of default_catalog_reader_ty. */
54
55 static void
56 extract_add_message (default_catalog_reader_ty *this,
57 char *msgctxt,
58 char *msgid,
59 lex_pos_ty *msgid_pos,
60 char *msgid_plural,
61 char *msgstr, size_t msgstr_len,
62 lex_pos_ty *msgstr_pos,
63 char *prev_msgctxt,
64 char *prev_msgid,
65 char *prev_msgid_plural,
66 bool force_fuzzy, bool obsolete)
67 {
68 /* See whether we shall exclude this message. */
69 if (exclude != NULL && message_list_search (exclude, msgctxt, msgid) != NULL)
70 goto discard;
71
72 /* If the msgid is the empty string, it is the old header. Throw it
73 away, we have constructed a new one. Only remember its charset.
74 But if no new one was constructed, keep the old header. This is useful
75 because the old header may contain a charset= directive. */
76 if (msgctxt == NULL && *msgid == '\0' && !xgettext_omit_header)
77 {
78 {
79 const char *charsetstr = strstr (msgstr, "charset=");
80
81 if (charsetstr != NULL)
82 {
83 size_t len;
84 char *charset;
85
86 charsetstr += strlen ("charset=");
87 len = strcspn (charsetstr, " \t\n");
88 charset = XNMALLOC (len + 1, char);
89 memcpy (charset, charsetstr, len);
90 charset[len] = '\0';
91
92 if (header_charset != NULL)
93 free (header_charset);
94 header_charset = charset;
95 }
96 }
97
98 discard:
99 if (msgctxt != NULL)
100 free (msgctxt);
101 free (msgid);
102 if (msgid_plural != NULL)
103 free (msgid_plural);
104 free (msgstr);
105 if (prev_msgctxt != NULL)
106 free (prev_msgctxt);
107 if (prev_msgid != NULL)
108 free (prev_msgid);
109 if (prev_msgid_plural != NULL)
110 free (prev_msgid_plural);
111 return;
112 }
113
114 /* Invoke superclass method. */
115 default_add_message (this, msgctxt, msgid, msgid_pos, msgid_plural,
116 msgstr, msgstr_len, msgstr_pos,
117 prev_msgctxt, prev_msgid, prev_msgid_plural,
118 force_fuzzy, obsolete);
119 }
120
121
122 /* So that the one parser can be used for multiple programs, and also
123 use good data hiding and encapsulation practices, an object
124 oriented approach has been taken. An object instance is allocated,
125 and all actions resulting from the parse will be through
126 invocations of method functions of that object. */
127
128 static default_catalog_reader_class_ty extract_methods =
129 {
130 {
131 sizeof (default_catalog_reader_ty),
132 default_constructor,
133 default_destructor,
134 default_parse_brief,
135 default_parse_debrief,
136 default_directive_domain,
137 default_directive_message,
138 default_comment,
139 default_comment_dot,
140 default_comment_filepos,
141 default_comment_special
142 },
143 default_set_domain, /* set_domain */
144 extract_add_message, /* add_message */
145 NULL /* frob_new_message */
146 };
147
148
149 static void
150 extract (FILE *fp,
151 const char *real_filename, const char *logical_filename,
152 catalog_input_format_ty input_syntax,
153 msgdomain_list_ty *mdlp)
154 {
155 default_catalog_reader_ty *pop;
156
157 header_charset = NULL;
158
159 pop = default_catalog_reader_alloc (&extract_methods);
160 pop->handle_comments = true;
161 pop->allow_domain_directives = false;
162 pop->allow_duplicates = false;
163 pop->allow_duplicates_if_same_msgstr = true;
164 pop->file_name = real_filename;
165 pop->mdlp = NULL;
166 pop->mlp = mdlp->item[0]->messages;
167 catalog_reader_parse ((abstract_catalog_reader_ty *) pop, fp, real_filename,
168 logical_filename, input_syntax);
169 catalog_reader_free ((abstract_catalog_reader_ty *) pop);
170
171 if (header_charset != NULL)
172 {
173 if (!xgettext_omit_header)
174 {
175 /* Put the old charset into the freshly constructed header entry. */
176 message_ty *mp =
177 message_list_search (mdlp->item[0]->messages, NULL, "");
178
179 if (mp != NULL && !mp->obsolete)
180 {
181 const char *header = mp->msgstr;
182
183 if (header != NULL)
184 {
185 const char *charsetstr = strstr (header, "charset=");
186
187 if (charsetstr != NULL)
188 {
189 size_t len, len1, len2, len3;
190 char *new_header;
191
192 charsetstr += strlen ("charset=");
193 len = strcspn (charsetstr, " \t\n");
194
195 len1 = charsetstr - header;
196 len2 = strlen (header_charset);
197 len3 = (header + strlen (header)) - (charsetstr + len);
198 new_header = XNMALLOC (len1 + len2 + len3 + 1, char);
199 memcpy (new_header, header, len1);
200 memcpy (new_header + len1, header_charset, len2);
201 memcpy (new_header + len1 + len2, charsetstr + len, len3 + 1);
202 mp->msgstr = new_header;
203 mp->msgstr_len = len1 + len2 + len3 + 1;
204 }
205 }
206 }
207
208 if (!input_syntax->produces_utf8)
209 {
210 /* Convert the messages to UTF-8.
211 finalize_header() expects this. */
212 message_list_ty *mlp = mdlp->item[0]->messages;
213 iconv_message_list (mlp, NULL, po_charset_utf8, logical_filename);
214 }
215 }
216
217 free (header_charset);
218 }
219 else
220 {
221 if (!xgettext_omit_header && !input_syntax->produces_utf8)
222 {
223 /* finalize_header() expects the messages to be in UTF-8 encoding.
224 We don't know the encoding here; therefore we have to reject the
225 input if it is not entirely ASCII. */
226 if (!is_ascii_msgdomain_list (mdlp))
227 error (EXIT_FAILURE, 0,
228 _("%s: input file doesn't contain a header entry with a charset specification"),
229 logical_filename);
230 }
231 }
232 }
233
234
235 void
236 extract_po (FILE *fp,
237 const char *real_filename, const char *logical_filename,
238 flag_context_list_table_ty *flag_table,
239 msgdomain_list_ty *mdlp)
240 {
241 extract (fp, real_filename, logical_filename, &input_format_po, mdlp);
242 }
243
244
245 void
246 extract_properties (FILE *fp,
247 const char *real_filename, const char *logical_filename,
248 flag_context_list_table_ty *flag_table,
249 msgdomain_list_ty *mdlp)
250 {
251 extract (fp, real_filename, logical_filename, &input_format_properties,
252 mdlp);
253 }
254
255
256 void
257 extract_stringtable (FILE *fp,
258 const char *real_filename, const char *logical_filename,
259 flag_context_list_table_ty *flag_table,
260 msgdomain_list_ty *mdlp)
261 {
262 extract (fp, real_filename, logical_filename, &input_format_stringtable,
263 mdlp);
264 }