1 /* Reading PO files.
2 Copyright (C) 1995-1998, 2000-2003, 2005-2006, 2008-2009, 2014-2016, 2023 Free Software Foundation, Inc.
3 This file was written by Peter Miller <millerp@canb.auug.org.au>
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 /* Specification. */
23 #include "read-catalog.h"
24
25 #include <stdbool.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include "open-catalog.h"
30 #include "po-charset.h"
31 #include "po-xerror.h"
32 #include "xalloc.h"
33 #include "gettext.h"
34
35 #define _(str) gettext (str)
36
37
38 /* ========================================================================= */
39 /* Inline functions to invoke the methods. */
40
41 static inline void
42 call_set_domain (struct default_catalog_reader_ty *this, char *name)
43 {
44 default_catalog_reader_class_ty *methods =
45 (default_catalog_reader_class_ty *) this->methods;
46
47 if (methods->set_domain)
48 methods->set_domain (this, name);
49 }
50
51 static inline void
52 call_add_message (struct default_catalog_reader_ty *this,
53 char *msgctxt,
54 char *msgid, lex_pos_ty *msgid_pos, char *msgid_plural,
55 char *msgstr, size_t msgstr_len, lex_pos_ty *msgstr_pos,
56 char *prev_msgctxt, char *prev_msgid, char *prev_msgid_plural,
57 bool force_fuzzy, bool obsolete)
58 {
59 default_catalog_reader_class_ty *methods =
60 (default_catalog_reader_class_ty *) this->methods;
61
62 if (methods->add_message)
63 methods->add_message (this, msgctxt,
64 msgid, msgid_pos, msgid_plural,
65 msgstr, msgstr_len, msgstr_pos,
66 prev_msgctxt, prev_msgid, prev_msgid_plural,
67 force_fuzzy, obsolete);
68 }
69
70 static inline void
71 call_frob_new_message (struct default_catalog_reader_ty *this, message_ty *mp,
72 const lex_pos_ty *msgid_pos,
73 const lex_pos_ty *msgstr_pos)
74 {
75 default_catalog_reader_class_ty *methods =
76 (default_catalog_reader_class_ty *) this->methods;
77
78 if (methods->frob_new_message)
79 methods->frob_new_message (this, mp, msgid_pos, msgstr_pos);
80 }
81
82
83 /* ========================================================================= */
84 /* Implementation of default_catalog_reader_ty's methods. */
85
86
87 /* Implementation of methods declared in the superclass. */
88
89
90 /* Prepare for first message. */
91 void
92 default_constructor (abstract_catalog_reader_ty *that)
93 {
94 default_catalog_reader_ty *this = (default_catalog_reader_ty *) that;
95 size_t i;
96
97 this->domain = MESSAGE_DOMAIN_DEFAULT;
98 this->comment = NULL;
99 this->comment_dot = NULL;
100 this->filepos_count = 0;
101 this->filepos = NULL;
102 this->is_fuzzy = false;
103 for (i = 0; i < NFORMATS; i++)
104 this->is_format[i] = undecided;
105 this->range.min = -1;
106 this->range.max = -1;
107 this->do_wrap = undecided;
108 for (i = 0; i < NSYNTAXCHECKS; i++)
109 this->do_syntax_check[i] = undecided;
110 }
111
112
113 void
114 default_destructor (abstract_catalog_reader_ty *that)
115 {
116 default_catalog_reader_ty *this = (default_catalog_reader_ty *) that;
117 size_t j;
118
119 /* Do not free this->mdlp and this->mlp. */
120 if (this->handle_comments)
121 {
122 if (this->comment != NULL)
123 string_list_free (this->comment);
124 if (this->comment_dot != NULL)
125 string_list_free (this->comment_dot);
126 }
127
128 for (j = 0; j < this->filepos_count; ++j)
129 free ((char *) this->filepos[j].file_name);
130 if (this->filepos != NULL)
131 free (this->filepos);
132 }
133
134
135 void
136 default_parse_brief (abstract_catalog_reader_ty *that)
137 {
138 /* We need to parse comments, because even if this->handle_comments
139 is false, we need to know which messages are fuzzy. */
140 po_lex_pass_comments (true);
141 }
142
143
144 void
145 default_parse_debrief (abstract_catalog_reader_ty *that)
146 {
147 }
148
149
150 /* Add the accumulated comments to the message. */
151 static void
152 default_copy_comment_state (default_catalog_reader_ty *this, message_ty *mp)
153 {
154 size_t j, i;
155
156 if (this->handle_comments)
157 {
158 if (this->comment != NULL)
159 for (j = 0; j < this->comment->nitems; ++j)
160 message_comment_append (mp, this->comment->item[j]);
161 if (this->comment_dot != NULL)
162 for (j = 0; j < this->comment_dot->nitems; ++j)
163 message_comment_dot_append (mp, this->comment_dot->item[j]);
164 }
165 for (j = 0; j < this->filepos_count; ++j)
166 {
167 lex_pos_ty *pp;
168
169 pp = &this->filepos[j];
170 message_comment_filepos (mp, pp->file_name, pp->line_number);
171 }
172 mp->is_fuzzy = this->is_fuzzy;
173 for (i = 0; i < NFORMATS; i++)
174 mp->is_format[i] = this->is_format[i];
175 mp->range = this->range;
176 mp->do_wrap = this->do_wrap;
177 for (i = 0; i < NSYNTAXCHECKS; i++)
178 mp->do_syntax_check[i] = this->do_syntax_check[i];
179 }
180
181
182 static void
183 default_reset_comment_state (default_catalog_reader_ty *this)
184 {
185 size_t j, i;
186
187 if (this->handle_comments)
188 {
189 if (this->comment != NULL)
190 {
191 string_list_free (this->comment);
192 this->comment = NULL;
193 }
194 if (this->comment_dot != NULL)
195 {
196 string_list_free (this->comment_dot);
197 this->comment_dot = NULL;
198 }
199 }
200 for (j = 0; j < this->filepos_count; ++j)
201 free ((char *) this->filepos[j].file_name);
202 if (this->filepos != NULL)
203 free (this->filepos);
204 this->filepos_count = 0;
205 this->filepos = NULL;
206 this->is_fuzzy = false;
207 for (i = 0; i < NFORMATS; i++)
208 this->is_format[i] = undecided;
209 this->range.min = -1;
210 this->range.max = -1;
211 this->do_wrap = undecided;
212 for (i = 0; i < NSYNTAXCHECKS; i++)
213 this->do_syntax_check[i] = undecided;
214 }
215
216
217 /* Process 'domain' directive from .po file. */
218 void
219 default_directive_domain (abstract_catalog_reader_ty *that, char *name)
220 {
221 default_catalog_reader_ty *this = (default_catalog_reader_ty *) that;
222
223 call_set_domain (this, name);
224
225 /* If there are accumulated comments, throw them away, they are
226 probably part of the file header, or about the domain directive,
227 and will be unrelated to the next message. */
228 default_reset_comment_state (this);
229 }
230
231
232 /* Process ['msgctxt'/]'msgid'/'msgstr' pair from .po file. */
233 void
234 default_directive_message (abstract_catalog_reader_ty *that,
235 char *msgctxt,
236 char *msgid,
237 lex_pos_ty *msgid_pos,
238 char *msgid_plural,
239 char *msgstr, size_t msgstr_len,
240 lex_pos_ty *msgstr_pos,
241 char *prev_msgctxt,
242 char *prev_msgid, char *prev_msgid_plural,
243 bool force_fuzzy, bool obsolete)
244 {
245 default_catalog_reader_ty *this = (default_catalog_reader_ty *) that;
246
247 call_add_message (this, msgctxt, msgid, msgid_pos, msgid_plural,
248 msgstr, msgstr_len, msgstr_pos,
249 prev_msgctxt, prev_msgid, prev_msgid_plural,
250 force_fuzzy, obsolete);
251
252 /* Prepare for next message. */
253 default_reset_comment_state (this);
254 }
255
256
257 void
258 default_comment (abstract_catalog_reader_ty *that, const char *s)
259 {
260 default_catalog_reader_ty *this = (default_catalog_reader_ty *) that;
261
262 if (this->handle_comments)
263 {
264 if (this->comment == NULL)
265 this->comment = string_list_alloc ();
266 string_list_append (this->comment, s);
267 }
268 }
269
270
271 void
272 default_comment_dot (abstract_catalog_reader_ty *that, const char *s)
273 {
274 default_catalog_reader_ty *this = (default_catalog_reader_ty *) that;
275
276 if (this->handle_comments)
277 {
278 if (this->comment_dot == NULL)
279 this->comment_dot = string_list_alloc ();
280 string_list_append (this->comment_dot, s);
281 }
282 }
283
284
285 void
286 default_comment_filepos (abstract_catalog_reader_ty *that,
287 const char *name, size_t line)
288 {
289 default_catalog_reader_ty *this = (default_catalog_reader_ty *) that;
290 size_t nbytes;
291 lex_pos_ty *pp;
292
293 nbytes = (this->filepos_count + 1) * sizeof (this->filepos[0]);
294 this->filepos = xrealloc (this->filepos, nbytes);
295 pp = &this->filepos[this->filepos_count++];
296 pp->file_name = xstrdup (name);
297 pp->line_number = line;
298 }
299
300
301 /* Test for '#, fuzzy' comments and warn. */
302 void
303 default_comment_special (abstract_catalog_reader_ty *that, const char *s)
304 {
305 default_catalog_reader_ty *this = (default_catalog_reader_ty *) that;
306
307 po_parse_comment_special (s, &this->is_fuzzy, this->is_format, &this->range,
308 &this->do_wrap, this->do_syntax_check);
309 }
310
311
312 /* Default implementation of methods not inherited from the superclass. */
313
314
315 void
316 default_set_domain (default_catalog_reader_ty *this, char *name)
317 {
318 if (this->allow_domain_directives)
319 /* Override current domain name. Don't free memory. */
320 this->domain = name;
321 else
322 {
323 po_gram_error_at_line (&gram_pos,
324 _("this file may not contain domain directives"));
325
326 /* NAME was allocated in po-gram-gen.y but is not used anywhere. */
327 free (name);
328 }
329 }
330
331 void
332 default_add_message (default_catalog_reader_ty *this,
333 char *msgctxt,
334 char *msgid,
335 lex_pos_ty *msgid_pos,
336 char *msgid_plural,
337 char *msgstr, size_t msgstr_len,
338 lex_pos_ty *msgstr_pos,
339 char *prev_msgctxt,
340 char *prev_msgid,
341 char *prev_msgid_plural,
342 bool force_fuzzy, bool obsolete)
343 {
344 message_ty *mp;
345
346 if (this->mdlp != NULL)
347 /* Select the appropriate sublist of this->mdlp. */
348 this->mlp = msgdomain_list_sublist (this->mdlp, this->domain, true);
349
350 if (this->allow_duplicates && msgid[0] != '\0')
351 /* Doesn't matter if this message ID has been seen before. */
352 mp = NULL;
353 else
354 /* See if this message ID has been seen before. */
355 mp = message_list_search (this->mlp, msgctxt, msgid);
356
357 if (mp)
358 {
359 if (!(this->allow_duplicates_if_same_msgstr
360 && msgstr_len == mp->msgstr_len
361 && memcmp (msgstr, mp->msgstr, msgstr_len) == 0))
362 {
363 /* We give a fatal error about this, regardless whether the
364 translations are equal or different. This is for consistency
365 with msgmerge, msgcat and others. The user can use the
366 msguniq program to get rid of duplicates. */
367 po_xerror2 (PO_SEVERITY_ERROR,
368 NULL, msgid_pos->file_name, msgid_pos->line_number,
369 (size_t)(-1), false, _("duplicate message definition"),
370 mp, NULL, 0, 0, false,
371 _("this is the location of the first definition"));
372 }
373 /* We don't need the just constructed entries' parameter string
374 (allocated in po-gram-gen.y). */
375 free (msgid);
376 if (msgid_plural != NULL)
377 free (msgid_plural);
378 free (msgstr);
379 if (msgctxt != NULL)
380 free (msgctxt);
381 if (prev_msgctxt != NULL)
382 free (prev_msgctxt);
383 if (prev_msgid != NULL)
384 free (prev_msgid);
385 if (prev_msgid_plural != NULL)
386 free (prev_msgid_plural);
387
388 /* Add the accumulated comments to the message. */
389 default_copy_comment_state (this, mp);
390 }
391 else
392 {
393 /* Construct message to add to the list.
394 Obsolete message go into the list at least for duplicate checking.
395 It's the caller's responsibility to ignore obsolete messages when
396 appropriate. */
397 mp = message_alloc (msgctxt, msgid, msgid_plural, msgstr, msgstr_len,
398 msgstr_pos);
399 if (msgid_plural != NULL)
400 free (msgid_plural);
401 mp->prev_msgctxt = prev_msgctxt;
402 mp->prev_msgid = prev_msgid;
403 mp->prev_msgid_plural = prev_msgid_plural;
404 mp->obsolete = obsolete;
405 default_copy_comment_state (this, mp);
406 if (force_fuzzy)
407 mp->is_fuzzy = true;
408
409 call_frob_new_message (this, mp, msgid_pos, msgstr_pos);
410
411 message_list_append (this->mlp, mp);
412 }
413 }
414
415
416 /* So that the one parser can be used for multiple programs, and also
417 use good data hiding and encapsulation practices, an object
418 oriented approach has been taken. An object instance is allocated,
419 and all actions resulting from the parse will be through
420 invocations of method functions of that object. */
421
422 static default_catalog_reader_class_ty default_methods =
423 {
424 {
425 sizeof (default_catalog_reader_ty),
426 default_constructor,
427 default_destructor,
428 default_parse_brief,
429 default_parse_debrief,
430 default_directive_domain,
431 default_directive_message,
432 default_comment,
433 default_comment_dot,
434 default_comment_filepos,
435 default_comment_special
436 },
437 default_set_domain, /* set_domain */
438 default_add_message, /* add_message */
439 NULL /* frob_new_message */
440 };
441
442
443 default_catalog_reader_ty *
444 default_catalog_reader_alloc (default_catalog_reader_class_ty *method_table)
445 {
446 return
447 (default_catalog_reader_ty *) catalog_reader_alloc (&method_table->super);
448 }
449
450
451 /* ========================================================================= */
452 /* Exported functions. */
453
454
455 /* If false, duplicate msgids in the same domain and file generate an error.
456 If true, such msgids are allowed; the caller should treat them
457 appropriately. Defaults to false. */
458 bool allow_duplicates = false;
459
460
461 msgdomain_list_ty *
462 read_catalog_stream (FILE *fp, const char *real_filename,
463 const char *logical_filename,
464 catalog_input_format_ty input_syntax)
465 {
466 default_catalog_reader_ty *pop;
467 msgdomain_list_ty *mdlp;
468
469 pop = default_catalog_reader_alloc (&default_methods);
470 pop->handle_comments = true;
471 pop->allow_domain_directives = true;
472 pop->allow_duplicates = allow_duplicates;
473 pop->allow_duplicates_if_same_msgstr = false;
474 pop->file_name = real_filename;
475 pop->mdlp = msgdomain_list_alloc (!pop->allow_duplicates);
476 pop->mlp = msgdomain_list_sublist (pop->mdlp, pop->domain, true);
477 if (input_syntax->produces_utf8)
478 /* We know a priori that input_syntax->parse convert strings to UTF-8. */
479 pop->mdlp->encoding = po_charset_utf8;
480 po_lex_pass_obsolete_entries (true);
481 catalog_reader_parse ((abstract_catalog_reader_ty *) pop, fp, real_filename,
482 logical_filename, input_syntax);
483 mdlp = pop->mdlp;
484 catalog_reader_free ((abstract_catalog_reader_ty *) pop);
485 return mdlp;
486 }
487
488
489 msgdomain_list_ty *
490 read_catalog_file (const char *filename, catalog_input_format_ty input_syntax)
491 {
492 char *real_filename;
493 FILE *fp = open_catalog_file (filename, &real_filename, true);
494 msgdomain_list_ty *result;
495
496 result = read_catalog_stream (fp, real_filename, filename, input_syntax);
497
498 if (fp != stdin)
499 fclose (fp);
500
501 return result;
502 }