1 /* Reading PO files, abstract class.
2 Copyright (C) 1995-1996, 1998, 2000-2009, 2013, 2015, 2021, 2023 Free Software Foundation, Inc.
3
4 This file was written by Peter Miller <millerp@canb.auug.org.au>
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23
24 /* Specification. */
25 #include "read-catalog-abstract.h"
26
27 #include <limits.h>
28 #include <stdbool.h>
29 #include <stdlib.h>
30 #include <string.h>
31
32 #include "po-charset.h"
33 #include "xalloc.h"
34 #include "xvasprintf.h"
35 #include "po-xerror.h"
36 #include "error.h"
37 #include "gettext.h"
38
39 /* Local variables. */
40 static abstract_catalog_reader_ty *callback_arg;
41
42
43 /* ========================================================================= */
44 /* Allocating and freeing instances of abstract_catalog_reader_ty. */
45
46
47 abstract_catalog_reader_ty *
48 catalog_reader_alloc (abstract_catalog_reader_class_ty *method_table)
49 {
50 abstract_catalog_reader_ty *pop;
51
52 pop = (abstract_catalog_reader_ty *) xmalloc (method_table->size);
53 pop->methods = method_table;
54 if (method_table->constructor)
55 method_table->constructor (pop);
56 return pop;
57 }
58
59
60 void
61 catalog_reader_free (abstract_catalog_reader_ty *pop)
62 {
63 if (pop->methods->destructor)
64 pop->methods->destructor (pop);
65 free (pop);
66 }
67
68
69 /* ========================================================================= */
70 /* Inline functions to invoke the methods. */
71
72
73 static inline void
74 call_parse_brief (abstract_catalog_reader_ty *pop)
75 {
76 if (pop->methods->parse_brief)
77 pop->methods->parse_brief (pop);
78 }
79
80 static inline void
81 call_parse_debrief (abstract_catalog_reader_ty *pop)
82 {
83 if (pop->methods->parse_debrief)
84 pop->methods->parse_debrief (pop);
85 }
86
87 static inline void
88 call_directive_domain (abstract_catalog_reader_ty *pop, char *name)
89 {
90 if (pop->methods->directive_domain)
91 pop->methods->directive_domain (pop, name);
92 }
93
94 static inline void
95 call_directive_message (abstract_catalog_reader_ty *pop,
96 char *msgctxt,
97 char *msgid,
98 lex_pos_ty *msgid_pos,
99 char *msgid_plural,
100 char *msgstr, size_t msgstr_len,
101 lex_pos_ty *msgstr_pos,
102 char *prev_msgctxt,
103 char *prev_msgid,
104 char *prev_msgid_plural,
105 bool force_fuzzy, bool obsolete)
106 {
107 if (pop->methods->directive_message)
108 pop->methods->directive_message (pop, msgctxt,
109 msgid, msgid_pos, msgid_plural,
110 msgstr, msgstr_len, msgstr_pos,
111 prev_msgctxt,
112 prev_msgid,
113 prev_msgid_plural,
114 force_fuzzy, obsolete);
115 }
116
117 static inline void
118 call_comment (abstract_catalog_reader_ty *pop, const char *s)
119 {
120 if (pop->methods->comment != NULL)
121 pop->methods->comment (pop, s);
122 }
123
124 static inline void
125 call_comment_dot (abstract_catalog_reader_ty *pop, const char *s)
126 {
127 if (pop->methods->comment_dot != NULL)
128 pop->methods->comment_dot (pop, s);
129 }
130
131 static inline void
132 call_comment_filepos (abstract_catalog_reader_ty *pop, const char *name,
133 size_t line)
134 {
135 if (pop->methods->comment_filepos)
136 pop->methods->comment_filepos (pop, name, line);
137 }
138
139 static inline void
140 call_comment_special (abstract_catalog_reader_ty *pop, const char *s)
141 {
142 if (pop->methods->comment_special != NULL)
143 pop->methods->comment_special (pop, s);
144 }
145
146
147 /* ========================================================================= */
148 /* Exported functions. */
149
150
151 static inline void
152 parse_start (abstract_catalog_reader_ty *pop)
153 {
154 /* The parse will call the po_callback_... functions (see below)
155 when the various directive are recognised. The callback_arg
156 variable is used to tell these functions which instance is to
157 have the relevant method invoked. */
158 callback_arg = pop;
159
160 call_parse_brief (pop);
161 }
162
163 static inline void
164 parse_end (abstract_catalog_reader_ty *pop)
165 {
166 call_parse_debrief (pop);
167 callback_arg = NULL;
168 }
169
170
171 void
172 catalog_reader_parse (abstract_catalog_reader_ty *pop, FILE *fp,
173 const char *real_filename, const char *logical_filename,
174 catalog_input_format_ty input_syntax)
175 {
176 error_message_count = 0;
177
178 /* Parse the stream's content. */
179 parse_start (pop);
180 input_syntax->parse (pop, fp, real_filename, logical_filename);
181 parse_end (pop);
182
183 if (error_message_count > 0)
184 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL,
185 /*real_filename*/ NULL, (size_t)(-1), (size_t)(-1), false,
186 xasprintf (ngettext ("found %u fatal error",
187 "found %u fatal errors",
188 error_message_count),
189 error_message_count));
190 }
191
192
193 /* ========================================================================= */
194 /* Callbacks used by po-gram.y or po-lex.c, indirectly from
195 catalog_reader_parse. */
196
197
198 /* This function is called by po_gram_lex() whenever a domain directive
199 has been seen. */
200 void
201 po_callback_domain (char *name)
202 {
203 /* assert(callback_arg); */
204 call_directive_domain (callback_arg, name);
205 }
206
207
208 /* This function is called by po_gram_lex() whenever a message has been
209 seen. */
210 void
211 po_callback_message (char *msgctxt,
212 char *msgid, lex_pos_ty *msgid_pos, char *msgid_plural,
213 char *msgstr, size_t msgstr_len, lex_pos_ty *msgstr_pos,
214 char *prev_msgctxt,
215 char *prev_msgid,
216 char *prev_msgid_plural,
217 bool force_fuzzy, bool obsolete)
218 {
219 /* assert(callback_arg); */
220 call_directive_message (callback_arg, msgctxt,
221 msgid, msgid_pos, msgid_plural,
222 msgstr, msgstr_len, msgstr_pos,
223 prev_msgctxt, prev_msgid, prev_msgid_plural,
224 force_fuzzy, obsolete);
225 }
226
227
228 void
229 po_callback_comment (const char *s)
230 {
231 /* assert(callback_arg); */
232 call_comment (callback_arg, s);
233 }
234
235
236 void
237 po_callback_comment_dot (const char *s)
238 {
239 /* assert(callback_arg); */
240 call_comment_dot (callback_arg, s);
241 }
242
243
244 /* This function is called by po_parse_comment_filepos(), once for each
245 filename. */
246 void
247 po_callback_comment_filepos (const char *name, size_t line)
248 {
249 /* assert(callback_arg); */
250 call_comment_filepos (callback_arg, name, line);
251 }
252
253
254 void
255 po_callback_comment_special (const char *s)
256 {
257 /* assert(callback_arg); */
258 call_comment_special (callback_arg, s);
259 }
260
261
262 /* Parse a special comment and put the result in *fuzzyp, formatp, *rangep,
263 *wrapp. */
264 void
265 po_parse_comment_special (const char *s,
266 bool *fuzzyp, enum is_format formatp[NFORMATS],
267 struct argument_range *rangep, enum is_wrap *wrapp,
268 enum is_syntax_check scp[NSYNTAXCHECKS])
269 {
270 size_t i;
271
272 *fuzzyp = false;
273 for (i = 0; i < NFORMATS; i++)
274 formatp[i] = undecided;
275 rangep->min = -1;
276 rangep->max = -1;
277 *wrapp = undecided;
278 for (i = 0; i < NSYNTAXCHECKS; i++)
279 scp[i] = undecided;
280
281 while (*s != '\0')
282 {
283 const char *t;
284
285 /* Skip whitespace. */
286 while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) != NULL)
287 s++;
288
289 /* Collect a token. */
290 t = s;
291 while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) == NULL)
292 s++;
293 if (s != t)
294 {
295 size_t len = s - t;
296
297 /* Accept fuzzy flag. */
298 if (len == 5 && memcmp (t, "fuzzy", 5) == 0)
299 {
300 *fuzzyp = true;
301 continue;
302 }
303
304 /* Accept format description. */
305 if (len >= 7 && memcmp (t + len - 7, "-format", 7) == 0)
306 {
307 const char *p;
308 size_t n;
309 enum is_format value;
310
311 p = t;
312 n = len - 7;
313
314 if (n >= 3 && memcmp (p, "no-", 3) == 0)
315 {
316 p += 3;
317 n -= 3;
318 value = no;
319 }
320 else if (n >= 9 && memcmp (p, "possible-", 9) == 0)
321 {
322 p += 9;
323 n -= 9;
324 value = possible;
325 }
326 else if (n >= 11 && memcmp (p, "impossible-", 11) == 0)
327 {
328 p += 11;
329 n -= 11;
330 value = impossible;
331 }
332 else
333 value = yes;
334
335 for (i = 0; i < NFORMATS; i++)
336 if (strlen (format_language[i]) == n
337 && memcmp (format_language[i], p, n) == 0)
338 {
339 formatp[i] = value;
340 break;
341 }
342 if (i < NFORMATS)
343 continue;
344 }
345
346 /* Accept range description "range: <min>..<max>". */
347 if (len == 6 && memcmp (t, "range:", 6) == 0)
348 {
349 /* Skip whitespace. */
350 while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) != NULL)
351 s++;
352
353 /* Collect a token. */
354 t = s;
355 while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) == NULL)
356 s++;
357 /* Parse it. */
358 if (*t >= '0' && *t <= '9')
359 {
360 unsigned int min = 0;
361
362 for (; *t >= '0' && *t <= '9'; t++)
363 {
364 if (min <= INT_MAX / 10)
365 {
366 min = 10 * min + (*t - '0');
367 if (min > INT_MAX)
368 min = INT_MAX;
369 }
370 else
371 /* Avoid integer overflow. */
372 min = INT_MAX;
373 }
374 if (*t++ == '.')
375 if (*t++ == '.')
376 if (*t >= '0' && *t <= '9')
377 {
378 unsigned int max = 0;
379 for (; *t >= '0' && *t <= '9'; t++)
380 {
381 if (max <= INT_MAX / 10)
382 {
383 max = 10 * max + (*t - '0');
384 if (max > INT_MAX)
385 max = INT_MAX;
386 }
387 else
388 /* Avoid integer overflow. */
389 max = INT_MAX;
390 }
391 if (min <= max)
392 {
393 rangep->min = min;
394 rangep->max = max;
395 continue;
396 }
397 }
398 }
399 }
400
401 /* Accept wrap description. */
402 if (len == 4 && memcmp (t, "wrap", 4) == 0)
403 {
404 *wrapp = yes;
405 continue;
406 }
407 if (len == 7 && memcmp (t, "no-wrap", 7) == 0)
408 {
409 *wrapp = no;
410 continue;
411 }
412
413 /* Accept syntax check description. */
414 if (len >= 6 && memcmp (t + len - 6, "-check", 6) == 0)
415 {
416 const char *p;
417 size_t n;
418 enum is_syntax_check value;
419
420 p = t;
421 n = len - 6;
422
423 if (n >= 3 && memcmp (p, "no-", 3) == 0)
424 {
425 p += 3;
426 n -= 3;
427 value = no;
428 }
429 else
430 value = yes;
431
432 for (i = 0; i < NSYNTAXCHECKS; i++)
433 if (strlen (syntax_check_name[i]) == n
434 && memcmp (syntax_check_name[i], p, n) == 0)
435 {
436 scp[i] = value;
437 break;
438 }
439 if (i < NSYNTAXCHECKS)
440 continue;
441 }
442
443 /* Unknown special comment marker. It may have been generated
444 from a future xgettext version. Ignore it. */
445 }
446 }
447 }
448
449
450 /* Parse a GNU style file comment.
451 Syntax: an arbitrary number of
452 STRING COLON NUMBER
453 or
454 STRING
455 The latter style, without line number, occurs in PO files converted e.g.
456 from Pascal .rst files or from OpenOffice resource files.
457 The STRING is either
458 FILENAME
459 or
460 U+2068 FILENAME U+2069.
461 Call po_callback_comment_filepos for each of them. */
462 static void
463 po_parse_comment_filepos (const char *s)
464 {
465 while (*s != '\0')
466 {
467 while (*s == ' ' || *s == '\t' || *s == '\n')
468 s++;
469 if (*s != '\0')
470 {
471 bool isolated_filename =
472 (po_lex_isolate_start != NULL
473 && strncmp (s, po_lex_isolate_start,
474 strlen (po_lex_isolate_start)) == 0);
475 if (isolated_filename)
476 s += strlen (po_lex_isolate_start);
477
478 const char *filename_start = s;
479 const char *filename_end;
480
481 if (isolated_filename)
482 {
483 for (;; s++)
484 {
485 if (*s == '\0' || *s == '\n')
486 {
487 filename_end = s;
488 break;
489 }
490 if (strncmp (s, po_lex_isolate_end,
491 strlen (po_lex_isolate_end)) == 0)
492 {
493 filename_end = s;
494 s += strlen (po_lex_isolate_end);
495 break;
496 }
497 }
498 }
499 else
500 {
501 do
502 s++;
503 while (!(*s == '\0' || *s == ' ' || *s == '\t' || *s == '\n'));
504 filename_end = s;
505 }
506
507 /* See if there is a COLON and NUMBER after the STRING, separated
508 through optional spaces. */
509 {
510 const char *p = s;
511
512 while (*p == ' ' || *p == '\t' || *p == '\n')
513 p++;
514
515 if (*p == ':')
516 {
517 p++;
518
519 while (*p == ' ' || *p == '\t' || *p == '\n')
520 p++;
521
522 if (*p >= '0' && *p <= '9')
523 {
524 /* Accumulate a number. */
525 size_t n = 0;
526
527 do
528 {
529 n = n * 10 + (*p - '0');
530 p++;
531 }
532 while (*p >= '0' && *p <= '9');
533
534 if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n')
535 {
536 /* Parsed a GNU style file comment with spaces. */
537 size_t filename_length = filename_end - filename_start;
538 char *filename = XNMALLOC (filename_length + 1, char);
539
540 memcpy (filename, filename_start, filename_length);
541 filename[filename_length] = '\0';
542
543 po_callback_comment_filepos (filename, n);
544
545 free (filename);
546
547 s = p;
548 continue;
549 }
550 }
551 }
552 }
553
554 /* See if there is a COLON at the end of STRING and a NUMBER after
555 it, separated through optional spaces. */
556 if (s[-1] == ':')
557 {
558 const char *p = s;
559
560 while (*p == ' ' || *p == '\t' || *p == '\n')
561 p++;
562
563 if (*p >= '0' && *p <= '9')
564 {
565 /* Accumulate a number. */
566 size_t n = 0;
567
568 do
569 {
570 n = n * 10 + (*p - '0');
571 p++;
572 }
573 while (*p >= '0' && *p <= '9');
574
575 if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n')
576 {
577 /* Parsed a GNU style file comment with spaces. */
578 filename_end = s - 1;
579 size_t filename_length = filename_end - filename_start;
580 char *filename = XNMALLOC (filename_length + 1, char);
581
582 memcpy (filename, filename_start, filename_length);
583 filename[filename_length] = '\0';
584
585 po_callback_comment_filepos (filename, n);
586
587 free (filename);
588
589 s = p;
590 continue;
591 }
592 }
593 }
594
595 /* See if there is a COLON and NUMBER at the end of the STRING,
596 without separating spaces. */
597 {
598 const char *p = s;
599
600 while (p > filename_start)
601 {
602 p--;
603 if (!(*p >= '0' && *p <= '9'))
604 {
605 p++;
606 break;
607 }
608 }
609
610 /* p now points to the beginning of the trailing digits segment
611 at the end of STRING. */
612
613 if (p < s
614 && p > filename_start + 1
615 && p[-1] == ':')
616 {
617 /* Parsed a GNU style file comment without spaces. */
618 const char *string_end = p - 1;
619
620 /* Accumulate a number. */
621 {
622 size_t n = 0;
623
624 do
625 {
626 n = n * 10 + (*p - '0');
627 p++;
628 }
629 while (p < s);
630
631 {
632 filename_end = string_end;
633 size_t filename_length = filename_end - filename_start;
634 char *filename = XNMALLOC (filename_length + 1, char);
635
636 memcpy (filename, filename_start, filename_length);
637 filename[filename_length] = '\0';
638
639 po_callback_comment_filepos (filename, n);
640
641 free (filename);
642
643 continue;
644 }
645 }
646 }
647 }
648
649 /* Parsed a file comment without line number. */
650 {
651 size_t filename_length = filename_end - filename_start;
652 char *filename = XNMALLOC (filename_length + 1, char);
653
654 memcpy (filename, filename_start, filename_length);
655 filename[filename_length] = '\0';
656
657 po_callback_comment_filepos (filename, (size_t)(-1));
658
659 free (filename);
660 }
661 }
662 }
663 }
664
665
666 /* Parse a SunOS or Solaris style file comment.
667 Syntax of SunOS style:
668 FILE_KEYWORD COLON STRING COMMA LINE_KEYWORD COLON NUMBER
669 Syntax of Solaris style:
670 FILE_KEYWORD COLON STRING COMMA LINE_KEYWORD NUMBER_KEYWORD COLON NUMBER
671 where
672 FILE_KEYWORD ::= "file" | "File"
673 COLON ::= ":"
674 COMMA ::= ","
675 LINE_KEYWORD ::= "line"
676 NUMBER_KEYWORD ::= "number"
677 NUMBER ::= [0-9]+
678 Return true if parsed, false if not a comment of this form. */
679 static bool
680 po_parse_comment_solaris_filepos (const char *s)
681 {
682 if (s[0] == ' '
683 && (s[1] == 'F' || s[1] == 'f')
684 && s[2] == 'i' && s[3] == 'l' && s[4] == 'e'
685 && s[5] == ':')
686 {
687 const char *string_start;
688 const char *string_end;
689
690 {
691 const char *p = s + 6;
692
693 while (*p == ' ' || *p == '\t')
694 p++;
695 string_start = p;
696 }
697
698 for (string_end = string_start; *string_end != '\0'; string_end++)
699 {
700 const char *p = string_end;
701
702 while (*p == ' ' || *p == '\t')
703 p++;
704
705 if (*p == ',')
706 {
707 p++;
708
709 while (*p == ' ' || *p == '\t')
710 p++;
711
712 if (p[0] == 'l' && p[1] == 'i' && p[2] == 'n' && p[3] == 'e')
713 {
714 p += 4;
715
716 while (*p == ' ' || *p == '\t')
717 p++;
718
719 if (p[0] == 'n' && p[1] == 'u' && p[2] == 'm'
720 && p[3] == 'b' && p[4] == 'e' && p[5] == 'r')
721 {
722 p += 6;
723 while (*p == ' ' || *p == '\t')
724 p++;
725 }
726
727 if (*p == ':')
728 {
729 p++;
730
731 if (*p >= '0' && *p <= '9')
732 {
733 /* Accumulate a number. */
734 size_t n = 0;
735
736 do
737 {
738 n = n * 10 + (*p - '0');
739 p++;
740 }
741 while (*p >= '0' && *p <= '9');
742
743 while (*p == ' ' || *p == '\t' || *p == '\n')
744 p++;
745
746 if (*p == '\0')
747 {
748 /* Parsed a Sun style file comment. */
749 size_t string_length = string_end - string_start;
750 char *string =
751 XNMALLOC (string_length + 1, char);
752
753 memcpy (string, string_start, string_length);
754 string[string_length] = '\0';
755
756 po_callback_comment_filepos (string, n);
757
758 free (string);
759 return true;
760 }
761 }
762 }
763 }
764 }
765 }
766 }
767
768 return false;
769 }
770
771
772 /* This function is called by po_gram_lex() whenever a comment is
773 seen. It analyzes the comment to see what sort it is, and then
774 dispatches it to the appropriate method: call_comment, call_comment_dot,
775 call_comment_filepos (via po_parse_comment_filepos), or
776 call_comment_special. */
777 void
778 po_callback_comment_dispatcher (const char *s)
779 {
780 if (*s == '.')
781 {
782 s++;
783 /* There is usually a space before the comment. People don't
784 consider it part of the comment, therefore remove it here. */
785 if (*s == ' ')
786 s++;
787 po_callback_comment_dot (s);
788 }
789 else if (*s == ':')
790 {
791 /* Parse the file location string. The appropriate callback will be
792 invoked. */
793 po_parse_comment_filepos (s + 1);
794 }
795 else if (*s == ',' || *s == '!')
796 {
797 /* Get all entries in the special comment line. */
798 po_callback_comment_special (s + 1);
799 }
800 else
801 {
802 /* It looks like a plain vanilla comment, but Solaris-style file
803 position lines do, too. Try to parse the lot. If the parse
804 succeeds, the appropriate callback will be invoked. */
805 if (po_parse_comment_solaris_filepos (s))
806 /* Do nothing, it is a Sun-style file pos line. */ ;
807 else
808 {
809 /* There is usually a space before the comment. People don't
810 consider it part of the comment, therefore remove it here. */
811 if (*s == ' ')
812 s++;
813 po_callback_comment (s);
814 }
815 }
816 }