1 /* xgettext librep backend.
2 Copyright (C) 2001-2003, 2005-2009, 2018-2023 Free Software Foundation, Inc.
3
4 This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19 #ifdef HAVE_CONFIG_H
20 # include "config.h"
21 #endif
22
23 /* Specification. */
24 #include "x-librep.h"
25
26 #include <errno.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31
32 #include "attribute.h"
33 #include "c-ctype.h"
34 #include "message.h"
35 #include "xgettext.h"
36 #include "xg-pos.h"
37 #include "xg-mixed-string.h"
38 #include "xg-arglist-context.h"
39 #include "xg-arglist-callshape.h"
40 #include "xg-arglist-parser.h"
41 #include "xg-message.h"
42 #include "error.h"
43 #include "error-progname.h"
44 #include "xalloc.h"
45 #include "mem-hash-map.h"
46 #include "gettext.h"
47
48 #define _(s) gettext(s)
49
50
51 /* Summary of librep syntax:
52 - ';' starts a comment until end of line.
53 - Block comments start with '#|' and end with '|#'.
54 - Numbers are constituted of an optional prefix (#b, #B for binary,
55 #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal,
56 #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and
57 the digits.
58 - Characters are written as '?' followed by the character, possibly
59 with an escape sequence, for examples '?a', '?\n', '?\177'.
60 - Strings are delimited by double quotes. Backslash introduces an escape
61 sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
62 '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
63 - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' -
64 if preceded by backslash or enclosed in |...|.
65 - Keywords: written as #:SYMBOL.
66 - () delimit lists.
67 - [] delimit vectors.
68 The reader is implemented in librep-0.14/src/lisp.c. */
69
70
71 /* ====================== Keyword set customization. ====================== */
72
73 /* If true extract all strings. */
74 static bool extract_all = false;
75
76 static hash_table keywords;
77 static bool default_keywords = true;
78
79
80 void
81 x_librep_extract_all ()
82 {
83 extract_all = true;
84 }
85
86
87 void
88 x_librep_keyword (const char *name)
89 {
90 if (name == NULL)
91 default_keywords = false;
92 else
93 {
94 const char *end;
95 struct callshape shape;
96 const char *colon;
97
98 if (keywords.table == NULL)
99 hash_init (&keywords, 100);
100
101 split_keywordspec (name, &end, &shape);
102
103 /* The characters between name and end should form a valid Lisp
104 symbol. */
105 colon = strchr (name, ':');
106 if (colon == NULL || colon >= end)
107 insert_keyword_callshape (&keywords, name, end - name, &shape);
108 }
109 }
110
111 /* Finish initializing the keywords hash table.
112 Called after argument processing, before each file is processed. */
113 static void
114 init_keywords ()
115 {
116 if (default_keywords)
117 {
118 /* When adding new keywords here, also update the documentation in
119 xgettext.texi! */
120 x_librep_keyword ("_");
121 default_keywords = false;
122 }
123 }
124
125 void
126 init_flag_table_librep ()
127 {
128 xgettext_record_flag ("_:1:pass-librep-format");
129 xgettext_record_flag ("format:2:librep-format");
130 }
131
132
133 /* ======================== Reading of characters. ======================== */
134
135 /* The input file stream. */
136 static FILE *fp;
137
138
139 /* Fetch the next character from the input file. */
140 static int
141 do_getc ()
142 {
143 int c = getc (fp);
144
145 if (c == EOF)
146 {
147 if (ferror (fp))
148 error (EXIT_FAILURE, errno,
149 _("error while reading \"%s\""), real_file_name);
150 }
151 else if (c == '\n')
152 line_number++;
153
154 return c;
155 }
156
157 /* Put back the last fetched character, not EOF. */
158 static void
159 do_ungetc (int c)
160 {
161 if (c == '\n')
162 line_number--;
163 ungetc (c, fp);
164 }
165
166
167 /* ========================== Reading of tokens. ========================== */
168
169
170 /* A token consists of a sequence of characters. */
171 struct token
172 {
173 int allocated; /* number of allocated 'token_char's */
174 int charcount; /* number of used 'token_char's */
175 char *chars; /* the token's constituents */
176 };
177
178 /* Initialize a 'struct token'. */
179 static inline void
180 init_token (struct token *tp)
181 {
182 tp->allocated = 10;
183 tp->chars = XNMALLOC (tp->allocated, char);
184 tp->charcount = 0;
185 }
186
187 /* Free the memory pointed to by a 'struct token'. */
188 static inline void
189 free_token (struct token *tp)
190 {
191 free (tp->chars);
192 }
193
194 /* Ensure there is enough room in the token for one more character. */
195 static inline void
196 grow_token (struct token *tp)
197 {
198 if (tp->charcount == tp->allocated)
199 {
200 tp->allocated *= 2;
201 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
202 }
203 }
204
205 /* Read the next token. If 'first' is given, it points to the first
206 character, which has already been read. Returns true for a symbol,
207 false for a number. */
208 static bool
209 read_token (struct token *tp, const int *first)
210 {
211 int c;
212 /* Variables for speculative number parsing: */
213 int radix = -1;
214 int nfirst = 0;
215 bool exact = true;
216 bool rational = false;
217 bool exponent = false;
218 bool had_sign = false;
219 bool expecting_prefix = false;
220
221 init_token (tp);
222
223 if (first)
224 c = *first;
225 else
226 c = do_getc ();
227
228 for (;; c = do_getc ())
229 {
230 switch (c)
231 {
232 case EOF:
233 goto done;
234
235 case ' ': case '\t': case '\n': case '\f': case '\r':
236 case '(': case ')': case '[': case ']':
237 case '\'': case '"': case ';': case ',': case '`':
238 goto done;
239
240 case '\\':
241 radix = 0;
242 c = do_getc ();
243 if (c == EOF)
244 /* Invalid, but be tolerant. */
245 break;
246 grow_token (tp);
247 tp->chars[tp->charcount++] = c;
248 break;
249
250 case '|':
251 radix = 0;
252 for (;;)
253 {
254 c = do_getc ();
255 if (c == EOF || c == '|')
256 break;
257 grow_token (tp);
258 tp->chars[tp->charcount++] = c;
259 }
260 break;
261
262 default:
263 if (radix != 0)
264 {
265 if (expecting_prefix)
266 {
267 switch (c)
268 {
269 case 'B': case 'b':
270 radix = 2;
271 break;
272 case 'O': case 'o':
273 radix = 8;
274 break;
275 case 'D': case 'd':
276 radix = 10;
277 break;
278 case 'X': case 'x':
279 radix = 16;
280 break;
281 case 'E': case 'e':
282 case 'I': case 'i':
283 break;
284 default:
285 radix = 0;
286 break;
287 }
288 expecting_prefix = false;
289 nfirst = tp->charcount + 1;
290 }
291 else if (tp->charcount == nfirst
292 && (c == '+' || c == '-' || c == '#'))
293 {
294 if (c == '#')
295 {
296 if (had_sign)
297 radix = 0;
298 else
299 expecting_prefix = true;
300 }
301 else
302 had_sign = true;
303 nfirst = tp->charcount + 1;
304 }
305 else
306 {
307 switch (radix)
308 {
309 case -1:
310 if (c == '.')
311 {
312 radix = 10;
313 exact = false;
314 }
315 else if (!(c >= '0' && c <= '9'))
316 radix = 0;
317 else if (c == '0')
318 radix = 1;
319 else
320 radix = 10;
321 break;
322
323 case 1:
324 switch (c)
325 {
326 case 'X': case 'x':
327 radix = 16;
328 nfirst = tp->charcount + 1;
329 break;
330 case '0': case '1': case '2': case '3': case '4':
331 case '5': case '6': case '7':
332 radix = 8;
333 nfirst = tp->charcount;
334 break;
335 case '.': case 'E': case 'e':
336 radix = 10;
337 exact = false;
338 break;
339 case '/':
340 radix = 10;
341 rational = true;
342 break;
343 default:
344 radix = 0;
345 break;
346 }
347 break;
348
349 default:
350 switch (c)
351 {
352 case '.':
353 if (exact && radix == 10 && !rational)
354 exact = false;
355 else
356 radix = 0;
357 break;
358 case '/':
359 if (exact && !rational)
360 rational = true;
361 else
362 radix = 0;
363 break;
364 case 'E': case 'e':
365 if (radix == 10)
366 {
367 if (!rational && !exponent)
368 {
369 exponent = true;
370 exact = false;
371 }
372 else
373 radix = 0;
374 break;
375 }
376 FALLTHROUGH;
377 default:
378 if (exponent && (c == '+' || c == '-'))
379 break;
380 if ((radix <= 10
381 && !(c >= '0' && c <= '0' + radix - 1))
382 || (radix == 16 && !c_isxdigit (c)))
383 radix = 0;
384 break;
385 }
386 break;
387 }
388 }
389 }
390 else
391 {
392 if (c == '#')
393 goto done;
394 }
395 grow_token (tp);
396 tp->chars[tp->charcount++] = c;
397 }
398 }
399 done:
400 if (c != EOF)
401 do_ungetc (c);
402 if (radix > 0 && nfirst < tp->charcount)
403 return false; /* number */
404 else
405 return true; /* symbol */
406 }
407
408
409 /* ========================= Accumulating comments ========================= */
410
411
412 static char *buffer;
413 static size_t bufmax;
414 static size_t buflen;
415
416 static inline void
417 comment_start ()
418 {
419 buflen = 0;
420 }
421
422 static inline void
423 comment_add (int c)
424 {
425 if (buflen >= bufmax)
426 {
427 bufmax = 2 * bufmax + 10;
428 buffer = xrealloc (buffer, bufmax);
429 }
430 buffer[buflen++] = c;
431 }
432
433 static inline void
434 comment_line_end (size_t chars_to_remove)
435 {
436 buflen -= chars_to_remove;
437 while (buflen >= 1
438 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
439 --buflen;
440 if (chars_to_remove == 0 && buflen >= bufmax)
441 {
442 bufmax = 2 * bufmax + 10;
443 buffer = xrealloc (buffer, bufmax);
444 }
445 buffer[buflen] = '\0';
446 savable_comment_add (buffer);
447 }
448
449
450 /* These are for tracking whether comments count as immediately before
451 keyword. */
452 static int last_comment_line;
453 static int last_non_comment_line;
454
455
456 /* ========================= Accumulating messages ========================= */
457
458
459 static message_list_ty *mlp;
460
461
462 /* ============== Reading of objects. See CLHS 2 "Syntax". ============== */
463
464
465 /* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
466 Other objects need not to be represented precisely. */
467 enum object_type
468 {
469 t_symbol, /* symbol */
470 t_string, /* string */
471 t_other, /* other kind of real object */
472 t_dot, /* '.' pseudo object */
473 t_close, /* ')' or ']' pseudo object */
474 t_eof /* EOF marker */
475 };
476
477 struct object
478 {
479 enum object_type type;
480 struct token *token; /* for t_symbol and t_string */
481 int line_number_at_start; /* for t_string */
482 };
483
484 /* Free the memory pointed to by a 'struct object'. */
485 static inline void
486 free_object (struct object *op)
487 {
488 if (op->type == t_symbol || op->type == t_string)
489 {
490 free_token (op->token);
491 free (op->token);
492 }
493 }
494
495 /* Convert a t_symbol/t_string token to a char*. */
496 static char *
497 string_of_object (const struct object *op)
498 {
499 char *str;
500 int n;
501
502 if (!(op->type == t_symbol || op->type == t_string))
503 abort ();
504 n = op->token->charcount;
505 str = XNMALLOC (n + 1, char);
506 memcpy (str, op->token->chars, n);
507 str[n] = '\0';
508 return str;
509 }
510
511
512 /* Context lookup table. */
513 static flag_context_list_table_ty *flag_context_list_table;
514
515
516 /* Maximum supported nesting depth. */
517 #define MAX_NESTING_DEPTH 1000
518
519 /* Current nesting depth. */
520 static int nesting_depth;
521
522
523 /* Returns the character represented by an escape sequence. */
524 static int
525 do_getc_escaped (int c)
526 {
527 switch (c)
528 {
529 case 'n':
530 return '\n';
531 case 'r':
532 return '\r';
533 case 'f':
534 return '\f';
535 case 't':
536 return '\t';
537 case 'v':
538 return '\v';
539 case 'a':
540 return '\a';
541 case '^':
542 c = do_getc ();
543 if (c == EOF)
544 return EOF;
545 return c & 0x1f;
546 case '0': case '1': case '2': case '3': case '4':
547 case '5': case '6': case '7':
548 {
549 int n = c - '0';
550
551 c = do_getc ();
552 if (c != EOF)
553 {
554 if (c >= '0' && c <= '7')
555 {
556 n = (n << 3) + (c - '0');
557 c = do_getc ();
558 if (c != EOF)
559 {
560 if (c >= '0' && c <= '7')
561 n = (n << 3) + (c - '0');
562 else
563 do_ungetc (c);
564 }
565 }
566 else
567 do_ungetc (c);
568 }
569 return (unsigned char) n;
570 }
571 case 'x':
572 {
573 int n = 0;
574
575 for (;;)
576 {
577 c = do_getc ();
578 if (c == EOF)
579 break;
580 else if (c >= '0' && c <= '9')
581 n = (n << 4) + (c - '0');
582 else if (c >= 'A' && c <= 'F')
583 n = (n << 4) + (c - 'A' + 10);
584 else if (c >= 'a' && c <= 'f')
585 n = (n << 4) + (c - 'a' + 10);
586 else
587 {
588 do_ungetc (c);
589 break;
590 }
591 }
592 return (unsigned char) n;
593 }
594 default:
595 return c;
596 }
597 }
598
599 /* Read the next object. */
600 static void
601 read_object (struct object *op, flag_context_ty outer_context)
602 {
603 if (nesting_depth > MAX_NESTING_DEPTH)
604 {
605 error_with_progname = false;
606 error (EXIT_FAILURE, 0, _("%s:%d: error: too deeply nested objects"),
607 logical_file_name, line_number);
608 }
609 for (;;)
610 {
611 int ch;
612
613 ch = do_getc ();
614
615 switch (ch)
616 {
617 case EOF:
618 op->type = t_eof;
619 return;
620
621 case '\n':
622 /* Comments assumed to be grouped with a message must immediately
623 precede it, with no non-whitespace token on a line between
624 both. */
625 if (last_non_comment_line > last_comment_line)
626 savable_comment_reset ();
627 continue;
628
629 case ' ': case '\t': case '\f': case '\r':
630 continue;
631
632 case '(':
633 {
634 int arg = 0; /* Current argument number. */
635 flag_context_list_iterator_ty context_iter;
636 const struct callshapes *shapes = NULL;
637 struct arglist_parser *argparser = NULL;
638
639 for (;; arg++)
640 {
641 struct object inner;
642 flag_context_ty inner_context;
643
644 if (arg == 0)
645 inner_context = null_context;
646 else
647 inner_context =
648 inherited_context (outer_context,
649 flag_context_list_iterator_advance (
650 &context_iter));
651
652 ++nesting_depth;
653 read_object (&inner, inner_context);
654 nesting_depth--;
655
656 /* Recognize end of list. */
657 if (inner.type == t_close)
658 {
659 op->type = t_other;
660 /* Don't bother converting "()" to "NIL". */
661 last_non_comment_line = line_number;
662 if (argparser != NULL)
663 arglist_parser_done (argparser, arg);
664 return;
665 }
666
667 /* Dots are not allowed in every position.
668 But be tolerant. */
669
670 /* EOF inside list is illegal. But be tolerant. */
671 if (inner.type == t_eof)
672 break;
673
674 if (arg == 0)
675 {
676 /* This is the function position. */
677 if (inner.type == t_symbol)
678 {
679 char *symbol_name = string_of_object (&inner);
680 void *keyword_value;
681
682 if (hash_find_entry (&keywords,
683 symbol_name, strlen (symbol_name),
684 &keyword_value)
685 == 0)
686 shapes = (const struct callshapes *) keyword_value;
687
688 argparser = arglist_parser_alloc (mlp, shapes);
689
690 context_iter =
691 flag_context_list_iterator (
692 flag_context_list_table_lookup (
693 flag_context_list_table,
694 symbol_name, strlen (symbol_name)));
695
696 free (symbol_name);
697 }
698 else
699 context_iter = null_context_list_iterator;
700 }
701 else
702 {
703 /* These are the argument positions. */
704 if (argparser != NULL && inner.type == t_string)
705 {
706 char *s = string_of_object (&inner);
707 mixed_string_ty *ms =
708 mixed_string_alloc_simple (s, lc_string,
709 logical_file_name,
710 inner.line_number_at_start);
711 free (s);
712 arglist_parser_remember (argparser, arg, ms,
713 inner_context,
714 logical_file_name,
715 inner.line_number_at_start,
716 savable_comment, false);
717 }
718 }
719
720 free_object (&inner);
721 }
722
723 if (argparser != NULL)
724 arglist_parser_done (argparser, arg);
725 }
726 op->type = t_other;
727 last_non_comment_line = line_number;
728 return;
729
730 case '[':
731 {
732 for (;;)
733 {
734 struct object inner;
735
736 ++nesting_depth;
737 read_object (&inner, null_context);
738 nesting_depth--;
739
740 /* Recognize end of vector. */
741 if (inner.type == t_close)
742 {
743 op->type = t_other;
744 last_non_comment_line = line_number;
745 return;
746 }
747
748 /* Dots are not allowed. But be tolerant. */
749
750 /* EOF inside vector is illegal. But be tolerant. */
751 if (inner.type == t_eof)
752 break;
753
754 free_object (&inner);
755 }
756 }
757 op->type = t_other;
758 last_non_comment_line = line_number;
759 return;
760
761 case ')': case ']':
762 /* Tell the caller about the end of list or vector.
763 Unmatched closing parenthesis is illegal. But be tolerant. */
764 op->type = t_close;
765 last_non_comment_line = line_number;
766 return;
767
768 case ',':
769 {
770 int c = do_getc ();
771 /* The ,@ handling inside lists is wrong anyway, because
772 ,@form expands to an unknown number of elements. */
773 if (c != EOF && c != '@')
774 do_ungetc (c);
775 }
776 FALLTHROUGH;
777 case '\'':
778 case '`':
779 {
780 struct object inner;
781
782 ++nesting_depth;
783 read_object (&inner, null_context);
784 nesting_depth--;
785
786 /* Dots and EOF are not allowed here. But be tolerant. */
787
788 free_object (&inner);
789
790 op->type = t_other;
791 last_non_comment_line = line_number;
792 return;
793 }
794
795 case ';':
796 {
797 bool all_semicolons = true;
798
799 last_comment_line = line_number;
800 comment_start ();
801 for (;;)
802 {
803 int c = do_getc ();
804 if (c == EOF || c == '\n' || c == '\f' || c == '\r')
805 break;
806 if (c != ';')
807 all_semicolons = false;
808 if (!all_semicolons)
809 {
810 /* We skip all leading white space, but not EOLs. */
811 if (!(buflen == 0 && (c == ' ' || c == '\t')))
812 comment_add (c);
813 }
814 }
815 comment_line_end (0);
816 continue;
817 }
818
819 case '"':
820 {
821 op->token = XMALLOC (struct token);
822 init_token (op->token);
823 op->line_number_at_start = line_number;
824 for (;;)
825 {
826 int c = do_getc ();
827 if (c == EOF)
828 /* Invalid input. Be tolerant, no error message. */
829 break;
830 if (c == '"')
831 break;
832 if (c == '\\')
833 {
834 c = do_getc ();
835 if (c == EOF)
836 /* Invalid input. Be tolerant, no error message. */
837 break;
838 if (c == '\n')
839 /* Ignore escaped newline. */
840 ;
841 else
842 {
843 c = do_getc_escaped (c);
844 if (c == EOF)
845 /* Invalid input. Be tolerant, no error message. */
846 break;
847 grow_token (op->token);
848 op->token->chars[op->token->charcount++] = c;
849 }
850 }
851 else
852 {
853 grow_token (op->token);
854 op->token->chars[op->token->charcount++] = c;
855 }
856 }
857 op->type = t_string;
858
859 if (extract_all)
860 {
861 lex_pos_ty pos;
862
863 pos.file_name = logical_file_name;
864 pos.line_number = op->line_number_at_start;
865 remember_a_message (mlp, NULL, string_of_object (op), false,
866 false, null_context, &pos,
867 NULL, savable_comment, false);
868 }
869 last_non_comment_line = line_number;
870 return;
871 }
872
873 case '?':
874 {
875 int c = do_getc ();
876 if (c == EOF)
877 /* Invalid input. Be tolerant, no error message. */
878 ;
879 else if (c == '\\')
880 {
881 c = do_getc ();
882 if (c == EOF)
883 /* Invalid input. Be tolerant, no error message. */
884 ;
885 else
886 {
887 c = do_getc_escaped (c);
888 if (c == EOF)
889 /* Invalid input. Be tolerant, no error message. */
890 ;
891 }
892 }
893 op->type = t_other;
894 last_non_comment_line = line_number;
895 return;
896 }
897
898 case '#':
899 /* Dispatch macro handling. */
900 {
901 int dmc = do_getc ();
902 if (dmc == EOF)
903 /* Invalid input. Be tolerant, no error message. */
904 {
905 op->type = t_other;
906 return;
907 }
908
909 switch (dmc)
910 {
911 case '!':
912 if (ftell (fp) == 2)
913 /* Skip comment until !# */
914 {
915 int c;
916
917 c = do_getc ();
918 for (;;)
919 {
920 if (c == EOF)
921 break;
922 if (c == '!')
923 {
924 c = do_getc ();
925 if (c == EOF || c == '#')
926 break;
927 }
928 else
929 c = do_getc ();
930 }
931 if (c == EOF)
932 {
933 /* EOF not allowed here. But be tolerant. */
934 op->type = t_eof;
935 return;
936 }
937 continue;
938 }
939 FALLTHROUGH;
940 case '\'':
941 case ':':
942 {
943 struct object inner;
944 ++nesting_depth;
945 read_object (&inner, null_context);
946 nesting_depth--;
947 /* Dots and EOF are not allowed here.
948 But be tolerant. */
949 free_object (&inner);
950 op->type = t_other;
951 last_non_comment_line = line_number;
952 return;
953 }
954
955 case '[':
956 case '(':
957 {
958 struct object inner;
959 do_ungetc (dmc);
960 ++nesting_depth;
961 read_object (&inner, null_context);
962 nesting_depth--;
963 /* Dots and EOF are not allowed here.
964 But be tolerant. */
965 free_object (&inner);
966 op->type = t_other;
967 last_non_comment_line = line_number;
968 return;
969 }
970
971 case '|':
972 {
973 int depth = 0;
974 int c;
975
976 comment_start ();
977 c = do_getc ();
978 for (;;)
979 {
980 if (c == EOF)
981 break;
982 if (c == '|')
983 {
984 c = do_getc ();
985 if (c == EOF)
986 break;
987 if (c == '#')
988 {
989 if (depth == 0)
990 {
991 comment_line_end (0);
992 break;
993 }
994 depth--;
995 comment_add ('|');
996 comment_add ('#');
997 c = do_getc ();
998 }
999 else
1000 comment_add ('|');
1001 }
1002 else if (c == '#')
1003 {
1004 c = do_getc ();
1005 if (c == EOF)
1006 break;
1007 comment_add ('#');
1008 if (c == '|')
1009 {
1010 depth++;
1011 comment_add ('|');
1012 c = do_getc ();
1013 }
1014 }
1015 else
1016 {
1017 /* We skip all leading white space. */
1018 if (!(buflen == 0 && (c == ' ' || c == '\t')))
1019 comment_add (c);
1020 if (c == '\n')
1021 {
1022 comment_line_end (1);
1023 comment_start ();
1024 }
1025 c = do_getc ();
1026 }
1027 }
1028 if (c == EOF)
1029 {
1030 /* EOF not allowed here. But be tolerant. */
1031 op->type = t_eof;
1032 return;
1033 }
1034 last_comment_line = line_number;
1035 continue;
1036 }
1037
1038 case '\\':
1039 {
1040 struct token token;
1041 int first = '\\';
1042 read_token (&token, &first);
1043 free_token (&token);
1044 op->type = t_other;
1045 last_non_comment_line = line_number;
1046 return;
1047 }
1048
1049 case 'T': case 't':
1050 case 'F': case 'f':
1051 op->type = t_other;
1052 last_non_comment_line = line_number;
1053 return;
1054
1055 case 'B': case 'b':
1056 case 'O': case 'o':
1057 case 'D': case 'd':
1058 case 'X': case 'x':
1059 case 'E': case 'e':
1060 case 'I': case 'i':
1061 {
1062 struct token token;
1063 do_ungetc (dmc);
1064 {
1065 int c;
1066 c = '#';
1067 read_token (&token, &c);
1068 free_token (&token);
1069 }
1070 op->type = t_other;
1071 last_non_comment_line = line_number;
1072 return;
1073 }
1074
1075 default:
1076 /* Invalid input. Be tolerant, no error message. */
1077 op->type = t_other;
1078 last_non_comment_line = line_number;
1079 return;
1080 }
1081
1082 /*NOTREACHED*/
1083 abort ();
1084 }
1085
1086 default:
1087 /* Read a token. */
1088 {
1089 bool symbol;
1090
1091 op->token = XMALLOC (struct token);
1092 symbol = read_token (op->token, &ch);
1093 if (op->token->charcount == 1 && op->token->chars[0] == '.')
1094 {
1095 free_token (op->token);
1096 free (op->token);
1097 op->type = t_dot;
1098 last_non_comment_line = line_number;
1099 return;
1100 }
1101 if (!symbol)
1102 {
1103 free_token (op->token);
1104 free (op->token);
1105 op->type = t_other;
1106 last_non_comment_line = line_number;
1107 return;
1108 }
1109 /* Distinguish between "foo" and "foo#bar". */
1110 {
1111 int c = do_getc ();
1112 if (c == '#')
1113 {
1114 struct token second_token;
1115
1116 free_token (op->token);
1117 free (op->token);
1118 read_token (&second_token, NULL);
1119 free_token (&second_token);
1120 op->type = t_other;
1121 last_non_comment_line = line_number;
1122 return;
1123 }
1124 else
1125 {
1126 if (c != EOF)
1127 do_ungetc (c);
1128 op->type = t_symbol;
1129 last_non_comment_line = line_number;
1130 return;
1131 }
1132 }
1133 }
1134 }
1135 }
1136 }
1137
1138
1139 void
1140 extract_librep (FILE *f,
1141 const char *real_filename, const char *logical_filename,
1142 flag_context_list_table_ty *flag_table,
1143 msgdomain_list_ty *mdlp)
1144 {
1145 mlp = mdlp->item[0]->messages;
1146
1147 fp = f;
1148 real_file_name = real_filename;
1149 logical_file_name = xstrdup (logical_filename);
1150 line_number = 1;
1151
1152 last_comment_line = -1;
1153 last_non_comment_line = -1;
1154
1155 flag_context_list_table = flag_table;
1156 nesting_depth = 0;
1157
1158 init_keywords ();
1159
1160 /* Eat tokens until eof is seen. When read_object returns
1161 due to an unbalanced closing parenthesis, just restart it. */
1162 do
1163 {
1164 struct object toplevel_object;
1165
1166 read_object (&toplevel_object, null_context);
1167
1168 if (toplevel_object.type == t_eof)
1169 break;
1170
1171 free_object (&toplevel_object);
1172 }
1173 while (!feof (fp));
1174
1175 /* Close scanner. */
1176 fp = NULL;
1177 real_file_name = NULL;
1178 logical_file_name = NULL;
1179 line_number = 0;
1180 }