1 /* xgettext Tcl backend.
2 Copyright (C) 2002-2003, 2005-2009, 2013, 2018-2023 Free Software Foundation, Inc.
3
4 This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19 #ifdef HAVE_CONFIG_H
20 # include "config.h"
21 #endif
22
23 /* Specification. */
24 #include "x-tcl.h"
25
26 #include <assert.h>
27 #include <errno.h>
28 #include <limits.h>
29 #include <stdbool.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33
34 #include "attribute.h"
35 #include "message.h"
36 #include "xgettext.h"
37 #include "xg-pos.h"
38 #include "xg-encoding.h"
39 #include "xg-mixed-string.h"
40 #include "xg-arglist-context.h"
41 #include "xg-arglist-callshape.h"
42 #include "xg-arglist-parser.h"
43 #include "xg-message.h"
44 #include "error.h"
45 #include "error-progname.h"
46 #include "xalloc.h"
47 #include "mem-hash-map.h"
48 #include "c-ctype.h"
49 #include "po-charset.h"
50 #include "unistr.h"
51 #include "gettext.h"
52
53 #define _(s) gettext(s)
54
55 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
56
57
58 /* The Tcl syntax is defined in the Tcl.n manual page, see
59 https://www.tcl-lang.org/man/tcl8.6/TclCmd/Tcl.htm .
60 Summary of Tcl syntax:
61 Like sh syntax, except that `...` is replaced with [...]. In detail:
62 - In a preprocessing pass, backslash-newline-anywhitespace is replaced
63 with single space.
64 - Input is broken into words, which are then subject to command
65 substitution [...] , variable substitution $var, backslash substitution
66 \escape.
67 - Strings are enclosed in "..."; command substitution, variable
68 substitution and backslash substitutions are performed here as well.
69 - {...} is a string without substitutions.
70 - The list of resulting words is split into commands by semicolon and
71 newline.
72 - '#' at the beginning of a command introduces a comment until end of line.
73 The parser is implemented in tcl8.6/generic/tclParse.c. */
74
75
76 /* ====================== Keyword set customization. ====================== */
77
78 /* If true extract all strings. */
79 static bool extract_all = false;
80
81 static hash_table keywords;
82 static bool default_keywords = true;
83
84
85 void
86 x_tcl_extract_all ()
87 {
88 extract_all = true;
89 }
90
91
92 void
93 x_tcl_keyword (const char *name)
94 {
95 if (name == NULL)
96 default_keywords = false;
97 else
98 {
99 const char *end;
100 struct callshape shape;
101
102 if (keywords.table == NULL)
103 hash_init (&keywords, 100);
104
105 split_keywordspec (name, &end, &shape);
106
107 /* The characters between name and end should form a valid Tcl
108 function name. A leading "::" is redundant. */
109 if (end - name >= 2 && name[0] == ':' && name[1] == ':')
110 name += 2;
111
112 insert_keyword_callshape (&keywords, name, end - name, &shape);
113 }
114 }
115
116 /* Finish initializing the keywords hash table.
117 Called after argument processing, before each file is processed. */
118 static void
119 init_keywords ()
120 {
121 if (default_keywords)
122 {
123 /* When adding new keywords here, also update the documentation in
124 xgettext.texi! */
125 x_tcl_keyword ("::msgcat::mc");
126 default_keywords = false;
127 }
128 }
129
130 void
131 init_flag_table_tcl ()
132 {
133 xgettext_record_flag ("::msgcat::mc:1:pass-tcl-format");
134 xgettext_record_flag ("format:1:tcl-format");
135 }
136
137
138 /* ======================== Reading of characters. ======================== */
139
140 /* The input file stream. */
141 static FILE *fp;
142
143
144 /* Fetch the next character from the input file. */
145 static int
146 do_getc ()
147 {
148 int c = getc (fp);
149
150 if (c == EOF)
151 {
152 if (ferror (fp))
153 error (EXIT_FAILURE, errno,
154 _("error while reading \"%s\""), real_file_name);
155 }
156 else if (c == '\n')
157 line_number++;
158
159 return c;
160 }
161
162 /* Put back the last fetched character, not EOF. */
163 static void
164 do_ungetc (int c)
165 {
166 if (c == '\n')
167 line_number--;
168 ungetc (c, fp);
169 }
170
171
172 /* Combine backslash followed by newline and additional whitespace to
173 a single space. */
174
175 /* An int that becomes a space when casted to 'unsigned char'. */
176 #define BS_NL (UCHAR_MAX + 1 + ' ')
177
178 static int phase1_pushback[5];
179 static int phase1_pushback_length;
180
181 static int
182 phase1_getc ()
183 {
184 int c;
185
186 if (phase1_pushback_length)
187 {
188 c = phase1_pushback[--phase1_pushback_length];
189 if (c == '\n' || c == BS_NL)
190 ++line_number;
191 return c;
192 }
193 c = do_getc ();
194 if (c != '\\')
195 return c;
196 c = do_getc ();
197 if (c != '\n')
198 {
199 if (c != EOF)
200 do_ungetc (c);
201 return '\\';
202 }
203 for (;;)
204 {
205 c = do_getc ();
206 if (!(c == ' ' || c == '\t'))
207 break;
208 }
209 if (c != EOF)
210 do_ungetc (c);
211 return BS_NL;
212 }
213
214 /* Supports only one pushback character. */
215 static void
216 phase1_ungetc (int c)
217 {
218 switch (c)
219 {
220 case EOF:
221 break;
222
223 case '\n':
224 case BS_NL:
225 --line_number;
226 FALLTHROUGH;
227
228 default:
229 if (phase1_pushback_length == SIZEOF (phase1_pushback))
230 abort ();
231 phase1_pushback[phase1_pushback_length++] = c;
232 break;
233 }
234 }
235
236
237 /* Keep track of brace nesting depth.
238 When a word starts with an opening brace, a character group begins that
239 ends with the corresponding closing brace. In theory these character
240 groups are string literals, but they are used by so many Tcl primitives
241 (proc, if, ...) as representing command lists, that we treat them as
242 command lists. */
243
244 /* An int that becomes a closing brace when casted to 'unsigned char'. */
245 #define CL_BRACE (UCHAR_MAX + 1 + '}')
246
247 static int phase2_pushback[2];
248 static int phase2_pushback_length;
249
250 /* Brace nesting depth inside the current character group. */
251 static int brace_depth;
252
253 static int
254 phase2_push ()
255 {
256 int previous_depth = brace_depth;
257 brace_depth = 1;
258 return previous_depth;
259 }
260
261 static void
262 phase2_pop (int previous_depth)
263 {
264 brace_depth = previous_depth;
265 }
266
267 static int
268 phase2_getc ()
269 {
270 int c;
271
272 if (phase2_pushback_length)
273 {
274 c = phase2_pushback[--phase2_pushback_length];
275 if (c == '\n' || c == BS_NL)
276 ++line_number;
277 else if (c == '{')
278 ++brace_depth;
279 else if (c == '}')
280 --brace_depth;
281 return c;
282 }
283 c = phase1_getc ();
284 if (c == '{')
285 ++brace_depth;
286 else if (c == '}')
287 {
288 if (--brace_depth == 0)
289 c = CL_BRACE;
290 }
291 return c;
292 }
293
294 /* Supports 2 characters of pushback. */
295 static void
296 phase2_ungetc (int c)
297 {
298 if (c != EOF)
299 {
300 switch (c)
301 {
302 case '\n':
303 case BS_NL:
304 --line_number;
305 break;
306
307 case '{':
308 --brace_depth;
309 break;
310
311 case '}':
312 ++brace_depth;
313 break;
314 }
315 if (phase2_pushback_length == SIZEOF (phase2_pushback))
316 abort ();
317 phase2_pushback[phase2_pushback_length++] = c;
318 }
319 }
320
321
322 /* ========================== Reading of tokens. ========================== */
323
324
325 /* A token consists of a sequence of characters. */
326 struct token
327 {
328 int allocated; /* number of allocated 'token_char's */
329 int charcount; /* number of used 'token_char's */
330 char *chars; /* the token's constituents */
331 };
332
333 /* Initialize a 'struct token'. */
334 static inline void
335 init_token (struct token *tp)
336 {
337 tp->allocated = 10;
338 tp->chars = XNMALLOC (tp->allocated, char);
339 tp->charcount = 0;
340 }
341
342 /* Free the memory pointed to by a 'struct token'. */
343 static inline void
344 free_token (struct token *tp)
345 {
346 free (tp->chars);
347 }
348
349 /* Ensure there is enough room in the token for one more character. */
350 static inline void
351 grow_token (struct token *tp)
352 {
353 if (tp->charcount == tp->allocated)
354 {
355 tp->allocated *= 2;
356 tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
357 }
358 }
359
360
361 /* ========================= Accumulating comments ========================= */
362
363
364 static char *buffer;
365 static size_t bufmax;
366 static size_t buflen;
367
368 static inline void
369 comment_start ()
370 {
371 buflen = 0;
372 }
373
374 static inline void
375 comment_add (int c)
376 {
377 if (buflen >= bufmax)
378 {
379 bufmax = 2 * bufmax + 10;
380 buffer = xrealloc (buffer, bufmax);
381 }
382 buffer[buflen++] = c;
383 }
384
385 static inline void
386 comment_line_end ()
387 {
388 while (buflen >= 1
389 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
390 --buflen;
391 if (buflen >= bufmax)
392 {
393 bufmax = 2 * bufmax + 10;
394 buffer = xrealloc (buffer, bufmax);
395 }
396 buffer[buflen] = '\0';
397 savable_comment_add (buffer);
398 }
399
400
401 /* These are for tracking whether comments count as immediately before
402 keyword. */
403 static int last_comment_line;
404 static int last_non_comment_line;
405
406
407 /* ========================= Accumulating messages ========================= */
408
409
410 static message_list_ty *mlp;
411
412
413 /* ========================== Reading of commands ========================== */
414
415
416 /* We are only interested in constant strings (e.g. "msgcat::mc" or other
417 string literals). Other words need not to be represented precisely. */
418 enum word_type
419 {
420 t_string, /* constant string */
421 t_other, /* other string */
422 t_separator, /* command separator: semicolon or newline */
423 t_bracket, /* ']' pseudo word */
424 t_brace, /* '}' pseudo word */
425 t_eof /* EOF marker */
426 };
427
428 struct word
429 {
430 enum word_type type;
431 struct token *token; /* for t_string */
432 int line_number_at_start; /* for t_string */
433 };
434
435 /* Free the memory pointed to by a 'struct word'. */
436 static inline void
437 free_word (struct word *wp)
438 {
439 if (wp->type == t_string)
440 {
441 free_token (wp->token);
442 free (wp->token);
443 }
444 }
445
446 /* Convert a t_string token to a char*. */
447 static char *
448 string_of_word (const struct word *wp)
449 {
450 char *str;
451 int n;
452
453 if (!(wp->type == t_string))
454 abort ();
455 n = wp->token->charcount;
456 str = XNMALLOC (n + 1, char);
457 memcpy (str, wp->token->chars, n);
458 str[n] = '\0';
459 return str;
460 }
461
462
463 /* Context lookup table. */
464 static flag_context_list_table_ty *flag_context_list_table;
465
466
467 /* Maximum supported nesting depth. */
468 #define MAX_NESTING_DEPTH 1000
469
470 /* Current nesting depths. */
471 static int bracket_nesting_depth;
472 static int brace_nesting_depth;
473
474
475 /* Read an escape sequence. The value is an ISO-8859-1 character (in the
476 range 0x00..0xff) or a Unicode character (in the range 0x0000..0x10FFFF). */
477 static int
478 do_getc_escaped ()
479 {
480 int c;
481
482 c = phase1_getc ();
483 switch (c)
484 {
485 case EOF:
486 return '\\';
487 case 'a':
488 return '\a';
489 case 'b':
490 return '\b';
491 case 'f':
492 return '\f';
493 case 'n':
494 return '\n';
495 case 'r':
496 return '\r';
497 case 't':
498 return '\t';
499 case 'v':
500 return '\v';
501 case 'x':
502 {
503 unsigned int n = 0;
504 unsigned int i;
505
506 for (i = 0; i < 2; i++)
507 {
508 c = phase1_getc ();
509 if (c == EOF || !c_isxdigit ((unsigned char) c))
510 {
511 phase1_ungetc (c);
512 break;
513 }
514
515 if (c >= '0' && c <= '9')
516 n = (n << 4) + (c - '0');
517 else if (c >= 'A' && c <= 'F')
518 n = (n << 4) + (c - 'A' + 10);
519 else if (c >= 'a' && c <= 'f')
520 n = (n << 4) + (c - 'a' + 10);
521 }
522 return (i > 0 ? (unsigned char) n : 'x');
523 }
524 case 'u':
525 {
526 unsigned int n = 0;
527 unsigned int i;
528
529 for (i = 0; i < 4; i++)
530 {
531 c = phase1_getc ();
532 if (c == EOF || !c_isxdigit ((unsigned char) c))
533 {
534 phase1_ungetc (c);
535 break;
536 }
537
538 if (c >= '0' && c <= '9')
539 n = (n << 4) + (c - '0');
540 else if (c >= 'A' && c <= 'F')
541 n = (n << 4) + (c - 'A' + 10);
542 else if (c >= 'a' && c <= 'f')
543 n = (n << 4) + (c - 'a' + 10);
544 }
545 return (i > 0 ? n : 'u');
546 }
547 case 'U':
548 {
549 unsigned int n = 0;
550 unsigned int i;
551
552 for (i = 0; i < 8; i++)
553 {
554 c = phase1_getc ();
555 if (c == EOF || !c_isxdigit ((unsigned char) c) || n >= 0x11000)
556 {
557 phase1_ungetc (c);
558 break;
559 }
560
561 if (c >= '0' && c <= '9')
562 n = (n << 4) + (c - '0');
563 else if (c >= 'A' && c <= 'F')
564 n = (n << 4) + (c - 'A' + 10);
565 else if (c >= 'a' && c <= 'f')
566 n = (n << 4) + (c - 'a' + 10);
567 }
568 return (i > 0 ? n : 'u');
569 }
570 case '0': case '1': case '2': case '3': case '4':
571 case '5': case '6': case '7':
572 {
573 int n = c - '0';
574
575 c = phase1_getc ();
576 if (c != EOF)
577 {
578 if (c >= '0' && c <= '7')
579 {
580 n = (n << 3) + (c - '0');
581 c = phase1_getc ();
582 if (c != EOF)
583 {
584 if (c >= '0' && c <= '7')
585 n = (n << 3) + (c - '0');
586 else
587 phase1_ungetc (c);
588 }
589 }
590 else
591 phase1_ungetc (c);
592 }
593 return (unsigned char) n;
594 }
595 default:
596 /* Note: If c is non-ASCII, Tcl's behaviour is undefined here. */
597 return (unsigned char) c;
598 }
599 }
600
601 /* Read an escape sequence for a low surrogate Unicode character.
602 The value is in the range 0xDC00..0xDFFF.
603 Return -1 when none was seen. */
604 static int
605 do_getc_escaped_low_surrogate ()
606 {
607 int c;
608
609 c = phase1_getc ();
610 switch (c)
611 {
612 case 'u':
613 {
614 unsigned char buf[4];
615 unsigned int n = 0;
616 unsigned int i;
617
618 for (i = 0; i < 4; i++)
619 {
620 c = phase1_getc ();
621 if (c == EOF || !c_isxdigit ((unsigned char) c))
622 {
623 phase1_ungetc (c);
624 while (i > 0)
625 phase1_ungetc (buf[--i]);
626 phase1_ungetc ('u');
627 return -1;
628 }
629 buf[i] = c;
630
631 if (c >= '0' && c <= '9')
632 n = (n << 4) + (c - '0');
633 else if (c >= 'A' && c <= 'F')
634 n = (n << 4) + (c - 'A' + 10);
635 else if (c >= 'a' && c <= 'f')
636 n = (n << 4) + (c - 'a' + 10);
637 }
638 if (n >= 0xdc00 && n <= 0xdfff)
639 return n;
640 else
641 {
642 while (i > 0)
643 phase1_ungetc (buf[--i]);
644 phase1_ungetc ('u');
645 return -1;
646 }
647 }
648 default:
649 phase1_ungetc (c);
650 return -1;
651 }
652 }
653
654
655 enum terminator
656 {
657 te_space_separator, /* looking for space semicolon newline */
658 te_space_separator_bracket, /* looking for space semicolon newline ']' */
659 te_paren, /* looking for ')' */
660 te_quote /* looking for '"' */
661 };
662
663 /* Forward declaration of local functions. */
664 static enum word_type read_command_list (int looking_for,
665 flag_context_ty outer_context);
666
667 /* Accumulate tokens into the given word.
668 'looking_for' denotes a parse terminator combination.
669 Return the first character past the token. */
670 static int
671 accumulate_word (struct word *wp, enum terminator looking_for,
672 flag_context_ty context)
673 {
674 int c;
675
676 for (;;)
677 {
678 c = phase2_getc ();
679
680 if (c == EOF || c == CL_BRACE)
681 return c;
682 if ((looking_for == te_space_separator
683 || looking_for == te_space_separator_bracket)
684 && (c == ' ' || c == BS_NL
685 || c == '\t' || c == '\v' || c == '\f' || c == '\r'
686 || c == ';' || c == '\n'))
687 return c;
688 if (looking_for == te_space_separator_bracket && c == ']')
689 return c;
690 if (looking_for == te_paren && c == ')')
691 return c;
692 if (looking_for == te_quote && c == '"')
693 return c;
694
695 if (c == '$')
696 {
697 /* Distinguish $varname, ${varname} and lone $. */
698 c = phase2_getc ();
699 if (c == '{')
700 {
701 /* ${varname} */
702 do
703 c = phase2_getc ();
704 while (c != EOF && c != '}');
705 wp->type = t_other;
706 }
707 else
708 {
709 bool nonempty = false;
710
711 for (; c != EOF && c != CL_BRACE; c = phase2_getc ())
712 {
713 if (c_isalnum ((unsigned char) c) || (c == '_'))
714 {
715 nonempty = true;
716 continue;
717 }
718 if (c == ':')
719 {
720 c = phase2_getc ();
721 if (c == ':')
722 {
723 do
724 c = phase2_getc ();
725 while (c == ':');
726
727 phase2_ungetc (c);
728 nonempty = true;
729 continue;
730 }
731 phase2_ungetc (c);
732 c = ':';
733 }
734 break;
735 }
736 if (c == '(')
737 {
738 /* $varname(index) */
739 struct word index_word;
740
741 index_word.type = t_other;
742 c = accumulate_word (&index_word, te_paren, null_context);
743 if (c != EOF && c != ')')
744 phase2_ungetc (c);
745 wp->type = t_other;
746 }
747 else
748 {
749 phase2_ungetc (c);
750 if (nonempty)
751 {
752 /* $varname */
753 wp->type = t_other;
754 }
755 else
756 {
757 /* lone $ */
758 if (wp->type == t_string)
759 {
760 grow_token (wp->token);
761 wp->token->chars[wp->token->charcount++] = '$';
762 }
763 }
764 }
765 }
766 }
767 else if (c == '[')
768 {
769 if (++bracket_nesting_depth > MAX_NESTING_DEPTH)
770 {
771 error_with_progname = false;
772 error (EXIT_FAILURE, 0, _("%s:%d: error: too many open brackets"),
773 logical_file_name, line_number);
774 }
775 read_command_list (']', context);
776 bracket_nesting_depth--;
777 wp->type = t_other;
778 }
779 else if (c == '\\')
780 {
781 unsigned int uc = do_getc_escaped ();
782 assert (uc < 0x110000);
783 if (uc >= 0xd800 && uc <= 0xdfff)
784 {
785 if (uc < 0xdc00)
786 {
787 /* Saw a high surrogate Unicode character.
788 Is it followed by a low surrogate Unicode character? */
789 c = phase2_getc ();
790 if (c == '\\')
791 {
792 int uc2 = do_getc_escaped_low_surrogate ();
793 if (uc2 >= 0)
794 {
795 /* Saw a low surrogate Unicode character. */
796 assert (uc2 >= 0xdc00 && uc2 <= 0xdfff);
797 uc = 0x10000 + ((uc - 0xd800) << 10) + (uc2 - 0xdc00);
798 goto saw_unicode_escape;
799 }
800 }
801 phase2_ungetc (c);
802 }
803 error_with_progname = false;
804 error (0, 0, _("%s:%d: warning: invalid Unicode character"),
805 logical_file_name, line_number);
806 error_with_progname = true;
807 goto done_escape;
808 }
809 saw_unicode_escape:
810 {
811 unsigned char utf8buf[6];
812 int count = u8_uctomb (utf8buf, uc, 6);
813 int i;
814 assert (count > 0);
815 if (wp->type == t_string)
816 for (i = 0; i < count; i++)
817 {
818 grow_token (wp->token);
819 wp->token->chars[wp->token->charcount++] = utf8buf[i];
820 }
821 }
822 done_escape: ;
823 }
824 else
825 {
826 if (wp->type == t_string)
827 {
828 grow_token (wp->token);
829 wp->token->chars[wp->token->charcount++] = (unsigned char) c;
830 }
831 }
832 }
833 }
834
835
836 /* Read the next word.
837 'looking_for' denotes a parse terminator, either ']' or '\0'. */
838 static void
839 read_word (struct word *wp, int looking_for, flag_context_ty context)
840 {
841 int c;
842
843 do
844 c = phase2_getc ();
845 while (c == ' ' || c == BS_NL
846 || c == '\t' || c == '\v' || c == '\f' || c == '\r');
847
848 if (c == EOF)
849 {
850 wp->type = t_eof;
851 return;
852 }
853
854 if (c == CL_BRACE)
855 {
856 wp->type = t_brace;
857 last_non_comment_line = line_number;
858 return;
859 }
860
861 if (c == '\n')
862 {
863 /* Comments assumed to be grouped with a message must immediately
864 precede it, with no non-whitespace token on a line between both. */
865 if (last_non_comment_line > last_comment_line)
866 savable_comment_reset ();
867 wp->type = t_separator;
868 return;
869 }
870
871 if (c == ';')
872 {
873 wp->type = t_separator;
874 last_non_comment_line = line_number;
875 return;
876 }
877
878 if (looking_for == ']' && c == ']')
879 {
880 wp->type = t_bracket;
881 last_non_comment_line = line_number;
882 return;
883 }
884
885 if (c == '{')
886 {
887 int previous_depth;
888 enum word_type terminator;
889
890 /* Start a new nested character group, which lasts until the next
891 balanced '}' (ignoring \} things). */
892 previous_depth = phase2_push () - 1;
893
894 /* Interpret it as a command list. */
895 if (++brace_nesting_depth > MAX_NESTING_DEPTH)
896 {
897 error_with_progname = false;
898 error (EXIT_FAILURE, 0, _("%s:%d: error: too many open braces"),
899 logical_file_name, line_number);
900 }
901 terminator = read_command_list ('\0', null_context);
902 brace_nesting_depth--;
903
904 if (terminator == t_brace)
905 phase2_pop (previous_depth);
906
907 wp->type = t_other;
908 last_non_comment_line = line_number;
909 return;
910 }
911
912 wp->type = t_string;
913 wp->token = XMALLOC (struct token);
914 init_token (wp->token);
915 wp->line_number_at_start = line_number;
916
917 if (c == '"')
918 {
919 c = accumulate_word (wp, te_quote, context);
920 if (c != EOF && c != '"')
921 phase2_ungetc (c);
922 }
923 else
924 {
925 phase2_ungetc (c);
926 c = accumulate_word (wp,
927 looking_for == ']'
928 ? te_space_separator_bracket
929 : te_space_separator,
930 context);
931 if (c != EOF)
932 phase2_ungetc (c);
933 }
934
935 if (wp->type != t_string)
936 {
937 free_token (wp->token);
938 free (wp->token);
939 }
940 last_non_comment_line = line_number;
941 }
942
943
944 /* Read the next command.
945 'looking_for' denotes a parse terminator, either ']' or '\0'.
946 Returns the type of the word that terminated the command: t_separator or
947 t_bracket (only if looking_for is ']') or t_brace or t_eof. */
948 static enum word_type
949 read_command (int looking_for, flag_context_ty outer_context)
950 {
951 int c;
952
953 /* Skip whitespace and comments. */
954 for (;;)
955 {
956 c = phase2_getc ();
957
958 if (c == ' ' || c == BS_NL
959 || c == '\t' || c == '\v' || c == '\f' || c == '\r')
960 continue;
961 if (c == '#')
962 {
963 /* Skip a comment up to end of line. */
964 last_comment_line = line_number;
965 comment_start ();
966 for (;;)
967 {
968 c = phase2_getc ();
969 if (c == EOF || c == CL_BRACE || c == '\n')
970 break;
971 /* We skip all leading white space, but not EOLs. */
972 if (!(buflen == 0 && (c == ' ' || c == '\t')))
973 comment_add (c);
974 }
975 comment_line_end ();
976 continue;
977 }
978 break;
979 }
980 phase2_ungetc (c);
981
982 /* Read the words that make up the command. */
983 {
984 int arg = 0; /* Current argument number. */
985 flag_context_list_iterator_ty context_iter;
986 const struct callshapes *shapes = NULL;
987 struct arglist_parser *argparser = NULL;
988
989 for (;; arg++)
990 {
991 struct word inner;
992 flag_context_ty inner_context;
993
994 if (arg == 0)
995 inner_context = null_context;
996 else
997 inner_context =
998 inherited_context (outer_context,
999 flag_context_list_iterator_advance (
1000 &context_iter));
1001
1002 read_word (&inner, looking_for, inner_context);
1003
1004 /* Recognize end of command. */
1005 if (inner.type == t_separator || inner.type == t_bracket
1006 || inner.type == t_brace || inner.type == t_eof)
1007 {
1008 if (argparser != NULL)
1009 arglist_parser_done (argparser, arg);
1010 return inner.type;
1011 }
1012
1013 if (extract_all)
1014 {
1015 if (inner.type == t_string)
1016 {
1017 lex_pos_ty pos;
1018
1019 pos.file_name = logical_file_name;
1020 pos.line_number = inner.line_number_at_start;
1021 remember_a_message (mlp, NULL, string_of_word (&inner), false,
1022 false, inner_context, &pos,
1023 NULL, savable_comment, false);
1024 }
1025 }
1026
1027 if (arg == 0)
1028 {
1029 /* This is the function position. */
1030 if (inner.type == t_string)
1031 {
1032 char *function_name = string_of_word (&inner);
1033 char *stripped_name;
1034 void *keyword_value;
1035
1036 /* A leading "::" is redundant. */
1037 stripped_name = function_name;
1038 if (function_name[0] == ':' && function_name[1] == ':')
1039 stripped_name += 2;
1040
1041 if (hash_find_entry (&keywords,
1042 stripped_name, strlen (stripped_name),
1043 &keyword_value)
1044 == 0)
1045 shapes = (const struct callshapes *) keyword_value;
1046
1047 argparser = arglist_parser_alloc (mlp, shapes);
1048
1049 context_iter =
1050 flag_context_list_iterator (
1051 flag_context_list_table_lookup (
1052 flag_context_list_table,
1053 stripped_name, strlen (stripped_name)));
1054
1055 free (function_name);
1056 }
1057 else
1058 context_iter = null_context_list_iterator;
1059 }
1060 else
1061 {
1062 /* These are the argument positions. */
1063 if (argparser != NULL && inner.type == t_string)
1064 {
1065 char *s = string_of_word (&inner);
1066 mixed_string_ty *ms =
1067 mixed_string_alloc_simple (s, lc_string,
1068 logical_file_name,
1069 inner.line_number_at_start);
1070 free (s);
1071 arglist_parser_remember (argparser, arg, ms,
1072 inner_context,
1073 logical_file_name,
1074 inner.line_number_at_start,
1075 savable_comment, false);
1076 }
1077 }
1078
1079 free_word (&inner);
1080 }
1081 }
1082 }
1083
1084
1085 /* Read a list of commands.
1086 'looking_for' denotes a parse terminator, either ']' or '\0'.
1087 Returns the type of the word that terminated the command list:
1088 t_bracket (only if looking_for is ']') or t_brace or t_eof. */
1089 static enum word_type
1090 read_command_list (int looking_for, flag_context_ty outer_context)
1091 {
1092 for (;;)
1093 {
1094 enum word_type terminator;
1095
1096 terminator = read_command (looking_for, outer_context);
1097 if (terminator != t_separator)
1098 return terminator;
1099 }
1100 }
1101
1102
1103 void
1104 extract_tcl (FILE *f,
1105 const char *real_filename, const char *logical_filename,
1106 flag_context_list_table_ty *flag_table,
1107 msgdomain_list_ty *mdlp)
1108 {
1109 mlp = mdlp->item[0]->messages;
1110
1111 /* We convert our strings to UTF-8 encoding. */
1112 xgettext_current_source_encoding = po_charset_utf8;
1113
1114 fp = f;
1115 real_file_name = real_filename;
1116 logical_file_name = xstrdup (logical_filename);
1117 line_number = 1;
1118
1119 phase1_pushback_length = 0;
1120 phase2_pushback_length = 0;
1121
1122 /* Initially, no brace is open. */
1123 brace_depth = 1000000;
1124
1125 last_comment_line = -1;
1126 last_non_comment_line = -1;
1127
1128 flag_context_list_table = flag_table;
1129 bracket_nesting_depth = 0;
1130 brace_nesting_depth = 0;
1131
1132 init_keywords ();
1133
1134 /* Eat tokens until eof is seen. */
1135 read_command_list ('\0', null_context);
1136
1137 fp = NULL;
1138 real_file_name = NULL;
1139 logical_file_name = NULL;
1140 line_number = 0;
1141 }