1 /* Boost format strings.
2 Copyright (C) 2001-2004, 2006-2007, 2009, 2019-2020, 2023 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2006.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 #include <stdbool.h>
23 #include <stdlib.h>
24
25 #include "attribute.h"
26 #include "format.h"
27 #include "c-ctype.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32
33 #define _(str) gettext (str)
34
35 /* Boost format strings are described in
36 boost_1_33_1/libs/format/doc/format.html
37 and implemented in
38 boost_1_33_1/boost/format/parsing.hpp.
39 A directive (other than '%%')
40 - starts with '%' or '%|'; in the latter case it must end in '|',
41 - is continued either by
42 - 'm%' where m is a positive integer, starting with a nonzero digit;
43 in this case the directive must not have started with '%|'; or
44 - the following:
45 - optional: 'm$' where m is a positive integer, starting with a
46 nonzero digit,
47 - optional: any of the characters '#', '0', '-', ' ', '+', "'",
48 '_', '=', 'h', 'l',
49 - optional: a width specification: '*' (reads an argument) or '*m$'
50 or a nonempty digit sequence,
51 - optional: a '.' and a precision specification: '*' (reads an
52 argument) or '*m$' or a nonempty digit sequence,
53 - optional: any of the characters 'h', 'l', 'L',
54 - if the directive started with '%|':
55 an optional specifier and a final '|',
56 otherwise
57 a mandatory specifier.
58 If no specifier is given, it needs an argument of any type.
59 The possible specifiers are:
60 - 'c', 'C', that need a character argument,
61 - 's', 'S', that need an argument of any type,
62 - 'i', 'd', 'o', 'u', 'x', 'X', that need an integer argument,
63 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument,
64 - 'p', that needs a 'void *' argument,
65 - 't', that doesn't need an argument,
66 - 'TX', where X is any character, that doesn't need an argument,
67 - 'n', that needs a pointer to integer.
68 The Boost format string interpreter doesn't actually care about
69 the argument types, but we do, because it increases the likelihood
70 of detecting translator mistakes.
71 Numbered ('%m%' or '%m$' or '*m$') and unnumbered argument specifications
72 cannot be used in the same string.
73 */
74
75 enum format_arg_type
76 {
77 FAT_NONE = 0,
78 /* Basic types */
79 FAT_INTEGER = 1,
80 FAT_DOUBLE = 2,
81 FAT_CHAR = 3,
82 FAT_POINTER = 4,
83 FAT_ANY = 5
84 };
85
86 struct numbered_arg
87 {
88 unsigned int number;
89 enum format_arg_type type;
90 };
91
92 struct spec
93 {
94 unsigned int directives;
95 unsigned int numbered_arg_count;
96 struct numbered_arg *numbered;
97 };
98
99 /* Locale independent test for a decimal digit.
100 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
101 <ctype.h> isdigit must be an 'unsigned char'.) */
102 #undef isdigit
103 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
104
105
106 static int
107 numbered_arg_compare (const void *p1, const void *p2)
108 {
109 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
110 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
111
112 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
113 }
114
115 static void *
116 format_parse (const char *format, bool translated, char *fdi,
117 char **invalid_reason)
118 {
119 const char *const format_start = format;
120 struct spec spec;
121 unsigned int numbered_allocated;
122 unsigned int unnumbered_arg_count;
123 struct spec *result;
124
125 spec.directives = 0;
126 spec.numbered_arg_count = 0;
127 spec.numbered = NULL;
128 numbered_allocated = 0;
129 unnumbered_arg_count = 0;
130
131 for (; *format != '\0';)
132 /* Invariant: spec.numbered_arg_count == 0 || unnumbered_arg_count == 0. */
133 if (*format++ == '%')
134 {
135 /* A directive. */
136 FDI_SET (format - 1, FMTDIR_START);
137 spec.directives++;
138
139 if (*format == '%')
140 format++;
141 else
142 {
143 bool brackets = false;
144 bool done = false;
145 unsigned int number = 0;
146 enum format_arg_type type = FAT_NONE;
147
148 if (*format == '|')
149 {
150 format++;
151 brackets = true;
152 }
153
154 if (isdigit (*format) && *format != '0')
155 {
156 const char *f = format;
157 unsigned int m = 0;
158
159 do
160 {
161 m = 10 * m + (*f - '0');
162 f++;
163 }
164 while (isdigit (*f));
165
166 if ((!brackets && *f == '%') || *f == '$')
167 {
168 if (m == 0) /* can happen if m overflows */
169 {
170 *invalid_reason = INVALID_ARGNO_0 (spec.directives);
171 FDI_SET (f, FMTDIR_ERROR);
172 goto bad_format;
173 }
174 number = m;
175 if (*f == '%')
176 {
177 type = FAT_ANY;
178 done = true;
179 }
180 format = ++f;
181 }
182 }
183
184 if (!done)
185 {
186 /* Parse flags. */
187 for (;;)
188 {
189 if (*format == ' ' || *format == '+' || *format == '-'
190 || *format == '#' || *format == '0' || *format == '\''
191 || *format == '_' || *format == '=' || *format == 'h'
192 || *format == 'l')
193 format++;
194 else
195 break;
196 }
197
198 /* Parse width. */
199 if (*format == '*')
200 {
201 unsigned int width_number = 0;
202
203 format++;
204
205 if (isdigit (*format))
206 {
207 const char *f = format;
208 unsigned int m = 0;
209
210 do
211 {
212 m = 10 * m + (*f - '0');
213 f++;
214 }
215 while (isdigit (*f));
216
217 if (*f == '$')
218 {
219 if (m == 0)
220 {
221 *invalid_reason =
222 INVALID_WIDTH_ARGNO_0 (spec.directives);
223 FDI_SET (f, FMTDIR_ERROR);
224 goto bad_format;
225 }
226 width_number = m;
227 format = ++f;
228 }
229 }
230
231 if (width_number)
232 {
233 /* Numbered argument. */
234
235 /* Numbered and unnumbered specifications are
236 exclusive. */
237 if (unnumbered_arg_count > 0)
238 {
239 *invalid_reason =
240 INVALID_MIXES_NUMBERED_UNNUMBERED ();
241 FDI_SET (format - 1, FMTDIR_ERROR);
242 goto bad_format;
243 }
244
245 if (numbered_allocated == spec.numbered_arg_count)
246 {
247 numbered_allocated = 2 * numbered_allocated + 1;
248 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
249 }
250 spec.numbered[spec.numbered_arg_count].number = width_number;
251 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
252 spec.numbered_arg_count++;
253 }
254 else
255 {
256 /* Unnumbered argument. */
257
258 /* Numbered and unnumbered specifications are
259 exclusive. */
260 if (spec.numbered_arg_count > 0)
261 {
262 *invalid_reason =
263 INVALID_MIXES_NUMBERED_UNNUMBERED ();
264 FDI_SET (format - 1, FMTDIR_ERROR);
265 goto bad_format;
266 }
267
268 if (numbered_allocated == unnumbered_arg_count)
269 {
270 numbered_allocated = 2 * numbered_allocated + 1;
271 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
272 }
273 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
274 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
275 unnumbered_arg_count++;
276 }
277 }
278 else if (isdigit (*format))
279 {
280 do format++; while (isdigit (*format));
281 }
282
283 /* Parse precision. */
284 if (*format == '.')
285 {
286 format++;
287
288 if (*format == '*')
289 {
290 unsigned int precision_number = 0;
291
292 format++;
293
294 if (isdigit (*format))
295 {
296 const char *f = format;
297 unsigned int m = 0;
298
299 do
300 {
301 m = 10 * m + (*f - '0');
302 f++;
303 }
304 while (isdigit (*f));
305
306 if (*f == '$')
307 {
308 if (m == 0)
309 {
310 *invalid_reason =
311 INVALID_PRECISION_ARGNO_0 (spec.directives);
312 FDI_SET (f, FMTDIR_ERROR);
313 goto bad_format;
314 }
315 precision_number = m;
316 format = ++f;
317 }
318 }
319
320 if (precision_number)
321 {
322 /* Numbered argument. */
323
324 /* Numbered and unnumbered specifications are
325 exclusive. */
326 if (unnumbered_arg_count > 0)
327 {
328 *invalid_reason =
329 INVALID_MIXES_NUMBERED_UNNUMBERED ();
330 FDI_SET (format - 1, FMTDIR_ERROR);
331 goto bad_format;
332 }
333
334 if (numbered_allocated == spec.numbered_arg_count)
335 {
336 numbered_allocated = 2 * numbered_allocated + 1;
337 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
338 }
339 spec.numbered[spec.numbered_arg_count].number = precision_number;
340 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
341 spec.numbered_arg_count++;
342 }
343 else
344 {
345 /* Unnumbered argument. */
346
347 /* Numbered and unnumbered specifications are
348 exclusive. */
349 if (spec.numbered_arg_count > 0)
350 {
351 *invalid_reason =
352 INVALID_MIXES_NUMBERED_UNNUMBERED ();
353 FDI_SET (format - 1, FMTDIR_ERROR);
354 goto bad_format;
355 }
356
357 if (numbered_allocated == unnumbered_arg_count)
358 {
359 numbered_allocated = 2 * numbered_allocated + 1;
360 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
361 }
362 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
363 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
364 unnumbered_arg_count++;
365 }
366 }
367 else if (isdigit (*format))
368 {
369 do format++; while (isdigit (*format));
370 }
371 }
372
373 /* Parse size. */
374 for (;;)
375 {
376 if (*format == 'h' || *format == 'l' || *format == 'L')
377 format++;
378 else
379 break;
380 }
381
382 switch (*format++)
383 {
384 case 'c': case 'C':
385 type = FAT_CHAR;
386 break;
387 case 's': case 'S':
388 type = FAT_ANY;
389 break;
390 case 'i': case 'd': case 'o': case 'u': case 'x': case 'X':
391 type = FAT_INTEGER;
392 break;
393 case 'e': case 'E': case 'f': case 'g': case 'G':
394 type = FAT_DOUBLE;
395 break;
396 case 'p':
397 type = FAT_POINTER;
398 break;
399 case 't':
400 type = FAT_NONE;
401 break;
402 case 'T':
403 if (*format == '\0')
404 {
405 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
406 FDI_SET (format - 1, FMTDIR_ERROR);
407 goto bad_format;
408 }
409 format++;
410 type = FAT_NONE;
411 break;
412 case 'n':
413 type = FAT_NONE;
414 break;
415 case '|':
416 if (brackets)
417 {
418 --format;
419 type = FAT_ANY;
420 break;
421 }
422 FALLTHROUGH;
423 default:
424 --format;
425 if (*format == '\0')
426 {
427 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
428 FDI_SET (format - 1, FMTDIR_ERROR);
429 }
430 else
431 {
432 *invalid_reason =
433 INVALID_CONVERSION_SPECIFIER (spec.directives,
434 *format);
435 FDI_SET (format, FMTDIR_ERROR);
436 }
437 goto bad_format;
438 }
439 if (brackets)
440 {
441 if (*format != '|')
442 {
443 if (*format == '\0')
444 {
445 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
446 FDI_SET (format - 1, FMTDIR_ERROR);
447 }
448 else
449 {
450 *invalid_reason =
451 xasprintf (_("The directive number %u starts with | but does not end with |."),
452 spec.directives);
453 FDI_SET (format, FMTDIR_ERROR);
454 }
455 goto bad_format;
456 }
457 format++;
458 }
459 }
460
461 if (type != FAT_NONE)
462 {
463 if (number)
464 {
465 /* Numbered argument. */
466
467 /* Numbered and unnumbered specifications are exclusive. */
468 if (unnumbered_arg_count > 0)
469 {
470 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
471 FDI_SET (format - 1, FMTDIR_ERROR);
472 goto bad_format;
473 }
474
475 if (numbered_allocated == spec.numbered_arg_count)
476 {
477 numbered_allocated = 2 * numbered_allocated + 1;
478 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
479 }
480 spec.numbered[spec.numbered_arg_count].number = number;
481 spec.numbered[spec.numbered_arg_count].type = type;
482 spec.numbered_arg_count++;
483 }
484 else
485 {
486 /* Unnumbered argument. */
487
488 /* Numbered and unnumbered specifications are exclusive. */
489 if (spec.numbered_arg_count > 0)
490 {
491 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
492 FDI_SET (format - 1, FMTDIR_ERROR);
493 goto bad_format;
494 }
495
496 if (numbered_allocated == unnumbered_arg_count)
497 {
498 numbered_allocated = 2 * numbered_allocated + 1;
499 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
500 }
501 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
502 spec.numbered[unnumbered_arg_count].type = type;
503 unnumbered_arg_count++;
504 }
505 }
506 }
507
508 FDI_SET (format - 1, FMTDIR_END);
509 }
510
511 /* Convert the unnumbered argument array to numbered arguments. */
512 if (unnumbered_arg_count > 0)
513 spec.numbered_arg_count = unnumbered_arg_count;
514 /* Sort the numbered argument array, and eliminate duplicates. */
515 else if (spec.numbered_arg_count > 1)
516 {
517 unsigned int i, j;
518 bool err;
519
520 qsort (spec.numbered, spec.numbered_arg_count,
521 sizeof (struct numbered_arg), numbered_arg_compare);
522
523 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
524 err = false;
525 for (i = j = 0; i < spec.numbered_arg_count; i++)
526 if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
527 {
528 enum format_arg_type type1 = spec.numbered[i].type;
529 enum format_arg_type type2 = spec.numbered[j-1].type;
530 enum format_arg_type type_both;
531
532 if (type1 == type2 || type2 == FAT_ANY)
533 type_both = type1;
534 else if (type1 == FAT_ANY)
535 type_both = type2;
536 else
537 {
538 /* Incompatible types. */
539 type_both = FAT_NONE;
540 if (!err)
541 *invalid_reason =
542 INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
543 err = true;
544 }
545
546 spec.numbered[j-1].type = type_both;
547 }
548 else
549 {
550 if (j < i)
551 {
552 spec.numbered[j].number = spec.numbered[i].number;
553 spec.numbered[j].type = spec.numbered[i].type;
554 }
555 j++;
556 }
557 spec.numbered_arg_count = j;
558 if (err)
559 /* *invalid_reason has already been set above. */
560 goto bad_format;
561 }
562
563 result = XMALLOC (struct spec);
564 *result = spec;
565 return result;
566
567 bad_format:
568 if (spec.numbered != NULL)
569 free (spec.numbered);
570 return NULL;
571 }
572
573 static void
574 format_free (void *descr)
575 {
576 struct spec *spec = (struct spec *) descr;
577
578 if (spec->numbered != NULL)
579 free (spec->numbered);
580 free (spec);
581 }
582
583 static int
584 format_get_number_of_directives (void *descr)
585 {
586 struct spec *spec = (struct spec *) descr;
587
588 return spec->directives;
589 }
590
591 static bool
592 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
593 formatstring_error_logger_t error_logger,
594 const char *pretty_msgid, const char *pretty_msgstr)
595 {
596 struct spec *spec1 = (struct spec *) msgid_descr;
597 struct spec *spec2 = (struct spec *) msgstr_descr;
598 bool err = false;
599
600 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
601 {
602 unsigned int i, j;
603 unsigned int n1 = spec1->numbered_arg_count;
604 unsigned int n2 = spec2->numbered_arg_count;
605
606 /* Check that the argument numbers are the same.
607 Both arrays are sorted. We search for the first difference. */
608 for (i = 0, j = 0; i < n1 || j < n2; )
609 {
610 int cmp = (i >= n1 ? 1 :
611 j >= n2 ? -1 :
612 spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
613 spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
614 0);
615
616 if (cmp > 0)
617 {
618 if (error_logger)
619 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
620 spec2->numbered[j].number, pretty_msgstr,
621 pretty_msgid);
622 err = true;
623 break;
624 }
625 else if (cmp < 0)
626 {
627 if (equality)
628 {
629 if (error_logger)
630 error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
631 spec1->numbered[i].number, pretty_msgstr);
632 err = true;
633 break;
634 }
635 else
636 i++;
637 }
638 else
639 j++, i++;
640 }
641 /* Check the argument types are the same. */
642 if (!err)
643 for (i = 0, j = 0; j < n2; )
644 {
645 if (spec1->numbered[i].number == spec2->numbered[j].number)
646 {
647 if (spec1->numbered[i].type != spec2->numbered[j].type)
648 {
649 if (error_logger)
650 error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
651 pretty_msgid, pretty_msgstr,
652 spec2->numbered[j].number);
653 err = true;
654 break;
655 }
656 j++, i++;
657 }
658 else
659 i++;
660 }
661 }
662
663 return err;
664 }
665
666
667 struct formatstring_parser formatstring_boost =
668 {
669 format_parse,
670 format_free,
671 format_get_number_of_directives,
672 NULL,
673 format_check
674 };
675
676
677 #ifdef TEST
678
679 /* Test program: Print the argument list specification returned by
680 format_parse for strings read from standard input. */
681
682 #include <stdio.h>
683
684 static void
685 format_print (void *descr)
686 {
687 struct spec *spec = (struct spec *) descr;
688 unsigned int last;
689 unsigned int i;
690
691 if (spec == NULL)
692 {
693 printf ("INVALID");
694 return;
695 }
696
697 printf ("(");
698 last = 1;
699 for (i = 0; i < spec->numbered_arg_count; i++)
700 {
701 unsigned int number = spec->numbered[i].number;
702
703 if (i > 0)
704 printf (" ");
705 if (number < last)
706 abort ();
707 for (; last < number; last++)
708 printf ("_ ");
709 switch (spec->numbered[i].type)
710 {
711 case FAT_INTEGER:
712 printf ("i");
713 break;
714 case FAT_DOUBLE:
715 printf ("f");
716 break;
717 case FAT_CHAR:
718 printf ("c");
719 break;
720 case FAT_POINTER:
721 printf ("p");
722 break;
723 case FAT_ANY:
724 printf ("*");
725 break;
726 default:
727 abort ();
728 }
729 last = number + 1;
730 }
731 printf (")");
732 }
733
734 int
735 main ()
736 {
737 for (;;)
738 {
739 char *line = NULL;
740 size_t line_size = 0;
741 int line_len;
742 char *invalid_reason;
743 void *descr;
744
745 line_len = getline (&line, &line_size, stdin);
746 if (line_len < 0)
747 break;
748 if (line_len > 0 && line[line_len - 1] == '\n')
749 line[--line_len] = '\0';
750
751 invalid_reason = NULL;
752 descr = format_parse (line, false, NULL, &invalid_reason);
753
754 format_print (descr);
755 printf ("\n");
756 if (descr == NULL)
757 printf ("%s\n", invalid_reason);
758
759 free (invalid_reason);
760 free (line);
761 }
762
763 return 0;
764 }
765
766 /*
767 * For Emacs M-x compile
768 * Local Variables:
769 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-boost.c ../gnulib-lib/libgettextlib.la"
770 * End:
771 */
772
773 #endif /* TEST */
774