1 /* Java printf format strings.
2 Copyright (C) 2001-2004, 2006-2007, 2009-2010, 2018-2020, 2023 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 #include <stdbool.h>
23 #include <stdlib.h>
24
25 #include "format.h"
26 #include "c-ctype.h"
27 #include "xalloc.h"
28 #include "xvasprintf.h"
29 #include "format-invalid.h"
30 #include "gettext.h"
31
32 #define _(str) gettext (str)
33
34 /* Java printf format strings are described in java/util/Formatter.html.
35 A directive
36 - starts with '%' or '%<' or '%m$' where m is a positive integer,
37 - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
38 ',', '(',
39 - is optionally followed by a width specification: a nonempty digit sequence,
40 - is optionally followed by '.' and a precision specification: a nonempty
41 digit sequence,
42 - is finished by a specifier
43 - '%', 'n', that need no argument,
44 Restrictions:
45 - For '%': flags other than '-' are invalid, and a precision is
46 invalid.
47 - For 'n': flags, width, and precision are invalid.
48 - 'b', 'B', 'h', 'H', 's', 'S', that need a general argument.
49 Restrictions:
50 Flags other than '#' and '-' are invalid.
51 - 'c', 'C', that need a character argument,
52 Restrictions:
53 Flags other than '-' are invalid.
54 A precision is invalid.
55 - 'd', 'o', 'x', 'X', that need an integer argument,
56 Restrictions:
57 - For 'd': The flag '#' is invalid.
58 - For 'o', 'x', 'X': The flag ',' is invalid.
59 A precision is invalid.
60 - 'e', 'E', 'f', 'g', 'G', 'a', 'A', that need a floating-point argument,
61 Restrictions:
62 - For 'a', 'A': The flags ',', '(' are invalid.
63 - 't', 'T', followed by one of
64 'H', 'I', 'k', 'l', 'M', 'S', 'L', 'N', 'p', 'z', 'Z', 's', 'Q',
65 'B', 'b', 'h', 'A', 'a', 'C', 'Y', 'y', 'j', 'm', 'd', 'e',
66 'R', 'T', 'r', 'D', 'F', 'c'
67 that need a date/time argument.
68 Restrictions:
69 Flags other than '-' are invalid.
70 A precision is invalid.
71 Numbered ('%m$') and unnumbered argument specifications can be mixed in the
72 same string. Numbered argument specifications have no influence on the
73 unnumbered argument counter.
74 */
75
76 enum format_arg_type
77 {
78 FAT_NONE = 0,
79 /* Basic types */
80 FAT_GENERAL = 1,
81 FAT_CHARACTER = 2,
82 FAT_INTEGER = 3,
83 FAT_FLOATINGPOINT = 4,
84 FAT_DATETIME = 5
85 };
86 #ifdef __cplusplus
87 typedef int format_arg_type_t;
88 #else
89 typedef enum format_arg_type format_arg_type_t;
90 #endif
91
92 enum
93 {
94 /* Flags */
95 FAT_ALTERNATE = 1 << 0, /* '#' */
96 FAT_ZERO_PADDED = 1 << 1, /* '0' */
97 FAT_LEFT_JUSTIFIED = 1 << 2, /* '-' */
98 FAT_SPACE_SIGN = 1 << 3, /* ' ' */
99 FAT_SIGN = 1 << 4, /* '+' */
100 FAT_OBEY_LOCALE = 1 << 5, /* ',' */
101 FAT_MONETARY = 1 << 6, /* '(' */
102 /* Width */
103 FAT_WIDTH = 1 << 7,
104 /* Precision */
105 FAT_PRECISION = 1 << 8,
106 };
107
108 struct numbered_arg
109 {
110 unsigned int number;
111 format_arg_type_t type;
112 };
113
114 struct spec
115 {
116 unsigned int directives;
117 unsigned int numbered_arg_count;
118 struct numbered_arg *numbered;
119 };
120
121 /* Locale independent test for a decimal digit.
122 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
123 <ctype.h> isdigit must be an 'unsigned char'.) */
124 #undef isdigit
125 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
126
127
128 static int
129 numbered_arg_compare (const void *p1, const void *p2)
130 {
131 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
132 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
133
134 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
135 }
136
137 #define INVALID_LAST_ARG(directive_number) \
138 xasprintf (_("In the directive number %u, the reference to the argument of the previous directive is invalid."), directive_number)
139
140 #define INVALID_PRECISION_MISSING(directive_number) \
141 xasprintf (_("In the directive number %u, the precision is missing."), directive_number)
142
143 #define INVALID_FLAG_FOR(directive_number,flag_char,conv_char) \
144 xasprintf (_("In the directive number %u, the flag '%c' is invalid for the conversion '%c'."), directive_number, flag_char, conv_char)
145
146 #define INVALID_WIDTH_FOR(directive_number,conv_char) \
147 xasprintf (_("In the directive number %u, a width is invalid for the conversion '%c'."), directive_number, conv_char)
148
149 #define INVALID_PRECISION_FOR(directive_number,conv_char) \
150 xasprintf (_("In the directive number %u, a precision is invalid for the conversion '%c'."), directive_number, conv_char)
151
152 #define INVALID_DATETIME_CONVERSION_SUFFIX(directive_number,conv_char,suffix_char) \
153 (c_isprint (conv_char) \
154 ? xasprintf (_("In the directive number %u, for the conversion '%c', the character '%c' is not a valid conversion suffix."), directive_number, conv_char, suffix_char) \
155 : xasprintf (_("The character that terminates the directive number %u, for the conversion '%c', is not a valid conversion suffix."), directive_number, conv_char))
156
157 static void *
158 format_parse (const char *format, bool translated, char *fdi,
159 char **invalid_reason)
160 {
161 const char *const format_start = format;
162 struct spec spec;
163 unsigned int numbered_allocated;
164 struct spec *result;
165 unsigned int unnumbered_arg_count;
166 unsigned int last_arg_number;
167
168 spec.directives = 0;
169 spec.numbered_arg_count = 0;
170 spec.numbered = NULL;
171 numbered_allocated = 0;
172 unnumbered_arg_count = 0;
173 last_arg_number = 0;
174
175 for (; *format != '\0';)
176 if (*format++ == '%')
177 {
178 /* A directive. */
179 unsigned int number = 0;
180 unsigned int flags;
181 format_arg_type_t type;
182 unsigned int invalid_flags;
183
184 FDI_SET (format - 1, FMTDIR_START);
185 spec.directives++;
186
187 if (*format == '<')
188 {
189 if (last_arg_number == 0)
190 {
191 *invalid_reason = INVALID_LAST_ARG (spec.directives);
192 FDI_SET (format, FMTDIR_ERROR);
193 goto bad_format;
194 }
195 number = last_arg_number;
196 format++;
197 }
198 else if (isdigit (*format))
199 {
200 const char *f = format;
201 unsigned int m = 0;
202
203 do
204 {
205 m = 10 * m + (*f - '0');
206 f++;
207 }
208 while (isdigit (*f));
209
210 if (*f == '$')
211 {
212 if (m == 0)
213 {
214 *invalid_reason = INVALID_ARGNO_0 (spec.directives);
215 FDI_SET (f, FMTDIR_ERROR);
216 goto bad_format;
217 }
218 number = m;
219 format = ++f;
220 }
221 }
222
223 flags = 0;
224
225 /* Parse flags. */
226 for (;;)
227 {
228 if (*format == '#')
229 {
230 flags |= FAT_ALTERNATE;
231 format++;
232 }
233 else if (*format == '0')
234 {
235 flags |= FAT_ZERO_PADDED;
236 format++;
237 }
238 else if (*format == '-')
239 {
240 flags |= FAT_LEFT_JUSTIFIED;
241 format++;
242 }
243 else if (*format == ' ')
244 {
245 flags |= FAT_SPACE_SIGN;
246 format++;
247 }
248 else if (*format == '+')
249 {
250 flags |= FAT_SIGN;
251 format++;
252 }
253 else if (*format == ',')
254 {
255 flags |= FAT_OBEY_LOCALE;
256 format++;
257 }
258 else if (*format == '(')
259 {
260 flags |= FAT_MONETARY;
261 format++;
262 }
263 else
264 break;
265 }
266
267 /* Parse width. */
268 if (isdigit (*format))
269 {
270 do format++; while (isdigit (*format));
271 flags |= FAT_WIDTH;
272 }
273
274 /* Parse precision. */
275 if (*format == '.')
276 {
277 format++;
278
279 if (!isdigit (*format))
280 {
281 if (*format == '\0')
282 {
283 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
284 FDI_SET (format - 1, FMTDIR_ERROR);
285 }
286 else
287 {
288 *invalid_reason = INVALID_PRECISION_MISSING (spec.directives);
289 FDI_SET (format, FMTDIR_ERROR);
290 }
291 goto bad_format;
292 }
293
294 do format++; while (isdigit (*format));
295 flags |= FAT_PRECISION;
296 }
297
298 /* Parse conversion. */
299 switch (*format)
300 {
301 case '%':
302 type = FAT_NONE;
303 invalid_flags = (FAT_ALTERNATE | FAT_ZERO_PADDED | FAT_SPACE_SIGN
304 | FAT_SIGN | FAT_OBEY_LOCALE | FAT_MONETARY)
305 | FAT_PRECISION;
306 break;
307 case 'n':
308 type = FAT_NONE;
309 invalid_flags = (FAT_ALTERNATE | FAT_ZERO_PADDED | FAT_LEFT_JUSTIFIED
310 | FAT_SPACE_SIGN | FAT_SIGN | FAT_OBEY_LOCALE
311 | FAT_MONETARY)
312 | FAT_WIDTH | FAT_PRECISION;
313 break;
314 case 'b': case 'B':
315 case 'h': case 'H':
316 case 's': case 'S':
317 type = FAT_GENERAL;
318 invalid_flags = (FAT_ZERO_PADDED | FAT_SPACE_SIGN | FAT_SIGN
319 | FAT_OBEY_LOCALE | FAT_MONETARY);
320 break;
321 case 'c': case 'C':
322 type = FAT_CHARACTER;
323 invalid_flags = (FAT_ALTERNATE | FAT_ZERO_PADDED | FAT_SPACE_SIGN
324 | FAT_SIGN | FAT_OBEY_LOCALE | FAT_MONETARY)
325 | FAT_PRECISION;
326 break;
327 case 'd':
328 type = FAT_INTEGER;
329 invalid_flags = FAT_ALTERNATE | FAT_PRECISION;
330 break;
331 case 'o': case 'x': case 'X':
332 type = FAT_INTEGER;
333 invalid_flags = FAT_OBEY_LOCALE | FAT_PRECISION;
334 break;
335 case 'e': case 'E':
336 case 'f':
337 case 'g': case 'G':
338 type = FAT_FLOATINGPOINT;
339 invalid_flags = 0;
340 break;
341 case 'a': case 'A':
342 type = FAT_FLOATINGPOINT;
343 invalid_flags = FAT_OBEY_LOCALE | FAT_MONETARY;
344 break;
345 case 't': case 'T':
346 type = FAT_DATETIME;
347 invalid_flags = (FAT_ALTERNATE | FAT_ZERO_PADDED | FAT_SPACE_SIGN
348 | FAT_SIGN | FAT_OBEY_LOCALE | FAT_MONETARY)
349 | FAT_PRECISION;
350 break;
351 default:
352 if (*format == '\0')
353 {
354 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
355 FDI_SET (format - 1, FMTDIR_ERROR);
356 }
357 else
358 {
359 *invalid_reason =
360 INVALID_CONVERSION_SPECIFIER (spec.directives, *format);
361 FDI_SET (format, FMTDIR_ERROR);
362 }
363 goto bad_format;
364 }
365
366 /* Report invalid flags, width, precision. */
367 invalid_flags &= flags;
368 if (invalid_flags & FAT_ALTERNATE)
369 {
370 *invalid_reason = INVALID_FLAG_FOR (spec.directives, '#', *format);
371 FDI_SET (format, FMTDIR_ERROR);
372 goto bad_format;
373 }
374 if (invalid_flags & FAT_ZERO_PADDED)
375 {
376 *invalid_reason = INVALID_FLAG_FOR (spec.directives, '0', *format);
377 FDI_SET (format, FMTDIR_ERROR);
378 goto bad_format;
379 }
380 if (invalid_flags & FAT_LEFT_JUSTIFIED)
381 {
382 *invalid_reason = INVALID_FLAG_FOR (spec.directives, '-', *format);
383 FDI_SET (format, FMTDIR_ERROR);
384 goto bad_format;
385 }
386 if (invalid_flags & FAT_SPACE_SIGN)
387 {
388 *invalid_reason = INVALID_FLAG_FOR (spec.directives, ' ', *format);
389 FDI_SET (format, FMTDIR_ERROR);
390 goto bad_format;
391 }
392 if (invalid_flags & FAT_SIGN)
393 {
394 *invalid_reason = INVALID_FLAG_FOR (spec.directives, '+', *format);
395 FDI_SET (format, FMTDIR_ERROR);
396 goto bad_format;
397 }
398 if (invalid_flags & FAT_OBEY_LOCALE)
399 {
400 *invalid_reason = INVALID_FLAG_FOR (spec.directives, ',', *format);
401 FDI_SET (format, FMTDIR_ERROR);
402 goto bad_format;
403 }
404 if (invalid_flags & FAT_MONETARY)
405 {
406 *invalid_reason = INVALID_FLAG_FOR (spec.directives, '(', *format);
407 FDI_SET (format, FMTDIR_ERROR);
408 goto bad_format;
409 }
410 if (invalid_flags & FAT_WIDTH)
411 {
412 *invalid_reason = INVALID_WIDTH_FOR (spec.directives, *format);
413 FDI_SET (format, FMTDIR_ERROR);
414 goto bad_format;
415 }
416 if (invalid_flags & FAT_PRECISION)
417 {
418 *invalid_reason = INVALID_PRECISION_FOR (spec.directives, *format);
419 FDI_SET (format, FMTDIR_ERROR);
420 goto bad_format;
421 }
422
423 if (type == FAT_DATETIME)
424 {
425 format++;
426
427 /* Parse conversion suffix. */
428 switch (*format)
429 {
430 case 'H': case 'I': case 'k': case 'l': case 'M': case 'S':
431 case 'L': case 'N': case 'p': case 'z': case 'Z': case 's':
432 case 'Q':
433 case 'B': case 'b': case 'h': case 'A': case 'a': case 'C':
434 case 'Y': case 'y': case 'j': case 'm': case 'd': case 'e':
435 case 'R': case 'T': case 'r': case 'D': case 'F': case 'c':
436 break;
437 default:
438 if (*format == '\0')
439 {
440 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
441 FDI_SET (format - 1, FMTDIR_ERROR);
442 }
443 else
444 {
445 *invalid_reason =
446 INVALID_DATETIME_CONVERSION_SUFFIX (spec.directives,
447 format[-1], *format);
448 FDI_SET (format, FMTDIR_ERROR);
449 }
450 goto bad_format;
451 }
452 }
453
454 if (type != FAT_NONE)
455 {
456 if (number == 0)
457 number = ++unnumbered_arg_count;
458
459 if (numbered_allocated == spec.numbered_arg_count)
460 {
461 numbered_allocated = 2 * numbered_allocated + 1;
462 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
463 }
464 spec.numbered[spec.numbered_arg_count].number = number;
465 spec.numbered[spec.numbered_arg_count].type = type;
466 spec.numbered_arg_count++;
467
468 last_arg_number = number;
469 }
470
471 FDI_SET (format, FMTDIR_END);
472
473 format++;
474 }
475
476 /* Sort the numbered argument array, and eliminate duplicates. */
477 if (spec.numbered_arg_count > 1)
478 {
479 unsigned int i, j;
480 bool err;
481
482 qsort (spec.numbered, spec.numbered_arg_count,
483 sizeof (struct numbered_arg), numbered_arg_compare);
484
485 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
486 err = false;
487 for (i = j = 0; i < spec.numbered_arg_count; i++)
488 if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
489 {
490 enum format_arg_type type1 = spec.numbered[i].type;
491 enum format_arg_type type2 = spec.numbered[j-1].type;
492 enum format_arg_type type_both;
493
494 if (type1 == type2)
495 type_both = type1;
496 else
497 {
498 /* Incompatible types. */
499 type_both = FAT_NONE;
500 if (!err)
501 *invalid_reason =
502 INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
503 err = true;
504 }
505
506 spec.numbered[j-1].type = type_both;
507 }
508 else
509 {
510 if (j < i)
511 {
512 spec.numbered[j].number = spec.numbered[i].number;
513 spec.numbered[j].type = spec.numbered[i].type;
514 }
515 j++;
516 }
517 spec.numbered_arg_count = j;
518 if (err)
519 /* *invalid_reason has already been set above. */
520 goto bad_format;
521 }
522
523 result = XMALLOC (struct spec);
524 *result = spec;
525 return result;
526
527 bad_format:
528 if (spec.numbered != NULL)
529 free (spec.numbered);
530 return NULL;
531 }
532
533 static void
534 format_free (void *descr)
535 {
536 struct spec *spec = (struct spec *) descr;
537
538 if (spec->numbered != NULL)
539 free (spec->numbered);
540 free (spec);
541 }
542
543 static int
544 format_get_number_of_directives (void *descr)
545 {
546 struct spec *spec = (struct spec *) descr;
547
548 return spec->directives;
549 }
550
551 static bool
552 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
553 formatstring_error_logger_t error_logger,
554 const char *pretty_msgid, const char *pretty_msgstr)
555 {
556 struct spec *spec1 = (struct spec *) msgid_descr;
557 struct spec *spec2 = (struct spec *) msgstr_descr;
558 bool err = false;
559
560 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
561 {
562 unsigned int i, j;
563 unsigned int n1 = spec1->numbered_arg_count;
564 unsigned int n2 = spec2->numbered_arg_count;
565
566 /* Check that the argument numbers are the same.
567 Both arrays are sorted. We search for the first difference. */
568 for (i = 0, j = 0; i < n1 || j < n2; )
569 {
570 int cmp = (i >= n1 ? 1 :
571 j >= n2 ? -1 :
572 spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
573 spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
574 0);
575
576 if (cmp > 0)
577 {
578 if (error_logger)
579 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
580 spec2->numbered[j].number, pretty_msgstr,
581 pretty_msgid);
582 err = true;
583 break;
584 }
585 else if (cmp < 0)
586 {
587 if (equality)
588 {
589 if (error_logger)
590 error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
591 spec1->numbered[i].number, pretty_msgstr);
592 err = true;
593 break;
594 }
595 else
596 i++;
597 }
598 else
599 j++, i++;
600 }
601 /* Check the argument types are the same. */
602 if (!err)
603 for (i = 0, j = 0; j < n2; )
604 {
605 if (spec1->numbered[i].number == spec2->numbered[j].number)
606 {
607 if (spec1->numbered[i].type != spec2->numbered[j].type)
608 {
609 if (error_logger)
610 error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
611 pretty_msgid, pretty_msgstr,
612 spec2->numbered[j].number);
613 err = true;
614 break;
615 }
616 j++, i++;
617 }
618 else
619 i++;
620 }
621 }
622
623 return err;
624 }
625
626
627 struct formatstring_parser formatstring_java_printf =
628 {
629 format_parse,
630 format_free,
631 format_get_number_of_directives,
632 NULL,
633 format_check
634 };
635
636
637 #ifdef TEST
638
639 /* Test program: Print the argument list specification returned by
640 format_parse for strings read from standard input. */
641
642 #include <stdio.h>
643
644 static void
645 format_print (void *descr)
646 {
647 struct spec *spec = (struct spec *) descr;
648 unsigned int i;
649
650 if (spec == NULL)
651 {
652 printf ("INVALID");
653 return;
654 }
655
656 printf ("(");
657 for (i = 0; i < spec->numbered_arg_count; i++)
658 {
659 if (i > 0)
660 printf (" ");
661 switch (spec->numbered[i].type)
662 {
663 case FAT_GENERAL:
664 printf ("s");
665 break;
666 case FAT_CHARACTER:
667 printf ("c");
668 break;
669 case FAT_INTEGER:
670 printf ("d");
671 break;
672 case FAT_FLOATINGPOINT:
673 printf ("f");
674 break;
675 case FAT_DATETIME:
676 printf ("t");
677 break;
678 default:
679 abort ();
680 }
681 }
682 printf (")");
683 }
684
685 int
686 main ()
687 {
688 for (;;)
689 {
690 char *line = NULL;
691 size_t line_size = 0;
692 int line_len;
693 char *invalid_reason;
694 void *descr;
695
696 line_len = getline (&line, &line_size, stdin);
697 if (line_len < 0)
698 break;
699 if (line_len > 0 && line[line_len - 1] == '\n')
700 line[--line_len] = '\0';
701
702 invalid_reason = NULL;
703 descr = format_parse (line, false, NULL, &invalid_reason);
704
705 format_print (descr);
706 printf ("\n");
707 if (descr == NULL)
708 printf ("%s\n", invalid_reason);
709
710 free (invalid_reason);
711 free (line);
712 }
713
714 return 0;
715 }
716
717 /*
718 * For Emacs M-x compile
719 * Local Variables:
720 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-java-printf.c ../gnulib-lib/libgettextlib.la"
721 * End:
722 */
723
724 #endif /* TEST */