1 /* Formatted output to strings.
2 Copyright (C) 1999-2000, 2002-2003, 2006-2023 Free Software Foundation, Inc.
3
4 This file is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2.1 of the
7 License, or (at your option) any later version.
8
9 This file is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 /* This file can be parametrized with the following macros:
18 CHAR_T The element type of the format string.
19 CHAR_T_ONLY_ASCII Set to 1 to enable verification that all characters
20 in the format string are ASCII.
21 DIRECTIVE Structure denoting a format directive.
22 Depends on CHAR_T.
23 DIRECTIVES Structure denoting the set of format directives of a
24 format string. Depends on CHAR_T.
25 PRINTF_PARSE Function that parses a format string.
26 Depends on CHAR_T.
27 STATIC Set to 'static' to declare the function static.
28 ENABLE_UNISTDIO Set to 1 to enable the unistdio extensions. */
29
30 #ifndef PRINTF_PARSE
31 # include <config.h>
32 #endif
33
34 /* Specification. */
35 #ifndef PRINTF_PARSE
36 # include "printf-parse.h"
37 #endif
38
39 /* Default parameters. */
40 #ifndef PRINTF_PARSE
41 # define PRINTF_PARSE printf_parse
42 # define CHAR_T char
43 # define DIRECTIVE char_directive
44 # define DIRECTIVES char_directives
45 #endif
46
47 /* Get size_t, NULL. */
48 #include <stddef.h>
49
50 /* Get intmax_t. */
51 #include <stdint.h>
52
53 /* malloc(), realloc(), free(). */
54 #include <stdlib.h>
55
56 /* memcpy(). */
57 #include <string.h>
58
59 /* errno. */
60 #include <errno.h>
61
62 /* Checked size_t computations. */
63 #include "xsize.h"
64
65 #if CHAR_T_ONLY_ASCII
66 /* c_isascii(). */
67 # include "c-ctype.h"
68 #endif
69
70 #ifdef STATIC
71 STATIC
72 #endif
73 int
74 PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
75 {
76 const CHAR_T *cp = format; /* pointer into format */
77 size_t arg_posn = 0; /* number of regular arguments consumed */
78 size_t d_allocated; /* allocated elements of d->dir */
79 size_t a_allocated; /* allocated elements of a->arg */
80 size_t max_width_length = 0;
81 size_t max_precision_length = 0;
82
83 d->count = 0;
84 d_allocated = N_DIRECT_ALLOC_DIRECTIVES;
85 d->dir = d->direct_alloc_dir;
86
87 a->count = 0;
88 a_allocated = N_DIRECT_ALLOC_ARGUMENTS;
89 a->arg = a->direct_alloc_arg;
90
91 #define REGISTER_ARG(_index_,_type_) \
92 { \
93 size_t n = (_index_); \
94 if (n >= a_allocated) \
95 { \
96 size_t memory_size; \
97 argument *memory; \
98 \
99 a_allocated = xtimes (a_allocated, 2); \
100 if (a_allocated <= n) \
101 a_allocated = xsum (n, 1); \
102 memory_size = xtimes (a_allocated, sizeof (argument)); \
103 if (size_overflow_p (memory_size)) \
104 /* Overflow, would lead to out of memory. */ \
105 goto out_of_memory; \
106 memory = (argument *) (a->arg != a->direct_alloc_arg \
107 ? realloc (a->arg, memory_size) \
108 : malloc (memory_size)); \
109 if (memory == NULL) \
110 /* Out of memory. */ \
111 goto out_of_memory; \
112 if (a->arg == a->direct_alloc_arg) \
113 memcpy (memory, a->arg, a->count * sizeof (argument)); \
114 a->arg = memory; \
115 } \
116 while (a->count <= n) \
117 a->arg[a->count++].type = TYPE_NONE; \
118 if (a->arg[n].type == TYPE_NONE) \
119 a->arg[n].type = (_type_); \
120 else if (a->arg[n].type != (_type_)) \
121 /* Ambiguous type for positional argument. */ \
122 goto error; \
123 }
124
125 while (*cp != '\0')
126 {
127 CHAR_T c = *cp++;
128 if (c == '%')
129 {
130 size_t arg_index = ARG_NONE;
131 DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
132
133 /* Initialize the next directive. */
134 dp->dir_start = cp - 1;
135 dp->flags = 0;
136 dp->width_start = NULL;
137 dp->width_end = NULL;
138 dp->width_arg_index = ARG_NONE;
139 dp->precision_start = NULL;
140 dp->precision_end = NULL;
141 dp->precision_arg_index = ARG_NONE;
142 dp->arg_index = ARG_NONE;
143
144 /* Test for positional argument. */
145 if (*cp >= '0' && *cp <= '9')
146 {
147 const CHAR_T *np;
148
149 for (np = cp; *np >= '0' && *np <= '9'; np++)
150 ;
151 if (*np == '$')
152 {
153 size_t n = 0;
154
155 for (np = cp; *np >= '0' && *np <= '9'; np++)
156 n = xsum (xtimes (n, 10), *np - '0');
157 if (n == 0)
158 /* Positional argument 0. */
159 goto error;
160 if (size_overflow_p (n))
161 /* n too large, would lead to out of memory later. */
162 goto error;
163 arg_index = n - 1;
164 cp = np + 1;
165 }
166 }
167
168 /* Read the flags. */
169 for (;;)
170 {
171 if (*cp == '\'')
172 {
173 dp->flags |= FLAG_GROUP;
174 cp++;
175 }
176 else if (*cp == '-')
177 {
178 dp->flags |= FLAG_LEFT;
179 cp++;
180 }
181 else if (*cp == '+')
182 {
183 dp->flags |= FLAG_SHOWSIGN;
184 cp++;
185 }
186 else if (*cp == ' ')
187 {
188 dp->flags |= FLAG_SPACE;
189 cp++;
190 }
191 else if (*cp == '#')
192 {
193 dp->flags |= FLAG_ALT;
194 cp++;
195 }
196 else if (*cp == '0')
197 {
198 dp->flags |= FLAG_ZERO;
199 cp++;
200 }
201 #if __GLIBC__ >= 2 && !defined __UCLIBC__
202 else if (*cp == 'I')
203 {
204 dp->flags |= FLAG_LOCALIZED;
205 cp++;
206 }
207 #endif
208 else
209 break;
210 }
211
212 /* Parse the field width. */
213 if (*cp == '*')
214 {
215 dp->width_start = cp;
216 cp++;
217 dp->width_end = cp;
218 if (max_width_length < 1)
219 max_width_length = 1;
220
221 /* Test for positional argument. */
222 if (*cp >= '0' && *cp <= '9')
223 {
224 const CHAR_T *np;
225
226 for (np = cp; *np >= '0' && *np <= '9'; np++)
227 ;
228 if (*np == '$')
229 {
230 size_t n = 0;
231
232 for (np = cp; *np >= '0' && *np <= '9'; np++)
233 n = xsum (xtimes (n, 10), *np - '0');
234 if (n == 0)
235 /* Positional argument 0. */
236 goto error;
237 if (size_overflow_p (n))
238 /* n too large, would lead to out of memory later. */
239 goto error;
240 dp->width_arg_index = n - 1;
241 cp = np + 1;
242 }
243 }
244 if (dp->width_arg_index == ARG_NONE)
245 {
246 dp->width_arg_index = arg_posn++;
247 if (dp->width_arg_index == ARG_NONE)
248 /* arg_posn wrapped around. */
249 goto error;
250 }
251 REGISTER_ARG (dp->width_arg_index, TYPE_INT);
252 }
253 else if (*cp >= '0' && *cp <= '9')
254 {
255 size_t width_length;
256
257 dp->width_start = cp;
258 for (; *cp >= '0' && *cp <= '9'; cp++)
259 ;
260 dp->width_end = cp;
261 width_length = dp->width_end - dp->width_start;
262 if (max_width_length < width_length)
263 max_width_length = width_length;
264 }
265
266 /* Parse the precision. */
267 if (*cp == '.')
268 {
269 cp++;
270 if (*cp == '*')
271 {
272 dp->precision_start = cp - 1;
273 cp++;
274 dp->precision_end = cp;
275 if (max_precision_length < 2)
276 max_precision_length = 2;
277
278 /* Test for positional argument. */
279 if (*cp >= '0' && *cp <= '9')
280 {
281 const CHAR_T *np;
282
283 for (np = cp; *np >= '0' && *np <= '9'; np++)
284 ;
285 if (*np == '$')
286 {
287 size_t n = 0;
288
289 for (np = cp; *np >= '0' && *np <= '9'; np++)
290 n = xsum (xtimes (n, 10), *np - '0');
291 if (n == 0)
292 /* Positional argument 0. */
293 goto error;
294 if (size_overflow_p (n))
295 /* n too large, would lead to out of memory
296 later. */
297 goto error;
298 dp->precision_arg_index = n - 1;
299 cp = np + 1;
300 }
301 }
302 if (dp->precision_arg_index == ARG_NONE)
303 {
304 dp->precision_arg_index = arg_posn++;
305 if (dp->precision_arg_index == ARG_NONE)
306 /* arg_posn wrapped around. */
307 goto error;
308 }
309 REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
310 }
311 else
312 {
313 size_t precision_length;
314
315 dp->precision_start = cp - 1;
316 for (; *cp >= '0' && *cp <= '9'; cp++)
317 ;
318 dp->precision_end = cp;
319 precision_length = dp->precision_end - dp->precision_start;
320 if (max_precision_length < precision_length)
321 max_precision_length = precision_length;
322 }
323 }
324
325 {
326 arg_type type;
327
328 /* Parse argument type/size specifiers. */
329 /* Relevant for the conversion characters d, i. */
330 arg_type signed_type = TYPE_INT;
331 /* Relevant for the conversion characters b, o, u, x, X. */
332 arg_type unsigned_type = TYPE_UINT;
333 /* Relevant for the conversion characters n. */
334 arg_type pointer_type = TYPE_COUNT_INT_POINTER;
335 /* Relevant for the conversion characters a, A, e, E, f, F, g, G. */
336 arg_type floatingpoint_type = TYPE_DOUBLE;
337
338 if (*cp == 'h')
339 {
340 if (cp[1] == 'h')
341 {
342 signed_type = TYPE_SCHAR;
343 unsigned_type = TYPE_UCHAR;
344 pointer_type = TYPE_COUNT_SCHAR_POINTER;
345 cp += 2;
346 }
347 else
348 {
349 signed_type = TYPE_SHORT;
350 unsigned_type = TYPE_USHORT;
351 pointer_type = TYPE_COUNT_SHORT_POINTER;
352 cp++;
353 }
354 }
355 else if (*cp == 'l')
356 {
357 if (cp[1] == 'l')
358 {
359 signed_type = TYPE_LONGLONGINT;
360 unsigned_type = TYPE_ULONGLONGINT;
361 pointer_type = TYPE_COUNT_LONGLONGINT_POINTER;
362 /* For backward compatibility only. */
363 floatingpoint_type = TYPE_LONGDOUBLE;
364 cp += 2;
365 }
366 else
367 {
368 signed_type = TYPE_LONGINT;
369 unsigned_type = TYPE_ULONGINT;
370 pointer_type = TYPE_COUNT_LONGINT_POINTER;
371 cp++;
372 }
373 }
374 else if (*cp == 'j')
375 {
376 if (sizeof (intmax_t) > sizeof (long))
377 {
378 /* intmax_t = long long */
379 signed_type = TYPE_LONGLONGINT;
380 unsigned_type = TYPE_ULONGLONGINT;
381 pointer_type = TYPE_COUNT_LONGLONGINT_POINTER;
382 /* For backward compatibility only. */
383 floatingpoint_type = TYPE_LONGDOUBLE;
384 }
385 else if (sizeof (intmax_t) > sizeof (int))
386 {
387 /* intmax_t = long */
388 signed_type = TYPE_LONGINT;
389 unsigned_type = TYPE_ULONGINT;
390 pointer_type = TYPE_COUNT_LONGINT_POINTER;
391 }
392 cp++;
393 }
394 else if (*cp == 'z' || *cp == 'Z')
395 {
396 /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
397 because the warning facility in gcc-2.95.2 understands
398 only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784). */
399 if (sizeof (size_t) > sizeof (long))
400 {
401 /* size_t = unsigned long long */
402 signed_type = TYPE_LONGLONGINT;
403 unsigned_type = TYPE_ULONGLONGINT;
404 pointer_type = TYPE_COUNT_LONGLONGINT_POINTER;
405 /* For backward compatibility only. */
406 floatingpoint_type = TYPE_LONGDOUBLE;
407 }
408 else if (sizeof (size_t) > sizeof (int))
409 {
410 /* size_t = unsigned long */
411 signed_type = TYPE_LONGINT;
412 unsigned_type = TYPE_ULONGINT;
413 pointer_type = TYPE_COUNT_LONGINT_POINTER;
414 }
415 cp++;
416 }
417 else if (*cp == 't')
418 {
419 if (sizeof (ptrdiff_t) > sizeof (long))
420 {
421 /* ptrdiff_t = long long */
422 signed_type = TYPE_LONGLONGINT;
423 unsigned_type = TYPE_ULONGLONGINT;
424 pointer_type = TYPE_COUNT_LONGLONGINT_POINTER;
425 /* For backward compatibility only. */
426 floatingpoint_type = TYPE_LONGDOUBLE;
427 }
428 else if (sizeof (ptrdiff_t) > sizeof (int))
429 {
430 /* ptrdiff_t = long */
431 signed_type = TYPE_LONGINT;
432 unsigned_type = TYPE_ULONGINT;
433 pointer_type = TYPE_COUNT_LONGINT_POINTER;
434 }
435 cp++;
436 }
437 else if (*cp == 'w')
438 {
439 /* wN and wfN are standardized in ISO C 23. */
440 if (cp[1] == 'f')
441 {
442 if (cp[2] == '8')
443 {
444 signed_type = TYPE_INT_FAST8_T;
445 unsigned_type = TYPE_UINT_FAST8_T;
446 pointer_type = TYPE_COUNT_INT_FAST8_T_POINTER;
447 cp += 3;
448 }
449 else if (cp[2] == '1' && cp[3] == '6')
450 {
451 signed_type = TYPE_INT_FAST16_T;
452 unsigned_type = TYPE_UINT_FAST16_T;
453 pointer_type = TYPE_COUNT_INT_FAST16_T_POINTER;
454 cp += 4;
455 }
456 else if (cp[2] == '3' && cp[3] == '2')
457 {
458 signed_type = TYPE_INT_FAST32_T;
459 unsigned_type = TYPE_UINT_FAST32_T;
460 pointer_type = TYPE_COUNT_INT_FAST32_T_POINTER;
461 cp += 4;
462 }
463 else if (cp[2] == '6' && cp[3] == '4')
464 {
465 signed_type = TYPE_INT_FAST64_T;
466 unsigned_type = TYPE_UINT_FAST64_T;
467 pointer_type = TYPE_COUNT_INT_FAST64_T_POINTER;
468 cp += 4;
469 }
470 }
471 else
472 {
473 if (cp[1] == '8')
474 {
475 signed_type = TYPE_INT8_T;
476 unsigned_type = TYPE_UINT8_T;
477 pointer_type = TYPE_COUNT_INT8_T_POINTER;
478 cp += 2;
479 }
480 else if (cp[1] == '1' && cp[2] == '6')
481 {
482 signed_type = TYPE_INT16_T;
483 unsigned_type = TYPE_UINT16_T;
484 pointer_type = TYPE_COUNT_INT16_T_POINTER;
485 cp += 3;
486 }
487 else if (cp[1] == '3' && cp[2] == '2')
488 {
489 signed_type = TYPE_INT32_T;
490 unsigned_type = TYPE_UINT32_T;
491 pointer_type = TYPE_COUNT_INT32_T_POINTER;
492 cp += 3;
493 }
494 else if (cp[1] == '6' && cp[2] == '4')
495 {
496 signed_type = TYPE_INT64_T;
497 unsigned_type = TYPE_UINT64_T;
498 pointer_type = TYPE_COUNT_INT64_T_POINTER;
499 cp += 3;
500 }
501 }
502 }
503 else if (*cp == 'L')
504 {
505 signed_type = TYPE_LONGLONGINT;
506 unsigned_type = TYPE_ULONGLONGINT;
507 pointer_type = TYPE_COUNT_LONGLONGINT_POINTER;
508 floatingpoint_type = TYPE_LONGDOUBLE;
509 cp++;
510 }
511 #if defined __APPLE__ && defined __MACH__
512 /* On Mac OS X 10.3, PRIdMAX is defined as "qd".
513 We cannot change it to "lld" because PRIdMAX must also
514 be understood by the system's printf routines. */
515 else if (*cp == 'q')
516 {
517 if (64 / 8 > sizeof (long))
518 {
519 /* int64_t = long long */
520 signed_type = TYPE_LONGLONGINT;
521 unsigned_type = TYPE_ULONGLONGINT;
522 pointer_type = TYPE_COUNT_LONGLONGINT_POINTER;
523 /* For backward compatibility only. */
524 floatingpoint_type = TYPE_LONGDOUBLE;
525 }
526 else
527 {
528 /* int64_t = long */
529 signed_type = TYPE_LONGINT;
530 unsigned_type = TYPE_ULONGINT;
531 pointer_type = TYPE_COUNT_LONGINT_POINTER;
532 }
533 cp++;
534 }
535 #endif
536 #if defined _WIN32 && ! defined __CYGWIN__
537 /* On native Windows, PRIdMAX is defined as "I64d".
538 We cannot change it to "lld" because PRIdMAX must also
539 be understood by the system's printf routines. */
540 else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4')
541 {
542 if (64 / 8 > sizeof (long))
543 {
544 /* __int64_t = long long */
545 signed_type = TYPE_LONGLONGINT;
546 unsigned_type = TYPE_ULONGLONGINT;
547 pointer_type = TYPE_COUNT_LONGLONGINT_POINTER;
548 /* For backward compatibility only. */
549 floatingpoint_type = TYPE_LONGDOUBLE;
550 }
551 else
552 {
553 /* __int64_t = long */
554 signed_type = TYPE_LONGINT;
555 unsigned_type = TYPE_ULONGINT;
556 pointer_type = TYPE_COUNT_LONGINT_POINTER;
557 }
558 cp++;
559 }
560 #endif
561
562 /* Read the conversion character. */
563 c = *cp++;
564 switch (c)
565 {
566 case 'd': case 'i':
567 type = signed_type;
568 break;
569 case 'b': case 'o': case 'u': case 'x': case 'X':
570 #if SUPPORT_GNU_PRINTF_DIRECTIVES \
571 || (__GLIBC__ + (__GLIBC_MINOR__ >= 35) > 2)
572 case 'B':
573 #endif
574 type = unsigned_type;
575 break;
576 case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
577 case 'a': case 'A':
578 type = floatingpoint_type;
579 break;
580 case 'c':
581 if (signed_type == TYPE_LONGINT
582 /* For backward compatibility only. */
583 || signed_type == TYPE_LONGLONGINT)
584 #if HAVE_WINT_T
585 type = TYPE_WIDE_CHAR;
586 #else
587 goto error;
588 #endif
589 else
590 type = TYPE_CHAR;
591 break;
592 #if HAVE_WINT_T
593 case 'C':
594 type = TYPE_WIDE_CHAR;
595 c = 'c';
596 break;
597 #endif
598 case 's':
599 if (signed_type == TYPE_LONGINT
600 /* For backward compatibility only. */
601 || signed_type == TYPE_LONGLONGINT)
602 #if HAVE_WCHAR_T
603 type = TYPE_WIDE_STRING;
604 #else
605 goto error;
606 #endif
607 else
608 type = TYPE_STRING;
609 break;
610 #if HAVE_WCHAR_T
611 case 'S':
612 type = TYPE_WIDE_STRING;
613 c = 's';
614 break;
615 #endif
616 case 'p':
617 type = TYPE_POINTER;
618 break;
619 case 'n':
620 type = pointer_type;
621 break;
622 #if ENABLE_UNISTDIO
623 /* The unistdio extensions. */
624 case 'U':
625 if (signed_type == TYPE_LONGLONGINT)
626 type = TYPE_U32_STRING;
627 else if (signed_type == TYPE_LONGINT)
628 type = TYPE_U16_STRING;
629 else
630 type = TYPE_U8_STRING;
631 break;
632 #endif
633 case '%':
634 type = TYPE_NONE;
635 break;
636 default:
637 /* Unknown conversion character. */
638 goto error;
639 }
640
641 if (type != TYPE_NONE)
642 {
643 dp->arg_index = arg_index;
644 if (dp->arg_index == ARG_NONE)
645 {
646 dp->arg_index = arg_posn++;
647 if (dp->arg_index == ARG_NONE)
648 /* arg_posn wrapped around. */
649 goto error;
650 }
651 REGISTER_ARG (dp->arg_index, type);
652 }
653 dp->conversion = c;
654 dp->dir_end = cp;
655 }
656
657 d->count++;
658 if (d->count >= d_allocated)
659 {
660 size_t memory_size;
661 DIRECTIVE *memory;
662
663 d_allocated = xtimes (d_allocated, 2);
664 memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
665 if (size_overflow_p (memory_size))
666 /* Overflow, would lead to out of memory. */
667 goto out_of_memory;
668 memory = (DIRECTIVE *) (d->dir != d->direct_alloc_dir
669 ? realloc (d->dir, memory_size)
670 : malloc (memory_size));
671 if (memory == NULL)
672 /* Out of memory. */
673 goto out_of_memory;
674 if (d->dir == d->direct_alloc_dir)
675 memcpy (memory, d->dir, d->count * sizeof (DIRECTIVE));
676 d->dir = memory;
677 }
678 }
679 #if CHAR_T_ONLY_ASCII
680 else if (!c_isascii (c))
681 {
682 /* Non-ASCII character. Not supported. */
683 goto error;
684 }
685 #endif
686 }
687 d->dir[d->count].dir_start = cp;
688
689 d->max_width_length = max_width_length;
690 d->max_precision_length = max_precision_length;
691 return 0;
692
693 error:
694 if (a->arg != a->direct_alloc_arg)
695 free (a->arg);
696 if (d->dir != d->direct_alloc_dir)
697 free (d->dir);
698 errno = EINVAL;
699 return -1;
700
701 out_of_memory:
702 if (a->arg != a->direct_alloc_arg)
703 free (a->arg);
704 if (d->dir != d->direct_alloc_dir)
705 free (d->dir);
706 errno = ENOMEM;
707 return -1;
708 }
709
710 #undef PRINTF_PARSE
711 #undef DIRECTIVES
712 #undef DIRECTIVE
713 #undef CHAR_T_ONLY_ASCII
714 #undef CHAR_T