1 /* Formatted output to strings.
2 Copyright (C) 1999-2000, 2002-2003, 2006-2019 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as published by
6 the Free Software Foundation; either version 2.1, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public License along
15 with this program; if not, see <https://www.gnu.org/licenses/>. */
16
17 /* This file can be parametrized with the following macros:
18 CHAR_T The element type of the format string.
19 CHAR_T_ONLY_ASCII Set to 1 to enable verification that all characters
20 in the format string are ASCII.
21 DIRECTIVE Structure denoting a format directive.
22 Depends on CHAR_T.
23 DIRECTIVES Structure denoting the set of format directives of a
24 format string. Depends on CHAR_T.
25 PRINTF_PARSE Function that parses a format string.
26 Depends on CHAR_T.
27 STATIC Set to 'static' to declare the function static.
28 ENABLE_UNISTDIO Set to 1 to enable the unistdio extensions. */
29
30 #ifndef PRINTF_PARSE
31 # include <config.h>
32 #endif
33
34 #include "g-gnulib.h"
35
36 /* Specification. */
37 #ifndef PRINTF_PARSE
38 # include "printf-parse.h"
39 #endif
40
41 /* Default parameters. */
42 #ifndef PRINTF_PARSE
43 # define PRINTF_PARSE printf_parse
44 # define CHAR_T char
45 # define DIRECTIVE char_directive
46 # define DIRECTIVES char_directives
47 #endif
48
49 /* Get size_t, NULL. */
50 #include <stddef.h>
51
52 /* Get intmax_t. */
53 #if defined IN_LIBINTL || defined IN_LIBASPRINTF
54 # if HAVE_STDINT_H_WITH_UINTMAX
55 # include <stdint.h>
56 # endif
57 # if HAVE_INTTYPES_H_WITH_UINTMAX
58 # include <inttypes.h>
59 # endif
60 #else
61 # include <stdint.h>
62 #endif
63
64 /* malloc(), realloc(), free(). */
65 #include <stdlib.h>
66
67 /* memcpy(). */
68 #include <string.h>
69
70 /* errno. */
71 #include <errno.h>
72
73 /* Checked size_t computations. */
74 #include "xsize.h"
75
76 #if CHAR_T_ONLY_ASCII
77 /* c_isascii(). */
78 # include "c-ctype.h"
79 #endif
80
81 #ifdef STATIC
82 STATIC
83 #endif
84 int
85 PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
86 {
87 const CHAR_T *cp = format; /* pointer into format */
88 size_t arg_posn = 0; /* number of regular arguments consumed */
89 size_t d_allocated; /* allocated elements of d->dir */
90 size_t a_allocated; /* allocated elements of a->arg */
91 size_t max_width_length = 0;
92 size_t max_precision_length = 0;
93
94 d->count = 0;
95 d_allocated = N_DIRECT_ALLOC_DIRECTIVES;
96 d->dir = d->direct_alloc_dir;
97
98 a->count = 0;
99 a_allocated = N_DIRECT_ALLOC_ARGUMENTS;
100 a->arg = a->direct_alloc_arg;
101
102 #define REGISTER_ARG(_index_,_type_) \
103 { \
104 size_t n = (_index_); \
105 if (n >= a_allocated) \
106 { \
107 size_t memory_size; \
108 argument *memory; \
109 \
110 a_allocated = xtimes (a_allocated, 2); \
111 if (a_allocated <= n) \
112 a_allocated = xsum (n, 1); \
113 memory_size = xtimes (a_allocated, sizeof (argument)); \
114 if (size_overflow_p (memory_size)) \
115 /* Overflow, would lead to out of memory. */ \
116 goto out_of_memory; \
117 memory = (argument *) (a->arg != a->direct_alloc_arg \
118 ? realloc (a->arg, memory_size) \
119 : malloc (memory_size)); \
120 if (memory == NULL) \
121 /* Out of memory. */ \
122 goto out_of_memory; \
123 if (a->arg == a->direct_alloc_arg) \
124 memcpy (memory, a->arg, a->count * sizeof (argument)); \
125 a->arg = memory; \
126 } \
127 while (a->count <= n) \
128 a->arg[a->count++].type = TYPE_NONE; \
129 if (a->arg[n].type == TYPE_NONE) \
130 a->arg[n].type = (_type_); \
131 else if (a->arg[n].type != (_type_)) \
132 /* Ambiguous type for positional argument. */ \
133 goto error; \
134 }
135
136 while (*cp != '\0')
137 {
138 CHAR_T c = *cp++;
139 if (c == '%')
140 {
141 size_t arg_index = ARG_NONE;
142 DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
143
144 /* Initialize the next directive. */
145 dp->dir_start = cp - 1;
146 dp->flags = 0;
147 dp->width_start = NULL;
148 dp->width_end = NULL;
149 dp->width_arg_index = ARG_NONE;
150 dp->precision_start = NULL;
151 dp->precision_end = NULL;
152 dp->precision_arg_index = ARG_NONE;
153 dp->arg_index = ARG_NONE;
154
155 /* Test for positional argument. */
156 if (*cp >= '0' && *cp <= '9')
157 {
158 const CHAR_T *np;
159
160 for (np = cp; *np >= '0' && *np <= '9'; np++)
161 ;
162 if (*np == '$')
163 {
164 size_t n = 0;
165
166 for (np = cp; *np >= '0' && *np <= '9'; np++)
167 n = xsum (xtimes (n, 10), *np - '0');
168 if (n == 0)
169 /* Positional argument 0. */
170 goto error;
171 if (size_overflow_p (n))
172 /* n too large, would lead to out of memory later. */
173 goto error;
174 arg_index = n - 1;
175 cp = np + 1;
176 }
177 }
178
179 /* Read the flags. */
180 for (;;)
181 {
182 if (*cp == '\'')
183 {
184 dp->flags |= FLAG_GROUP;
185 cp++;
186 }
187 else if (*cp == '-')
188 {
189 dp->flags |= FLAG_LEFT;
190 cp++;
191 }
192 else if (*cp == '+')
193 {
194 dp->flags |= FLAG_SHOWSIGN;
195 cp++;
196 }
197 else if (*cp == ' ')
198 {
199 dp->flags |= FLAG_SPACE;
200 cp++;
201 }
202 else if (*cp == '#')
203 {
204 dp->flags |= FLAG_ALT;
205 cp++;
206 }
207 else if (*cp == '0')
208 {
209 dp->flags |= FLAG_ZERO;
210 cp++;
211 }
212 #if __GLIBC__ >= 2 && !defined __UCLIBC__
213 else if (*cp == 'I')
214 {
215 dp->flags |= FLAG_LOCALIZED;
216 cp++;
217 }
218 #endif
219 else
220 break;
221 }
222
223 /* Parse the field width. */
224 if (*cp == '*')
225 {
226 dp->width_start = cp;
227 cp++;
228 dp->width_end = cp;
229 if (max_width_length < 1)
230 max_width_length = 1;
231
232 /* Test for positional argument. */
233 if (*cp >= '0' && *cp <= '9')
234 {
235 const CHAR_T *np;
236
237 for (np = cp; *np >= '0' && *np <= '9'; np++)
238 ;
239 if (*np == '$')
240 {
241 size_t n = 0;
242
243 for (np = cp; *np >= '0' && *np <= '9'; np++)
244 n = xsum (xtimes (n, 10), *np - '0');
245 if (n == 0)
246 /* Positional argument 0. */
247 goto error;
248 if (size_overflow_p (n))
249 /* n too large, would lead to out of memory later. */
250 goto error;
251 dp->width_arg_index = n - 1;
252 cp = np + 1;
253 }
254 }
255 if (dp->width_arg_index == ARG_NONE)
256 {
257 dp->width_arg_index = arg_posn++;
258 if (dp->width_arg_index == ARG_NONE)
259 /* arg_posn wrapped around. */
260 goto error;
261 }
262 REGISTER_ARG (dp->width_arg_index, TYPE_INT);
263 }
264 else if (*cp >= '0' && *cp <= '9')
265 {
266 size_t width_length;
267
268 dp->width_start = cp;
269 for (; *cp >= '0' && *cp <= '9'; cp++)
270 ;
271 dp->width_end = cp;
272 width_length = dp->width_end - dp->width_start;
273 if (max_width_length < width_length)
274 max_width_length = width_length;
275 }
276
277 /* Parse the precision. */
278 if (*cp == '.')
279 {
280 cp++;
281 if (*cp == '*')
282 {
283 dp->precision_start = cp - 1;
284 cp++;
285 dp->precision_end = cp;
286 if (max_precision_length < 2)
287 max_precision_length = 2;
288
289 /* Test for positional argument. */
290 if (*cp >= '0' && *cp <= '9')
291 {
292 const CHAR_T *np;
293
294 for (np = cp; *np >= '0' && *np <= '9'; np++)
295 ;
296 if (*np == '$')
297 {
298 size_t n = 0;
299
300 for (np = cp; *np >= '0' && *np <= '9'; np++)
301 n = xsum (xtimes (n, 10), *np - '0');
302 if (n == 0)
303 /* Positional argument 0. */
304 goto error;
305 if (size_overflow_p (n))
306 /* n too large, would lead to out of memory
307 later. */
308 goto error;
309 dp->precision_arg_index = n - 1;
310 cp = np + 1;
311 }
312 }
313 if (dp->precision_arg_index == ARG_NONE)
314 {
315 dp->precision_arg_index = arg_posn++;
316 if (dp->precision_arg_index == ARG_NONE)
317 /* arg_posn wrapped around. */
318 goto error;
319 }
320 REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
321 }
322 else
323 {
324 size_t precision_length;
325
326 dp->precision_start = cp - 1;
327 for (; *cp >= '0' && *cp <= '9'; cp++)
328 ;
329 dp->precision_end = cp;
330 precision_length = dp->precision_end - dp->precision_start;
331 if (max_precision_length < precision_length)
332 max_precision_length = precision_length;
333 }
334 }
335
336 {
337 arg_type type;
338
339 /* Parse argument type/size specifiers. */
340 {
341 int flags = 0;
342
343 for (;;)
344 {
345 if (*cp == 'h')
346 {
347 flags |= (1 << (flags & 1));
348 cp++;
349 }
350 else if (*cp == 'L')
351 {
352 flags |= 4;
353 cp++;
354 }
355 else if (*cp == 'l')
356 {
357 flags += 8;
358 cp++;
359 }
360 else if (*cp == 'j')
361 {
362 if (sizeof (intmax_t) > sizeof (long))
363 {
364 /* intmax_t = long long */
365 flags += 16;
366 }
367 else if (sizeof (intmax_t) > sizeof (int))
368 {
369 /* intmax_t = long */
370 flags += 8;
371 }
372 cp++;
373 }
374 else if (*cp == 'z' || *cp == 'Z')
375 {
376 /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
377 because the warning facility in gcc-2.95.2 understands
378 only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784). */
379 if (sizeof (size_t) > sizeof (long))
380 {
381 /* size_t = long long */
382 flags += 16;
383 }
384 else if (sizeof (size_t) > sizeof (int))
385 {
386 /* size_t = long */
387 flags += 8;
388 }
389 cp++;
390 }
391 else if (*cp == 't')
392 {
393 if (sizeof (ptrdiff_t) > sizeof (long))
394 {
395 /* ptrdiff_t = long long */
396 flags += 16;
397 }
398 else if (sizeof (ptrdiff_t) > sizeof (int))
399 {
400 /* ptrdiff_t = long */
401 flags += 8;
402 }
403 cp++;
404 }
405 #if defined __APPLE__ && defined __MACH__
406 /* On Mac OS X 10.3, PRIdMAX is defined as "qd".
407 We cannot change it to "lld" because PRIdMAX must also
408 be understood by the system's printf routines. */
409 else if (*cp == 'q')
410 {
411 if (64 / 8 > sizeof (long))
412 {
413 /* int64_t = long long */
414 flags += 16;
415 }
416 else
417 {
418 /* int64_t = long */
419 flags += 8;
420 }
421 cp++;
422 }
423 #endif
424 #if defined _WIN32 && ! defined __CYGWIN__
425 /* On native Windows, PRIdMAX is defined as "I64d".
426 We cannot change it to "lld" because PRIdMAX must also
427 be understood by the system's printf routines. */
428 else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4')
429 {
430 if (64 / 8 > sizeof (long))
431 {
432 /* __int64 = long long */
433 flags += 16;
434 }
435 else
436 {
437 /* __int64 = long */
438 flags += 8;
439 }
440 cp += 3;
441 }
442 #endif
443 else
444 break;
445 }
446
447 /* Read the conversion character. */
448 c = *cp++;
449 switch (c)
450 {
451 case 'd': case 'i':
452 #if HAVE_LONG_LONG
453 /* If 'long long' exists and is larger than 'long': */
454 if (flags >= 16 || (flags & 4))
455 type = TYPE_LONGLONGINT;
456 else
457 #endif
458 /* If 'long long' exists and is the same as 'long', we parse
459 "lld" into TYPE_LONGINT. */
460 if (flags >= 8)
461 type = TYPE_LONGINT;
462 else if (flags & 2)
463 type = TYPE_SCHAR;
464 else if (flags & 1)
465 type = TYPE_SHORT;
466 else
467 type = TYPE_INT;
468 break;
469 case 'o': case 'u': case 'x': case 'X':
470 #if HAVE_LONG_LONG
471 /* If 'long long' exists and is larger than 'long': */
472 if (flags >= 16 || (flags & 4))
473 type = TYPE_ULONGLONGINT;
474 else
475 #endif
476 /* If 'unsigned long long' exists and is the same as
477 'unsigned long', we parse "llu" into TYPE_ULONGINT. */
478 if (flags >= 8)
479 type = TYPE_ULONGINT;
480 else if (flags & 2)
481 type = TYPE_UCHAR;
482 else if (flags & 1)
483 type = TYPE_USHORT;
484 else
485 type = TYPE_UINT;
486 break;
487 case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
488 case 'a': case 'A':
489 if (flags >= 16 || (flags & 4))
490 type = TYPE_LONGDOUBLE;
491 else
492 type = TYPE_DOUBLE;
493 break;
494 case 'c':
495 if (flags >= 8)
496 #if HAVE_WINT_T
497 type = TYPE_WIDE_CHAR;
498 #else
499 goto error;
500 #endif
501 else
502 type = TYPE_CHAR;
503 break;
504 #if HAVE_WINT_T
505 case 'C':
506 type = TYPE_WIDE_CHAR;
507 c = 'c';
508 break;
509 #endif
510 case 's':
511 if (flags >= 8)
512 #if HAVE_WCHAR_T
513 type = TYPE_WIDE_STRING;
514 #else
515 goto error;
516 #endif
517 else
518 type = TYPE_STRING;
519 break;
520 #if HAVE_WCHAR_T
521 case 'S':
522 type = TYPE_WIDE_STRING;
523 c = 's';
524 break;
525 #endif
526 case 'p':
527 type = TYPE_POINTER;
528 break;
529 case 'n':
530 #if HAVE_LONG_LONG
531 /* If 'long long' exists and is larger than 'long': */
532 if (flags >= 16 || (flags & 4))
533 type = TYPE_COUNT_LONGLONGINT_POINTER;
534 else
535 #endif
536 /* If 'long long' exists and is the same as 'long', we parse
537 "lln" into TYPE_COUNT_LONGINT_POINTER. */
538 if (flags >= 8)
539 type = TYPE_COUNT_LONGINT_POINTER;
540 else if (flags & 2)
541 type = TYPE_COUNT_SCHAR_POINTER;
542 else if (flags & 1)
543 type = TYPE_COUNT_SHORT_POINTER;
544 else
545 type = TYPE_COUNT_INT_POINTER;
546 break;
547 #if ENABLE_UNISTDIO
548 /* The unistdio extensions. */
549 case 'U':
550 if (flags >= 16)
551 type = TYPE_U32_STRING;
552 else if (flags >= 8)
553 type = TYPE_U16_STRING;
554 else
555 type = TYPE_U8_STRING;
556 break;
557 #endif
558 case '%':
559 type = TYPE_NONE;
560 break;
561 default:
562 /* Unknown conversion character. */
563 goto error;
564 }
565 }
566
567 if (type != TYPE_NONE)
568 {
569 dp->arg_index = arg_index;
570 if (dp->arg_index == ARG_NONE)
571 {
572 dp->arg_index = arg_posn++;
573 if (dp->arg_index == ARG_NONE)
574 /* arg_posn wrapped around. */
575 goto error;
576 }
577 REGISTER_ARG (dp->arg_index, type);
578 }
579 dp->conversion = c;
580 dp->dir_end = cp;
581 }
582
583 d->count++;
584 if (d->count >= d_allocated)
585 {
586 size_t memory_size;
587 DIRECTIVE *memory;
588
589 d_allocated = xtimes (d_allocated, 2);
590 memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
591 if (size_overflow_p (memory_size))
592 /* Overflow, would lead to out of memory. */
593 goto out_of_memory;
594 memory = (DIRECTIVE *) (d->dir != d->direct_alloc_dir
595 ? realloc (d->dir, memory_size)
596 : malloc (memory_size));
597 if (memory == NULL)
598 /* Out of memory. */
599 goto out_of_memory;
600 if (d->dir == d->direct_alloc_dir)
601 memcpy (memory, d->dir, d->count * sizeof (DIRECTIVE));
602 d->dir = memory;
603 }
604 }
605 #if CHAR_T_ONLY_ASCII
606 else if (!c_isascii (c))
607 {
608 /* Non-ASCII character. Not supported. */
609 goto error;
610 }
611 #endif
612 }
613 d->dir[d->count].dir_start = cp;
614
615 d->max_width_length = max_width_length;
616 d->max_precision_length = max_precision_length;
617 return 0;
618
619 error:
620 if (a->arg != a->direct_alloc_arg)
621 free (a->arg);
622 if (d->dir != d->direct_alloc_dir)
623 free (d->dir);
624 errno = EINVAL;
625 return -1;
626
627 out_of_memory:
628 if (a->arg != a->direct_alloc_arg)
629 free (a->arg);
630 if (d->dir != d->direct_alloc_dir)
631 free (d->dir);
632 errno = ENOMEM;
633 return -1;
634 }
635
636 #undef PRINTF_PARSE
637 #undef DIRECTIVES
638 #undef DIRECTIVE
639 #undef CHAR_T_ONLY_ASCII
640 #undef CHAR_T