1 /* argcv.c - simple functions for parsing input based on whitespace
2 Copyright (C) 1999-2001, 2007, 2009-2010, 2023 Free Software Foundation,
3 Inc.
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 3 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 #include <ctype.h>
23
24 #include <argcv.h>
25
26 /*
27 * takes a string and splits it into several strings, breaking at ' '
28 * command is the string to split
29 * the number of strings is placed into argc
30 * the split strings are put into argv
31 * returns 0 on success, nonzero on failure
32 */
33
34 #define isws(c) ((c)==' '||(c)=='\t'||(c)=='\n')
35 #define isdelim(c,delim) ((c)=='"'||strchr(delim,(c))!=NULL)
36
37 static int
38 argcv_scan (int len, const char *command, const char *delim, const char* cmnt,
39 int *start, int *end, int *save)
40 {
41 int i = 0;
42
43 for (;;)
44 {
45 i = *save;
46
47 if (i >= len)
48 return i + 1;
49
50 /* Skip initial whitespace */
51 while (i < len && isws (command[i]))
52 i++;
53 *start = i;
54
55 switch (command[i])
56 {
57 case '"':
58 case '\'':
59 while (++i < len
60 && (command[i] != command[*start]
61 || command[i-1] == '\\'))
62 ;
63 if (i < len) /* found matching quote */
64 break;
65 /*FALLTHRU*/ default:
66 if (isdelim (command[i], delim))
67 break;
68 /* Skip until next whitespace character or end of line. Honor
69 escaped whitespace. */
70 while (++i < len &&
71 !((isws (command[i]) && command[i-1] != '\\')
72 || isdelim (command[i], delim)));
73 i--;
74 break;
75 }
76
77 *end = i;
78 *save = i + 1;
79
80 /* If we have a token, and it starts with a comment character, skip
81 to the newline and restart the token search. */
82 if (*save <= len)
83 {
84 if (cmnt && strchr (cmnt, command[*start]) != NULL)
85 {
86 i = *save;
87 while (i < len && command[i] != '\n')
88 i++;
89
90 *save = i;
91 continue;
92 }
93 }
94 break;
95 }
96 return *save;
97 }
98
99 static char escape_transtab[] = "\\\\a\ab\bf\fn\nr\rt\t";
100
101 int
102 argcv_unescape_char (int c)
103 {
104 char *p;
105
106 for (p = escape_transtab; *p; p += 2)
107 {
108 if (*p == c)
109 return p[1];
110 }
111 return c;
112 }
113
114 int
115 argcv_escape_char (int c)
116 {
117 char *p;
118
119 for (p = escape_transtab + sizeof(escape_transtab) - 2;
120 p > escape_transtab; p -= 2)
121 {
122 if (*p == c)
123 return p[-1];
124 }
125 return -1;
126 }
127
128
129 static int
130 xtonum (const char *src, int base, size_t cnt)
131 {
132 char *p;
133 char tmp[4]; /* At most three characters + zero */
134
135 /* Notice: No use to check `cnt'. It should be either 2 or 3 */
136 memcpy (tmp, src, cnt);
137 tmp[cnt] = 0;
138 long int val = strtol (tmp, &p, base);
139 return (*p == 0) ? val : -1;
140 }
141
142 static size_t
143 escaped_length (const char *str, int *quote)
144 {
145 size_t len = 0;
146
147 for (; *str; str++)
148 {
149 if (*str == ' ')
150 {
151 len++;
152 *quote = 1;
153 }
154 else if (*str == '"')
155 {
156 len += 2;
157 *quote = 1;
158 }
159 else if (isprint ((unsigned char) *str))
160 len++;
161 else if (argcv_escape_char (*str) != -1)
162 len += 2;
163 else
164 len += 4;
165 }
166 return len;
167 }
168
169 static void
170 unescape_copy (char *dst, const char *src, size_t n)
171 {
172 int c;
173
174 while (n > 0)
175 {
176 n--;
177 if (*src == '\\')
178 {
179 switch (*++src)
180 {
181 case 'x':
182 case 'X':
183 ++src;
184 --n;
185 if (n == 0)
186 {
187 *dst++ = '\\';
188 *dst++ = src[-1];
189 }
190 else
191 {
192 c = xtonum(src, 16, 2);
193 if (c == -1)
194 {
195 *dst++ = '\\';
196 *dst++ = src[-1];
197 }
198 else
199 {
200 *dst++ = c;
201 src += 2;
202 n -= 2;
203 }
204 }
205 break;
206
207 case '0':
208 ++src;
209 --n;
210 if (n == 0)
211 {
212 *dst++ = '\\';
213 *dst++ = src[-1];
214 }
215 else
216 {
217 c = xtonum(src, 8, 3);
218 if (c == -1)
219 {
220 *dst++ = '\\';
221 *dst++ = src[-1];
222 }
223 else
224 {
225 *dst++ = c;
226 src += 3;
227 n -= 3;
228 }
229 }
230 break;
231
232 default:
233 *dst++ = argcv_unescape_char (*src++);
234 n--;
235 }
236 }
237 else
238 {
239 *dst++ = *src++;
240 }
241 }
242 *dst = 0;
243 }
244
245 static void
246 escape_copy (char *dst, const char *src)
247 {
248 for (; *src; src++)
249 {
250 if (*src == '"')
251 {
252 *dst++ = '\\';
253 *dst++ = '"';
254 }
255 else if (*src != '\t' && isprint ((unsigned char) *src))
256 *dst++ = *src;
257 else
258 {
259 int c = argcv_escape_char (*src);
260 *dst++ = '\\';
261 if (c != -1)
262 *dst++ = c;
263 else
264 {
265 char tmp[4];
266 snprintf (tmp, sizeof tmp, "%03o", *(unsigned char*)src);
267 memcpy (dst, tmp, 3);
268 dst += 3;
269 }
270 }
271 }
272 }
273
274 int
275 argcv_get (const char *command, const char *delim, const char* cmnt,
276 int *argc, char ***argv)
277 {
278 int len = strlen (command);
279 int i = 0;
280 int start, end, save;
281
282 *argv = NULL;
283
284 /* Count number of arguments */
285 *argc = 0;
286 save = 0;
287
288 while (argcv_scan (len, command, delim, cmnt, &start, &end, &save) <= len)
289 (*argc)++;
290
291 *argv = calloc ((*argc + 1), sizeof (char *));
292
293 i = 0;
294 save = 0;
295 for (i = 0; i < *argc; i++)
296 {
297 int n;
298 argcv_scan (len, command, delim, cmnt, &start, &end, &save);
299
300 if ((command[start] == '"' || command[end] == '\'')
301 && command[end] == command[start])
302 {
303 start++;
304 end--;
305 }
306 n = end - start + 1;
307 (*argv)[i] = calloc (n+1, sizeof (char));
308 if ((*argv)[i] == NULL)
309 return 1;
310 unescape_copy ((*argv)[i], &command[start], n);
311 (*argv)[i][n] = 0;
312 }
313 (*argv)[i] = NULL;
314 return 0;
315 }
316
317 /*
318 * frees all elements of an argv array
319 * argc is the number of elements
320 * argv is the array
321 */
322 int
323 argcv_free (int argc, char **argv)
324 {
325 while (--argc >= 0)
326 if (argv[argc])
327 free (argv[argc]);
328 free (argv);
329 return 1;
330 }
331
332 /* Take a argv an make string separated by ' '. */
333
334 int
335 argcv_string (int argc, char **argv, char **pstring)
336 {
337 size_t i, j, len;
338 char *buffer;
339
340 /* No need. */
341 if (pstring == NULL)
342 return 1;
343
344 buffer = malloc (1);
345 if (buffer == NULL)
346 return 1;
347 *buffer = '\0';
348
349 for (len = i = j = 0; i < argc; i++)
350 {
351 int quote = 0;
352 int toklen;
353
354 toklen = escaped_length (argv[i], "e);
355
356 len += toklen + 2;
357 if (quote)
358 len += 2;
359
360 buffer = realloc (buffer, len);
361 if (buffer == NULL)
362 return 1;
363
364 if (i != 0)
365 buffer[j++] = ' ';
366 if (quote)
367 buffer[j++] = '"';
368 escape_copy (buffer + j, argv[i]);
369 j += toklen;
370 if (quote)
371 buffer[j++] = '"';
372 }
373
374 for (; j > 0 && isspace ((unsigned char) buffer[j - 1]); j--)
375 ;
376 buffer[j] = 0;
377 if (pstring)
378 *pstring = buffer;
379 return 0;
380 }
381
382 #if 0
383 char *command = "set prompt=\"& \a\\\"\" \\x25\\0145\\098\\ta";
384
385 main(int xargc, char **xargv)
386 {
387 int i, argc;
388 char **argv;
389 char *s;
390
391 argcv_get (xargv[1] ? xargv[1]:command, "=", "#", &argc, &argv);
392 printf ("%d args:\n", argc);
393 for (i = 0; i < argc; i++)
394 printf ("%s\n", argv[i]);
395 printf ("===\n");
396 argcv_string (argc, argv, &s);
397 printf ("%s\n", s);
398 }
399 #endif