1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file args.c
4 /// \brief Argument parsing
5 ///
6 /// \note Filter-specific options parsing is in options.c.
7 //
8 // Author: Lasse Collin
9 //
10 // This file has been put into the public domain.
11 // You can do whatever you want with this file.
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14
15 #include "private.h"
16
17 #include "getopt.h"
18 #include <ctype.h>
19
20
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24 bool opt_robot = false;
25 bool opt_ignore_check = false;
26
27 // We don't modify or free() this, but we need to assign it in some
28 // non-const pointers.
29 const char stdin_filename[] = "(stdin)";
30
31
32 /// Parse and set the memory usage limit for compression, decompression,
33 /// and/or multithreaded decompression.
34 static void
35 parse_memlimit(const char *name, const char *name_percentage, const char *str,
36 bool set_compress, bool set_decompress, bool set_mtdec)
37 {
38 bool is_percentage = false;
39 uint64_t value;
40
41 const size_t len = strlen(str);
42 if (len > 0 && str[len - 1] == '%') {
43 // Make a copy so that we can get rid of %.
44 //
45 // In the past str wasn't const and we modified it directly
46 // but that modified argv[] and thus affected what was visible
47 // in "ps auxf" or similar tools which was confusing. For
48 // example, --memlimit=50% would show up as --memlimit=50
49 // since the percent sign was overwritten here.
50 char *s = xstrdup(str);
51 s[len - 1] = '\0';
52 is_percentage = true;
53 value = str_to_uint64(name_percentage, s, 1, 100);
54 free(s);
55 } else {
56 // On 32-bit systems, SIZE_MAX would make more sense than
57 // UINT64_MAX. But use UINT64_MAX still so that scripts
58 // that assume > 4 GiB values don't break.
59 value = str_to_uint64(name, str, 0, UINT64_MAX);
60 }
61
62 hardware_memlimit_set(value, set_compress, set_decompress, set_mtdec,
63 is_percentage);
64 return;
65 }
66
67
68 static void
69 parse_block_list(const char *str_const)
70 {
71 // We need a modifiable string in the for-loop.
72 char *str_start = xstrdup(str_const);
73 char *str = str_start;
74
75 // It must be non-empty and not begin with a comma.
76 if (str[0] == '\0' || str[0] == ',')
77 message_fatal(_("%s: Invalid argument to --block-list"), str);
78
79 // Count the number of comma-separated strings.
80 size_t count = 1;
81 for (size_t i = 0; str[i] != '\0'; ++i)
82 if (str[i] == ',')
83 ++count;
84
85 // Prevent an unlikely integer overflow.
86 if (count > SIZE_MAX / sizeof(uint64_t) - 1)
87 message_fatal(_("%s: Too many arguments to --block-list"),
88 str);
89
90 // Allocate memory to hold all the sizes specified.
91 // If --block-list was specified already, its value is forgotten.
92 free(opt_block_list);
93 opt_block_list = xmalloc((count + 1) * sizeof(uint64_t));
94
95 for (size_t i = 0; i < count; ++i) {
96 // Locate the next comma and replace it with \0.
97 char *p = strchr(str, ',');
98 if (p != NULL)
99 *p = '\0';
100
101 if (str[0] == '\0') {
102 // There is no string, that is, a comma follows
103 // another comma. Use the previous value.
104 //
105 // NOTE: We checked earlier that the first char
106 // of the whole list cannot be a comma.
107 assert(i > 0);
108 opt_block_list[i] = opt_block_list[i - 1];
109 } else {
110 opt_block_list[i] = str_to_uint64("block-list", str,
111 0, UINT64_MAX);
112
113 // Zero indicates no more new Blocks.
114 if (opt_block_list[i] == 0) {
115 if (i + 1 != count)
116 message_fatal(_("0 can only be used "
117 "as the last element "
118 "in --block-list"));
119
120 opt_block_list[i] = UINT64_MAX;
121 }
122 }
123
124 str = p + 1;
125 }
126
127 // Terminate the array.
128 opt_block_list[count] = 0;
129
130 free(str_start);
131 return;
132 }
133
134
135 static void
136 parse_real(args_info *args, int argc, char **argv)
137 {
138 enum {
139 OPT_X86 = INT_MIN,
140 OPT_POWERPC,
141 OPT_IA64,
142 OPT_ARM,
143 OPT_ARMTHUMB,
144 OPT_ARM64,
145 OPT_SPARC,
146 OPT_DELTA,
147 OPT_LZMA1,
148 OPT_LZMA2,
149
150 OPT_SINGLE_STREAM,
151 OPT_NO_SPARSE,
152 OPT_FILES,
153 OPT_FILES0,
154 OPT_BLOCK_SIZE,
155 OPT_BLOCK_LIST,
156 OPT_MEM_COMPRESS,
157 OPT_MEM_DECOMPRESS,
158 OPT_MEM_MT_DECOMPRESS,
159 OPT_NO_ADJUST,
160 OPT_INFO_MEMORY,
161 OPT_ROBOT,
162 OPT_FLUSH_TIMEOUT,
163 OPT_IGNORE_CHECK,
164 };
165
166 static const char short_opts[]
167 = "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
168
169 static const struct option long_opts[] = {
170 // Operation mode
171 { "compress", no_argument, NULL, 'z' },
172 { "decompress", no_argument, NULL, 'd' },
173 { "uncompress", no_argument, NULL, 'd' },
174 { "test", no_argument, NULL, 't' },
175 { "list", no_argument, NULL, 'l' },
176
177 // Operation modifiers
178 { "keep", no_argument, NULL, 'k' },
179 { "force", no_argument, NULL, 'f' },
180 { "stdout", no_argument, NULL, 'c' },
181 { "to-stdout", no_argument, NULL, 'c' },
182 { "single-stream", no_argument, NULL, OPT_SINGLE_STREAM },
183 { "no-sparse", no_argument, NULL, OPT_NO_SPARSE },
184 { "suffix", required_argument, NULL, 'S' },
185 // { "recursive", no_argument, NULL, 'r' }, // TODO
186 { "files", optional_argument, NULL, OPT_FILES },
187 { "files0", optional_argument, NULL, OPT_FILES0 },
188
189 // Basic compression settings
190 { "format", required_argument, NULL, 'F' },
191 { "check", required_argument, NULL, 'C' },
192 { "ignore-check", no_argument, NULL, OPT_IGNORE_CHECK },
193 { "block-size", required_argument, NULL, OPT_BLOCK_SIZE },
194 { "block-list", required_argument, NULL, OPT_BLOCK_LIST },
195 { "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS },
196 { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
197 { "memlimit-mt-decompress", required_argument, NULL, OPT_MEM_MT_DECOMPRESS },
198 { "memlimit", required_argument, NULL, 'M' },
199 { "memory", required_argument, NULL, 'M' }, // Old alias
200 { "no-adjust", no_argument, NULL, OPT_NO_ADJUST },
201 { "threads", required_argument, NULL, 'T' },
202 { "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT },
203
204 { "extreme", no_argument, NULL, 'e' },
205 { "fast", no_argument, NULL, '0' },
206 { "best", no_argument, NULL, '9' },
207
208 // Filters
209 { "lzma1", optional_argument, NULL, OPT_LZMA1 },
210 { "lzma2", optional_argument, NULL, OPT_LZMA2 },
211 { "x86", optional_argument, NULL, OPT_X86 },
212 { "powerpc", optional_argument, NULL, OPT_POWERPC },
213 { "ia64", optional_argument, NULL, OPT_IA64 },
214 { "arm", optional_argument, NULL, OPT_ARM },
215 { "armthumb", optional_argument, NULL, OPT_ARMTHUMB },
216 { "arm64", optional_argument, NULL, OPT_ARM64 },
217 { "sparc", optional_argument, NULL, OPT_SPARC },
218 { "delta", optional_argument, NULL, OPT_DELTA },
219
220 // Other options
221 { "quiet", no_argument, NULL, 'q' },
222 { "verbose", no_argument, NULL, 'v' },
223 { "no-warn", no_argument, NULL, 'Q' },
224 { "robot", no_argument, NULL, OPT_ROBOT },
225 { "info-memory", no_argument, NULL, OPT_INFO_MEMORY },
226 { "help", no_argument, NULL, 'h' },
227 { "long-help", no_argument, NULL, 'H' },
228 { "version", no_argument, NULL, 'V' },
229
230 { NULL, 0, NULL, 0 }
231 };
232
233 int c;
234
235 while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
236 != -1) {
237 switch (c) {
238 // Compression preset (also for decompression if --format=raw)
239 case '0': case '1': case '2': case '3': case '4':
240 case '5': case '6': case '7': case '8': case '9':
241 coder_set_preset((uint32_t)(c - '0'));
242 break;
243
244 // --memlimit-compress
245 case OPT_MEM_COMPRESS:
246 parse_memlimit("memlimit-compress",
247 "memlimit-compress%", optarg,
248 true, false, false);
249 break;
250
251 // --memlimit-decompress
252 case OPT_MEM_DECOMPRESS:
253 parse_memlimit("memlimit-decompress",
254 "memlimit-decompress%", optarg,
255 false, true, false);
256 break;
257
258 // --memlimit-mt-decompress
259 case OPT_MEM_MT_DECOMPRESS:
260 parse_memlimit("memlimit-mt-decompress",
261 "memlimit-mt-decompress%", optarg,
262 false, false, true);
263 break;
264
265 // --memlimit
266 case 'M':
267 parse_memlimit("memlimit", "memlimit%", optarg,
268 true, true, true);
269 break;
270
271 // --suffix
272 case 'S':
273 suffix_set(optarg);
274 break;
275
276 case 'T': {
277 // Since xz 5.4.0: Ignore leading '+' first.
278 const char *s = optarg;
279 if (optarg[0] == '+')
280 ++s;
281
282 // The max is from src/liblzma/common/common.h.
283 uint32_t t = str_to_uint64("threads", s, 0, 16384);
284
285 // If leading '+' was used then use multi-threaded
286 // mode even if exactly one thread was specified.
287 if (t == 1 && optarg[0] == '+')
288 t = UINT32_MAX;
289
290 hardware_threads_set(t);
291 break;
292 }
293
294 // --version
295 case 'V':
296 // This doesn't return.
297 message_version();
298
299 // --stdout
300 case 'c':
301 opt_stdout = true;
302 break;
303
304 // --decompress
305 case 'd':
306 opt_mode = MODE_DECOMPRESS;
307 break;
308
309 // --extreme
310 case 'e':
311 coder_set_extreme();
312 break;
313
314 // --force
315 case 'f':
316 opt_force = true;
317 break;
318
319 // --info-memory
320 case OPT_INFO_MEMORY:
321 // This doesn't return.
322 hardware_memlimit_show();
323
324 // --help
325 case 'h':
326 // This doesn't return.
327 message_help(false);
328
329 // --long-help
330 case 'H':
331 // This doesn't return.
332 message_help(true);
333
334 // --list
335 case 'l':
336 opt_mode = MODE_LIST;
337 break;
338
339 // --keep
340 case 'k':
341 opt_keep_original = true;
342 break;
343
344 // --quiet
345 case 'q':
346 message_verbosity_decrease();
347 break;
348
349 case 'Q':
350 set_exit_no_warn();
351 break;
352
353 case 't':
354 opt_mode = MODE_TEST;
355 break;
356
357 // --verbose
358 case 'v':
359 message_verbosity_increase();
360 break;
361
362 // --robot
363 case OPT_ROBOT:
364 opt_robot = true;
365
366 // This is to make sure that floating point numbers
367 // always have a dot as decimal separator.
368 setlocale(LC_NUMERIC, "C");
369 break;
370
371 case 'z':
372 opt_mode = MODE_COMPRESS;
373 break;
374
375 // Filter setup
376
377 case OPT_X86:
378 coder_add_filter(LZMA_FILTER_X86,
379 options_bcj(optarg));
380 break;
381
382 case OPT_POWERPC:
383 coder_add_filter(LZMA_FILTER_POWERPC,
384 options_bcj(optarg));
385 break;
386
387 case OPT_IA64:
388 coder_add_filter(LZMA_FILTER_IA64,
389 options_bcj(optarg));
390 break;
391
392 case OPT_ARM:
393 coder_add_filter(LZMA_FILTER_ARM,
394 options_bcj(optarg));
395 break;
396
397 case OPT_ARMTHUMB:
398 coder_add_filter(LZMA_FILTER_ARMTHUMB,
399 options_bcj(optarg));
400 break;
401
402 case OPT_ARM64:
403 coder_add_filter(LZMA_FILTER_ARM64,
404 options_bcj(optarg));
405 break;
406
407 case OPT_SPARC:
408 coder_add_filter(LZMA_FILTER_SPARC,
409 options_bcj(optarg));
410 break;
411
412 case OPT_DELTA:
413 coder_add_filter(LZMA_FILTER_DELTA,
414 options_delta(optarg));
415 break;
416
417 case OPT_LZMA1:
418 coder_add_filter(LZMA_FILTER_LZMA1,
419 options_lzma(optarg));
420 break;
421
422 case OPT_LZMA2:
423 coder_add_filter(LZMA_FILTER_LZMA2,
424 options_lzma(optarg));
425 break;
426
427 // Other
428
429 // --format
430 case 'F': {
431 // Just in case, support both "lzma" and "alone" since
432 // the latter was used for forward compatibility in
433 // LZMA Utils 4.32.x.
434 static const struct {
435 char str[8];
436 enum format_type format;
437 } types[] = {
438 { "auto", FORMAT_AUTO },
439 { "xz", FORMAT_XZ },
440 { "lzma", FORMAT_LZMA },
441 { "alone", FORMAT_LZMA },
442 #ifdef HAVE_LZIP_DECODER
443 { "lzip", FORMAT_LZIP },
444 #endif
445 { "raw", FORMAT_RAW },
446 };
447
448 size_t i = 0;
449 while (strcmp(types[i].str, optarg) != 0)
450 if (++i == ARRAY_SIZE(types))
451 message_fatal(_("%s: Unknown file "
452 "format type"),
453 optarg);
454
455 opt_format = types[i].format;
456 break;
457 }
458
459 // --check
460 case 'C': {
461 static const struct {
462 char str[8];
463 lzma_check check;
464 } types[] = {
465 { "none", LZMA_CHECK_NONE },
466 { "crc32", LZMA_CHECK_CRC32 },
467 { "crc64", LZMA_CHECK_CRC64 },
468 { "sha256", LZMA_CHECK_SHA256 },
469 };
470
471 size_t i = 0;
472 while (strcmp(types[i].str, optarg) != 0) {
473 if (++i == ARRAY_SIZE(types))
474 message_fatal(_("%s: Unsupported "
475 "integrity "
476 "check type"), optarg);
477 }
478
479 // Use a separate check in case we are using different
480 // liblzma than what was used to compile us.
481 if (!lzma_check_is_supported(types[i].check))
482 message_fatal(_("%s: Unsupported integrity "
483 "check type"), optarg);
484
485 coder_set_check(types[i].check);
486 break;
487 }
488
489 case OPT_IGNORE_CHECK:
490 opt_ignore_check = true;
491 break;
492
493 case OPT_BLOCK_SIZE:
494 opt_block_size = str_to_uint64("block-size", optarg,
495 0, LZMA_VLI_MAX);
496 break;
497
498 case OPT_BLOCK_LIST: {
499 parse_block_list(optarg);
500 break;
501 }
502
503 case OPT_SINGLE_STREAM:
504 opt_single_stream = true;
505 break;
506
507 case OPT_NO_SPARSE:
508 io_no_sparse();
509 break;
510
511 case OPT_FILES:
512 args->files_delim = '\n';
513
514 // Fall through
515
516 case OPT_FILES0:
517 if (args->files_name != NULL)
518 message_fatal(_("Only one file can be "
519 "specified with `--files' "
520 "or `--files0'."));
521
522 if (optarg == NULL) {
523 args->files_name = stdin_filename;
524 args->files_file = stdin;
525 } else {
526 args->files_name = optarg;
527 args->files_file = fopen(optarg,
528 c == OPT_FILES ? "r" : "rb");
529 if (args->files_file == NULL)
530 // TRANSLATORS: This is a translatable
531 // string because French needs a space
532 // before the colon ("%s : %s").
533 message_fatal(_("%s: %s"), optarg,
534 strerror(errno));
535 }
536
537 break;
538
539 case OPT_NO_ADJUST:
540 opt_auto_adjust = false;
541 break;
542
543 case OPT_FLUSH_TIMEOUT:
544 opt_flush_timeout = str_to_uint64("flush-timeout",
545 optarg, 0, UINT64_MAX);
546 break;
547
548 default:
549 message_try_help();
550 tuklib_exit(E_ERROR, E_ERROR, false);
551 }
552 }
553
554 return;
555 }
556
557
558 static void
559 parse_environment(args_info *args, char *argv0, const char *varname)
560 {
561 char *env = getenv(varname);
562 if (env == NULL)
563 return;
564
565 // We modify the string, so make a copy of it.
566 env = xstrdup(env);
567
568 // Calculate the number of arguments in env. argc stats at one
569 // to include space for the program name.
570 int argc = 1;
571 bool prev_was_space = true;
572 for (size_t i = 0; env[i] != '\0'; ++i) {
573 // NOTE: Cast to unsigned char is needed so that correct
574 // value gets passed to isspace(), which expects
575 // unsigned char cast to int. Casting to int is done
576 // automatically due to integer promotion, but we need to
577 // force char to unsigned char manually. Otherwise 8-bit
578 // characters would get promoted to wrong value if
579 // char is signed.
580 if (isspace((unsigned char)env[i])) {
581 prev_was_space = true;
582 } else if (prev_was_space) {
583 prev_was_space = false;
584
585 // Keep argc small enough to fit into a signed int
586 // and to keep it usable for memory allocation.
587 if (++argc == my_min(
588 INT_MAX, SIZE_MAX / sizeof(char *)))
589 message_fatal(_("The environment variable "
590 "%s contains too many "
591 "arguments"), varname);
592 }
593 }
594
595 // Allocate memory to hold pointers to the arguments. Add one to get
596 // space for the terminating NULL (if some systems happen to need it).
597 char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
598 argv[0] = argv0;
599 argv[argc] = NULL;
600
601 // Go through the string again. Split the arguments using '\0'
602 // characters and add pointers to the resulting strings to argv.
603 argc = 1;
604 prev_was_space = true;
605 for (size_t i = 0; env[i] != '\0'; ++i) {
606 if (isspace((unsigned char)env[i])) {
607 prev_was_space = true;
608 env[i] = '\0';
609 } else if (prev_was_space) {
610 prev_was_space = false;
611 argv[argc++] = env + i;
612 }
613 }
614
615 // Parse the argument list we got from the environment. All non-option
616 // arguments i.e. filenames are ignored.
617 parse_real(args, argc, argv);
618
619 // Reset the state of the getopt_long() so that we can parse the
620 // command line options too. There are two incompatible ways to
621 // do it.
622 #ifdef HAVE_OPTRESET
623 // BSD
624 optind = 1;
625 optreset = 1;
626 #else
627 // GNU, Solaris
628 optind = 0;
629 #endif
630
631 // We don't need the argument list from environment anymore.
632 free(argv);
633 free(env);
634
635 return;
636 }
637
638
639 extern void
640 args_parse(args_info *args, int argc, char **argv)
641 {
642 // Initialize those parts of *args that we need later.
643 args->files_name = NULL;
644 args->files_file = NULL;
645 args->files_delim = '\0';
646
647 // Check how we were called.
648 {
649 // Remove the leading path name, if any.
650 const char *name = strrchr(argv[0], '/');
651 if (name == NULL)
652 name = argv[0];
653 else
654 ++name;
655
656 // NOTE: It's possible that name[0] is now '\0' if argv[0]
657 // is weird, but it doesn't matter here.
658
659 // Look for full command names instead of substrings like
660 // "un", "cat", and "lz" to reduce possibility of false
661 // positives when the programs have been renamed.
662 if (strstr(name, "xzcat") != NULL) {
663 opt_mode = MODE_DECOMPRESS;
664 opt_stdout = true;
665 } else if (strstr(name, "unxz") != NULL) {
666 opt_mode = MODE_DECOMPRESS;
667 } else if (strstr(name, "lzcat") != NULL) {
668 opt_format = FORMAT_LZMA;
669 opt_mode = MODE_DECOMPRESS;
670 opt_stdout = true;
671 } else if (strstr(name, "unlzma") != NULL) {
672 opt_format = FORMAT_LZMA;
673 opt_mode = MODE_DECOMPRESS;
674 } else if (strstr(name, "lzma") != NULL) {
675 opt_format = FORMAT_LZMA;
676 }
677 }
678
679 // First the flags from the environment
680 parse_environment(args, argv[0], "XZ_DEFAULTS");
681 parse_environment(args, argv[0], "XZ_OPT");
682
683 // Then from the command line
684 parse_real(args, argc, argv);
685
686 // If encoder or decoder support was omitted at build time,
687 // show an error now so that the rest of the code can rely on
688 // that whatever is in opt_mode is also supported.
689 #ifndef HAVE_ENCODERS
690 if (opt_mode == MODE_COMPRESS)
691 message_fatal(_("Compression support was disabled "
692 "at build time"));
693 #endif
694 #ifndef HAVE_DECODERS
695 // Even MODE_LIST cannot work without decoder support so MODE_COMPRESS
696 // is the only valid choice.
697 if (opt_mode != MODE_COMPRESS)
698 message_fatal(_("Decompression support was disabled "
699 "at build time"));
700 #endif
701
702 #ifdef HAVE_LZIP_DECODER
703 if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_LZIP)
704 message_fatal(_("Compression of lzip files (.lz) "
705 "is not supported"));
706 #endif
707
708 // Never remove the source file when the destination is not on disk.
709 // In test mode the data is written nowhere, but setting opt_stdout
710 // will make the rest of the code behave well.
711 if (opt_stdout || opt_mode == MODE_TEST) {
712 opt_keep_original = true;
713 opt_stdout = true;
714 }
715
716 // When compressing, if no --format flag was used, or it
717 // was --format=auto, we compress to the .xz format.
718 if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
719 opt_format = FORMAT_XZ;
720
721 // Compression settings need to be validated (options themselves and
722 // their memory usage) when compressing to any file format. It has to
723 // be done also when uncompressing raw data, since for raw decoding
724 // the options given on the command line are used to know what kind
725 // of raw data we are supposed to decode.
726 if (opt_mode == MODE_COMPRESS || (opt_format == FORMAT_RAW
727 && opt_mode != MODE_LIST))
728 coder_set_compression_settings();
729
730 // If raw format is used and a custom suffix is not provided,
731 // then only stdout mode can be used when compressing or decompressing.
732 if (opt_format == FORMAT_RAW && !suffix_is_set() && !opt_stdout
733 && (opt_mode == MODE_COMPRESS
734 || opt_mode == MODE_DECOMPRESS))
735 message_fatal(_("With --format=raw, --suffix=.SUF is "
736 "required unless writing to stdout"));
737
738 // If no filenames are given, use stdin.
739 if (argv[optind] == NULL && args->files_name == NULL) {
740 // We don't modify or free() the "-" constant. The caller
741 // modifies this so don't make the struct itself const.
742 static char *names_stdin[2] = { (char *)"-", NULL };
743 args->arg_names = names_stdin;
744 args->arg_count = 1;
745 } else {
746 // We got at least one filename from the command line, or
747 // --files or --files0 was specified.
748 args->arg_names = argv + optind;
749 args->arg_count = (unsigned int)(argc - optind);
750 }
751
752 return;
753 }
754
755
756 #ifndef NDEBUG
757 extern void
758 args_free(void)
759 {
760 free(opt_block_list);
761 return;
762 }
763 #endif