1 /* Functions for dealing with sparse files
2
3 Copyright 2003-2023 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any later
8 version.
9
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include <system.h>
19 #include <inttostr.h>
20 #include <quotearg.h>
21 #include "common.h"
22
23 struct tar_sparse_file;
24 static bool sparse_select_optab (struct tar_sparse_file *file);
25
26 enum sparse_scan_state
27 {
28 scan_begin,
29 scan_block,
30 scan_end
31 };
32
33 struct tar_sparse_optab
34 {
35 bool (*init) (struct tar_sparse_file *);
36 bool (*done) (struct tar_sparse_file *);
37 bool (*sparse_member_p) (struct tar_sparse_file *);
38 bool (*dump_header) (struct tar_sparse_file *);
39 bool (*fixup_header) (struct tar_sparse_file *);
40 bool (*decode_header) (struct tar_sparse_file *);
41 bool (*scan_block) (struct tar_sparse_file *, enum sparse_scan_state,
42 void *);
43 bool (*dump_region) (struct tar_sparse_file *, size_t);
44 bool (*extract_region) (struct tar_sparse_file *, size_t);
45 };
46
47 struct tar_sparse_file
48 {
49 int fd; /* File descriptor */
50 bool seekable; /* Is fd seekable? */
51 off_t offset; /* Current offset in fd if seekable==false.
52 Otherwise unused */
53 off_t dumped_size; /* Number of bytes actually written
54 to the archive */
55 struct tar_stat_info *stat_info; /* Information about the file */
56 struct tar_sparse_optab const *optab; /* Operation table */
57 void *closure; /* Any additional data optab calls might
58 require */
59 };
60
61 /* Dump zeros to file->fd until offset is reached. It is used instead of
62 lseek if the output file is not seekable */
63 static bool
64 dump_zeros (struct tar_sparse_file *file, off_t offset)
65 {
66 static char const zero_buf[BLOCKSIZE];
67
68 if (offset < file->offset)
69 {
70 errno = EINVAL;
71 return false;
72 }
73
74 while (file->offset < offset)
75 {
76 size_t size = (BLOCKSIZE < offset - file->offset
77 ? BLOCKSIZE
78 : offset - file->offset);
79 ssize_t wrbytes;
80
81 wrbytes = write (file->fd, zero_buf, size);
82 if (wrbytes <= 0)
83 {
84 if (wrbytes == 0)
85 errno = EINVAL;
86 return false;
87 }
88 file->offset += wrbytes;
89 }
90
91 return true;
92 }
93
94 static bool
95 tar_sparse_member_p (struct tar_sparse_file *file)
96 {
97 if (file->optab->sparse_member_p)
98 return file->optab->sparse_member_p (file);
99 return false;
100 }
101
102 static bool
103 tar_sparse_init (struct tar_sparse_file *file)
104 {
105 memset (file, 0, sizeof *file);
106
107 if (!sparse_select_optab (file))
108 return false;
109
110 if (file->optab->init)
111 return file->optab->init (file);
112
113 return true;
114 }
115
116 static bool
117 tar_sparse_done (struct tar_sparse_file *file)
118 {
119 if (file->optab->done)
120 return file->optab->done (file);
121 return true;
122 }
123
124 static bool
125 tar_sparse_scan (struct tar_sparse_file *file, enum sparse_scan_state state,
126 void *block)
127 {
128 if (file->optab->scan_block)
129 return file->optab->scan_block (file, state, block);
130 return true;
131 }
132
133 static bool
134 tar_sparse_dump_region (struct tar_sparse_file *file, size_t i)
135 {
136 if (file->optab->dump_region)
137 return file->optab->dump_region (file, i);
138 return false;
139 }
140
141 static bool
142 tar_sparse_extract_region (struct tar_sparse_file *file, size_t i)
143 {
144 if (file->optab->extract_region)
145 return file->optab->extract_region (file, i);
146 return false;
147 }
148
149 static bool
150 tar_sparse_dump_header (struct tar_sparse_file *file)
151 {
152 if (file->optab->dump_header)
153 return file->optab->dump_header (file);
154 return false;
155 }
156
157 static bool
158 tar_sparse_decode_header (struct tar_sparse_file *file)
159 {
160 if (file->optab->decode_header)
161 return file->optab->decode_header (file);
162 return true;
163 }
164
165 static bool
166 tar_sparse_fixup_header (struct tar_sparse_file *file)
167 {
168 if (file->optab->fixup_header)
169 return file->optab->fixup_header (file);
170 return true;
171 }
172
173
174 static bool
175 lseek_or_error (struct tar_sparse_file *file, off_t offset)
176 {
177 if (file->seekable
178 ? lseek (file->fd, offset, SEEK_SET) < 0
179 : ! dump_zeros (file, offset))
180 {
181 seek_diag_details (file->stat_info->orig_file_name, offset);
182 return false;
183 }
184 return true;
185 }
186
187 /* Takes a blockful of data and basically cruises through it to see if
188 it's made *entirely* of zeros, returning a 0 the instant it finds
189 something that is a nonzero, i.e., useful data. */
190 static bool
191 zero_block_p (char const *buffer, size_t size)
192 {
193 while (size--)
194 if (*buffer++)
195 return false;
196 return true;
197 }
198
199 static void
200 sparse_add_map (struct tar_stat_info *st, struct sp_array const *sp)
201 {
202 struct sp_array *sparse_map = st->sparse_map;
203 size_t avail = st->sparse_map_avail;
204 if (avail == st->sparse_map_size)
205 st->sparse_map = sparse_map =
206 x2nrealloc (sparse_map, &st->sparse_map_size, sizeof *sparse_map);
207 sparse_map[avail] = *sp;
208 st->sparse_map_avail = avail + 1;
209 }
210
211 /* Scan the sparse file byte-by-byte and create its map. */
212 static bool
213 sparse_scan_file_raw (struct tar_sparse_file *file)
214 {
215 struct tar_stat_info *st = file->stat_info;
216 int fd = file->fd;
217 char buffer[BLOCKSIZE];
218 size_t count = 0;
219 off_t offset = 0;
220 struct sp_array sp = {0, 0};
221
222 st->archive_file_size = 0;
223
224 if (!tar_sparse_scan (file, scan_begin, NULL))
225 return false;
226
227 while ((count = blocking_read (fd, buffer, sizeof buffer)) != 0
228 && count != SAFE_READ_ERROR)
229 {
230 /* Analyze the block. */
231 if (zero_block_p (buffer, count))
232 {
233 if (sp.numbytes)
234 {
235 sparse_add_map (st, &sp);
236 sp.numbytes = 0;
237 if (!tar_sparse_scan (file, scan_block, NULL))
238 return false;
239 }
240 }
241 else
242 {
243 if (sp.numbytes == 0)
244 sp.offset = offset;
245 sp.numbytes += count;
246 st->archive_file_size += count;
247 if (!tar_sparse_scan (file, scan_block, buffer))
248 return false;
249 }
250
251 offset += count;
252 }
253
254 /* save one more sparse segment of length 0 to indicate that
255 the file ends with a hole */
256 if (sp.numbytes == 0)
257 sp.offset = offset;
258
259 sparse_add_map (st, &sp);
260 st->archive_file_size += count;
261 return tar_sparse_scan (file, scan_end, NULL);
262 }
263
264 static bool
265 sparse_scan_file_wholesparse (struct tar_sparse_file *file)
266 {
267 struct tar_stat_info *st = file->stat_info;
268 struct sp_array sp = {0, 0};
269
270 /* Note that this function is called only for truly sparse files of size >= 1
271 block size (checked via ST_IS_SPARSE before). See the thread
272 http://www.mail-archive.com/bug-tar@gnu.org/msg04209.html for more info */
273 if (ST_NBLOCKS (st->stat) == 0)
274 {
275 st->archive_file_size = 0;
276 sp.offset = st->stat.st_size;
277 sparse_add_map (st, &sp);
278 return true;
279 }
280
281 return false;
282 }
283
284 #ifdef SEEK_HOLE
285 /* Try to engage SEEK_HOLE/SEEK_DATA feature. */
286 static bool
287 sparse_scan_file_seek (struct tar_sparse_file *file)
288 {
289 struct tar_stat_info *st = file->stat_info;
290 int fd = file->fd;
291 struct sp_array sp = {0, 0};
292 off_t offset = 0;
293 off_t data_offset;
294 off_t hole_offset;
295
296 st->archive_file_size = 0;
297
298 for (;;)
299 {
300 /* locate first chunk of data */
301 data_offset = lseek (fd, offset, SEEK_DATA);
302
303 if (data_offset == (off_t)-1)
304 /* ENXIO == EOF; error otherwise */
305 {
306 if (errno == ENXIO)
307 {
308 /* file ends with hole, add one more empty chunk of data */
309 sp.numbytes = 0;
310 sp.offset = st->stat.st_size;
311 sparse_add_map (st, &sp);
312 return true;
313 }
314 return false;
315 }
316
317 hole_offset = lseek (fd, data_offset, SEEK_HOLE);
318
319 /* according to specs, if FS does not fully support
320 SEEK_DATA/SEEK_HOLE it may just implement kind of "wrapper" around
321 classic lseek() call. We must detect it here and try to use other
322 hole-detection methods. */
323 if (offset == 0 /* first loop */
324 && data_offset == 0
325 && hole_offset == st->stat.st_size)
326 {
327 lseek (fd, 0, SEEK_SET);
328 return false;
329 }
330
331 sp.offset = data_offset;
332 sp.numbytes = hole_offset - data_offset;
333 sparse_add_map (st, &sp);
334
335 st->archive_file_size += sp.numbytes;
336 offset = hole_offset;
337 }
338 }
339 #endif
340
341 static bool
342 sparse_scan_file (struct tar_sparse_file *file)
343 {
344 /* always check for completely sparse files */
345 if (sparse_scan_file_wholesparse (file))
346 return true;
347
348 switch (hole_detection)
349 {
350 case HOLE_DETECTION_DEFAULT:
351 case HOLE_DETECTION_SEEK:
352 #ifdef SEEK_HOLE
353 if (sparse_scan_file_seek (file))
354 return true;
355 #else
356 if (hole_detection == HOLE_DETECTION_SEEK)
357 WARN((0, 0,
358 _("\"seek\" hole detection is not supported, using \"raw\".")));
359 /* fall back to "raw" for this and all other files */
360 hole_detection = HOLE_DETECTION_RAW;
361 #endif
362 FALLTHROUGH;
363 case HOLE_DETECTION_RAW:
364 if (sparse_scan_file_raw (file))
365 return true;
366 }
367
368 return false;
369 }
370
371 static struct tar_sparse_optab const oldgnu_optab;
372 static struct tar_sparse_optab const star_optab;
373 static struct tar_sparse_optab const pax_optab;
374
375 static bool
376 sparse_select_optab (struct tar_sparse_file *file)
377 {
378 switch (current_format == DEFAULT_FORMAT ? archive_format : current_format)
379 {
380 case V7_FORMAT:
381 case USTAR_FORMAT:
382 return false;
383
384 case OLDGNU_FORMAT:
385 case GNU_FORMAT: /*FIXME: This one should disappear? */
386 file->optab = &oldgnu_optab;
387 break;
388
389 case POSIX_FORMAT:
390 file->optab = &pax_optab;
391 break;
392
393 case STAR_FORMAT:
394 file->optab = &star_optab;
395 break;
396
397 default:
398 return false;
399 }
400 return true;
401 }
402
403 static bool
404 sparse_dump_region (struct tar_sparse_file *file, size_t i)
405 {
406 union block *blk;
407 off_t bytes_left = file->stat_info->sparse_map[i].numbytes;
408
409 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
410 return false;
411
412 while (bytes_left > 0)
413 {
414 size_t bufsize = (bytes_left > BLOCKSIZE) ? BLOCKSIZE : bytes_left;
415 size_t bytes_read;
416
417 blk = find_next_block ();
418 bytes_read = full_read (file->fd, blk->buffer, bufsize);
419 if (bytes_read == SAFE_READ_ERROR)
420 {
421 read_diag_details (file->stat_info->orig_file_name,
422 (file->stat_info->sparse_map[i].offset
423 + file->stat_info->sparse_map[i].numbytes
424 - bytes_left),
425 bufsize);
426 return false;
427 }
428 else if (bytes_read == 0)
429 {
430 if (errno != 0)
431 {
432 read_diag_details (file->stat_info->orig_file_name,
433 (file->stat_info->sparse_map[i].offset
434 + file->stat_info->sparse_map[i].numbytes
435 - bytes_left),
436 bufsize);
437 return false;
438 }
439 else
440 {
441 char buf[UINTMAX_STRSIZE_BOUND];
442 struct stat st;
443 size_t n;
444 if (fstat (file->fd, &st) == 0)
445 n = file->stat_info->stat.st_size - st.st_size;
446 else
447 n = file->stat_info->stat.st_size
448 - (file->stat_info->sparse_map[i].offset
449 + file->stat_info->sparse_map[i].numbytes
450 - bytes_left);
451
452 WARNOPT (WARN_FILE_SHRANK,
453 (0, 0,
454 ngettext ("%s: File shrank by %s byte; padding with zeros",
455 "%s: File shrank by %s bytes; padding with zeros",
456 n),
457 quotearg_colon (file->stat_info->orig_file_name),
458 STRINGIFY_BIGINT (n, buf)));
459 if (! ignore_failed_read_option)
460 set_exit_status (TAREXIT_DIFFERS);
461 return false;
462 }
463 }
464
465 memset (blk->buffer + bytes_read, 0, BLOCKSIZE - bytes_read);
466 bytes_left -= bytes_read;
467 file->dumped_size += bytes_read;
468 set_next_block_after (blk);
469 }
470
471 return true;
472 }
473
474 static bool
475 sparse_extract_region (struct tar_sparse_file *file, size_t i)
476 {
477 off_t write_size;
478
479 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
480 return false;
481
482 write_size = file->stat_info->sparse_map[i].numbytes;
483
484 if (write_size == 0)
485 {
486 /* Last block of the file is a hole */
487 if (file->seekable && sys_truncate (file->fd))
488 truncate_warn (file->stat_info->orig_file_name);
489 }
490 else while (write_size > 0)
491 {
492 size_t count;
493 size_t wrbytes = (write_size > BLOCKSIZE) ? BLOCKSIZE : write_size;
494 union block *blk = find_next_block ();
495 if (!blk)
496 {
497 ERROR ((0, 0, _("Unexpected EOF in archive")));
498 return false;
499 }
500 set_next_block_after (blk);
501 file->dumped_size += BLOCKSIZE;
502 count = blocking_write (file->fd, blk->buffer, wrbytes);
503 write_size -= count;
504 mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
505 file->offset += count;
506 if (count != wrbytes)
507 {
508 write_error_details (file->stat_info->orig_file_name,
509 count, wrbytes);
510 return false;
511 }
512 }
513 return true;
514 }
515
516
517
518 /* Interface functions */
519 enum dump_status
520 sparse_dump_file (int fd, struct tar_stat_info *st)
521 {
522 bool rc;
523 struct tar_sparse_file file;
524
525 if (!tar_sparse_init (&file))
526 return dump_status_not_implemented;
527
528 file.stat_info = st;
529 file.fd = fd;
530 file.seekable = true; /* File *must* be seekable for dump to work */
531
532 rc = sparse_scan_file (&file);
533 if (rc && file.optab->dump_region)
534 {
535 tar_sparse_dump_header (&file);
536
537 if (fd >= 0)
538 {
539 size_t i;
540
541 mv_begin_write (file.stat_info->file_name,
542 file.stat_info->stat.st_size,
543 file.stat_info->archive_file_size - file.dumped_size);
544 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
545 rc = tar_sparse_dump_region (&file, i);
546 }
547 }
548
549 pad_archive (file.stat_info->archive_file_size - file.dumped_size);
550 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
551 }
552
553 bool
554 sparse_member_p (struct tar_stat_info *st)
555 {
556 struct tar_sparse_file file;
557
558 if (!tar_sparse_init (&file))
559 return false;
560 file.stat_info = st;
561 return tar_sparse_member_p (&file);
562 }
563
564 bool
565 sparse_fixup_header (struct tar_stat_info *st)
566 {
567 struct tar_sparse_file file;
568
569 if (!tar_sparse_init (&file))
570 return false;
571 file.stat_info = st;
572 return tar_sparse_fixup_header (&file);
573 }
574
575 enum dump_status
576 sparse_extract_file (int fd, struct tar_stat_info *st, off_t *size)
577 {
578 bool rc = true;
579 struct tar_sparse_file file;
580 size_t i;
581
582 if (!tar_sparse_init (&file))
583 {
584 *size = st->stat.st_size;
585 return dump_status_not_implemented;
586 }
587
588 file.stat_info = st;
589 file.fd = fd;
590 file.seekable = lseek (fd, 0, SEEK_SET) == 0;
591 file.offset = 0;
592
593 rc = tar_sparse_decode_header (&file);
594 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
595 rc = tar_sparse_extract_region (&file, i);
596 *size = file.stat_info->archive_file_size - file.dumped_size;
597 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
598 }
599
600 enum dump_status
601 sparse_skim_file (struct tar_stat_info *st, bool must_copy)
602 {
603 bool rc = true;
604 struct tar_sparse_file file;
605
606 if (!tar_sparse_init (&file))
607 return dump_status_not_implemented;
608
609 file.stat_info = st;
610 file.fd = -1;
611
612 rc = tar_sparse_decode_header (&file);
613 skim_file (file.stat_info->archive_file_size - file.dumped_size, must_copy);
614 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
615 }
616
617
618 static bool
619 check_sparse_region (struct tar_sparse_file *file, off_t beg, off_t end)
620 {
621 if (!lseek_or_error (file, beg))
622 return false;
623
624 while (beg < end)
625 {
626 size_t bytes_read;
627 size_t rdsize = BLOCKSIZE < end - beg ? BLOCKSIZE : end - beg;
628 char diff_buffer[BLOCKSIZE];
629
630 bytes_read = full_read (file->fd, diff_buffer, rdsize);
631 if (bytes_read == SAFE_READ_ERROR)
632 {
633 read_diag_details (file->stat_info->orig_file_name,
634 beg,
635 rdsize);
636 return false;
637 }
638 else if (bytes_read == 0)
639 {
640 if (errno != 0)
641 read_diag_details (file->stat_info->orig_file_name,
642 beg,
643 rdsize);
644 else
645 report_difference (file->stat_info, _("Size differs"));
646 return false;
647 }
648
649 if (!zero_block_p (diff_buffer, bytes_read))
650 {
651 char begbuf[INT_BUFSIZE_BOUND (off_t)];
652 report_difference (file->stat_info,
653 _("File fragment at %s is not a hole"),
654 offtostr (beg, begbuf));
655 return false;
656 }
657
658 beg += bytes_read;
659 }
660
661 return true;
662 }
663
664 static bool
665 check_data_region (struct tar_sparse_file *file, size_t i)
666 {
667 off_t size_left;
668
669 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
670 return false;
671 size_left = file->stat_info->sparse_map[i].numbytes;
672 mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
673
674 while (size_left > 0)
675 {
676 size_t bytes_read;
677 size_t rdsize = (size_left > BLOCKSIZE) ? BLOCKSIZE : size_left;
678 char diff_buffer[BLOCKSIZE];
679
680 union block *blk = find_next_block ();
681 if (!blk)
682 {
683 ERROR ((0, 0, _("Unexpected EOF in archive")));
684 return false;
685 }
686 set_next_block_after (blk);
687 file->dumped_size += BLOCKSIZE;
688 bytes_read = full_read (file->fd, diff_buffer, rdsize);
689 if (bytes_read == SAFE_READ_ERROR)
690 {
691 read_diag_details (file->stat_info->orig_file_name,
692 (file->stat_info->sparse_map[i].offset
693 + file->stat_info->sparse_map[i].numbytes
694 - size_left),
695 rdsize);
696 return false;
697 }
698 else if (bytes_read == 0)
699 {
700 if (errno != 0)
701 read_diag_details (file->stat_info->orig_file_name,
702 (file->stat_info->sparse_map[i].offset
703 + file->stat_info->sparse_map[i].numbytes
704 - size_left),
705 rdsize);
706 else
707 report_difference (¤t_stat_info, _("Size differs"));
708 return false;
709 }
710 size_left -= bytes_read;
711 mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
712 if (memcmp (blk->buffer, diff_buffer, bytes_read))
713 {
714 report_difference (file->stat_info, _("Contents differ"));
715 return false;
716 }
717 }
718 return true;
719 }
720
721 bool
722 sparse_diff_file (int fd, struct tar_stat_info *st)
723 {
724 bool rc = true;
725 struct tar_sparse_file file;
726 size_t i;
727 off_t offset = 0;
728
729 if (!tar_sparse_init (&file))
730 return false;
731
732 file.stat_info = st;
733 file.fd = fd;
734 file.seekable = true; /* File *must* be seekable for compare to work */
735
736 rc = tar_sparse_decode_header (&file);
737 mv_begin_read (st);
738 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
739 {
740 rc = check_sparse_region (&file,
741 offset, file.stat_info->sparse_map[i].offset)
742 && check_data_region (&file, i);
743 offset = file.stat_info->sparse_map[i].offset
744 + file.stat_info->sparse_map[i].numbytes;
745 }
746
747 if (!rc)
748 skim_file (file.stat_info->archive_file_size - file.dumped_size, false);
749 mv_end ();
750
751 tar_sparse_done (&file);
752 return rc;
753 }
754
755
756 /* Old GNU Format. The sparse file information is stored in the
757 oldgnu_header in the following manner:
758
759 The header is marked with type 'S'. Its 'size' field contains
760 the cumulative size of all non-empty blocks of the file. The
761 actual file size is stored in 'realsize' member of oldgnu_header.
762
763 The map of the file is stored in a list of 'struct sparse'.
764 Each struct contains offset to the block of data and its
765 size (both as octal numbers). The first file header contains
766 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
767 contains more structs, then the field 'isextended' of the main
768 header is set to 1 (binary) and the 'struct sparse_header'
769 header follows, containing at most 21 following structs
770 (SPARSES_IN_SPARSE_HEADER). If more structs follow, 'isextended'
771 field of the extended header is set and next next extension header
772 follows, etc... */
773
774 enum oldgnu_add_status
775 {
776 add_ok,
777 add_finish,
778 add_fail
779 };
780
781 static bool
782 oldgnu_sparse_member_p (MAYBE_UNUSED struct tar_sparse_file *file)
783 {
784 return current_header->header.typeflag == GNUTYPE_SPARSE;
785 }
786
787 /* Add a sparse item to the sparse file and its obstack */
788 static enum oldgnu_add_status
789 oldgnu_add_sparse (struct tar_sparse_file *file, struct sparse *s)
790 {
791 struct sp_array sp;
792
793 if (s->numbytes[0] == '\0')
794 return add_finish;
795 sp.offset = OFF_FROM_HEADER (s->offset);
796 sp.numbytes = OFF_FROM_HEADER (s->numbytes);
797 if (sp.offset < 0 || sp.numbytes < 0
798 || INT_ADD_OVERFLOW (sp.offset, sp.numbytes)
799 || file->stat_info->stat.st_size < sp.offset + sp.numbytes
800 || file->stat_info->archive_file_size < 0)
801 return add_fail;
802
803 sparse_add_map (file->stat_info, &sp);
804 return add_ok;
805 }
806
807 static bool
808 oldgnu_fixup_header (struct tar_sparse_file *file)
809 {
810 /* NOTE! st_size was initialized from the header
811 which actually contains archived size. The following fixes it */
812 off_t realsize = OFF_FROM_HEADER (current_header->oldgnu_header.realsize);
813 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
814 file->stat_info->stat.st_size = max (0, realsize);
815 return 0 <= realsize;
816 }
817
818 /* Convert old GNU format sparse data to internal representation */
819 static bool
820 oldgnu_get_sparse_info (struct tar_sparse_file *file)
821 {
822 size_t i;
823 union block *h = current_header;
824 int ext_p;
825 enum oldgnu_add_status rc;
826
827 file->stat_info->sparse_map_avail = 0;
828 for (i = 0; i < SPARSES_IN_OLDGNU_HEADER; i++)
829 {
830 rc = oldgnu_add_sparse (file, &h->oldgnu_header.sp[i]);
831 if (rc != add_ok)
832 break;
833 }
834
835 for (ext_p = h->oldgnu_header.isextended;
836 rc == add_ok && ext_p; ext_p = h->sparse_header.isextended)
837 {
838 h = find_next_block ();
839 if (!h)
840 {
841 ERROR ((0, 0, _("Unexpected EOF in archive")));
842 return false;
843 }
844 set_next_block_after (h);
845 for (i = 0; i < SPARSES_IN_SPARSE_HEADER && rc == add_ok; i++)
846 rc = oldgnu_add_sparse (file, &h->sparse_header.sp[i]);
847 }
848
849 if (rc == add_fail)
850 {
851 ERROR ((0, 0, _("%s: invalid sparse archive member"),
852 file->stat_info->orig_file_name));
853 return false;
854 }
855 return true;
856 }
857
858 static void
859 oldgnu_store_sparse_info (struct tar_sparse_file *file, size_t *pindex,
860 struct sparse *sp, size_t sparse_size)
861 {
862 for (; *pindex < file->stat_info->sparse_map_avail
863 && sparse_size > 0; sparse_size--, sp++, ++*pindex)
864 {
865 OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].offset,
866 sp->offset);
867 OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes,
868 sp->numbytes);
869 }
870 }
871
872 static bool
873 oldgnu_dump_header (struct tar_sparse_file *file)
874 {
875 off_t block_ordinal = current_block_ordinal ();
876 union block *blk;
877 size_t i;
878
879 blk = start_header (file->stat_info);
880 blk->header.typeflag = GNUTYPE_SPARSE;
881 if (file->stat_info->sparse_map_avail > SPARSES_IN_OLDGNU_HEADER)
882 blk->oldgnu_header.isextended = 1;
883
884 /* Store the real file size */
885 OFF_TO_CHARS (file->stat_info->stat.st_size, blk->oldgnu_header.realsize);
886 /* Store the effective (shrunken) file size */
887 OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
888
889 i = 0;
890 oldgnu_store_sparse_info (file, &i,
891 blk->oldgnu_header.sp,
892 SPARSES_IN_OLDGNU_HEADER);
893 blk->oldgnu_header.isextended = i < file->stat_info->sparse_map_avail;
894 finish_header (file->stat_info, blk, block_ordinal);
895
896 while (i < file->stat_info->sparse_map_avail)
897 {
898 blk = find_next_block ();
899 memset (blk->buffer, 0, BLOCKSIZE);
900 oldgnu_store_sparse_info (file, &i,
901 blk->sparse_header.sp,
902 SPARSES_IN_SPARSE_HEADER);
903 if (i < file->stat_info->sparse_map_avail)
904 blk->sparse_header.isextended = 1;
905 set_next_block_after (blk);
906 }
907 return true;
908 }
909
910 static struct tar_sparse_optab const oldgnu_optab = {
911 NULL, /* No init function */
912 NULL, /* No done function */
913 oldgnu_sparse_member_p,
914 oldgnu_dump_header,
915 oldgnu_fixup_header,
916 oldgnu_get_sparse_info,
917 NULL, /* No scan_block function */
918 sparse_dump_region,
919 sparse_extract_region,
920 };
921
922
923 /* Star */
924
925 static bool
926 star_sparse_member_p (MAYBE_UNUSED struct tar_sparse_file *file)
927 {
928 return current_header->header.typeflag == GNUTYPE_SPARSE;
929 }
930
931 static bool
932 star_fixup_header (struct tar_sparse_file *file)
933 {
934 /* NOTE! st_size was initialized from the header
935 which actually contains archived size. The following fixes it */
936 off_t realsize = OFF_FROM_HEADER (current_header->star_in_header.realsize);
937 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
938 file->stat_info->stat.st_size = max (0, realsize);
939 return 0 <= realsize;
940 }
941
942 /* Convert STAR format sparse data to internal representation */
943 static bool
944 star_get_sparse_info (struct tar_sparse_file *file)
945 {
946 size_t i;
947 union block *h = current_header;
948 int ext_p;
949 enum oldgnu_add_status rc = add_ok;
950
951 file->stat_info->sparse_map_avail = 0;
952
953 if (h->star_in_header.prefix[0] == '\0'
954 && h->star_in_header.sp[0].offset[10] != '\0')
955 {
956 /* Old star format */
957 for (i = 0; i < SPARSES_IN_STAR_HEADER; i++)
958 {
959 rc = oldgnu_add_sparse (file, &h->star_in_header.sp[i]);
960 if (rc != add_ok)
961 break;
962 }
963 ext_p = h->star_in_header.isextended;
964 }
965 else
966 ext_p = 1;
967
968 for (; rc == add_ok && ext_p; ext_p = h->star_ext_header.isextended)
969 {
970 h = find_next_block ();
971 if (!h)
972 {
973 ERROR ((0, 0, _("Unexpected EOF in archive")));
974 return false;
975 }
976 set_next_block_after (h);
977 for (i = 0; i < SPARSES_IN_STAR_EXT_HEADER && rc == add_ok; i++)
978 rc = oldgnu_add_sparse (file, &h->star_ext_header.sp[i]);
979 file->dumped_size += BLOCKSIZE;
980 }
981
982 if (rc == add_fail)
983 {
984 ERROR ((0, 0, _("%s: invalid sparse archive member"),
985 file->stat_info->orig_file_name));
986 return false;
987 }
988 return true;
989 }
990
991
992 static struct tar_sparse_optab const star_optab = {
993 NULL, /* No init function */
994 NULL, /* No done function */
995 star_sparse_member_p,
996 NULL,
997 star_fixup_header,
998 star_get_sparse_info,
999 NULL, /* No scan_block function */
1000 NULL, /* No dump region function */
1001 sparse_extract_region,
1002 };
1003
1004
1005 /* GNU PAX sparse file format. There are several versions:
1006
1007 * 0.0
1008
1009 The initial version of sparse format used by tar 1.14-1.15.1.
1010 The sparse file map is stored in x header:
1011
1012 GNU.sparse.size Real size of the stored file
1013 GNU.sparse.numblocks Number of blocks in the sparse map
1014 repeat numblocks time
1015 GNU.sparse.offset Offset of the next data block
1016 GNU.sparse.numbytes Size of the next data block
1017 end repeat
1018
1019 This has been reported as conflicting with the POSIX specs. The reason is
1020 that offsets and sizes of non-zero data blocks were stored in multiple
1021 instances of GNU.sparse.offset/GNU.sparse.numbytes variables, whereas
1022 POSIX requires the latest occurrence of the variable to override all
1023 previous occurrences.
1024
1025 To avoid this incompatibility two following versions were introduced.
1026
1027 * 0.1
1028
1029 Used by tar 1.15.2 -- 1.15.91 (alpha releases).
1030
1031 The sparse file map is stored in
1032 x header:
1033
1034 GNU.sparse.size Real size of the stored file
1035 GNU.sparse.numblocks Number of blocks in the sparse map
1036 GNU.sparse.map Map of non-null data chunks. A string consisting
1037 of comma-separated values "offset,size[,offset,size]..."
1038
1039 The resulting GNU.sparse.map string can be *very* long. While POSIX does not
1040 impose any limit on the length of a x header variable, this can confuse some
1041 tars.
1042
1043 * 1.0
1044
1045 Starting from this version, the exact sparse format version is specified
1046 explicitly in the header using the following variables:
1047
1048 GNU.sparse.major Major version
1049 GNU.sparse.minor Minor version
1050
1051 X header keeps the following variables:
1052
1053 GNU.sparse.name Real file name of the sparse file
1054 GNU.sparse.realsize Real size of the stored file (corresponds to the old
1055 GNU.sparse.size variable)
1056
1057 The name field of the ustar header is constructed using the pattern
1058 "%d/GNUSparseFile.%p/%f".
1059
1060 The sparse map itself is stored in the file data block, preceding the actual
1061 file data. It consists of a series of octal numbers of arbitrary length,
1062 delimited by newlines. The map is padded with nulls to the nearest block
1063 boundary.
1064
1065 The first number gives the number of entries in the map. Following are map
1066 entries, each one consisting of two numbers giving the offset and size of
1067 the data block it describes.
1068
1069 The format is designed in such a way that non-posix aware tars and tars not
1070 supporting GNU.sparse.* keywords will extract each sparse file in its
1071 condensed form with the file map attached and will place it into a separate
1072 directory. Then, using a simple program it would be possible to expand the
1073 file to its original form even without GNU tar.
1074
1075 Bu default, v.1.0 archives are created. To use other formats,
1076 --sparse-version option is provided. Additionally, v.0.0 can be obtained
1077 by deleting GNU.sparse.map from 0.1 format: --sparse-version 0.1
1078 --pax-option delete=GNU.sparse.map
1079 */
1080
1081 static bool
1082 pax_sparse_member_p (struct tar_sparse_file *file)
1083 {
1084 return file->stat_info->sparse_map_avail > 0
1085 || file->stat_info->sparse_major > 0;
1086 }
1087
1088 /* Start a header that uses the effective (shrunken) file size. */
1089 static union block *
1090 pax_start_header (struct tar_stat_info *st)
1091 {
1092 off_t realsize = st->stat.st_size;
1093 union block *blk;
1094 st->stat.st_size = st->archive_file_size;
1095 blk = start_header (st);
1096 st->stat.st_size = realsize;
1097 return blk;
1098 }
1099
1100 static bool
1101 pax_dump_header_0 (struct tar_sparse_file *file)
1102 {
1103 off_t block_ordinal = current_block_ordinal ();
1104 union block *blk;
1105 size_t i;
1106 char nbuf[UINTMAX_STRSIZE_BOUND];
1107 struct sp_array *map = file->stat_info->sparse_map;
1108 char *save_file_name = NULL;
1109
1110 /* Store the real file size */
1111 xheader_store ("GNU.sparse.size", file->stat_info, NULL);
1112 xheader_store ("GNU.sparse.numblocks", file->stat_info, NULL);
1113
1114 if (xheader_keyword_deleted_p ("GNU.sparse.map")
1115 || tar_sparse_minor == 0)
1116 {
1117 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1118 {
1119 xheader_store ("GNU.sparse.offset", file->stat_info, &i);
1120 xheader_store ("GNU.sparse.numbytes", file->stat_info, &i);
1121 }
1122 }
1123 else
1124 {
1125 xheader_store ("GNU.sparse.name", file->stat_info, NULL);
1126 save_file_name = file->stat_info->file_name;
1127 file->stat_info->file_name = xheader_format_name (file->stat_info,
1128 "%d/GNUSparseFile.%p/%f", 0);
1129
1130 xheader_string_begin (&file->stat_info->xhdr);
1131 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1132 {
1133 if (i)
1134 xheader_string_add (&file->stat_info->xhdr, ",");
1135 xheader_string_add (&file->stat_info->xhdr,
1136 umaxtostr (map[i].offset, nbuf));
1137 xheader_string_add (&file->stat_info->xhdr, ",");
1138 xheader_string_add (&file->stat_info->xhdr,
1139 umaxtostr (map[i].numbytes, nbuf));
1140 }
1141 if (!xheader_string_end (&file->stat_info->xhdr,
1142 "GNU.sparse.map"))
1143 {
1144 free (file->stat_info->file_name);
1145 file->stat_info->file_name = save_file_name;
1146 return false;
1147 }
1148 }
1149 blk = pax_start_header (file->stat_info);
1150 finish_header (file->stat_info, blk, block_ordinal);
1151 if (save_file_name)
1152 {
1153 free (file->stat_info->file_name);
1154 file->stat_info->file_name = save_file_name;
1155 }
1156 return true;
1157 }
1158
1159 static bool
1160 pax_dump_header_1 (struct tar_sparse_file *file)
1161 {
1162 off_t block_ordinal = current_block_ordinal ();
1163 union block *blk;
1164 char *p, *q;
1165 size_t i;
1166 char nbuf[UINTMAX_STRSIZE_BOUND];
1167 off_t size = 0;
1168 struct sp_array *map = file->stat_info->sparse_map;
1169 char *save_file_name = file->stat_info->file_name;
1170
1171 #define COPY_STRING(b,dst,src) do \
1172 { \
1173 char *endp = b->buffer + BLOCKSIZE; \
1174 char const *srcp = src; \
1175 while (*srcp) \
1176 { \
1177 if (dst == endp) \
1178 { \
1179 set_next_block_after (b); \
1180 b = find_next_block (); \
1181 dst = b->buffer; \
1182 endp = b->buffer + BLOCKSIZE; \
1183 } \
1184 *dst++ = *srcp++; \
1185 } \
1186 } while (0)
1187
1188 /* Compute stored file size */
1189 p = umaxtostr (file->stat_info->sparse_map_avail, nbuf);
1190 size += strlen (p) + 1;
1191 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1192 {
1193 p = umaxtostr (map[i].offset, nbuf);
1194 size += strlen (p) + 1;
1195 p = umaxtostr (map[i].numbytes, nbuf);
1196 size += strlen (p) + 1;
1197 }
1198 size = (size + BLOCKSIZE - 1) / BLOCKSIZE;
1199 file->stat_info->archive_file_size += size * BLOCKSIZE;
1200 file->dumped_size += size * BLOCKSIZE;
1201
1202 /* Store sparse file identification */
1203 xheader_store ("GNU.sparse.major", file->stat_info, NULL);
1204 xheader_store ("GNU.sparse.minor", file->stat_info, NULL);
1205 xheader_store ("GNU.sparse.name", file->stat_info, NULL);
1206 xheader_store ("GNU.sparse.realsize", file->stat_info, NULL);
1207
1208 file->stat_info->file_name =
1209 xheader_format_name (file->stat_info, "%d/GNUSparseFile.%p/%f", 0);
1210 /* Make sure the created header name is shorter than NAME_FIELD_SIZE: */
1211 if (strlen (file->stat_info->file_name) > NAME_FIELD_SIZE)
1212 file->stat_info->file_name[NAME_FIELD_SIZE] = 0;
1213
1214 blk = pax_start_header (file->stat_info);
1215 finish_header (file->stat_info, blk, block_ordinal);
1216 free (file->stat_info->file_name);
1217 file->stat_info->file_name = save_file_name;
1218
1219 blk = find_next_block ();
1220 q = blk->buffer;
1221 p = umaxtostr (file->stat_info->sparse_map_avail, nbuf);
1222 COPY_STRING (blk, q, p);
1223 COPY_STRING (blk, q, "\n");
1224 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1225 {
1226 p = umaxtostr (map[i].offset, nbuf);
1227 COPY_STRING (blk, q, p);
1228 COPY_STRING (blk, q, "\n");
1229 p = umaxtostr (map[i].numbytes, nbuf);
1230 COPY_STRING (blk, q, p);
1231 COPY_STRING (blk, q, "\n");
1232 }
1233 memset (q, 0, BLOCKSIZE - (q - blk->buffer));
1234 set_next_block_after (blk);
1235 return true;
1236 }
1237
1238 static bool
1239 pax_dump_header (struct tar_sparse_file *file)
1240 {
1241 file->stat_info->sparse_major = tar_sparse_major;
1242 file->stat_info->sparse_minor = tar_sparse_minor;
1243
1244 return (file->stat_info->sparse_major == 0) ?
1245 pax_dump_header_0 (file) : pax_dump_header_1 (file);
1246 }
1247
1248 static bool
1249 decode_num (uintmax_t *num, char const *arg, uintmax_t maxval)
1250 {
1251 uintmax_t u;
1252 char *arg_lim;
1253
1254 if (!ISDIGIT (*arg))
1255 return false;
1256
1257 errno = 0;
1258 u = strtoumax (arg, &arg_lim, 10);
1259
1260 if (! (u <= maxval && errno != ERANGE) || *arg_lim)
1261 return false;
1262
1263 *num = u;
1264 return true;
1265 }
1266
1267 static bool
1268 pax_decode_header (struct tar_sparse_file *file)
1269 {
1270 if (file->stat_info->sparse_major > 0)
1271 {
1272 uintmax_t u;
1273 char nbuf[UINTMAX_STRSIZE_BOUND];
1274 union block *blk;
1275 char *p;
1276 size_t i;
1277 off_t start;
1278
1279 #define COPY_BUF(b,buf,src) do \
1280 { \
1281 char *endp = b->buffer + BLOCKSIZE; \
1282 char *dst = buf; \
1283 do \
1284 { \
1285 if (dst == buf + UINTMAX_STRSIZE_BOUND -1) \
1286 { \
1287 ERROR ((0, 0, _("%s: numeric overflow in sparse archive member"), \
1288 file->stat_info->orig_file_name)); \
1289 return false; \
1290 } \
1291 if (src == endp) \
1292 { \
1293 set_next_block_after (b); \
1294 b = find_next_block (); \
1295 if (!b) \
1296 FATAL_ERROR ((0, 0, _("Unexpected EOF in archive"))); \
1297 src = b->buffer; \
1298 endp = b->buffer + BLOCKSIZE; \
1299 } \
1300 *dst = *src++; \
1301 } \
1302 while (*dst++ != '\n'); \
1303 dst[-1] = 0; \
1304 } while (0)
1305
1306 start = current_block_ordinal ();
1307 set_next_block_after (current_header);
1308 blk = find_next_block ();
1309 if (!blk)
1310 FATAL_ERROR ((0, 0, _("Unexpected EOF in archive")));
1311 p = blk->buffer;
1312 COPY_BUF (blk,nbuf,p);
1313 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (size_t)))
1314 {
1315 ERROR ((0, 0, _("%s: malformed sparse archive member"),
1316 file->stat_info->orig_file_name));
1317 return false;
1318 }
1319 file->stat_info->sparse_map_size = u;
1320 file->stat_info->sparse_map = xcalloc (file->stat_info->sparse_map_size,
1321 sizeof (*file->stat_info->sparse_map));
1322 file->stat_info->sparse_map_avail = 0;
1323 for (i = 0; i < file->stat_info->sparse_map_size; i++)
1324 {
1325 struct sp_array sp;
1326
1327 COPY_BUF (blk,nbuf,p);
1328 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t)))
1329 {
1330 ERROR ((0, 0, _("%s: malformed sparse archive member"),
1331 file->stat_info->orig_file_name));
1332 return false;
1333 }
1334 sp.offset = u;
1335 COPY_BUF (blk,nbuf,p);
1336 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t))
1337 || INT_ADD_OVERFLOW (sp.offset, u)
1338 || file->stat_info->stat.st_size < sp.offset + u)
1339 {
1340 ERROR ((0, 0, _("%s: malformed sparse archive member"),
1341 file->stat_info->orig_file_name));
1342 return false;
1343 }
1344 sp.numbytes = u;
1345 sparse_add_map (file->stat_info, &sp);
1346 }
1347 set_next_block_after (blk);
1348
1349 file->dumped_size += BLOCKSIZE * (current_block_ordinal () - start);
1350 }
1351
1352 return true;
1353 }
1354
1355 static struct tar_sparse_optab const pax_optab = {
1356 NULL, /* No init function */
1357 NULL, /* No done function */
1358 pax_sparse_member_p,
1359 pax_dump_header,
1360 NULL,
1361 pax_decode_header,
1362 NULL, /* No scan_block function */
1363 sparse_dump_region,
1364 sparse_extract_region,
1365 };