python (3.12.0)
1 #!/usr/bin/env python3
2 #-------------------------------------------------------------------
3 # tarfile.py
4 #-------------------------------------------------------------------
5 # Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
6 # All rights reserved.
7 #
8 # Permission is hereby granted, free of charge, to any person
9 # obtaining a copy of this software and associated documentation
10 # files (the "Software"), to deal in the Software without
11 # restriction, including without limitation the rights to use,
12 # copy, modify, merge, publish, distribute, sublicense, and/or sell
13 # copies of the Software, and to permit persons to whom the
14 # Software is furnished to do so, subject to the following
15 # conditions:
16 #
17 # The above copyright notice and this permission notice shall be
18 # included in all copies or substantial portions of the Software.
19 #
20 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27 # OTHER DEALINGS IN THE SOFTWARE.
28 #
29 """Read from and write to tar format archives.
30 """
31
32 version = "0.9.0"
33 __author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
34 __credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
35
36 #---------
37 # Imports
38 #---------
39 from builtins import open as bltn_open
40 import sys
41 import os
42 import io
43 import shutil
44 import stat
45 import time
46 import struct
47 import copy
48 import re
49 import warnings
50
51 try:
52 import pwd
53 except ImportError:
54 pwd = None
55 try:
56 import grp
57 except ImportError:
58 grp = None
59
60 # os.symlink on Windows prior to 6.0 raises NotImplementedError
61 # OSError (winerror=1314) will be raised if the caller does not hold the
62 # SeCreateSymbolicLinkPrivilege privilege
63 symlink_exception = (AttributeError, NotImplementedError, OSError)
64
65 # from tarfile import *
66 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
67 "CompressionError", "StreamError", "ExtractError", "HeaderError",
68 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
69 "DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter",
70 "tar_filter", "FilterError", "AbsoluteLinkError",
71 "OutsideDestinationError", "SpecialFileError", "AbsolutePathError",
72 "LinkOutsideDestinationError"]
73
74
75 #---------------------------------------------------------
76 # tar constants
77 #---------------------------------------------------------
78 NUL = b"\0" # the null character
79 BLOCKSIZE = 512 # length of processing blocks
80 RECORDSIZE = BLOCKSIZE * 20 # length of records
81 GNU_MAGIC = b"ustar \0" # magic gnu tar string
82 POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
83
84 LENGTH_NAME = 100 # maximum length of a filename
85 LENGTH_LINK = 100 # maximum length of a linkname
86 LENGTH_PREFIX = 155 # maximum length of the prefix field
87
88 REGTYPE = b"0" # regular file
89 AREGTYPE = b"\0" # regular file
90 LNKTYPE = b"1" # link (inside tarfile)
91 SYMTYPE = b"2" # symbolic link
92 CHRTYPE = b"3" # character special device
93 BLKTYPE = b"4" # block special device
94 DIRTYPE = b"5" # directory
95 FIFOTYPE = b"6" # fifo special device
96 CONTTYPE = b"7" # contiguous file
97
98 GNUTYPE_LONGNAME = b"L" # GNU tar longname
99 GNUTYPE_LONGLINK = b"K" # GNU tar longlink
100 GNUTYPE_SPARSE = b"S" # GNU tar sparse file
101
102 XHDTYPE = b"x" # POSIX.1-2001 extended header
103 XGLTYPE = b"g" # POSIX.1-2001 global header
104 SOLARIS_XHDTYPE = b"X" # Solaris extended header
105
106 USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
107 GNU_FORMAT = 1 # GNU tar format
108 PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
109 DEFAULT_FORMAT = PAX_FORMAT
110
111 #---------------------------------------------------------
112 # tarfile constants
113 #---------------------------------------------------------
114 # File types that tarfile supports:
115 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
116 SYMTYPE, DIRTYPE, FIFOTYPE,
117 CONTTYPE, CHRTYPE, BLKTYPE,
118 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
119 GNUTYPE_SPARSE)
120
121 # File types that will be treated as a regular file.
122 REGULAR_TYPES = (REGTYPE, AREGTYPE,
123 CONTTYPE, GNUTYPE_SPARSE)
124
125 # File types that are part of the GNU tar format.
126 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
127 GNUTYPE_SPARSE)
128
129 # Fields from a pax header that override a TarInfo attribute.
130 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
131 "uid", "gid", "uname", "gname")
132
133 # Fields from a pax header that are affected by hdrcharset.
134 PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
135
136 # Fields in a pax header that are numbers, all other fields
137 # are treated as strings.
138 PAX_NUMBER_FIELDS = {
139 "atime": float,
140 "ctime": float,
141 "mtime": float,
142 "uid": int,
143 "gid": int,
144 "size": int
145 }
146
147 #---------------------------------------------------------
148 # initialization
149 #---------------------------------------------------------
150 if os.name == "nt":
151 ENCODING = "utf-8"
152 else:
153 ENCODING = sys.getfilesystemencoding()
154
155 #---------------------------------------------------------
156 # Some useful functions
157 #---------------------------------------------------------
158
159 def stn(s, length, encoding, errors):
160 """Convert a string to a null-terminated bytes object.
161 """
162 if s is None:
163 raise ValueError("metadata cannot contain None")
164 s = s.encode(encoding, errors)
165 return s[:length] + (length - len(s)) * NUL
166
167 def nts(s, encoding, errors):
168 """Convert a null-terminated bytes object to a string.
169 """
170 p = s.find(b"\0")
171 if p != -1:
172 s = s[:p]
173 return s.decode(encoding, errors)
174
175 def nti(s):
176 """Convert a number field to a python number.
177 """
178 # There are two possible encodings for a number field, see
179 # itn() below.
180 if s[0] in (0o200, 0o377):
181 n = 0
182 for i in range(len(s) - 1):
183 n <<= 8
184 n += s[i + 1]
185 if s[0] == 0o377:
186 n = -(256 ** (len(s) - 1) - n)
187 else:
188 try:
189 s = nts(s, "ascii", "strict")
190 n = int(s.strip() or "0", 8)
191 except ValueError:
192 raise InvalidHeaderError("invalid header")
193 return n
194
195 def itn(n, digits=8, format=DEFAULT_FORMAT):
196 """Convert a python number to a number field.
197 """
198 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
199 # octal digits followed by a null-byte, this allows values up to
200 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
201 # that if necessary. A leading 0o200 or 0o377 byte indicate this
202 # particular encoding, the following digits-1 bytes are a big-endian
203 # base-256 representation. This allows values up to (256**(digits-1))-1.
204 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
205 # number.
206 original_n = n
207 n = int(n)
208 if 0 <= n < 8 ** (digits - 1):
209 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
210 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
211 if n >= 0:
212 s = bytearray([0o200])
213 else:
214 s = bytearray([0o377])
215 n = 256 ** digits + n
216
217 for i in range(digits - 1):
218 s.insert(1, n & 0o377)
219 n >>= 8
220 else:
221 raise ValueError("overflow in number field")
222
223 return s
224
225 def calc_chksums(buf):
226 """Calculate the checksum for a member's header by summing up all
227 characters except for the chksum field which is treated as if
228 it was filled with spaces. According to the GNU tar sources,
229 some tars (Sun and NeXT) calculate chksum with signed char,
230 which will be different if there are chars in the buffer with
231 the high bit set. So we calculate two checksums, unsigned and
232 signed.
233 """
234 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
235 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
236 return unsigned_chksum, signed_chksum
237
238 def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
239 """Copy length bytes from fileobj src to fileobj dst.
240 If length is None, copy the entire content.
241 """
242 bufsize = bufsize or 16 * 1024
243 if length == 0:
244 return
245 if length is None:
246 shutil.copyfileobj(src, dst, bufsize)
247 return
248
249 blocks, remainder = divmod(length, bufsize)
250 for b in range(blocks):
251 buf = src.read(bufsize)
252 if len(buf) < bufsize:
253 raise exception("unexpected end of data")
254 dst.write(buf)
255
256 if remainder != 0:
257 buf = src.read(remainder)
258 if len(buf) < remainder:
259 raise exception("unexpected end of data")
260 dst.write(buf)
261 return
262
263 def _safe_print(s):
264 encoding = getattr(sys.stdout, 'encoding', None)
265 if encoding is not None:
266 s = s.encode(encoding, 'backslashreplace').decode(encoding)
267 print(s, end=' ')
268
269
270 class ESC[4;38;5;81mTarError(ESC[4;38;5;149mException):
271 """Base exception."""
272 pass
273 class ESC[4;38;5;81mExtractError(ESC[4;38;5;149mTarError):
274 """General exception for extract errors."""
275 pass
276 class ESC[4;38;5;81mReadError(ESC[4;38;5;149mTarError):
277 """Exception for unreadable tar archives."""
278 pass
279 class ESC[4;38;5;81mCompressionError(ESC[4;38;5;149mTarError):
280 """Exception for unavailable compression methods."""
281 pass
282 class ESC[4;38;5;81mStreamError(ESC[4;38;5;149mTarError):
283 """Exception for unsupported operations on stream-like TarFiles."""
284 pass
285 class ESC[4;38;5;81mHeaderError(ESC[4;38;5;149mTarError):
286 """Base exception for header errors."""
287 pass
288 class ESC[4;38;5;81mEmptyHeaderError(ESC[4;38;5;149mHeaderError):
289 """Exception for empty headers."""
290 pass
291 class ESC[4;38;5;81mTruncatedHeaderError(ESC[4;38;5;149mHeaderError):
292 """Exception for truncated headers."""
293 pass
294 class ESC[4;38;5;81mEOFHeaderError(ESC[4;38;5;149mHeaderError):
295 """Exception for end of file headers."""
296 pass
297 class ESC[4;38;5;81mInvalidHeaderError(ESC[4;38;5;149mHeaderError):
298 """Exception for invalid headers."""
299 pass
300 class ESC[4;38;5;81mSubsequentHeaderError(ESC[4;38;5;149mHeaderError):
301 """Exception for missing and invalid extended headers."""
302 pass
303
304 #---------------------------
305 # internal stream interface
306 #---------------------------
307 class ESC[4;38;5;81m_LowLevelFile:
308 """Low-level file object. Supports reading and writing.
309 It is used instead of a regular file object for streaming
310 access.
311 """
312
313 def __init__(self, name, mode):
314 mode = {
315 "r": os.O_RDONLY,
316 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
317 }[mode]
318 if hasattr(os, "O_BINARY"):
319 mode |= os.O_BINARY
320 self.fd = os.open(name, mode, 0o666)
321
322 def close(self):
323 os.close(self.fd)
324
325 def read(self, size):
326 return os.read(self.fd, size)
327
328 def write(self, s):
329 os.write(self.fd, s)
330
331 class ESC[4;38;5;81m_Stream:
332 """Class that serves as an adapter between TarFile and
333 a stream-like object. The stream-like object only
334 needs to have a read() or write() method and is accessed
335 blockwise. Use of gzip or bzip2 compression is possible.
336 A stream-like object could be for example: sys.stdin,
337 sys.stdout, a socket, a tape device etc.
338
339 _Stream is intended to be used only internally.
340 """
341
342 def __init__(self, name, mode, comptype, fileobj, bufsize,
343 compresslevel):
344 """Construct a _Stream object.
345 """
346 self._extfileobj = True
347 if fileobj is None:
348 fileobj = _LowLevelFile(name, mode)
349 self._extfileobj = False
350
351 if comptype == '*':
352 # Enable transparent compression detection for the
353 # stream interface
354 fileobj = _StreamProxy(fileobj)
355 comptype = fileobj.getcomptype()
356
357 self.name = name or ""
358 self.mode = mode
359 self.comptype = comptype
360 self.fileobj = fileobj
361 self.bufsize = bufsize
362 self.buf = b""
363 self.pos = 0
364 self.closed = False
365
366 try:
367 if comptype == "gz":
368 try:
369 import zlib
370 except ImportError:
371 raise CompressionError("zlib module is not available") from None
372 self.zlib = zlib
373 self.crc = zlib.crc32(b"")
374 if mode == "r":
375 self.exception = zlib.error
376 self._init_read_gz()
377 else:
378 self._init_write_gz(compresslevel)
379
380 elif comptype == "bz2":
381 try:
382 import bz2
383 except ImportError:
384 raise CompressionError("bz2 module is not available") from None
385 if mode == "r":
386 self.dbuf = b""
387 self.cmp = bz2.BZ2Decompressor()
388 self.exception = OSError
389 else:
390 self.cmp = bz2.BZ2Compressor(compresslevel)
391
392 elif comptype == "xz":
393 try:
394 import lzma
395 except ImportError:
396 raise CompressionError("lzma module is not available") from None
397 if mode == "r":
398 self.dbuf = b""
399 self.cmp = lzma.LZMADecompressor()
400 self.exception = lzma.LZMAError
401 else:
402 self.cmp = lzma.LZMACompressor()
403
404 elif comptype != "tar":
405 raise CompressionError("unknown compression type %r" % comptype)
406
407 except:
408 if not self._extfileobj:
409 self.fileobj.close()
410 self.closed = True
411 raise
412
413 def __del__(self):
414 if hasattr(self, "closed") and not self.closed:
415 self.close()
416
417 def _init_write_gz(self, compresslevel):
418 """Initialize for writing with gzip compression.
419 """
420 self.cmp = self.zlib.compressobj(compresslevel,
421 self.zlib.DEFLATED,
422 -self.zlib.MAX_WBITS,
423 self.zlib.DEF_MEM_LEVEL,
424 0)
425 timestamp = struct.pack("<L", int(time.time()))
426 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
427 if self.name.endswith(".gz"):
428 self.name = self.name[:-3]
429 # Honor "directory components removed" from RFC1952
430 self.name = os.path.basename(self.name)
431 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
432 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
433
434 def write(self, s):
435 """Write string s to the stream.
436 """
437 if self.comptype == "gz":
438 self.crc = self.zlib.crc32(s, self.crc)
439 self.pos += len(s)
440 if self.comptype != "tar":
441 s = self.cmp.compress(s)
442 self.__write(s)
443
444 def __write(self, s):
445 """Write string s to the stream if a whole new block
446 is ready to be written.
447 """
448 self.buf += s
449 while len(self.buf) > self.bufsize:
450 self.fileobj.write(self.buf[:self.bufsize])
451 self.buf = self.buf[self.bufsize:]
452
453 def close(self):
454 """Close the _Stream object. No operation should be
455 done on it afterwards.
456 """
457 if self.closed:
458 return
459
460 self.closed = True
461 try:
462 if self.mode == "w" and self.comptype != "tar":
463 self.buf += self.cmp.flush()
464
465 if self.mode == "w" and self.buf:
466 self.fileobj.write(self.buf)
467 self.buf = b""
468 if self.comptype == "gz":
469 self.fileobj.write(struct.pack("<L", self.crc))
470 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
471 finally:
472 if not self._extfileobj:
473 self.fileobj.close()
474
475 def _init_read_gz(self):
476 """Initialize for reading a gzip compressed fileobj.
477 """
478 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
479 self.dbuf = b""
480
481 # taken from gzip.GzipFile with some alterations
482 if self.__read(2) != b"\037\213":
483 raise ReadError("not a gzip file")
484 if self.__read(1) != b"\010":
485 raise CompressionError("unsupported compression method")
486
487 flag = ord(self.__read(1))
488 self.__read(6)
489
490 if flag & 4:
491 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
492 self.read(xlen)
493 if flag & 8:
494 while True:
495 s = self.__read(1)
496 if not s or s == NUL:
497 break
498 if flag & 16:
499 while True:
500 s = self.__read(1)
501 if not s or s == NUL:
502 break
503 if flag & 2:
504 self.__read(2)
505
506 def tell(self):
507 """Return the stream's file pointer position.
508 """
509 return self.pos
510
511 def seek(self, pos=0):
512 """Set the stream's file pointer to pos. Negative seeking
513 is forbidden.
514 """
515 if pos - self.pos >= 0:
516 blocks, remainder = divmod(pos - self.pos, self.bufsize)
517 for i in range(blocks):
518 self.read(self.bufsize)
519 self.read(remainder)
520 else:
521 raise StreamError("seeking backwards is not allowed")
522 return self.pos
523
524 def read(self, size):
525 """Return the next size number of bytes from the stream."""
526 assert size is not None
527 buf = self._read(size)
528 self.pos += len(buf)
529 return buf
530
531 def _read(self, size):
532 """Return size bytes from the stream.
533 """
534 if self.comptype == "tar":
535 return self.__read(size)
536
537 c = len(self.dbuf)
538 t = [self.dbuf]
539 while c < size:
540 # Skip underlying buffer to avoid unaligned double buffering.
541 if self.buf:
542 buf = self.buf
543 self.buf = b""
544 else:
545 buf = self.fileobj.read(self.bufsize)
546 if not buf:
547 break
548 try:
549 buf = self.cmp.decompress(buf)
550 except self.exception as e:
551 raise ReadError("invalid compressed data") from e
552 t.append(buf)
553 c += len(buf)
554 t = b"".join(t)
555 self.dbuf = t[size:]
556 return t[:size]
557
558 def __read(self, size):
559 """Return size bytes from stream. If internal buffer is empty,
560 read another block from the stream.
561 """
562 c = len(self.buf)
563 t = [self.buf]
564 while c < size:
565 buf = self.fileobj.read(self.bufsize)
566 if not buf:
567 break
568 t.append(buf)
569 c += len(buf)
570 t = b"".join(t)
571 self.buf = t[size:]
572 return t[:size]
573 # class _Stream
574
575 class ESC[4;38;5;81m_StreamProxy(ESC[4;38;5;149mobject):
576 """Small proxy class that enables transparent compression
577 detection for the Stream interface (mode 'r|*').
578 """
579
580 def __init__(self, fileobj):
581 self.fileobj = fileobj
582 self.buf = self.fileobj.read(BLOCKSIZE)
583
584 def read(self, size):
585 self.read = self.fileobj.read
586 return self.buf
587
588 def getcomptype(self):
589 if self.buf.startswith(b"\x1f\x8b\x08"):
590 return "gz"
591 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
592 return "bz2"
593 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
594 return "xz"
595 else:
596 return "tar"
597
598 def close(self):
599 self.fileobj.close()
600 # class StreamProxy
601
602 #------------------------
603 # Extraction file object
604 #------------------------
605 class ESC[4;38;5;81m_FileInFile(ESC[4;38;5;149mobject):
606 """A thin wrapper around an existing file object that
607 provides a part of its data as an individual file
608 object.
609 """
610
611 def __init__(self, fileobj, offset, size, name, blockinfo=None):
612 self.fileobj = fileobj
613 self.offset = offset
614 self.size = size
615 self.position = 0
616 self.name = name
617 self.closed = False
618
619 if blockinfo is None:
620 blockinfo = [(0, size)]
621
622 # Construct a map with data and zero blocks.
623 self.map_index = 0
624 self.map = []
625 lastpos = 0
626 realpos = self.offset
627 for offset, size in blockinfo:
628 if offset > lastpos:
629 self.map.append((False, lastpos, offset, None))
630 self.map.append((True, offset, offset + size, realpos))
631 realpos += size
632 lastpos = offset + size
633 if lastpos < self.size:
634 self.map.append((False, lastpos, self.size, None))
635
636 def flush(self):
637 pass
638
639 def readable(self):
640 return True
641
642 def writable(self):
643 return False
644
645 def seekable(self):
646 return self.fileobj.seekable()
647
648 def tell(self):
649 """Return the current file position.
650 """
651 return self.position
652
653 def seek(self, position, whence=io.SEEK_SET):
654 """Seek to a position in the file.
655 """
656 if whence == io.SEEK_SET:
657 self.position = min(max(position, 0), self.size)
658 elif whence == io.SEEK_CUR:
659 if position < 0:
660 self.position = max(self.position + position, 0)
661 else:
662 self.position = min(self.position + position, self.size)
663 elif whence == io.SEEK_END:
664 self.position = max(min(self.size + position, self.size), 0)
665 else:
666 raise ValueError("Invalid argument")
667 return self.position
668
669 def read(self, size=None):
670 """Read data from the file.
671 """
672 if size is None:
673 size = self.size - self.position
674 else:
675 size = min(size, self.size - self.position)
676
677 buf = b""
678 while size > 0:
679 while True:
680 data, start, stop, offset = self.map[self.map_index]
681 if start <= self.position < stop:
682 break
683 else:
684 self.map_index += 1
685 if self.map_index == len(self.map):
686 self.map_index = 0
687 length = min(size, stop - self.position)
688 if data:
689 self.fileobj.seek(offset + (self.position - start))
690 b = self.fileobj.read(length)
691 if len(b) != length:
692 raise ReadError("unexpected end of data")
693 buf += b
694 else:
695 buf += NUL * length
696 size -= length
697 self.position += length
698 return buf
699
700 def readinto(self, b):
701 buf = self.read(len(b))
702 b[:len(buf)] = buf
703 return len(buf)
704
705 def close(self):
706 self.closed = True
707 #class _FileInFile
708
709 class ESC[4;38;5;81mExFileObject(ESC[4;38;5;149mioESC[4;38;5;149m.ESC[4;38;5;149mBufferedReader):
710
711 def __init__(self, tarfile, tarinfo):
712 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
713 tarinfo.size, tarinfo.name, tarinfo.sparse)
714 super().__init__(fileobj)
715 #class ExFileObject
716
717
718 #-----------------------------
719 # extraction filters (PEP 706)
720 #-----------------------------
721
722 class ESC[4;38;5;81mFilterError(ESC[4;38;5;149mTarError):
723 pass
724
725 class ESC[4;38;5;81mAbsolutePathError(ESC[4;38;5;149mFilterError):
726 def __init__(self, tarinfo):
727 self.tarinfo = tarinfo
728 super().__init__(f'member {tarinfo.name!r} has an absolute path')
729
730 class ESC[4;38;5;81mOutsideDestinationError(ESC[4;38;5;149mFilterError):
731 def __init__(self, tarinfo, path):
732 self.tarinfo = tarinfo
733 self._path = path
734 super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, '
735 + 'which is outside the destination')
736
737 class ESC[4;38;5;81mSpecialFileError(ESC[4;38;5;149mFilterError):
738 def __init__(self, tarinfo):
739 self.tarinfo = tarinfo
740 super().__init__(f'{tarinfo.name!r} is a special file')
741
742 class ESC[4;38;5;81mAbsoluteLinkError(ESC[4;38;5;149mFilterError):
743 def __init__(self, tarinfo):
744 self.tarinfo = tarinfo
745 super().__init__(f'{tarinfo.name!r} is a link to an absolute path')
746
747 class ESC[4;38;5;81mLinkOutsideDestinationError(ESC[4;38;5;149mFilterError):
748 def __init__(self, tarinfo, path):
749 self.tarinfo = tarinfo
750 self._path = path
751 super().__init__(f'{tarinfo.name!r} would link to {path!r}, '
752 + 'which is outside the destination')
753
754 def _get_filtered_attrs(member, dest_path, for_data=True):
755 new_attrs = {}
756 name = member.name
757 dest_path = os.path.realpath(dest_path)
758 # Strip leading / (tar's directory separator) from filenames.
759 # Include os.sep (target OS directory separator) as well.
760 if name.startswith(('/', os.sep)):
761 name = new_attrs['name'] = member.path.lstrip('/' + os.sep)
762 if os.path.isabs(name):
763 # Path is absolute even after stripping.
764 # For example, 'C:/foo' on Windows.
765 raise AbsolutePathError(member)
766 # Ensure we stay in the destination
767 target_path = os.path.realpath(os.path.join(dest_path, name))
768 if os.path.commonpath([target_path, dest_path]) != dest_path:
769 raise OutsideDestinationError(member, target_path)
770 # Limit permissions (no high bits, and go-w)
771 mode = member.mode
772 if mode is not None:
773 # Strip high bits & group/other write bits
774 mode = mode & 0o755
775 if for_data:
776 # For data, handle permissions & file types
777 if member.isreg() or member.islnk():
778 if not mode & 0o100:
779 # Clear executable bits if not executable by user
780 mode &= ~0o111
781 # Ensure owner can read & write
782 mode |= 0o600
783 elif member.isdir() or member.issym():
784 # Ignore mode for directories & symlinks
785 mode = None
786 else:
787 # Reject special files
788 raise SpecialFileError(member)
789 if mode != member.mode:
790 new_attrs['mode'] = mode
791 if for_data:
792 # Ignore ownership for 'data'
793 if member.uid is not None:
794 new_attrs['uid'] = None
795 if member.gid is not None:
796 new_attrs['gid'] = None
797 if member.uname is not None:
798 new_attrs['uname'] = None
799 if member.gname is not None:
800 new_attrs['gname'] = None
801 # Check link destination for 'data'
802 if member.islnk() or member.issym():
803 if os.path.isabs(member.linkname):
804 raise AbsoluteLinkError(member)
805 if member.issym():
806 target_path = os.path.join(dest_path,
807 os.path.dirname(name),
808 member.linkname)
809 else:
810 target_path = os.path.join(dest_path,
811 member.linkname)
812 target_path = os.path.realpath(target_path)
813 if os.path.commonpath([target_path, dest_path]) != dest_path:
814 raise LinkOutsideDestinationError(member, target_path)
815 return new_attrs
816
817 def fully_trusted_filter(member, dest_path):
818 return member
819
820 def tar_filter(member, dest_path):
821 new_attrs = _get_filtered_attrs(member, dest_path, False)
822 if new_attrs:
823 return member.replace(**new_attrs, deep=False)
824 return member
825
826 def data_filter(member, dest_path):
827 new_attrs = _get_filtered_attrs(member, dest_path, True)
828 if new_attrs:
829 return member.replace(**new_attrs, deep=False)
830 return member
831
832 _NAMED_FILTERS = {
833 "fully_trusted": fully_trusted_filter,
834 "tar": tar_filter,
835 "data": data_filter,
836 }
837
838 #------------------
839 # Exported Classes
840 #------------------
841
842 # Sentinel for replace() defaults, meaning "don't change the attribute"
843 _KEEP = object()
844
845 class ESC[4;38;5;81mTarInfo(ESC[4;38;5;149mobject):
846 """Informational class which holds the details about an
847 archive member given by a tar header block.
848 TarInfo objects are returned by TarFile.getmember(),
849 TarFile.getmembers() and TarFile.gettarinfo() and are
850 usually created internally.
851 """
852
853 __slots__ = dict(
854 name = 'Name of the archive member.',
855 mode = 'Permission bits.',
856 uid = 'User ID of the user who originally stored this member.',
857 gid = 'Group ID of the user who originally stored this member.',
858 size = 'Size in bytes.',
859 mtime = 'Time of last modification.',
860 chksum = 'Header checksum.',
861 type = ('File type. type is usually one of these constants: '
862 'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
863 'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
864 linkname = ('Name of the target file name, which is only present '
865 'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
866 uname = 'User name.',
867 gname = 'Group name.',
868 devmajor = 'Device major number.',
869 devminor = 'Device minor number.',
870 offset = 'The tar header starts here.',
871 offset_data = "The file's data starts here.",
872 pax_headers = ('A dictionary containing key-value pairs of an '
873 'associated pax extended header.'),
874 sparse = 'Sparse member information.',
875 tarfile = None,
876 _sparse_structs = None,
877 _link_target = None,
878 )
879
880 def __init__(self, name=""):
881 """Construct a TarInfo object. name is the optional name
882 of the member.
883 """
884 self.name = name # member name
885 self.mode = 0o644 # file permissions
886 self.uid = 0 # user id
887 self.gid = 0 # group id
888 self.size = 0 # file size
889 self.mtime = 0 # modification time
890 self.chksum = 0 # header checksum
891 self.type = REGTYPE # member type
892 self.linkname = "" # link name
893 self.uname = "" # user name
894 self.gname = "" # group name
895 self.devmajor = 0 # device major number
896 self.devminor = 0 # device minor number
897
898 self.offset = 0 # the tar header starts here
899 self.offset_data = 0 # the file's data starts here
900
901 self.sparse = None # sparse member information
902 self.pax_headers = {} # pax header information
903
904 @property
905 def path(self):
906 'In pax headers, "name" is called "path".'
907 return self.name
908
909 @path.setter
910 def path(self, name):
911 self.name = name
912
913 @property
914 def linkpath(self):
915 'In pax headers, "linkname" is called "linkpath".'
916 return self.linkname
917
918 @linkpath.setter
919 def linkpath(self, linkname):
920 self.linkname = linkname
921
922 def __repr__(self):
923 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
924
925 def replace(self, *,
926 name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP,
927 uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP,
928 deep=True, _KEEP=_KEEP):
929 """Return a deep copy of self with the given attributes replaced.
930 """
931 if deep:
932 result = copy.deepcopy(self)
933 else:
934 result = copy.copy(self)
935 if name is not _KEEP:
936 result.name = name
937 if mtime is not _KEEP:
938 result.mtime = mtime
939 if mode is not _KEEP:
940 result.mode = mode
941 if linkname is not _KEEP:
942 result.linkname = linkname
943 if uid is not _KEEP:
944 result.uid = uid
945 if gid is not _KEEP:
946 result.gid = gid
947 if uname is not _KEEP:
948 result.uname = uname
949 if gname is not _KEEP:
950 result.gname = gname
951 return result
952
953 def get_info(self):
954 """Return the TarInfo's attributes as a dictionary.
955 """
956 if self.mode is None:
957 mode = None
958 else:
959 mode = self.mode & 0o7777
960 info = {
961 "name": self.name,
962 "mode": mode,
963 "uid": self.uid,
964 "gid": self.gid,
965 "size": self.size,
966 "mtime": self.mtime,
967 "chksum": self.chksum,
968 "type": self.type,
969 "linkname": self.linkname,
970 "uname": self.uname,
971 "gname": self.gname,
972 "devmajor": self.devmajor,
973 "devminor": self.devminor
974 }
975
976 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
977 info["name"] += "/"
978
979 return info
980
981 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
982 """Return a tar header as a string of 512 byte blocks.
983 """
984 info = self.get_info()
985 for name, value in info.items():
986 if value is None:
987 raise ValueError("%s may not be None" % name)
988
989 if format == USTAR_FORMAT:
990 return self.create_ustar_header(info, encoding, errors)
991 elif format == GNU_FORMAT:
992 return self.create_gnu_header(info, encoding, errors)
993 elif format == PAX_FORMAT:
994 return self.create_pax_header(info, encoding)
995 else:
996 raise ValueError("invalid format")
997
998 def create_ustar_header(self, info, encoding, errors):
999 """Return the object as a ustar header block.
1000 """
1001 info["magic"] = POSIX_MAGIC
1002
1003 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
1004 raise ValueError("linkname is too long")
1005
1006 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
1007 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
1008
1009 return self._create_header(info, USTAR_FORMAT, encoding, errors)
1010
1011 def create_gnu_header(self, info, encoding, errors):
1012 """Return the object as a GNU header block sequence.
1013 """
1014 info["magic"] = GNU_MAGIC
1015
1016 buf = b""
1017 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
1018 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
1019
1020 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
1021 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
1022
1023 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
1024
1025 def create_pax_header(self, info, encoding):
1026 """Return the object as a ustar header block. If it cannot be
1027 represented this way, prepend a pax extended header sequence
1028 with supplement information.
1029 """
1030 info["magic"] = POSIX_MAGIC
1031 pax_headers = self.pax_headers.copy()
1032
1033 # Test string fields for values that exceed the field length or cannot
1034 # be represented in ASCII encoding.
1035 for name, hname, length in (
1036 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1037 ("uname", "uname", 32), ("gname", "gname", 32)):
1038
1039 if hname in pax_headers:
1040 # The pax header has priority.
1041 continue
1042
1043 # Try to encode the string as ASCII.
1044 try:
1045 info[name].encode("ascii", "strict")
1046 except UnicodeEncodeError:
1047 pax_headers[hname] = info[name]
1048 continue
1049
1050 if len(info[name]) > length:
1051 pax_headers[hname] = info[name]
1052
1053 # Test number fields for values that exceed the field limit or values
1054 # that like to be stored as float.
1055 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1056 needs_pax = False
1057
1058 val = info[name]
1059 val_is_float = isinstance(val, float)
1060 val_int = round(val) if val_is_float else val
1061 if not 0 <= val_int < 8 ** (digits - 1):
1062 # Avoid overflow.
1063 info[name] = 0
1064 needs_pax = True
1065 elif val_is_float:
1066 # Put rounded value in ustar header, and full
1067 # precision value in pax header.
1068 info[name] = val_int
1069 needs_pax = True
1070
1071 # The existing pax header has priority.
1072 if needs_pax and name not in pax_headers:
1073 pax_headers[name] = str(val)
1074
1075 # Create a pax extended header if necessary.
1076 if pax_headers:
1077 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
1078 else:
1079 buf = b""
1080
1081 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
1082
1083 @classmethod
1084 def create_pax_global_header(cls, pax_headers):
1085 """Return the object as a pax global header block sequence.
1086 """
1087 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
1088
1089 def _posix_split_name(self, name, encoding, errors):
1090 """Split a name longer than 100 chars into a prefix
1091 and a name part.
1092 """
1093 components = name.split("/")
1094 for i in range(1, len(components)):
1095 prefix = "/".join(components[:i])
1096 name = "/".join(components[i:])
1097 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
1098 len(name.encode(encoding, errors)) <= LENGTH_NAME:
1099 break
1100 else:
1101 raise ValueError("name is too long")
1102
1103 return prefix, name
1104
1105 @staticmethod
1106 def _create_header(info, format, encoding, errors):
1107 """Return a header block. info is a dictionary with file
1108 information, format must be one of the *_FORMAT constants.
1109 """
1110 has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)
1111 if has_device_fields:
1112 devmajor = itn(info.get("devmajor", 0), 8, format)
1113 devminor = itn(info.get("devminor", 0), 8, format)
1114 else:
1115 devmajor = stn("", 8, encoding, errors)
1116 devminor = stn("", 8, encoding, errors)
1117
1118 # None values in metadata should cause ValueError.
1119 # itn()/stn() do this for all fields except type.
1120 filetype = info.get("type", REGTYPE)
1121 if filetype is None:
1122 raise ValueError("TarInfo.type must not be None")
1123
1124 parts = [
1125 stn(info.get("name", ""), 100, encoding, errors),
1126 itn(info.get("mode", 0) & 0o7777, 8, format),
1127 itn(info.get("uid", 0), 8, format),
1128 itn(info.get("gid", 0), 8, format),
1129 itn(info.get("size", 0), 12, format),
1130 itn(info.get("mtime", 0), 12, format),
1131 b" ", # checksum field
1132 filetype,
1133 stn(info.get("linkname", ""), 100, encoding, errors),
1134 info.get("magic", POSIX_MAGIC),
1135 stn(info.get("uname", ""), 32, encoding, errors),
1136 stn(info.get("gname", ""), 32, encoding, errors),
1137 devmajor,
1138 devminor,
1139 stn(info.get("prefix", ""), 155, encoding, errors)
1140 ]
1141
1142 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
1143 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1144 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
1145 return buf
1146
1147 @staticmethod
1148 def _create_payload(payload):
1149 """Return the string payload filled with zero bytes
1150 up to the next 512 byte border.
1151 """
1152 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1153 if remainder > 0:
1154 payload += (BLOCKSIZE - remainder) * NUL
1155 return payload
1156
1157 @classmethod
1158 def _create_gnu_long_header(cls, name, type, encoding, errors):
1159 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1160 for name.
1161 """
1162 name = name.encode(encoding, errors) + NUL
1163
1164 info = {}
1165 info["name"] = "././@LongLink"
1166 info["type"] = type
1167 info["size"] = len(name)
1168 info["magic"] = GNU_MAGIC
1169
1170 # create extended header + name blocks.
1171 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
1172 cls._create_payload(name)
1173
1174 @classmethod
1175 def _create_pax_generic_header(cls, pax_headers, type, encoding):
1176 """Return a POSIX.1-2008 extended or global header sequence
1177 that contains a list of keyword, value pairs. The values
1178 must be strings.
1179 """
1180 # Check if one of the fields contains surrogate characters and thereby
1181 # forces hdrcharset=BINARY, see _proc_pax() for more information.
1182 binary = False
1183 for keyword, value in pax_headers.items():
1184 try:
1185 value.encode("utf-8", "strict")
1186 except UnicodeEncodeError:
1187 binary = True
1188 break
1189
1190 records = b""
1191 if binary:
1192 # Put the hdrcharset field at the beginning of the header.
1193 records += b"21 hdrcharset=BINARY\n"
1194
1195 for keyword, value in pax_headers.items():
1196 keyword = keyword.encode("utf-8")
1197 if binary:
1198 # Try to restore the original byte representation of `value'.
1199 # Needless to say, that the encoding must match the string.
1200 value = value.encode(encoding, "surrogateescape")
1201 else:
1202 value = value.encode("utf-8")
1203
1204 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1205 n = p = 0
1206 while True:
1207 n = l + len(str(p))
1208 if n == p:
1209 break
1210 p = n
1211 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1212
1213 # We use a hardcoded "././@PaxHeader" name like star does
1214 # instead of the one that POSIX recommends.
1215 info = {}
1216 info["name"] = "././@PaxHeader"
1217 info["type"] = type
1218 info["size"] = len(records)
1219 info["magic"] = POSIX_MAGIC
1220
1221 # Create pax header + record blocks.
1222 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1223 cls._create_payload(records)
1224
1225 @classmethod
1226 def frombuf(cls, buf, encoding, errors):
1227 """Construct a TarInfo object from a 512 byte bytes object.
1228 """
1229 if len(buf) == 0:
1230 raise EmptyHeaderError("empty header")
1231 if len(buf) != BLOCKSIZE:
1232 raise TruncatedHeaderError("truncated header")
1233 if buf.count(NUL) == BLOCKSIZE:
1234 raise EOFHeaderError("end of file header")
1235
1236 chksum = nti(buf[148:156])
1237 if chksum not in calc_chksums(buf):
1238 raise InvalidHeaderError("bad checksum")
1239
1240 obj = cls()
1241 obj.name = nts(buf[0:100], encoding, errors)
1242 obj.mode = nti(buf[100:108])
1243 obj.uid = nti(buf[108:116])
1244 obj.gid = nti(buf[116:124])
1245 obj.size = nti(buf[124:136])
1246 obj.mtime = nti(buf[136:148])
1247 obj.chksum = chksum
1248 obj.type = buf[156:157]
1249 obj.linkname = nts(buf[157:257], encoding, errors)
1250 obj.uname = nts(buf[265:297], encoding, errors)
1251 obj.gname = nts(buf[297:329], encoding, errors)
1252 obj.devmajor = nti(buf[329:337])
1253 obj.devminor = nti(buf[337:345])
1254 prefix = nts(buf[345:500], encoding, errors)
1255
1256 # Old V7 tar format represents a directory as a regular
1257 # file with a trailing slash.
1258 if obj.type == AREGTYPE and obj.name.endswith("/"):
1259 obj.type = DIRTYPE
1260
1261 # The old GNU sparse format occupies some of the unused
1262 # space in the buffer for up to 4 sparse structures.
1263 # Save them for later processing in _proc_sparse().
1264 if obj.type == GNUTYPE_SPARSE:
1265 pos = 386
1266 structs = []
1267 for i in range(4):
1268 try:
1269 offset = nti(buf[pos:pos + 12])
1270 numbytes = nti(buf[pos + 12:pos + 24])
1271 except ValueError:
1272 break
1273 structs.append((offset, numbytes))
1274 pos += 24
1275 isextended = bool(buf[482])
1276 origsize = nti(buf[483:495])
1277 obj._sparse_structs = (structs, isextended, origsize)
1278
1279 # Remove redundant slashes from directories.
1280 if obj.isdir():
1281 obj.name = obj.name.rstrip("/")
1282
1283 # Reconstruct a ustar longname.
1284 if prefix and obj.type not in GNU_TYPES:
1285 obj.name = prefix + "/" + obj.name
1286 return obj
1287
1288 @classmethod
1289 def fromtarfile(cls, tarfile):
1290 """Return the next TarInfo object from TarFile object
1291 tarfile.
1292 """
1293 buf = tarfile.fileobj.read(BLOCKSIZE)
1294 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1295 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1296 return obj._proc_member(tarfile)
1297
1298 #--------------------------------------------------------------------------
1299 # The following are methods that are called depending on the type of a
1300 # member. The entry point is _proc_member() which can be overridden in a
1301 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1302 # implement the following
1303 # operations:
1304 # 1. Set self.offset_data to the position where the data blocks begin,
1305 # if there is data that follows.
1306 # 2. Set tarfile.offset to the position where the next member's header will
1307 # begin.
1308 # 3. Return self or another valid TarInfo object.
1309 def _proc_member(self, tarfile):
1310 """Choose the right processing method depending on
1311 the type and call it.
1312 """
1313 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1314 return self._proc_gnulong(tarfile)
1315 elif self.type == GNUTYPE_SPARSE:
1316 return self._proc_sparse(tarfile)
1317 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1318 return self._proc_pax(tarfile)
1319 else:
1320 return self._proc_builtin(tarfile)
1321
1322 def _proc_builtin(self, tarfile):
1323 """Process a builtin type or an unknown type which
1324 will be treated as a regular file.
1325 """
1326 self.offset_data = tarfile.fileobj.tell()
1327 offset = self.offset_data
1328 if self.isreg() or self.type not in SUPPORTED_TYPES:
1329 # Skip the following data blocks.
1330 offset += self._block(self.size)
1331 tarfile.offset = offset
1332
1333 # Patch the TarInfo object with saved global
1334 # header information.
1335 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1336
1337 # Remove redundant slashes from directories. This is to be consistent
1338 # with frombuf().
1339 if self.isdir():
1340 self.name = self.name.rstrip("/")
1341
1342 return self
1343
1344 def _proc_gnulong(self, tarfile):
1345 """Process the blocks that hold a GNU longname
1346 or longlink member.
1347 """
1348 buf = tarfile.fileobj.read(self._block(self.size))
1349
1350 # Fetch the next header and process it.
1351 try:
1352 next = self.fromtarfile(tarfile)
1353 except HeaderError as e:
1354 raise SubsequentHeaderError(str(e)) from None
1355
1356 # Patch the TarInfo object from the next header with
1357 # the longname information.
1358 next.offset = self.offset
1359 if self.type == GNUTYPE_LONGNAME:
1360 next.name = nts(buf, tarfile.encoding, tarfile.errors)
1361 elif self.type == GNUTYPE_LONGLINK:
1362 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1363
1364 # Remove redundant slashes from directories. This is to be consistent
1365 # with frombuf().
1366 if next.isdir():
1367 next.name = next.name.removesuffix("/")
1368
1369 return next
1370
1371 def _proc_sparse(self, tarfile):
1372 """Process a GNU sparse header plus extra headers.
1373 """
1374 # We already collected some sparse structures in frombuf().
1375 structs, isextended, origsize = self._sparse_structs
1376 del self._sparse_structs
1377
1378 # Collect sparse structures from extended header blocks.
1379 while isextended:
1380 buf = tarfile.fileobj.read(BLOCKSIZE)
1381 pos = 0
1382 for i in range(21):
1383 try:
1384 offset = nti(buf[pos:pos + 12])
1385 numbytes = nti(buf[pos + 12:pos + 24])
1386 except ValueError:
1387 break
1388 if offset and numbytes:
1389 structs.append((offset, numbytes))
1390 pos += 24
1391 isextended = bool(buf[504])
1392 self.sparse = structs
1393
1394 self.offset_data = tarfile.fileobj.tell()
1395 tarfile.offset = self.offset_data + self._block(self.size)
1396 self.size = origsize
1397 return self
1398
1399 def _proc_pax(self, tarfile):
1400 """Process an extended or global header as described in
1401 POSIX.1-2008.
1402 """
1403 # Read the header information.
1404 buf = tarfile.fileobj.read(self._block(self.size))
1405
1406 # A pax header stores supplemental information for either
1407 # the following file (extended) or all following files
1408 # (global).
1409 if self.type == XGLTYPE:
1410 pax_headers = tarfile.pax_headers
1411 else:
1412 pax_headers = tarfile.pax_headers.copy()
1413
1414 # Check if the pax header contains a hdrcharset field. This tells us
1415 # the encoding of the path, linkpath, uname and gname fields. Normally,
1416 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1417 # implementations are allowed to store them as raw binary strings if
1418 # the translation to UTF-8 fails.
1419 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1420 if match is not None:
1421 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
1422
1423 # For the time being, we don't care about anything other than "BINARY".
1424 # The only other value that is currently allowed by the standard is
1425 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1426 hdrcharset = pax_headers.get("hdrcharset")
1427 if hdrcharset == "BINARY":
1428 encoding = tarfile.encoding
1429 else:
1430 encoding = "utf-8"
1431
1432 # Parse pax header information. A record looks like that:
1433 # "%d %s=%s\n" % (length, keyword, value). length is the size
1434 # of the complete record including the length field itself and
1435 # the newline. keyword and value are both UTF-8 encoded strings.
1436 regex = re.compile(br"(\d+) ([^=]+)=")
1437 pos = 0
1438 while match := regex.match(buf, pos):
1439 length, keyword = match.groups()
1440 length = int(length)
1441 if length == 0:
1442 raise InvalidHeaderError("invalid header")
1443 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1444
1445 # Normally, we could just use "utf-8" as the encoding and "strict"
1446 # as the error handler, but we better not take the risk. For
1447 # example, GNU tar <= 1.23 is known to store filenames it cannot
1448 # translate to UTF-8 as raw strings (unfortunately without a
1449 # hdrcharset=BINARY header).
1450 # We first try the strict standard encoding, and if that fails we
1451 # fall back on the user's encoding and error handler.
1452 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
1453 tarfile.errors)
1454 if keyword in PAX_NAME_FIELDS:
1455 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1456 tarfile.errors)
1457 else:
1458 value = self._decode_pax_field(value, "utf-8", "utf-8",
1459 tarfile.errors)
1460
1461 pax_headers[keyword] = value
1462 pos += length
1463
1464 # Fetch the next header.
1465 try:
1466 next = self.fromtarfile(tarfile)
1467 except HeaderError as e:
1468 raise SubsequentHeaderError(str(e)) from None
1469
1470 # Process GNU sparse information.
1471 if "GNU.sparse.map" in pax_headers:
1472 # GNU extended sparse format version 0.1.
1473 self._proc_gnusparse_01(next, pax_headers)
1474
1475 elif "GNU.sparse.size" in pax_headers:
1476 # GNU extended sparse format version 0.0.
1477 self._proc_gnusparse_00(next, pax_headers, buf)
1478
1479 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1480 # GNU extended sparse format version 1.0.
1481 self._proc_gnusparse_10(next, pax_headers, tarfile)
1482
1483 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1484 # Patch the TarInfo object with the extended header info.
1485 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1486 next.offset = self.offset
1487
1488 if "size" in pax_headers:
1489 # If the extended header replaces the size field,
1490 # we need to recalculate the offset where the next
1491 # header starts.
1492 offset = next.offset_data
1493 if next.isreg() or next.type not in SUPPORTED_TYPES:
1494 offset += next._block(next.size)
1495 tarfile.offset = offset
1496
1497 return next
1498
1499 def _proc_gnusparse_00(self, next, pax_headers, buf):
1500 """Process a GNU tar extended sparse header, version 0.0.
1501 """
1502 offsets = []
1503 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1504 offsets.append(int(match.group(1)))
1505 numbytes = []
1506 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1507 numbytes.append(int(match.group(1)))
1508 next.sparse = list(zip(offsets, numbytes))
1509
1510 def _proc_gnusparse_01(self, next, pax_headers):
1511 """Process a GNU tar extended sparse header, version 0.1.
1512 """
1513 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1514 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1515
1516 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1517 """Process a GNU tar extended sparse header, version 1.0.
1518 """
1519 fields = None
1520 sparse = []
1521 buf = tarfile.fileobj.read(BLOCKSIZE)
1522 fields, buf = buf.split(b"\n", 1)
1523 fields = int(fields)
1524 while len(sparse) < fields * 2:
1525 if b"\n" not in buf:
1526 buf += tarfile.fileobj.read(BLOCKSIZE)
1527 number, buf = buf.split(b"\n", 1)
1528 sparse.append(int(number))
1529 next.offset_data = tarfile.fileobj.tell()
1530 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1531
1532 def _apply_pax_info(self, pax_headers, encoding, errors):
1533 """Replace fields with supplemental information from a previous
1534 pax extended or global header.
1535 """
1536 for keyword, value in pax_headers.items():
1537 if keyword == "GNU.sparse.name":
1538 setattr(self, "path", value)
1539 elif keyword == "GNU.sparse.size":
1540 setattr(self, "size", int(value))
1541 elif keyword == "GNU.sparse.realsize":
1542 setattr(self, "size", int(value))
1543 elif keyword in PAX_FIELDS:
1544 if keyword in PAX_NUMBER_FIELDS:
1545 try:
1546 value = PAX_NUMBER_FIELDS[keyword](value)
1547 except ValueError:
1548 value = 0
1549 if keyword == "path":
1550 value = value.rstrip("/")
1551 setattr(self, keyword, value)
1552
1553 self.pax_headers = pax_headers.copy()
1554
1555 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1556 """Decode a single field from a pax record.
1557 """
1558 try:
1559 return value.decode(encoding, "strict")
1560 except UnicodeDecodeError:
1561 return value.decode(fallback_encoding, fallback_errors)
1562
1563 def _block(self, count):
1564 """Round up a byte count by BLOCKSIZE and return it,
1565 e.g. _block(834) => 1024.
1566 """
1567 blocks, remainder = divmod(count, BLOCKSIZE)
1568 if remainder:
1569 blocks += 1
1570 return blocks * BLOCKSIZE
1571
1572 def isreg(self):
1573 'Return True if the Tarinfo object is a regular file.'
1574 return self.type in REGULAR_TYPES
1575
1576 def isfile(self):
1577 'Return True if the Tarinfo object is a regular file.'
1578 return self.isreg()
1579
1580 def isdir(self):
1581 'Return True if it is a directory.'
1582 return self.type == DIRTYPE
1583
1584 def issym(self):
1585 'Return True if it is a symbolic link.'
1586 return self.type == SYMTYPE
1587
1588 def islnk(self):
1589 'Return True if it is a hard link.'
1590 return self.type == LNKTYPE
1591
1592 def ischr(self):
1593 'Return True if it is a character device.'
1594 return self.type == CHRTYPE
1595
1596 def isblk(self):
1597 'Return True if it is a block device.'
1598 return self.type == BLKTYPE
1599
1600 def isfifo(self):
1601 'Return True if it is a FIFO.'
1602 return self.type == FIFOTYPE
1603
1604 def issparse(self):
1605 return self.sparse is not None
1606
1607 def isdev(self):
1608 'Return True if it is one of character device, block device or FIFO.'
1609 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1610 # class TarInfo
1611
1612 class ESC[4;38;5;81mTarFile(ESC[4;38;5;149mobject):
1613 """The TarFile Class provides an interface to tar archives.
1614 """
1615
1616 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1617
1618 dereference = False # If true, add content of linked file to the
1619 # tar file, else the link.
1620
1621 ignore_zeros = False # If true, skips empty or invalid blocks and
1622 # continues processing.
1623
1624 errorlevel = 1 # If 0, fatal errors only appear in debug
1625 # messages (if debug >= 0). If > 0, errors
1626 # are passed to the caller as exceptions.
1627
1628 format = DEFAULT_FORMAT # The format to use when creating an archive.
1629
1630 encoding = ENCODING # Encoding for 8-bit character strings.
1631
1632 errors = None # Error handler for unicode conversion.
1633
1634 tarinfo = TarInfo # The default TarInfo class to use.
1635
1636 fileobject = ExFileObject # The file-object for extractfile().
1637
1638 extraction_filter = None # The default filter for extraction.
1639
1640 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1641 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1642 errors="surrogateescape", pax_headers=None, debug=None,
1643 errorlevel=None, copybufsize=None):
1644 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1645 read from an existing archive, 'a' to append data to an existing
1646 file or 'w' to create a new file overwriting an existing one. `mode'
1647 defaults to 'r'.
1648 If `fileobj' is given, it is used for reading or writing data. If it
1649 can be determined, `mode' is overridden by `fileobj's mode.
1650 `fileobj' is not closed, when TarFile is closed.
1651 """
1652 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
1653 if mode not in modes:
1654 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1655 self.mode = mode
1656 self._mode = modes[mode]
1657
1658 if not fileobj:
1659 if self.mode == "a" and not os.path.exists(name):
1660 # Create nonexistent files in append mode.
1661 self.mode = "w"
1662 self._mode = "wb"
1663 fileobj = bltn_open(name, self._mode)
1664 self._extfileobj = False
1665 else:
1666 if (name is None and hasattr(fileobj, "name") and
1667 isinstance(fileobj.name, (str, bytes))):
1668 name = fileobj.name
1669 if hasattr(fileobj, "mode"):
1670 self._mode = fileobj.mode
1671 self._extfileobj = True
1672 self.name = os.path.abspath(name) if name else None
1673 self.fileobj = fileobj
1674
1675 # Init attributes.
1676 if format is not None:
1677 self.format = format
1678 if tarinfo is not None:
1679 self.tarinfo = tarinfo
1680 if dereference is not None:
1681 self.dereference = dereference
1682 if ignore_zeros is not None:
1683 self.ignore_zeros = ignore_zeros
1684 if encoding is not None:
1685 self.encoding = encoding
1686 self.errors = errors
1687
1688 if pax_headers is not None and self.format == PAX_FORMAT:
1689 self.pax_headers = pax_headers
1690 else:
1691 self.pax_headers = {}
1692
1693 if debug is not None:
1694 self.debug = debug
1695 if errorlevel is not None:
1696 self.errorlevel = errorlevel
1697
1698 # Init datastructures.
1699 self.copybufsize = copybufsize
1700 self.closed = False
1701 self.members = [] # list of members as TarInfo objects
1702 self._loaded = False # flag if all members have been read
1703 self.offset = self.fileobj.tell()
1704 # current position in the archive file
1705 self.inodes = {} # dictionary caching the inodes of
1706 # archive members already added
1707
1708 try:
1709 if self.mode == "r":
1710 self.firstmember = None
1711 self.firstmember = self.next()
1712
1713 if self.mode == "a":
1714 # Move to the end of the archive,
1715 # before the first empty block.
1716 while True:
1717 self.fileobj.seek(self.offset)
1718 try:
1719 tarinfo = self.tarinfo.fromtarfile(self)
1720 self.members.append(tarinfo)
1721 except EOFHeaderError:
1722 self.fileobj.seek(self.offset)
1723 break
1724 except HeaderError as e:
1725 raise ReadError(str(e)) from None
1726
1727 if self.mode in ("a", "w", "x"):
1728 self._loaded = True
1729
1730 if self.pax_headers:
1731 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1732 self.fileobj.write(buf)
1733 self.offset += len(buf)
1734 except:
1735 if not self._extfileobj:
1736 self.fileobj.close()
1737 self.closed = True
1738 raise
1739
1740 #--------------------------------------------------------------------------
1741 # Below are the classmethods which act as alternate constructors to the
1742 # TarFile class. The open() method is the only one that is needed for
1743 # public use; it is the "super"-constructor and is able to select an
1744 # adequate "sub"-constructor for a particular compression using the mapping
1745 # from OPEN_METH.
1746 #
1747 # This concept allows one to subclass TarFile without losing the comfort of
1748 # the super-constructor. A sub-constructor is registered and made available
1749 # by adding it to the mapping in OPEN_METH.
1750
1751 @classmethod
1752 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1753 """Open a tar archive for reading, writing or appending. Return
1754 an appropriate TarFile class.
1755
1756 mode:
1757 'r' or 'r:*' open for reading with transparent compression
1758 'r:' open for reading exclusively uncompressed
1759 'r:gz' open for reading with gzip compression
1760 'r:bz2' open for reading with bzip2 compression
1761 'r:xz' open for reading with lzma compression
1762 'a' or 'a:' open for appending, creating the file if necessary
1763 'w' or 'w:' open for writing without compression
1764 'w:gz' open for writing with gzip compression
1765 'w:bz2' open for writing with bzip2 compression
1766 'w:xz' open for writing with lzma compression
1767
1768 'x' or 'x:' create a tarfile exclusively without compression, raise
1769 an exception if the file is already created
1770 'x:gz' create a gzip compressed tarfile, raise an exception
1771 if the file is already created
1772 'x:bz2' create a bzip2 compressed tarfile, raise an exception
1773 if the file is already created
1774 'x:xz' create an lzma compressed tarfile, raise an exception
1775 if the file is already created
1776
1777 'r|*' open a stream of tar blocks with transparent compression
1778 'r|' open an uncompressed stream of tar blocks for reading
1779 'r|gz' open a gzip compressed stream of tar blocks
1780 'r|bz2' open a bzip2 compressed stream of tar blocks
1781 'r|xz' open an lzma compressed stream of tar blocks
1782 'w|' open an uncompressed stream for writing
1783 'w|gz' open a gzip compressed stream for writing
1784 'w|bz2' open a bzip2 compressed stream for writing
1785 'w|xz' open an lzma compressed stream for writing
1786 """
1787
1788 if not name and not fileobj:
1789 raise ValueError("nothing to open")
1790
1791 if mode in ("r", "r:*"):
1792 # Find out which *open() is appropriate for opening the file.
1793 def not_compressed(comptype):
1794 return cls.OPEN_METH[comptype] == 'taropen'
1795 error_msgs = []
1796 for comptype in sorted(cls.OPEN_METH, key=not_compressed):
1797 func = getattr(cls, cls.OPEN_METH[comptype])
1798 if fileobj is not None:
1799 saved_pos = fileobj.tell()
1800 try:
1801 return func(name, "r", fileobj, **kwargs)
1802 except (ReadError, CompressionError) as e:
1803 error_msgs.append(f'- method {comptype}: {e!r}')
1804 if fileobj is not None:
1805 fileobj.seek(saved_pos)
1806 continue
1807 error_msgs_summary = '\n'.join(error_msgs)
1808 raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}")
1809
1810 elif ":" in mode:
1811 filemode, comptype = mode.split(":", 1)
1812 filemode = filemode or "r"
1813 comptype = comptype or "tar"
1814
1815 # Select the *open() function according to
1816 # given compression.
1817 if comptype in cls.OPEN_METH:
1818 func = getattr(cls, cls.OPEN_METH[comptype])
1819 else:
1820 raise CompressionError("unknown compression type %r" % comptype)
1821 return func(name, filemode, fileobj, **kwargs)
1822
1823 elif "|" in mode:
1824 filemode, comptype = mode.split("|", 1)
1825 filemode = filemode or "r"
1826 comptype = comptype or "tar"
1827
1828 if filemode not in ("r", "w"):
1829 raise ValueError("mode must be 'r' or 'w'")
1830
1831 compresslevel = kwargs.pop("compresslevel", 9)
1832 stream = _Stream(name, filemode, comptype, fileobj, bufsize,
1833 compresslevel)
1834 try:
1835 t = cls(name, filemode, stream, **kwargs)
1836 except:
1837 stream.close()
1838 raise
1839 t._extfileobj = False
1840 return t
1841
1842 elif mode in ("a", "w", "x"):
1843 return cls.taropen(name, mode, fileobj, **kwargs)
1844
1845 raise ValueError("undiscernible mode")
1846
1847 @classmethod
1848 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1849 """Open uncompressed tar archive name for reading or writing.
1850 """
1851 if mode not in ("r", "a", "w", "x"):
1852 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1853 return cls(name, mode, fileobj, **kwargs)
1854
1855 @classmethod
1856 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1857 """Open gzip compressed tar archive name for reading or writing.
1858 Appending is not allowed.
1859 """
1860 if mode not in ("r", "w", "x"):
1861 raise ValueError("mode must be 'r', 'w' or 'x'")
1862
1863 try:
1864 from gzip import GzipFile
1865 except ImportError:
1866 raise CompressionError("gzip module is not available") from None
1867
1868 try:
1869 fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
1870 except OSError as e:
1871 if fileobj is not None and mode == 'r':
1872 raise ReadError("not a gzip file") from e
1873 raise
1874
1875 try:
1876 t = cls.taropen(name, mode, fileobj, **kwargs)
1877 except OSError as e:
1878 fileobj.close()
1879 if mode == 'r':
1880 raise ReadError("not a gzip file") from e
1881 raise
1882 except:
1883 fileobj.close()
1884 raise
1885 t._extfileobj = False
1886 return t
1887
1888 @classmethod
1889 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1890 """Open bzip2 compressed tar archive name for reading or writing.
1891 Appending is not allowed.
1892 """
1893 if mode not in ("r", "w", "x"):
1894 raise ValueError("mode must be 'r', 'w' or 'x'")
1895
1896 try:
1897 from bz2 import BZ2File
1898 except ImportError:
1899 raise CompressionError("bz2 module is not available") from None
1900
1901 fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)
1902
1903 try:
1904 t = cls.taropen(name, mode, fileobj, **kwargs)
1905 except (OSError, EOFError) as e:
1906 fileobj.close()
1907 if mode == 'r':
1908 raise ReadError("not a bzip2 file") from e
1909 raise
1910 except:
1911 fileobj.close()
1912 raise
1913 t._extfileobj = False
1914 return t
1915
1916 @classmethod
1917 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
1918 """Open lzma compressed tar archive name for reading or writing.
1919 Appending is not allowed.
1920 """
1921 if mode not in ("r", "w", "x"):
1922 raise ValueError("mode must be 'r', 'w' or 'x'")
1923
1924 try:
1925 from lzma import LZMAFile, LZMAError
1926 except ImportError:
1927 raise CompressionError("lzma module is not available") from None
1928
1929 fileobj = LZMAFile(fileobj or name, mode, preset=preset)
1930
1931 try:
1932 t = cls.taropen(name, mode, fileobj, **kwargs)
1933 except (LZMAError, EOFError) as e:
1934 fileobj.close()
1935 if mode == 'r':
1936 raise ReadError("not an lzma file") from e
1937 raise
1938 except:
1939 fileobj.close()
1940 raise
1941 t._extfileobj = False
1942 return t
1943
1944 # All *open() methods are registered here.
1945 OPEN_METH = {
1946 "tar": "taropen", # uncompressed tar
1947 "gz": "gzopen", # gzip compressed tar
1948 "bz2": "bz2open", # bzip2 compressed tar
1949 "xz": "xzopen" # lzma compressed tar
1950 }
1951
1952 #--------------------------------------------------------------------------
1953 # The public methods which TarFile provides:
1954
1955 def close(self):
1956 """Close the TarFile. In write-mode, two finishing zero blocks are
1957 appended to the archive.
1958 """
1959 if self.closed:
1960 return
1961
1962 self.closed = True
1963 try:
1964 if self.mode in ("a", "w", "x"):
1965 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1966 self.offset += (BLOCKSIZE * 2)
1967 # fill up the end with zero-blocks
1968 # (like option -b20 for tar does)
1969 blocks, remainder = divmod(self.offset, RECORDSIZE)
1970 if remainder > 0:
1971 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1972 finally:
1973 if not self._extfileobj:
1974 self.fileobj.close()
1975
1976 def getmember(self, name):
1977 """Return a TarInfo object for member `name'. If `name' can not be
1978 found in the archive, KeyError is raised. If a member occurs more
1979 than once in the archive, its last occurrence is assumed to be the
1980 most up-to-date version.
1981 """
1982 tarinfo = self._getmember(name.rstrip('/'))
1983 if tarinfo is None:
1984 raise KeyError("filename %r not found" % name)
1985 return tarinfo
1986
1987 def getmembers(self):
1988 """Return the members of the archive as a list of TarInfo objects. The
1989 list has the same order as the members in the archive.
1990 """
1991 self._check()
1992 if not self._loaded: # if we want to obtain a list of
1993 self._load() # all members, we first have to
1994 # scan the whole archive.
1995 return self.members
1996
1997 def getnames(self):
1998 """Return the members of the archive as a list of their names. It has
1999 the same order as the list returned by getmembers().
2000 """
2001 return [tarinfo.name for tarinfo in self.getmembers()]
2002
2003 def gettarinfo(self, name=None, arcname=None, fileobj=None):
2004 """Create a TarInfo object from the result of os.stat or equivalent
2005 on an existing file. The file is either named by `name', or
2006 specified as a file object `fileobj' with a file descriptor. If
2007 given, `arcname' specifies an alternative name for the file in the
2008 archive, otherwise, the name is taken from the 'name' attribute of
2009 'fileobj', or the 'name' argument. The name should be a text
2010 string.
2011 """
2012 self._check("awx")
2013
2014 # When fileobj is given, replace name by
2015 # fileobj's real name.
2016 if fileobj is not None:
2017 name = fileobj.name
2018
2019 # Building the name of the member in the archive.
2020 # Backward slashes are converted to forward slashes,
2021 # Absolute paths are turned to relative paths.
2022 if arcname is None:
2023 arcname = name
2024 drv, arcname = os.path.splitdrive(arcname)
2025 arcname = arcname.replace(os.sep, "/")
2026 arcname = arcname.lstrip("/")
2027
2028 # Now, fill the TarInfo object with
2029 # information specific for the file.
2030 tarinfo = self.tarinfo()
2031 tarinfo.tarfile = self # Not needed
2032
2033 # Use os.stat or os.lstat, depending on if symlinks shall be resolved.
2034 if fileobj is None:
2035 if not self.dereference:
2036 statres = os.lstat(name)
2037 else:
2038 statres = os.stat(name)
2039 else:
2040 statres = os.fstat(fileobj.fileno())
2041 linkname = ""
2042
2043 stmd = statres.st_mode
2044 if stat.S_ISREG(stmd):
2045 inode = (statres.st_ino, statres.st_dev)
2046 if not self.dereference and statres.st_nlink > 1 and \
2047 inode in self.inodes and arcname != self.inodes[inode]:
2048 # Is it a hardlink to an already
2049 # archived file?
2050 type = LNKTYPE
2051 linkname = self.inodes[inode]
2052 else:
2053 # The inode is added only if its valid.
2054 # For win32 it is always 0.
2055 type = REGTYPE
2056 if inode[0]:
2057 self.inodes[inode] = arcname
2058 elif stat.S_ISDIR(stmd):
2059 type = DIRTYPE
2060 elif stat.S_ISFIFO(stmd):
2061 type = FIFOTYPE
2062 elif stat.S_ISLNK(stmd):
2063 type = SYMTYPE
2064 linkname = os.readlink(name)
2065 elif stat.S_ISCHR(stmd):
2066 type = CHRTYPE
2067 elif stat.S_ISBLK(stmd):
2068 type = BLKTYPE
2069 else:
2070 return None
2071
2072 # Fill the TarInfo object with all
2073 # information we can get.
2074 tarinfo.name = arcname
2075 tarinfo.mode = stmd
2076 tarinfo.uid = statres.st_uid
2077 tarinfo.gid = statres.st_gid
2078 if type == REGTYPE:
2079 tarinfo.size = statres.st_size
2080 else:
2081 tarinfo.size = 0
2082 tarinfo.mtime = statres.st_mtime
2083 tarinfo.type = type
2084 tarinfo.linkname = linkname
2085 if pwd:
2086 try:
2087 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
2088 except KeyError:
2089 pass
2090 if grp:
2091 try:
2092 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
2093 except KeyError:
2094 pass
2095
2096 if type in (CHRTYPE, BLKTYPE):
2097 if hasattr(os, "major") and hasattr(os, "minor"):
2098 tarinfo.devmajor = os.major(statres.st_rdev)
2099 tarinfo.devminor = os.minor(statres.st_rdev)
2100 return tarinfo
2101
2102 def list(self, verbose=True, *, members=None):
2103 """Print a table of contents to sys.stdout. If `verbose' is False, only
2104 the names of the members are printed. If it is True, an `ls -l'-like
2105 output is produced. `members' is optional and must be a subset of the
2106 list returned by getmembers().
2107 """
2108 self._check()
2109
2110 if members is None:
2111 members = self
2112 for tarinfo in members:
2113 if verbose:
2114 if tarinfo.mode is None:
2115 _safe_print("??????????")
2116 else:
2117 _safe_print(stat.filemode(tarinfo.mode))
2118 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
2119 tarinfo.gname or tarinfo.gid))
2120 if tarinfo.ischr() or tarinfo.isblk():
2121 _safe_print("%10s" %
2122 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
2123 else:
2124 _safe_print("%10d" % tarinfo.size)
2125 if tarinfo.mtime is None:
2126 _safe_print("????-??-?? ??:??:??")
2127 else:
2128 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
2129 % time.localtime(tarinfo.mtime)[:6])
2130
2131 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
2132
2133 if verbose:
2134 if tarinfo.issym():
2135 _safe_print("-> " + tarinfo.linkname)
2136 if tarinfo.islnk():
2137 _safe_print("link to " + tarinfo.linkname)
2138 print()
2139
2140 def add(self, name, arcname=None, recursive=True, *, filter=None):
2141 """Add the file `name' to the archive. `name' may be any type of file
2142 (directory, fifo, symbolic link, etc.). If given, `arcname'
2143 specifies an alternative name for the file in the archive.
2144 Directories are added recursively by default. This can be avoided by
2145 setting `recursive' to False. `filter' is a function
2146 that expects a TarInfo object argument and returns the changed
2147 TarInfo object, if it returns None the TarInfo object will be
2148 excluded from the archive.
2149 """
2150 self._check("awx")
2151
2152 if arcname is None:
2153 arcname = name
2154
2155 # Skip if somebody tries to archive the archive...
2156 if self.name is not None and os.path.abspath(name) == self.name:
2157 self._dbg(2, "tarfile: Skipped %r" % name)
2158 return
2159
2160 self._dbg(1, name)
2161
2162 # Create a TarInfo object from the file.
2163 tarinfo = self.gettarinfo(name, arcname)
2164
2165 if tarinfo is None:
2166 self._dbg(1, "tarfile: Unsupported type %r" % name)
2167 return
2168
2169 # Change or exclude the TarInfo object.
2170 if filter is not None:
2171 tarinfo = filter(tarinfo)
2172 if tarinfo is None:
2173 self._dbg(2, "tarfile: Excluded %r" % name)
2174 return
2175
2176 # Append the tar header and data to the archive.
2177 if tarinfo.isreg():
2178 with bltn_open(name, "rb") as f:
2179 self.addfile(tarinfo, f)
2180
2181 elif tarinfo.isdir():
2182 self.addfile(tarinfo)
2183 if recursive:
2184 for f in sorted(os.listdir(name)):
2185 self.add(os.path.join(name, f), os.path.join(arcname, f),
2186 recursive, filter=filter)
2187
2188 else:
2189 self.addfile(tarinfo)
2190
2191 def addfile(self, tarinfo, fileobj=None):
2192 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2193 given, it should be a binary file, and tarinfo.size bytes are read
2194 from it and added to the archive. You can create TarInfo objects
2195 directly, or by using gettarinfo().
2196 """
2197 self._check("awx")
2198
2199 tarinfo = copy.copy(tarinfo)
2200
2201 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2202 self.fileobj.write(buf)
2203 self.offset += len(buf)
2204 bufsize=self.copybufsize
2205 # If there's data to follow, append it.
2206 if fileobj is not None:
2207 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
2208 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2209 if remainder > 0:
2210 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2211 blocks += 1
2212 self.offset += blocks * BLOCKSIZE
2213
2214 self.members.append(tarinfo)
2215
2216 def _get_filter_function(self, filter):
2217 if filter is None:
2218 filter = self.extraction_filter
2219 if filter is None:
2220 warnings.warn(
2221 'Python 3.14 will, by default, filter extracted tar '
2222 + 'archives and reject files or modify their metadata. '
2223 + 'Use the filter argument to control this behavior.',
2224 DeprecationWarning)
2225 return fully_trusted_filter
2226 if isinstance(filter, str):
2227 raise TypeError(
2228 'String names are not supported for '
2229 + 'TarFile.extraction_filter. Use a function such as '
2230 + 'tarfile.data_filter directly.')
2231 return filter
2232 if callable(filter):
2233 return filter
2234 try:
2235 return _NAMED_FILTERS[filter]
2236 except KeyError:
2237 raise ValueError(f"filter {filter!r} not found") from None
2238
2239 def extractall(self, path=".", members=None, *, numeric_owner=False,
2240 filter=None):
2241 """Extract all members from the archive to the current working
2242 directory and set owner, modification time and permissions on
2243 directories afterwards. `path' specifies a different directory
2244 to extract to. `members' is optional and must be a subset of the
2245 list returned by getmembers(). If `numeric_owner` is True, only
2246 the numbers for user/group names are used and not the names.
2247
2248 The `filter` function will be called on each member just
2249 before extraction.
2250 It can return a changed TarInfo or None to skip the member.
2251 String names of common filters are accepted.
2252 """
2253 directories = []
2254
2255 filter_function = self._get_filter_function(filter)
2256 if members is None:
2257 members = self
2258
2259 for member in members:
2260 tarinfo = self._get_extract_tarinfo(member, filter_function, path)
2261 if tarinfo is None:
2262 continue
2263 if tarinfo.isdir():
2264 # For directories, delay setting attributes until later,
2265 # since permissions can interfere with extraction and
2266 # extracting contents can reset mtime.
2267 directories.append(tarinfo)
2268 self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(),
2269 numeric_owner=numeric_owner)
2270
2271 # Reverse sort directories.
2272 directories.sort(key=lambda a: a.name, reverse=True)
2273
2274 # Set correct owner, mtime and filemode on directories.
2275 for tarinfo in directories:
2276 dirpath = os.path.join(path, tarinfo.name)
2277 try:
2278 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
2279 self.utime(tarinfo, dirpath)
2280 self.chmod(tarinfo, dirpath)
2281 except ExtractError as e:
2282 self._handle_nonfatal_error(e)
2283
2284 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False,
2285 filter=None):
2286 """Extract a member from the archive to the current working directory,
2287 using its full name. Its file information is extracted as accurately
2288 as possible. `member' may be a filename or a TarInfo object. You can
2289 specify a different directory using `path'. File attributes (owner,
2290 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2291 is True, only the numbers for user/group names are used and not
2292 the names.
2293
2294 The `filter` function will be called before extraction.
2295 It can return a changed TarInfo or None to skip the member.
2296 String names of common filters are accepted.
2297 """
2298 filter_function = self._get_filter_function(filter)
2299 tarinfo = self._get_extract_tarinfo(member, filter_function, path)
2300 if tarinfo is not None:
2301 self._extract_one(tarinfo, path, set_attrs, numeric_owner)
2302
2303 def _get_extract_tarinfo(self, member, filter_function, path):
2304 """Get filtered TarInfo (or None) from member, which might be a str"""
2305 if isinstance(member, str):
2306 tarinfo = self.getmember(member)
2307 else:
2308 tarinfo = member
2309
2310 unfiltered = tarinfo
2311 try:
2312 tarinfo = filter_function(tarinfo, path)
2313 except (OSError, FilterError) as e:
2314 self._handle_fatal_error(e)
2315 except ExtractError as e:
2316 self._handle_nonfatal_error(e)
2317 if tarinfo is None:
2318 self._dbg(2, "tarfile: Excluded %r" % unfiltered.name)
2319 return None
2320 # Prepare the link target for makelink().
2321 if tarinfo.islnk():
2322 tarinfo = copy.copy(tarinfo)
2323 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2324 return tarinfo
2325
2326 def _extract_one(self, tarinfo, path, set_attrs, numeric_owner):
2327 """Extract from filtered tarinfo to disk"""
2328 self._check("r")
2329
2330 try:
2331 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2332 set_attrs=set_attrs,
2333 numeric_owner=numeric_owner)
2334 except OSError as e:
2335 self._handle_fatal_error(e)
2336 except ExtractError as e:
2337 self._handle_nonfatal_error(e)
2338
2339 def _handle_nonfatal_error(self, e):
2340 """Handle non-fatal error (ExtractError) according to errorlevel"""
2341 if self.errorlevel > 1:
2342 raise
2343 else:
2344 self._dbg(1, "tarfile: %s" % e)
2345
2346 def _handle_fatal_error(self, e):
2347 """Handle "fatal" error according to self.errorlevel"""
2348 if self.errorlevel > 0:
2349 raise
2350 elif isinstance(e, OSError):
2351 if e.filename is None:
2352 self._dbg(1, "tarfile: %s" % e.strerror)
2353 else:
2354 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2355 else:
2356 self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e))
2357
2358 def extractfile(self, member):
2359 """Extract a member from the archive as a file object. `member' may be
2360 a filename or a TarInfo object. If `member' is a regular file or
2361 a link, an io.BufferedReader object is returned. For all other
2362 existing members, None is returned. If `member' does not appear
2363 in the archive, KeyError is raised.
2364 """
2365 self._check("r")
2366
2367 if isinstance(member, str):
2368 tarinfo = self.getmember(member)
2369 else:
2370 tarinfo = member
2371
2372 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2373 # Members with unknown types are treated as regular files.
2374 return self.fileobject(self, tarinfo)
2375
2376 elif tarinfo.islnk() or tarinfo.issym():
2377 if isinstance(self.fileobj, _Stream):
2378 # A small but ugly workaround for the case that someone tries
2379 # to extract a (sym)link as a file-object from a non-seekable
2380 # stream of tar blocks.
2381 raise StreamError("cannot extract (sym)link as file object")
2382 else:
2383 # A (sym)link's file object is its target's file object.
2384 return self.extractfile(self._find_link_target(tarinfo))
2385 else:
2386 # If there's no data associated with the member (directory, chrdev,
2387 # blkdev, etc.), return None instead of a file object.
2388 return None
2389
2390 def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2391 numeric_owner=False):
2392 """Extract the TarInfo object tarinfo to a physical
2393 file called targetpath.
2394 """
2395 # Fetch the TarInfo object for the given name
2396 # and build the destination pathname, replacing
2397 # forward slashes to platform specific separators.
2398 targetpath = targetpath.rstrip("/")
2399 targetpath = targetpath.replace("/", os.sep)
2400
2401 # Create all upper directories.
2402 upperdirs = os.path.dirname(targetpath)
2403 if upperdirs and not os.path.exists(upperdirs):
2404 # Create directories that are not part of the archive with
2405 # default permissions.
2406 os.makedirs(upperdirs)
2407
2408 if tarinfo.islnk() or tarinfo.issym():
2409 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2410 else:
2411 self._dbg(1, tarinfo.name)
2412
2413 if tarinfo.isreg():
2414 self.makefile(tarinfo, targetpath)
2415 elif tarinfo.isdir():
2416 self.makedir(tarinfo, targetpath)
2417 elif tarinfo.isfifo():
2418 self.makefifo(tarinfo, targetpath)
2419 elif tarinfo.ischr() or tarinfo.isblk():
2420 self.makedev(tarinfo, targetpath)
2421 elif tarinfo.islnk() or tarinfo.issym():
2422 self.makelink(tarinfo, targetpath)
2423 elif tarinfo.type not in SUPPORTED_TYPES:
2424 self.makeunknown(tarinfo, targetpath)
2425 else:
2426 self.makefile(tarinfo, targetpath)
2427
2428 if set_attrs:
2429 self.chown(tarinfo, targetpath, numeric_owner)
2430 if not tarinfo.issym():
2431 self.chmod(tarinfo, targetpath)
2432 self.utime(tarinfo, targetpath)
2433
2434 #--------------------------------------------------------------------------
2435 # Below are the different file methods. They are called via
2436 # _extract_member() when extract() is called. They can be replaced in a
2437 # subclass to implement other functionality.
2438
2439 def makedir(self, tarinfo, targetpath):
2440 """Make a directory called targetpath.
2441 """
2442 try:
2443 if tarinfo.mode is None:
2444 # Use the system's default mode
2445 os.mkdir(targetpath)
2446 else:
2447 # Use a safe mode for the directory, the real mode is set
2448 # later in _extract_member().
2449 os.mkdir(targetpath, 0o700)
2450 except FileExistsError:
2451 pass
2452
2453 def makefile(self, tarinfo, targetpath):
2454 """Make a file called targetpath.
2455 """
2456 source = self.fileobj
2457 source.seek(tarinfo.offset_data)
2458 bufsize = self.copybufsize
2459 with bltn_open(targetpath, "wb") as target:
2460 if tarinfo.sparse is not None:
2461 for offset, size in tarinfo.sparse:
2462 target.seek(offset)
2463 copyfileobj(source, target, size, ReadError, bufsize)
2464 target.seek(tarinfo.size)
2465 target.truncate()
2466 else:
2467 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
2468
2469 def makeunknown(self, tarinfo, targetpath):
2470 """Make a file from a TarInfo object with an unknown type
2471 at targetpath.
2472 """
2473 self.makefile(tarinfo, targetpath)
2474 self._dbg(1, "tarfile: Unknown file type %r, " \
2475 "extracted as regular file." % tarinfo.type)
2476
2477 def makefifo(self, tarinfo, targetpath):
2478 """Make a fifo called targetpath.
2479 """
2480 if hasattr(os, "mkfifo"):
2481 os.mkfifo(targetpath)
2482 else:
2483 raise ExtractError("fifo not supported by system")
2484
2485 def makedev(self, tarinfo, targetpath):
2486 """Make a character or block device called targetpath.
2487 """
2488 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2489 raise ExtractError("special devices not supported by system")
2490
2491 mode = tarinfo.mode
2492 if mode is None:
2493 # Use mknod's default
2494 mode = 0o600
2495 if tarinfo.isblk():
2496 mode |= stat.S_IFBLK
2497 else:
2498 mode |= stat.S_IFCHR
2499
2500 os.mknod(targetpath, mode,
2501 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2502
2503 def makelink(self, tarinfo, targetpath):
2504 """Make a (symbolic) link called targetpath. If it cannot be created
2505 (platform limitation), we try to make a copy of the referenced file
2506 instead of a link.
2507 """
2508 try:
2509 # For systems that support symbolic and hard links.
2510 if tarinfo.issym():
2511 if os.path.lexists(targetpath):
2512 # Avoid FileExistsError on following os.symlink.
2513 os.unlink(targetpath)
2514 os.symlink(tarinfo.linkname, targetpath)
2515 else:
2516 if os.path.exists(tarinfo._link_target):
2517 os.link(tarinfo._link_target, targetpath)
2518 else:
2519 self._extract_member(self._find_link_target(tarinfo),
2520 targetpath)
2521 except symlink_exception:
2522 try:
2523 self._extract_member(self._find_link_target(tarinfo),
2524 targetpath)
2525 except KeyError:
2526 raise ExtractError("unable to resolve link inside archive") from None
2527
2528 def chown(self, tarinfo, targetpath, numeric_owner):
2529 """Set owner of targetpath according to tarinfo. If numeric_owner
2530 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2531 is False, fall back to .gid/.uid when the search based on name
2532 fails.
2533 """
2534 if hasattr(os, "geteuid") and os.geteuid() == 0:
2535 # We have to be root to do so.
2536 g = tarinfo.gid
2537 u = tarinfo.uid
2538 if not numeric_owner:
2539 try:
2540 if grp and tarinfo.gname:
2541 g = grp.getgrnam(tarinfo.gname)[2]
2542 except KeyError:
2543 pass
2544 try:
2545 if pwd and tarinfo.uname:
2546 u = pwd.getpwnam(tarinfo.uname)[2]
2547 except KeyError:
2548 pass
2549 if g is None:
2550 g = -1
2551 if u is None:
2552 u = -1
2553 try:
2554 if tarinfo.issym() and hasattr(os, "lchown"):
2555 os.lchown(targetpath, u, g)
2556 else:
2557 os.chown(targetpath, u, g)
2558 except OSError as e:
2559 raise ExtractError("could not change owner") from e
2560
2561 def chmod(self, tarinfo, targetpath):
2562 """Set file permissions of targetpath according to tarinfo.
2563 """
2564 if tarinfo.mode is None:
2565 return
2566 try:
2567 os.chmod(targetpath, tarinfo.mode)
2568 except OSError as e:
2569 raise ExtractError("could not change mode") from e
2570
2571 def utime(self, tarinfo, targetpath):
2572 """Set modification time of targetpath according to tarinfo.
2573 """
2574 mtime = tarinfo.mtime
2575 if mtime is None:
2576 return
2577 if not hasattr(os, 'utime'):
2578 return
2579 try:
2580 os.utime(targetpath, (mtime, mtime))
2581 except OSError as e:
2582 raise ExtractError("could not change modification time") from e
2583
2584 #--------------------------------------------------------------------------
2585 def next(self):
2586 """Return the next member of the archive as a TarInfo object, when
2587 TarFile is opened for reading. Return None if there is no more
2588 available.
2589 """
2590 self._check("ra")
2591 if self.firstmember is not None:
2592 m = self.firstmember
2593 self.firstmember = None
2594 return m
2595
2596 # Advance the file pointer.
2597 if self.offset != self.fileobj.tell():
2598 if self.offset == 0:
2599 return None
2600 self.fileobj.seek(self.offset - 1)
2601 if not self.fileobj.read(1):
2602 raise ReadError("unexpected end of data")
2603
2604 # Read the next block.
2605 tarinfo = None
2606 while True:
2607 try:
2608 tarinfo = self.tarinfo.fromtarfile(self)
2609 except EOFHeaderError as e:
2610 if self.ignore_zeros:
2611 self._dbg(2, "0x%X: %s" % (self.offset, e))
2612 self.offset += BLOCKSIZE
2613 continue
2614 except InvalidHeaderError as e:
2615 if self.ignore_zeros:
2616 self._dbg(2, "0x%X: %s" % (self.offset, e))
2617 self.offset += BLOCKSIZE
2618 continue
2619 elif self.offset == 0:
2620 raise ReadError(str(e)) from None
2621 except EmptyHeaderError:
2622 if self.offset == 0:
2623 raise ReadError("empty file") from None
2624 except TruncatedHeaderError as e:
2625 if self.offset == 0:
2626 raise ReadError(str(e)) from None
2627 except SubsequentHeaderError as e:
2628 raise ReadError(str(e)) from None
2629 except Exception as e:
2630 try:
2631 import zlib
2632 if isinstance(e, zlib.error):
2633 raise ReadError(f'zlib error: {e}') from None
2634 else:
2635 raise e
2636 except ImportError:
2637 raise e
2638 break
2639
2640 if tarinfo is not None:
2641 self.members.append(tarinfo)
2642 else:
2643 self._loaded = True
2644
2645 return tarinfo
2646
2647 #--------------------------------------------------------------------------
2648 # Little helper methods:
2649
2650 def _getmember(self, name, tarinfo=None, normalize=False):
2651 """Find an archive member by name from bottom to top.
2652 If tarinfo is given, it is used as the starting point.
2653 """
2654 # Ensure that all members have been loaded.
2655 members = self.getmembers()
2656
2657 # Limit the member search list up to tarinfo.
2658 skipping = False
2659 if tarinfo is not None:
2660 try:
2661 index = members.index(tarinfo)
2662 except ValueError:
2663 # The given starting point might be a (modified) copy.
2664 # We'll later skip members until we find an equivalent.
2665 skipping = True
2666 else:
2667 # Happy fast path
2668 members = members[:index]
2669
2670 if normalize:
2671 name = os.path.normpath(name)
2672
2673 for member in reversed(members):
2674 if skipping:
2675 if tarinfo.offset == member.offset:
2676 skipping = False
2677 continue
2678 if normalize:
2679 member_name = os.path.normpath(member.name)
2680 else:
2681 member_name = member.name
2682
2683 if name == member_name:
2684 return member
2685
2686 if skipping:
2687 # Starting point was not found
2688 raise ValueError(tarinfo)
2689
2690 def _load(self):
2691 """Read through the entire archive file and look for readable
2692 members.
2693 """
2694 while self.next() is not None:
2695 pass
2696 self._loaded = True
2697
2698 def _check(self, mode=None):
2699 """Check if TarFile is still open, and if the operation's mode
2700 corresponds to TarFile's mode.
2701 """
2702 if self.closed:
2703 raise OSError("%s is closed" % self.__class__.__name__)
2704 if mode is not None and self.mode not in mode:
2705 raise OSError("bad operation for mode %r" % self.mode)
2706
2707 def _find_link_target(self, tarinfo):
2708 """Find the target member of a symlink or hardlink member in the
2709 archive.
2710 """
2711 if tarinfo.issym():
2712 # Always search the entire archive.
2713 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
2714 limit = None
2715 else:
2716 # Search the archive before the link, because a hard link is
2717 # just a reference to an already archived file.
2718 linkname = tarinfo.linkname
2719 limit = tarinfo
2720
2721 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2722 if member is None:
2723 raise KeyError("linkname %r not found" % linkname)
2724 return member
2725
2726 def __iter__(self):
2727 """Provide an iterator object.
2728 """
2729 if self._loaded:
2730 yield from self.members
2731 return
2732
2733 # Yield items using TarFile's next() method.
2734 # When all members have been read, set TarFile as _loaded.
2735 index = 0
2736 # Fix for SF #1100429: Under rare circumstances it can
2737 # happen that getmembers() is called during iteration,
2738 # which will have already exhausted the next() method.
2739 if self.firstmember is not None:
2740 tarinfo = self.next()
2741 index += 1
2742 yield tarinfo
2743
2744 while True:
2745 if index < len(self.members):
2746 tarinfo = self.members[index]
2747 elif not self._loaded:
2748 tarinfo = self.next()
2749 if not tarinfo:
2750 self._loaded = True
2751 return
2752 else:
2753 return
2754 index += 1
2755 yield tarinfo
2756
2757 def _dbg(self, level, msg):
2758 """Write debugging output to sys.stderr.
2759 """
2760 if level <= self.debug:
2761 print(msg, file=sys.stderr)
2762
2763 def __enter__(self):
2764 self._check()
2765 return self
2766
2767 def __exit__(self, type, value, traceback):
2768 if type is None:
2769 self.close()
2770 else:
2771 # An exception occurred. We must not call close() because
2772 # it would try to write end-of-archive blocks and padding.
2773 if not self._extfileobj:
2774 self.fileobj.close()
2775 self.closed = True
2776
2777 #--------------------
2778 # exported functions
2779 #--------------------
2780
2781 def is_tarfile(name):
2782 """Return True if name points to a tar archive that we
2783 are able to handle, else return False.
2784
2785 'name' should be a string, file, or file-like object.
2786 """
2787 try:
2788 if hasattr(name, "read"):
2789 pos = name.tell()
2790 t = open(fileobj=name)
2791 name.seek(pos)
2792 else:
2793 t = open(name)
2794 t.close()
2795 return True
2796 except TarError:
2797 return False
2798
2799 open = TarFile.open
2800
2801
2802 def main():
2803 import argparse
2804
2805 description = 'A simple command-line interface for tarfile module.'
2806 parser = argparse.ArgumentParser(description=description)
2807 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2808 help='Verbose output')
2809 parser.add_argument('--filter', metavar='<filtername>',
2810 choices=_NAMED_FILTERS,
2811 help='Filter for extraction')
2812
2813 group = parser.add_mutually_exclusive_group(required=True)
2814 group.add_argument('-l', '--list', metavar='<tarfile>',
2815 help='Show listing of a tarfile')
2816 group.add_argument('-e', '--extract', nargs='+',
2817 metavar=('<tarfile>', '<output_dir>'),
2818 help='Extract tarfile into target dir')
2819 group.add_argument('-c', '--create', nargs='+',
2820 metavar=('<name>', '<file>'),
2821 help='Create tarfile from sources')
2822 group.add_argument('-t', '--test', metavar='<tarfile>',
2823 help='Test if a tarfile is valid')
2824
2825 args = parser.parse_args()
2826
2827 if args.filter and args.extract is None:
2828 parser.exit(1, '--filter is only valid for extraction\n')
2829
2830 if args.test is not None:
2831 src = args.test
2832 if is_tarfile(src):
2833 with open(src, 'r') as tar:
2834 tar.getmembers()
2835 print(tar.getmembers(), file=sys.stderr)
2836 if args.verbose:
2837 print('{!r} is a tar archive.'.format(src))
2838 else:
2839 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2840
2841 elif args.list is not None:
2842 src = args.list
2843 if is_tarfile(src):
2844 with TarFile.open(src, 'r:*') as tf:
2845 tf.list(verbose=args.verbose)
2846 else:
2847 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2848
2849 elif args.extract is not None:
2850 if len(args.extract) == 1:
2851 src = args.extract[0]
2852 curdir = os.curdir
2853 elif len(args.extract) == 2:
2854 src, curdir = args.extract
2855 else:
2856 parser.exit(1, parser.format_help())
2857
2858 if is_tarfile(src):
2859 with TarFile.open(src, 'r:*') as tf:
2860 tf.extractall(path=curdir, filter=args.filter)
2861 if args.verbose:
2862 if curdir == '.':
2863 msg = '{!r} file is extracted.'.format(src)
2864 else:
2865 msg = ('{!r} file is extracted '
2866 'into {!r} directory.').format(src, curdir)
2867 print(msg)
2868 else:
2869 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2870
2871 elif args.create is not None:
2872 tar_name = args.create.pop(0)
2873 _, ext = os.path.splitext(tar_name)
2874 compressions = {
2875 # gz
2876 '.gz': 'gz',
2877 '.tgz': 'gz',
2878 # xz
2879 '.xz': 'xz',
2880 '.txz': 'xz',
2881 # bz2
2882 '.bz2': 'bz2',
2883 '.tbz': 'bz2',
2884 '.tbz2': 'bz2',
2885 '.tb2': 'bz2',
2886 }
2887 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2888 tar_files = args.create
2889
2890 with TarFile.open(tar_name, tar_mode) as tf:
2891 for file_name in tar_files:
2892 tf.add(file_name)
2893
2894 if args.verbose:
2895 print('{!r} file created.'.format(tar_name))
2896
2897 if __name__ == '__main__':
2898 main()