python (3.12.0)

(root)/
lib/
python3.12/
tarfile.py
       1  #!/usr/bin/env python3
       2  #-------------------------------------------------------------------
       3  # tarfile.py
       4  #-------------------------------------------------------------------
       5  # Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
       6  # All rights reserved.
       7  #
       8  # Permission  is  hereby granted,  free  of charge,  to  any person
       9  # obtaining a  copy of  this software  and associated documentation
      10  # files  (the  "Software"),  to   deal  in  the  Software   without
      11  # restriction,  including  without limitation  the  rights to  use,
      12  # copy, modify, merge, publish, distribute, sublicense, and/or sell
      13  # copies  of  the  Software,  and to  permit  persons  to  whom the
      14  # Software  is  furnished  to  do  so,  subject  to  the  following
      15  # conditions:
      16  #
      17  # The above copyright  notice and this  permission notice shall  be
      18  # included in all copies or substantial portions of the Software.
      19  #
      20  # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
      21  # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
      22  # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
      23  # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
      24  # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
      25  # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
      26  # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
      27  # OTHER DEALINGS IN THE SOFTWARE.
      28  #
      29  """Read from and write to tar format archives.
      30  """
      31  
      32  version     = "0.9.0"
      33  __author__  = "Lars Gust\u00e4bel (lars@gustaebel.de)"
      34  __credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
      35  
      36  #---------
      37  # Imports
      38  #---------
      39  from builtins import open as bltn_open
      40  import sys
      41  import os
      42  import io
      43  import shutil
      44  import stat
      45  import time
      46  import struct
      47  import copy
      48  import re
      49  import warnings
      50  
      51  try:
      52      import pwd
      53  except ImportError:
      54      pwd = None
      55  try:
      56      import grp
      57  except ImportError:
      58      grp = None
      59  
      60  # os.symlink on Windows prior to 6.0 raises NotImplementedError
      61  # OSError (winerror=1314) will be raised if the caller does not hold the
      62  # SeCreateSymbolicLinkPrivilege privilege
      63  symlink_exception = (AttributeError, NotImplementedError, OSError)
      64  
      65  # from tarfile import *
      66  __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
      67             "CompressionError", "StreamError", "ExtractError", "HeaderError",
      68             "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
      69             "DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter",
      70             "tar_filter", "FilterError", "AbsoluteLinkError",
      71             "OutsideDestinationError", "SpecialFileError", "AbsolutePathError",
      72             "LinkOutsideDestinationError"]
      73  
      74  
      75  #---------------------------------------------------------
      76  # tar constants
      77  #---------------------------------------------------------
      78  NUL = b"\0"                     # the null character
      79  BLOCKSIZE = 512                 # length of processing blocks
      80  RECORDSIZE = BLOCKSIZE * 20     # length of records
      81  GNU_MAGIC = b"ustar  \0"        # magic gnu tar string
      82  POSIX_MAGIC = b"ustar\x0000"    # magic posix tar string
      83  
      84  LENGTH_NAME = 100               # maximum length of a filename
      85  LENGTH_LINK = 100               # maximum length of a linkname
      86  LENGTH_PREFIX = 155             # maximum length of the prefix field
      87  
      88  REGTYPE = b"0"                  # regular file
      89  AREGTYPE = b"\0"                # regular file
      90  LNKTYPE = b"1"                  # link (inside tarfile)
      91  SYMTYPE = b"2"                  # symbolic link
      92  CHRTYPE = b"3"                  # character special device
      93  BLKTYPE = b"4"                  # block special device
      94  DIRTYPE = b"5"                  # directory
      95  FIFOTYPE = b"6"                 # fifo special device
      96  CONTTYPE = b"7"                 # contiguous file
      97  
      98  GNUTYPE_LONGNAME = b"L"         # GNU tar longname
      99  GNUTYPE_LONGLINK = b"K"         # GNU tar longlink
     100  GNUTYPE_SPARSE = b"S"           # GNU tar sparse file
     101  
     102  XHDTYPE = b"x"                  # POSIX.1-2001 extended header
     103  XGLTYPE = b"g"                  # POSIX.1-2001 global header
     104  SOLARIS_XHDTYPE = b"X"          # Solaris extended header
     105  
     106  USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
     107  GNU_FORMAT = 1                  # GNU tar format
     108  PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
     109  DEFAULT_FORMAT = PAX_FORMAT
     110  
     111  #---------------------------------------------------------
     112  # tarfile constants
     113  #---------------------------------------------------------
     114  # File types that tarfile supports:
     115  SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
     116                     SYMTYPE, DIRTYPE, FIFOTYPE,
     117                     CONTTYPE, CHRTYPE, BLKTYPE,
     118                     GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
     119                     GNUTYPE_SPARSE)
     120  
     121  # File types that will be treated as a regular file.
     122  REGULAR_TYPES = (REGTYPE, AREGTYPE,
     123                   CONTTYPE, GNUTYPE_SPARSE)
     124  
     125  # File types that are part of the GNU tar format.
     126  GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
     127               GNUTYPE_SPARSE)
     128  
     129  # Fields from a pax header that override a TarInfo attribute.
     130  PAX_FIELDS = ("path", "linkpath", "size", "mtime",
     131                "uid", "gid", "uname", "gname")
     132  
     133  # Fields from a pax header that are affected by hdrcharset.
     134  PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
     135  
     136  # Fields in a pax header that are numbers, all other fields
     137  # are treated as strings.
     138  PAX_NUMBER_FIELDS = {
     139      "atime": float,
     140      "ctime": float,
     141      "mtime": float,
     142      "uid": int,
     143      "gid": int,
     144      "size": int
     145  }
     146  
     147  #---------------------------------------------------------
     148  # initialization
     149  #---------------------------------------------------------
     150  if os.name == "nt":
     151      ENCODING = "utf-8"
     152  else:
     153      ENCODING = sys.getfilesystemencoding()
     154  
     155  #---------------------------------------------------------
     156  # Some useful functions
     157  #---------------------------------------------------------
     158  
     159  def stn(s, length, encoding, errors):
     160      """Convert a string to a null-terminated bytes object.
     161      """
     162      if s is None:
     163          raise ValueError("metadata cannot contain None")
     164      s = s.encode(encoding, errors)
     165      return s[:length] + (length - len(s)) * NUL
     166  
     167  def nts(s, encoding, errors):
     168      """Convert a null-terminated bytes object to a string.
     169      """
     170      p = s.find(b"\0")
     171      if p != -1:
     172          s = s[:p]
     173      return s.decode(encoding, errors)
     174  
     175  def nti(s):
     176      """Convert a number field to a python number.
     177      """
     178      # There are two possible encodings for a number field, see
     179      # itn() below.
     180      if s[0] in (0o200, 0o377):
     181          n = 0
     182          for i in range(len(s) - 1):
     183              n <<= 8
     184              n += s[i + 1]
     185          if s[0] == 0o377:
     186              n = -(256 ** (len(s) - 1) - n)
     187      else:
     188          try:
     189              s = nts(s, "ascii", "strict")
     190              n = int(s.strip() or "0", 8)
     191          except ValueError:
     192              raise InvalidHeaderError("invalid header")
     193      return n
     194  
     195  def itn(n, digits=8, format=DEFAULT_FORMAT):
     196      """Convert a python number to a number field.
     197      """
     198      # POSIX 1003.1-1988 requires numbers to be encoded as a string of
     199      # octal digits followed by a null-byte, this allows values up to
     200      # (8**(digits-1))-1. GNU tar allows storing numbers greater than
     201      # that if necessary. A leading 0o200 or 0o377 byte indicate this
     202      # particular encoding, the following digits-1 bytes are a big-endian
     203      # base-256 representation. This allows values up to (256**(digits-1))-1.
     204      # A 0o200 byte indicates a positive number, a 0o377 byte a negative
     205      # number.
     206      original_n = n
     207      n = int(n)
     208      if 0 <= n < 8 ** (digits - 1):
     209          s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
     210      elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
     211          if n >= 0:
     212              s = bytearray([0o200])
     213          else:
     214              s = bytearray([0o377])
     215              n = 256 ** digits + n
     216  
     217          for i in range(digits - 1):
     218              s.insert(1, n & 0o377)
     219              n >>= 8
     220      else:
     221          raise ValueError("overflow in number field")
     222  
     223      return s
     224  
     225  def calc_chksums(buf):
     226      """Calculate the checksum for a member's header by summing up all
     227         characters except for the chksum field which is treated as if
     228         it was filled with spaces. According to the GNU tar sources,
     229         some tars (Sun and NeXT) calculate chksum with signed char,
     230         which will be different if there are chars in the buffer with
     231         the high bit set. So we calculate two checksums, unsigned and
     232         signed.
     233      """
     234      unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
     235      signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
     236      return unsigned_chksum, signed_chksum
     237  
     238  def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
     239      """Copy length bytes from fileobj src to fileobj dst.
     240         If length is None, copy the entire content.
     241      """
     242      bufsize = bufsize or 16 * 1024
     243      if length == 0:
     244          return
     245      if length is None:
     246          shutil.copyfileobj(src, dst, bufsize)
     247          return
     248  
     249      blocks, remainder = divmod(length, bufsize)
     250      for b in range(blocks):
     251          buf = src.read(bufsize)
     252          if len(buf) < bufsize:
     253              raise exception("unexpected end of data")
     254          dst.write(buf)
     255  
     256      if remainder != 0:
     257          buf = src.read(remainder)
     258          if len(buf) < remainder:
     259              raise exception("unexpected end of data")
     260          dst.write(buf)
     261      return
     262  
     263  def _safe_print(s):
     264      encoding = getattr(sys.stdout, 'encoding', None)
     265      if encoding is not None:
     266          s = s.encode(encoding, 'backslashreplace').decode(encoding)
     267      print(s, end=' ')
     268  
     269  
     270  class ESC[4;38;5;81mTarError(ESC[4;38;5;149mException):
     271      """Base exception."""
     272      pass
     273  class ESC[4;38;5;81mExtractError(ESC[4;38;5;149mTarError):
     274      """General exception for extract errors."""
     275      pass
     276  class ESC[4;38;5;81mReadError(ESC[4;38;5;149mTarError):
     277      """Exception for unreadable tar archives."""
     278      pass
     279  class ESC[4;38;5;81mCompressionError(ESC[4;38;5;149mTarError):
     280      """Exception for unavailable compression methods."""
     281      pass
     282  class ESC[4;38;5;81mStreamError(ESC[4;38;5;149mTarError):
     283      """Exception for unsupported operations on stream-like TarFiles."""
     284      pass
     285  class ESC[4;38;5;81mHeaderError(ESC[4;38;5;149mTarError):
     286      """Base exception for header errors."""
     287      pass
     288  class ESC[4;38;5;81mEmptyHeaderError(ESC[4;38;5;149mHeaderError):
     289      """Exception for empty headers."""
     290      pass
     291  class ESC[4;38;5;81mTruncatedHeaderError(ESC[4;38;5;149mHeaderError):
     292      """Exception for truncated headers."""
     293      pass
     294  class ESC[4;38;5;81mEOFHeaderError(ESC[4;38;5;149mHeaderError):
     295      """Exception for end of file headers."""
     296      pass
     297  class ESC[4;38;5;81mInvalidHeaderError(ESC[4;38;5;149mHeaderError):
     298      """Exception for invalid headers."""
     299      pass
     300  class ESC[4;38;5;81mSubsequentHeaderError(ESC[4;38;5;149mHeaderError):
     301      """Exception for missing and invalid extended headers."""
     302      pass
     303  
     304  #---------------------------
     305  # internal stream interface
     306  #---------------------------
     307  class ESC[4;38;5;81m_LowLevelFile:
     308      """Low-level file object. Supports reading and writing.
     309         It is used instead of a regular file object for streaming
     310         access.
     311      """
     312  
     313      def __init__(self, name, mode):
     314          mode = {
     315              "r": os.O_RDONLY,
     316              "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
     317          }[mode]
     318          if hasattr(os, "O_BINARY"):
     319              mode |= os.O_BINARY
     320          self.fd = os.open(name, mode, 0o666)
     321  
     322      def close(self):
     323          os.close(self.fd)
     324  
     325      def read(self, size):
     326          return os.read(self.fd, size)
     327  
     328      def write(self, s):
     329          os.write(self.fd, s)
     330  
     331  class ESC[4;38;5;81m_Stream:
     332      """Class that serves as an adapter between TarFile and
     333         a stream-like object.  The stream-like object only
     334         needs to have a read() or write() method and is accessed
     335         blockwise.  Use of gzip or bzip2 compression is possible.
     336         A stream-like object could be for example: sys.stdin,
     337         sys.stdout, a socket, a tape device etc.
     338  
     339         _Stream is intended to be used only internally.
     340      """
     341  
     342      def __init__(self, name, mode, comptype, fileobj, bufsize,
     343                   compresslevel):
     344          """Construct a _Stream object.
     345          """
     346          self._extfileobj = True
     347          if fileobj is None:
     348              fileobj = _LowLevelFile(name, mode)
     349              self._extfileobj = False
     350  
     351          if comptype == '*':
     352              # Enable transparent compression detection for the
     353              # stream interface
     354              fileobj = _StreamProxy(fileobj)
     355              comptype = fileobj.getcomptype()
     356  
     357          self.name     = name or ""
     358          self.mode     = mode
     359          self.comptype = comptype
     360          self.fileobj  = fileobj
     361          self.bufsize  = bufsize
     362          self.buf      = b""
     363          self.pos      = 0
     364          self.closed   = False
     365  
     366          try:
     367              if comptype == "gz":
     368                  try:
     369                      import zlib
     370                  except ImportError:
     371                      raise CompressionError("zlib module is not available") from None
     372                  self.zlib = zlib
     373                  self.crc = zlib.crc32(b"")
     374                  if mode == "r":
     375                      self.exception = zlib.error
     376                      self._init_read_gz()
     377                  else:
     378                      self._init_write_gz(compresslevel)
     379  
     380              elif comptype == "bz2":
     381                  try:
     382                      import bz2
     383                  except ImportError:
     384                      raise CompressionError("bz2 module is not available") from None
     385                  if mode == "r":
     386                      self.dbuf = b""
     387                      self.cmp = bz2.BZ2Decompressor()
     388                      self.exception = OSError
     389                  else:
     390                      self.cmp = bz2.BZ2Compressor(compresslevel)
     391  
     392              elif comptype == "xz":
     393                  try:
     394                      import lzma
     395                  except ImportError:
     396                      raise CompressionError("lzma module is not available") from None
     397                  if mode == "r":
     398                      self.dbuf = b""
     399                      self.cmp = lzma.LZMADecompressor()
     400                      self.exception = lzma.LZMAError
     401                  else:
     402                      self.cmp = lzma.LZMACompressor()
     403  
     404              elif comptype != "tar":
     405                  raise CompressionError("unknown compression type %r" % comptype)
     406  
     407          except:
     408              if not self._extfileobj:
     409                  self.fileobj.close()
     410              self.closed = True
     411              raise
     412  
     413      def __del__(self):
     414          if hasattr(self, "closed") and not self.closed:
     415              self.close()
     416  
     417      def _init_write_gz(self, compresslevel):
     418          """Initialize for writing with gzip compression.
     419          """
     420          self.cmp = self.zlib.compressobj(compresslevel,
     421                                           self.zlib.DEFLATED,
     422                                           -self.zlib.MAX_WBITS,
     423                                           self.zlib.DEF_MEM_LEVEL,
     424                                           0)
     425          timestamp = struct.pack("<L", int(time.time()))
     426          self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
     427          if self.name.endswith(".gz"):
     428              self.name = self.name[:-3]
     429          # Honor "directory components removed" from RFC1952
     430          self.name = os.path.basename(self.name)
     431          # RFC1952 says we must use ISO-8859-1 for the FNAME field.
     432          self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
     433  
     434      def write(self, s):
     435          """Write string s to the stream.
     436          """
     437          if self.comptype == "gz":
     438              self.crc = self.zlib.crc32(s, self.crc)
     439          self.pos += len(s)
     440          if self.comptype != "tar":
     441              s = self.cmp.compress(s)
     442          self.__write(s)
     443  
     444      def __write(self, s):
     445          """Write string s to the stream if a whole new block
     446             is ready to be written.
     447          """
     448          self.buf += s
     449          while len(self.buf) > self.bufsize:
     450              self.fileobj.write(self.buf[:self.bufsize])
     451              self.buf = self.buf[self.bufsize:]
     452  
     453      def close(self):
     454          """Close the _Stream object. No operation should be
     455             done on it afterwards.
     456          """
     457          if self.closed:
     458              return
     459  
     460          self.closed = True
     461          try:
     462              if self.mode == "w" and self.comptype != "tar":
     463                  self.buf += self.cmp.flush()
     464  
     465              if self.mode == "w" and self.buf:
     466                  self.fileobj.write(self.buf)
     467                  self.buf = b""
     468                  if self.comptype == "gz":
     469                      self.fileobj.write(struct.pack("<L", self.crc))
     470                      self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
     471          finally:
     472              if not self._extfileobj:
     473                  self.fileobj.close()
     474  
     475      def _init_read_gz(self):
     476          """Initialize for reading a gzip compressed fileobj.
     477          """
     478          self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
     479          self.dbuf = b""
     480  
     481          # taken from gzip.GzipFile with some alterations
     482          if self.__read(2) != b"\037\213":
     483              raise ReadError("not a gzip file")
     484          if self.__read(1) != b"\010":
     485              raise CompressionError("unsupported compression method")
     486  
     487          flag = ord(self.__read(1))
     488          self.__read(6)
     489  
     490          if flag & 4:
     491              xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
     492              self.read(xlen)
     493          if flag & 8:
     494              while True:
     495                  s = self.__read(1)
     496                  if not s or s == NUL:
     497                      break
     498          if flag & 16:
     499              while True:
     500                  s = self.__read(1)
     501                  if not s or s == NUL:
     502                      break
     503          if flag & 2:
     504              self.__read(2)
     505  
     506      def tell(self):
     507          """Return the stream's file pointer position.
     508          """
     509          return self.pos
     510  
     511      def seek(self, pos=0):
     512          """Set the stream's file pointer to pos. Negative seeking
     513             is forbidden.
     514          """
     515          if pos - self.pos >= 0:
     516              blocks, remainder = divmod(pos - self.pos, self.bufsize)
     517              for i in range(blocks):
     518                  self.read(self.bufsize)
     519              self.read(remainder)
     520          else:
     521              raise StreamError("seeking backwards is not allowed")
     522          return self.pos
     523  
     524      def read(self, size):
     525          """Return the next size number of bytes from the stream."""
     526          assert size is not None
     527          buf = self._read(size)
     528          self.pos += len(buf)
     529          return buf
     530  
     531      def _read(self, size):
     532          """Return size bytes from the stream.
     533          """
     534          if self.comptype == "tar":
     535              return self.__read(size)
     536  
     537          c = len(self.dbuf)
     538          t = [self.dbuf]
     539          while c < size:
     540              # Skip underlying buffer to avoid unaligned double buffering.
     541              if self.buf:
     542                  buf = self.buf
     543                  self.buf = b""
     544              else:
     545                  buf = self.fileobj.read(self.bufsize)
     546                  if not buf:
     547                      break
     548              try:
     549                  buf = self.cmp.decompress(buf)
     550              except self.exception as e:
     551                  raise ReadError("invalid compressed data") from e
     552              t.append(buf)
     553              c += len(buf)
     554          t = b"".join(t)
     555          self.dbuf = t[size:]
     556          return t[:size]
     557  
     558      def __read(self, size):
     559          """Return size bytes from stream. If internal buffer is empty,
     560             read another block from the stream.
     561          """
     562          c = len(self.buf)
     563          t = [self.buf]
     564          while c < size:
     565              buf = self.fileobj.read(self.bufsize)
     566              if not buf:
     567                  break
     568              t.append(buf)
     569              c += len(buf)
     570          t = b"".join(t)
     571          self.buf = t[size:]
     572          return t[:size]
     573  # class _Stream
     574  
     575  class ESC[4;38;5;81m_StreamProxy(ESC[4;38;5;149mobject):
     576      """Small proxy class that enables transparent compression
     577         detection for the Stream interface (mode 'r|*').
     578      """
     579  
     580      def __init__(self, fileobj):
     581          self.fileobj = fileobj
     582          self.buf = self.fileobj.read(BLOCKSIZE)
     583  
     584      def read(self, size):
     585          self.read = self.fileobj.read
     586          return self.buf
     587  
     588      def getcomptype(self):
     589          if self.buf.startswith(b"\x1f\x8b\x08"):
     590              return "gz"
     591          elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
     592              return "bz2"
     593          elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
     594              return "xz"
     595          else:
     596              return "tar"
     597  
     598      def close(self):
     599          self.fileobj.close()
     600  # class StreamProxy
     601  
     602  #------------------------
     603  # Extraction file object
     604  #------------------------
     605  class ESC[4;38;5;81m_FileInFile(ESC[4;38;5;149mobject):
     606      """A thin wrapper around an existing file object that
     607         provides a part of its data as an individual file
     608         object.
     609      """
     610  
     611      def __init__(self, fileobj, offset, size, name, blockinfo=None):
     612          self.fileobj = fileobj
     613          self.offset = offset
     614          self.size = size
     615          self.position = 0
     616          self.name = name
     617          self.closed = False
     618  
     619          if blockinfo is None:
     620              blockinfo = [(0, size)]
     621  
     622          # Construct a map with data and zero blocks.
     623          self.map_index = 0
     624          self.map = []
     625          lastpos = 0
     626          realpos = self.offset
     627          for offset, size in blockinfo:
     628              if offset > lastpos:
     629                  self.map.append((False, lastpos, offset, None))
     630              self.map.append((True, offset, offset + size, realpos))
     631              realpos += size
     632              lastpos = offset + size
     633          if lastpos < self.size:
     634              self.map.append((False, lastpos, self.size, None))
     635  
     636      def flush(self):
     637          pass
     638  
     639      def readable(self):
     640          return True
     641  
     642      def writable(self):
     643          return False
     644  
     645      def seekable(self):
     646          return self.fileobj.seekable()
     647  
     648      def tell(self):
     649          """Return the current file position.
     650          """
     651          return self.position
     652  
     653      def seek(self, position, whence=io.SEEK_SET):
     654          """Seek to a position in the file.
     655          """
     656          if whence == io.SEEK_SET:
     657              self.position = min(max(position, 0), self.size)
     658          elif whence == io.SEEK_CUR:
     659              if position < 0:
     660                  self.position = max(self.position + position, 0)
     661              else:
     662                  self.position = min(self.position + position, self.size)
     663          elif whence == io.SEEK_END:
     664              self.position = max(min(self.size + position, self.size), 0)
     665          else:
     666              raise ValueError("Invalid argument")
     667          return self.position
     668  
     669      def read(self, size=None):
     670          """Read data from the file.
     671          """
     672          if size is None:
     673              size = self.size - self.position
     674          else:
     675              size = min(size, self.size - self.position)
     676  
     677          buf = b""
     678          while size > 0:
     679              while True:
     680                  data, start, stop, offset = self.map[self.map_index]
     681                  if start <= self.position < stop:
     682                      break
     683                  else:
     684                      self.map_index += 1
     685                      if self.map_index == len(self.map):
     686                          self.map_index = 0
     687              length = min(size, stop - self.position)
     688              if data:
     689                  self.fileobj.seek(offset + (self.position - start))
     690                  b = self.fileobj.read(length)
     691                  if len(b) != length:
     692                      raise ReadError("unexpected end of data")
     693                  buf += b
     694              else:
     695                  buf += NUL * length
     696              size -= length
     697              self.position += length
     698          return buf
     699  
     700      def readinto(self, b):
     701          buf = self.read(len(b))
     702          b[:len(buf)] = buf
     703          return len(buf)
     704  
     705      def close(self):
     706          self.closed = True
     707  #class _FileInFile
     708  
     709  class ESC[4;38;5;81mExFileObject(ESC[4;38;5;149mioESC[4;38;5;149m.ESC[4;38;5;149mBufferedReader):
     710  
     711      def __init__(self, tarfile, tarinfo):
     712          fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
     713                  tarinfo.size, tarinfo.name, tarinfo.sparse)
     714          super().__init__(fileobj)
     715  #class ExFileObject
     716  
     717  
     718  #-----------------------------
     719  # extraction filters (PEP 706)
     720  #-----------------------------
     721  
     722  class ESC[4;38;5;81mFilterError(ESC[4;38;5;149mTarError):
     723      pass
     724  
     725  class ESC[4;38;5;81mAbsolutePathError(ESC[4;38;5;149mFilterError):
     726      def __init__(self, tarinfo):
     727          self.tarinfo = tarinfo
     728          super().__init__(f'member {tarinfo.name!r} has an absolute path')
     729  
     730  class ESC[4;38;5;81mOutsideDestinationError(ESC[4;38;5;149mFilterError):
     731      def __init__(self, tarinfo, path):
     732          self.tarinfo = tarinfo
     733          self._path = path
     734          super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, '
     735                           + 'which is outside the destination')
     736  
     737  class ESC[4;38;5;81mSpecialFileError(ESC[4;38;5;149mFilterError):
     738      def __init__(self, tarinfo):
     739          self.tarinfo = tarinfo
     740          super().__init__(f'{tarinfo.name!r} is a special file')
     741  
     742  class ESC[4;38;5;81mAbsoluteLinkError(ESC[4;38;5;149mFilterError):
     743      def __init__(self, tarinfo):
     744          self.tarinfo = tarinfo
     745          super().__init__(f'{tarinfo.name!r} is a link to an absolute path')
     746  
     747  class ESC[4;38;5;81mLinkOutsideDestinationError(ESC[4;38;5;149mFilterError):
     748      def __init__(self, tarinfo, path):
     749          self.tarinfo = tarinfo
     750          self._path = path
     751          super().__init__(f'{tarinfo.name!r} would link to {path!r}, '
     752                           + 'which is outside the destination')
     753  
     754  def _get_filtered_attrs(member, dest_path, for_data=True):
     755      new_attrs = {}
     756      name = member.name
     757      dest_path = os.path.realpath(dest_path)
     758      # Strip leading / (tar's directory separator) from filenames.
     759      # Include os.sep (target OS directory separator) as well.
     760      if name.startswith(('/', os.sep)):
     761          name = new_attrs['name'] = member.path.lstrip('/' + os.sep)
     762      if os.path.isabs(name):
     763          # Path is absolute even after stripping.
     764          # For example, 'C:/foo' on Windows.
     765          raise AbsolutePathError(member)
     766      # Ensure we stay in the destination
     767      target_path = os.path.realpath(os.path.join(dest_path, name))
     768      if os.path.commonpath([target_path, dest_path]) != dest_path:
     769          raise OutsideDestinationError(member, target_path)
     770      # Limit permissions (no high bits, and go-w)
     771      mode = member.mode
     772      if mode is not None:
     773          # Strip high bits & group/other write bits
     774          mode = mode & 0o755
     775          if for_data:
     776              # For data, handle permissions & file types
     777              if member.isreg() or member.islnk():
     778                  if not mode & 0o100:
     779                      # Clear executable bits if not executable by user
     780                      mode &= ~0o111
     781                  # Ensure owner can read & write
     782                  mode |= 0o600
     783              elif member.isdir() or member.issym():
     784                  # Ignore mode for directories & symlinks
     785                  mode = None
     786              else:
     787                  # Reject special files
     788                  raise SpecialFileError(member)
     789          if mode != member.mode:
     790              new_attrs['mode'] = mode
     791      if for_data:
     792          # Ignore ownership for 'data'
     793          if member.uid is not None:
     794              new_attrs['uid'] = None
     795          if member.gid is not None:
     796              new_attrs['gid'] = None
     797          if member.uname is not None:
     798              new_attrs['uname'] = None
     799          if member.gname is not None:
     800              new_attrs['gname'] = None
     801          # Check link destination for 'data'
     802          if member.islnk() or member.issym():
     803              if os.path.isabs(member.linkname):
     804                  raise AbsoluteLinkError(member)
     805              if member.issym():
     806                  target_path = os.path.join(dest_path,
     807                                             os.path.dirname(name),
     808                                             member.linkname)
     809              else:
     810                  target_path = os.path.join(dest_path,
     811                                             member.linkname)
     812              target_path = os.path.realpath(target_path)
     813              if os.path.commonpath([target_path, dest_path]) != dest_path:
     814                  raise LinkOutsideDestinationError(member, target_path)
     815      return new_attrs
     816  
     817  def fully_trusted_filter(member, dest_path):
     818      return member
     819  
     820  def tar_filter(member, dest_path):
     821      new_attrs = _get_filtered_attrs(member, dest_path, False)
     822      if new_attrs:
     823          return member.replace(**new_attrs, deep=False)
     824      return member
     825  
     826  def data_filter(member, dest_path):
     827      new_attrs = _get_filtered_attrs(member, dest_path, True)
     828      if new_attrs:
     829          return member.replace(**new_attrs, deep=False)
     830      return member
     831  
     832  _NAMED_FILTERS = {
     833      "fully_trusted": fully_trusted_filter,
     834      "tar": tar_filter,
     835      "data": data_filter,
     836  }
     837  
     838  #------------------
     839  # Exported Classes
     840  #------------------
     841  
     842  # Sentinel for replace() defaults, meaning "don't change the attribute"
     843  _KEEP = object()
     844  
     845  class ESC[4;38;5;81mTarInfo(ESC[4;38;5;149mobject):
     846      """Informational class which holds the details about an
     847         archive member given by a tar header block.
     848         TarInfo objects are returned by TarFile.getmember(),
     849         TarFile.getmembers() and TarFile.gettarinfo() and are
     850         usually created internally.
     851      """
     852  
     853      __slots__ = dict(
     854          name = 'Name of the archive member.',
     855          mode = 'Permission bits.',
     856          uid = 'User ID of the user who originally stored this member.',
     857          gid = 'Group ID of the user who originally stored this member.',
     858          size = 'Size in bytes.',
     859          mtime = 'Time of last modification.',
     860          chksum = 'Header checksum.',
     861          type = ('File type. type is usually one of these constants: '
     862                  'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
     863                  'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
     864          linkname = ('Name of the target file name, which is only present '
     865                      'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
     866          uname = 'User name.',
     867          gname = 'Group name.',
     868          devmajor = 'Device major number.',
     869          devminor = 'Device minor number.',
     870          offset = 'The tar header starts here.',
     871          offset_data = "The file's data starts here.",
     872          pax_headers = ('A dictionary containing key-value pairs of an '
     873                         'associated pax extended header.'),
     874          sparse = 'Sparse member information.',
     875          tarfile = None,
     876          _sparse_structs = None,
     877          _link_target = None,
     878          )
     879  
     880      def __init__(self, name=""):
     881          """Construct a TarInfo object. name is the optional name
     882             of the member.
     883          """
     884          self.name = name        # member name
     885          self.mode = 0o644       # file permissions
     886          self.uid = 0            # user id
     887          self.gid = 0            # group id
     888          self.size = 0           # file size
     889          self.mtime = 0          # modification time
     890          self.chksum = 0         # header checksum
     891          self.type = REGTYPE     # member type
     892          self.linkname = ""      # link name
     893          self.uname = ""         # user name
     894          self.gname = ""         # group name
     895          self.devmajor = 0       # device major number
     896          self.devminor = 0       # device minor number
     897  
     898          self.offset = 0         # the tar header starts here
     899          self.offset_data = 0    # the file's data starts here
     900  
     901          self.sparse = None      # sparse member information
     902          self.pax_headers = {}   # pax header information
     903  
     904      @property
     905      def path(self):
     906          'In pax headers, "name" is called "path".'
     907          return self.name
     908  
     909      @path.setter
     910      def path(self, name):
     911          self.name = name
     912  
     913      @property
     914      def linkpath(self):
     915          'In pax headers, "linkname" is called "linkpath".'
     916          return self.linkname
     917  
     918      @linkpath.setter
     919      def linkpath(self, linkname):
     920          self.linkname = linkname
     921  
     922      def __repr__(self):
     923          return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
     924  
     925      def replace(self, *,
     926                  name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP,
     927                  uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP,
     928                  deep=True, _KEEP=_KEEP):
     929          """Return a deep copy of self with the given attributes replaced.
     930          """
     931          if deep:
     932              result = copy.deepcopy(self)
     933          else:
     934              result = copy.copy(self)
     935          if name is not _KEEP:
     936              result.name = name
     937          if mtime is not _KEEP:
     938              result.mtime = mtime
     939          if mode is not _KEEP:
     940              result.mode = mode
     941          if linkname is not _KEEP:
     942              result.linkname = linkname
     943          if uid is not _KEEP:
     944              result.uid = uid
     945          if gid is not _KEEP:
     946              result.gid = gid
     947          if uname is not _KEEP:
     948              result.uname = uname
     949          if gname is not _KEEP:
     950              result.gname = gname
     951          return result
     952  
     953      def get_info(self):
     954          """Return the TarInfo's attributes as a dictionary.
     955          """
     956          if self.mode is None:
     957              mode = None
     958          else:
     959              mode = self.mode & 0o7777
     960          info = {
     961              "name":     self.name,
     962              "mode":     mode,
     963              "uid":      self.uid,
     964              "gid":      self.gid,
     965              "size":     self.size,
     966              "mtime":    self.mtime,
     967              "chksum":   self.chksum,
     968              "type":     self.type,
     969              "linkname": self.linkname,
     970              "uname":    self.uname,
     971              "gname":    self.gname,
     972              "devmajor": self.devmajor,
     973              "devminor": self.devminor
     974          }
     975  
     976          if info["type"] == DIRTYPE and not info["name"].endswith("/"):
     977              info["name"] += "/"
     978  
     979          return info
     980  
     981      def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
     982          """Return a tar header as a string of 512 byte blocks.
     983          """
     984          info = self.get_info()
     985          for name, value in info.items():
     986              if value is None:
     987                  raise ValueError("%s may not be None" % name)
     988  
     989          if format == USTAR_FORMAT:
     990              return self.create_ustar_header(info, encoding, errors)
     991          elif format == GNU_FORMAT:
     992              return self.create_gnu_header(info, encoding, errors)
     993          elif format == PAX_FORMAT:
     994              return self.create_pax_header(info, encoding)
     995          else:
     996              raise ValueError("invalid format")
     997  
     998      def create_ustar_header(self, info, encoding, errors):
     999          """Return the object as a ustar header block.
    1000          """
    1001          info["magic"] = POSIX_MAGIC
    1002  
    1003          if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
    1004              raise ValueError("linkname is too long")
    1005  
    1006          if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
    1007              info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
    1008  
    1009          return self._create_header(info, USTAR_FORMAT, encoding, errors)
    1010  
    1011      def create_gnu_header(self, info, encoding, errors):
    1012          """Return the object as a GNU header block sequence.
    1013          """
    1014          info["magic"] = GNU_MAGIC
    1015  
    1016          buf = b""
    1017          if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
    1018              buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
    1019  
    1020          if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
    1021              buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
    1022  
    1023          return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
    1024  
    1025      def create_pax_header(self, info, encoding):
    1026          """Return the object as a ustar header block. If it cannot be
    1027             represented this way, prepend a pax extended header sequence
    1028             with supplement information.
    1029          """
    1030          info["magic"] = POSIX_MAGIC
    1031          pax_headers = self.pax_headers.copy()
    1032  
    1033          # Test string fields for values that exceed the field length or cannot
    1034          # be represented in ASCII encoding.
    1035          for name, hname, length in (
    1036                  ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
    1037                  ("uname", "uname", 32), ("gname", "gname", 32)):
    1038  
    1039              if hname in pax_headers:
    1040                  # The pax header has priority.
    1041                  continue
    1042  
    1043              # Try to encode the string as ASCII.
    1044              try:
    1045                  info[name].encode("ascii", "strict")
    1046              except UnicodeEncodeError:
    1047                  pax_headers[hname] = info[name]
    1048                  continue
    1049  
    1050              if len(info[name]) > length:
    1051                  pax_headers[hname] = info[name]
    1052  
    1053          # Test number fields for values that exceed the field limit or values
    1054          # that like to be stored as float.
    1055          for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
    1056              needs_pax = False
    1057  
    1058              val = info[name]
    1059              val_is_float = isinstance(val, float)
    1060              val_int = round(val) if val_is_float else val
    1061              if not 0 <= val_int < 8 ** (digits - 1):
    1062                  # Avoid overflow.
    1063                  info[name] = 0
    1064                  needs_pax = True
    1065              elif val_is_float:
    1066                  # Put rounded value in ustar header, and full
    1067                  # precision value in pax header.
    1068                  info[name] = val_int
    1069                  needs_pax = True
    1070  
    1071              # The existing pax header has priority.
    1072              if needs_pax and name not in pax_headers:
    1073                  pax_headers[name] = str(val)
    1074  
    1075          # Create a pax extended header if necessary.
    1076          if pax_headers:
    1077              buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
    1078          else:
    1079              buf = b""
    1080  
    1081          return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
    1082  
    1083      @classmethod
    1084      def create_pax_global_header(cls, pax_headers):
    1085          """Return the object as a pax global header block sequence.
    1086          """
    1087          return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
    1088  
    1089      def _posix_split_name(self, name, encoding, errors):
    1090          """Split a name longer than 100 chars into a prefix
    1091             and a name part.
    1092          """
    1093          components = name.split("/")
    1094          for i in range(1, len(components)):
    1095              prefix = "/".join(components[:i])
    1096              name = "/".join(components[i:])
    1097              if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
    1098                      len(name.encode(encoding, errors)) <= LENGTH_NAME:
    1099                  break
    1100          else:
    1101              raise ValueError("name is too long")
    1102  
    1103          return prefix, name
    1104  
    1105      @staticmethod
    1106      def _create_header(info, format, encoding, errors):
    1107          """Return a header block. info is a dictionary with file
    1108             information, format must be one of the *_FORMAT constants.
    1109          """
    1110          has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)
    1111          if has_device_fields:
    1112              devmajor = itn(info.get("devmajor", 0), 8, format)
    1113              devminor = itn(info.get("devminor", 0), 8, format)
    1114          else:
    1115              devmajor = stn("", 8, encoding, errors)
    1116              devminor = stn("", 8, encoding, errors)
    1117  
    1118          # None values in metadata should cause ValueError.
    1119          # itn()/stn() do this for all fields except type.
    1120          filetype = info.get("type", REGTYPE)
    1121          if filetype is None:
    1122              raise ValueError("TarInfo.type must not be None")
    1123  
    1124          parts = [
    1125              stn(info.get("name", ""), 100, encoding, errors),
    1126              itn(info.get("mode", 0) & 0o7777, 8, format),
    1127              itn(info.get("uid", 0), 8, format),
    1128              itn(info.get("gid", 0), 8, format),
    1129              itn(info.get("size", 0), 12, format),
    1130              itn(info.get("mtime", 0), 12, format),
    1131              b"        ", # checksum field
    1132              filetype,
    1133              stn(info.get("linkname", ""), 100, encoding, errors),
    1134              info.get("magic", POSIX_MAGIC),
    1135              stn(info.get("uname", ""), 32, encoding, errors),
    1136              stn(info.get("gname", ""), 32, encoding, errors),
    1137              devmajor,
    1138              devminor,
    1139              stn(info.get("prefix", ""), 155, encoding, errors)
    1140          ]
    1141  
    1142          buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
    1143          chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
    1144          buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
    1145          return buf
    1146  
    1147      @staticmethod
    1148      def _create_payload(payload):
    1149          """Return the string payload filled with zero bytes
    1150             up to the next 512 byte border.
    1151          """
    1152          blocks, remainder = divmod(len(payload), BLOCKSIZE)
    1153          if remainder > 0:
    1154              payload += (BLOCKSIZE - remainder) * NUL
    1155          return payload
    1156  
    1157      @classmethod
    1158      def _create_gnu_long_header(cls, name, type, encoding, errors):
    1159          """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
    1160             for name.
    1161          """
    1162          name = name.encode(encoding, errors) + NUL
    1163  
    1164          info = {}
    1165          info["name"] = "././@LongLink"
    1166          info["type"] = type
    1167          info["size"] = len(name)
    1168          info["magic"] = GNU_MAGIC
    1169  
    1170          # create extended header + name blocks.
    1171          return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
    1172                  cls._create_payload(name)
    1173  
    1174      @classmethod
    1175      def _create_pax_generic_header(cls, pax_headers, type, encoding):
    1176          """Return a POSIX.1-2008 extended or global header sequence
    1177             that contains a list of keyword, value pairs. The values
    1178             must be strings.
    1179          """
    1180          # Check if one of the fields contains surrogate characters and thereby
    1181          # forces hdrcharset=BINARY, see _proc_pax() for more information.
    1182          binary = False
    1183          for keyword, value in pax_headers.items():
    1184              try:
    1185                  value.encode("utf-8", "strict")
    1186              except UnicodeEncodeError:
    1187                  binary = True
    1188                  break
    1189  
    1190          records = b""
    1191          if binary:
    1192              # Put the hdrcharset field at the beginning of the header.
    1193              records += b"21 hdrcharset=BINARY\n"
    1194  
    1195          for keyword, value in pax_headers.items():
    1196              keyword = keyword.encode("utf-8")
    1197              if binary:
    1198                  # Try to restore the original byte representation of `value'.
    1199                  # Needless to say, that the encoding must match the string.
    1200                  value = value.encode(encoding, "surrogateescape")
    1201              else:
    1202                  value = value.encode("utf-8")
    1203  
    1204              l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
    1205              n = p = 0
    1206              while True:
    1207                  n = l + len(str(p))
    1208                  if n == p:
    1209                      break
    1210                  p = n
    1211              records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
    1212  
    1213          # We use a hardcoded "././@PaxHeader" name like star does
    1214          # instead of the one that POSIX recommends.
    1215          info = {}
    1216          info["name"] = "././@PaxHeader"
    1217          info["type"] = type
    1218          info["size"] = len(records)
    1219          info["magic"] = POSIX_MAGIC
    1220  
    1221          # Create pax header + record blocks.
    1222          return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
    1223                  cls._create_payload(records)
    1224  
    1225      @classmethod
    1226      def frombuf(cls, buf, encoding, errors):
    1227          """Construct a TarInfo object from a 512 byte bytes object.
    1228          """
    1229          if len(buf) == 0:
    1230              raise EmptyHeaderError("empty header")
    1231          if len(buf) != BLOCKSIZE:
    1232              raise TruncatedHeaderError("truncated header")
    1233          if buf.count(NUL) == BLOCKSIZE:
    1234              raise EOFHeaderError("end of file header")
    1235  
    1236          chksum = nti(buf[148:156])
    1237          if chksum not in calc_chksums(buf):
    1238              raise InvalidHeaderError("bad checksum")
    1239  
    1240          obj = cls()
    1241          obj.name = nts(buf[0:100], encoding, errors)
    1242          obj.mode = nti(buf[100:108])
    1243          obj.uid = nti(buf[108:116])
    1244          obj.gid = nti(buf[116:124])
    1245          obj.size = nti(buf[124:136])
    1246          obj.mtime = nti(buf[136:148])
    1247          obj.chksum = chksum
    1248          obj.type = buf[156:157]
    1249          obj.linkname = nts(buf[157:257], encoding, errors)
    1250          obj.uname = nts(buf[265:297], encoding, errors)
    1251          obj.gname = nts(buf[297:329], encoding, errors)
    1252          obj.devmajor = nti(buf[329:337])
    1253          obj.devminor = nti(buf[337:345])
    1254          prefix = nts(buf[345:500], encoding, errors)
    1255  
    1256          # Old V7 tar format represents a directory as a regular
    1257          # file with a trailing slash.
    1258          if obj.type == AREGTYPE and obj.name.endswith("/"):
    1259              obj.type = DIRTYPE
    1260  
    1261          # The old GNU sparse format occupies some of the unused
    1262          # space in the buffer for up to 4 sparse structures.
    1263          # Save them for later processing in _proc_sparse().
    1264          if obj.type == GNUTYPE_SPARSE:
    1265              pos = 386
    1266              structs = []
    1267              for i in range(4):
    1268                  try:
    1269                      offset = nti(buf[pos:pos + 12])
    1270                      numbytes = nti(buf[pos + 12:pos + 24])
    1271                  except ValueError:
    1272                      break
    1273                  structs.append((offset, numbytes))
    1274                  pos += 24
    1275              isextended = bool(buf[482])
    1276              origsize = nti(buf[483:495])
    1277              obj._sparse_structs = (structs, isextended, origsize)
    1278  
    1279          # Remove redundant slashes from directories.
    1280          if obj.isdir():
    1281              obj.name = obj.name.rstrip("/")
    1282  
    1283          # Reconstruct a ustar longname.
    1284          if prefix and obj.type not in GNU_TYPES:
    1285              obj.name = prefix + "/" + obj.name
    1286          return obj
    1287  
    1288      @classmethod
    1289      def fromtarfile(cls, tarfile):
    1290          """Return the next TarInfo object from TarFile object
    1291             tarfile.
    1292          """
    1293          buf = tarfile.fileobj.read(BLOCKSIZE)
    1294          obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
    1295          obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
    1296          return obj._proc_member(tarfile)
    1297  
    1298      #--------------------------------------------------------------------------
    1299      # The following are methods that are called depending on the type of a
    1300      # member. The entry point is _proc_member() which can be overridden in a
    1301      # subclass to add custom _proc_*() methods. A _proc_*() method MUST
    1302      # implement the following
    1303      # operations:
    1304      # 1. Set self.offset_data to the position where the data blocks begin,
    1305      #    if there is data that follows.
    1306      # 2. Set tarfile.offset to the position where the next member's header will
    1307      #    begin.
    1308      # 3. Return self or another valid TarInfo object.
    1309      def _proc_member(self, tarfile):
    1310          """Choose the right processing method depending on
    1311             the type and call it.
    1312          """
    1313          if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
    1314              return self._proc_gnulong(tarfile)
    1315          elif self.type == GNUTYPE_SPARSE:
    1316              return self._proc_sparse(tarfile)
    1317          elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
    1318              return self._proc_pax(tarfile)
    1319          else:
    1320              return self._proc_builtin(tarfile)
    1321  
    1322      def _proc_builtin(self, tarfile):
    1323          """Process a builtin type or an unknown type which
    1324             will be treated as a regular file.
    1325          """
    1326          self.offset_data = tarfile.fileobj.tell()
    1327          offset = self.offset_data
    1328          if self.isreg() or self.type not in SUPPORTED_TYPES:
    1329              # Skip the following data blocks.
    1330              offset += self._block(self.size)
    1331          tarfile.offset = offset
    1332  
    1333          # Patch the TarInfo object with saved global
    1334          # header information.
    1335          self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
    1336  
    1337          # Remove redundant slashes from directories. This is to be consistent
    1338          # with frombuf().
    1339          if self.isdir():
    1340              self.name = self.name.rstrip("/")
    1341  
    1342          return self
    1343  
    1344      def _proc_gnulong(self, tarfile):
    1345          """Process the blocks that hold a GNU longname
    1346             or longlink member.
    1347          """
    1348          buf = tarfile.fileobj.read(self._block(self.size))
    1349  
    1350          # Fetch the next header and process it.
    1351          try:
    1352              next = self.fromtarfile(tarfile)
    1353          except HeaderError as e:
    1354              raise SubsequentHeaderError(str(e)) from None
    1355  
    1356          # Patch the TarInfo object from the next header with
    1357          # the longname information.
    1358          next.offset = self.offset
    1359          if self.type == GNUTYPE_LONGNAME:
    1360              next.name = nts(buf, tarfile.encoding, tarfile.errors)
    1361          elif self.type == GNUTYPE_LONGLINK:
    1362              next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
    1363  
    1364          # Remove redundant slashes from directories. This is to be consistent
    1365          # with frombuf().
    1366          if next.isdir():
    1367              next.name = next.name.removesuffix("/")
    1368  
    1369          return next
    1370  
    1371      def _proc_sparse(self, tarfile):
    1372          """Process a GNU sparse header plus extra headers.
    1373          """
    1374          # We already collected some sparse structures in frombuf().
    1375          structs, isextended, origsize = self._sparse_structs
    1376          del self._sparse_structs
    1377  
    1378          # Collect sparse structures from extended header blocks.
    1379          while isextended:
    1380              buf = tarfile.fileobj.read(BLOCKSIZE)
    1381              pos = 0
    1382              for i in range(21):
    1383                  try:
    1384                      offset = nti(buf[pos:pos + 12])
    1385                      numbytes = nti(buf[pos + 12:pos + 24])
    1386                  except ValueError:
    1387                      break
    1388                  if offset and numbytes:
    1389                      structs.append((offset, numbytes))
    1390                  pos += 24
    1391              isextended = bool(buf[504])
    1392          self.sparse = structs
    1393  
    1394          self.offset_data = tarfile.fileobj.tell()
    1395          tarfile.offset = self.offset_data + self._block(self.size)
    1396          self.size = origsize
    1397          return self
    1398  
    1399      def _proc_pax(self, tarfile):
    1400          """Process an extended or global header as described in
    1401             POSIX.1-2008.
    1402          """
    1403          # Read the header information.
    1404          buf = tarfile.fileobj.read(self._block(self.size))
    1405  
    1406          # A pax header stores supplemental information for either
    1407          # the following file (extended) or all following files
    1408          # (global).
    1409          if self.type == XGLTYPE:
    1410              pax_headers = tarfile.pax_headers
    1411          else:
    1412              pax_headers = tarfile.pax_headers.copy()
    1413  
    1414          # Check if the pax header contains a hdrcharset field. This tells us
    1415          # the encoding of the path, linkpath, uname and gname fields. Normally,
    1416          # these fields are UTF-8 encoded but since POSIX.1-2008 tar
    1417          # implementations are allowed to store them as raw binary strings if
    1418          # the translation to UTF-8 fails.
    1419          match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
    1420          if match is not None:
    1421              pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
    1422  
    1423          # For the time being, we don't care about anything other than "BINARY".
    1424          # The only other value that is currently allowed by the standard is
    1425          # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
    1426          hdrcharset = pax_headers.get("hdrcharset")
    1427          if hdrcharset == "BINARY":
    1428              encoding = tarfile.encoding
    1429          else:
    1430              encoding = "utf-8"
    1431  
    1432          # Parse pax header information. A record looks like that:
    1433          # "%d %s=%s\n" % (length, keyword, value). length is the size
    1434          # of the complete record including the length field itself and
    1435          # the newline. keyword and value are both UTF-8 encoded strings.
    1436          regex = re.compile(br"(\d+) ([^=]+)=")
    1437          pos = 0
    1438          while match := regex.match(buf, pos):
    1439              length, keyword = match.groups()
    1440              length = int(length)
    1441              if length == 0:
    1442                  raise InvalidHeaderError("invalid header")
    1443              value = buf[match.end(2) + 1:match.start(1) + length - 1]
    1444  
    1445              # Normally, we could just use "utf-8" as the encoding and "strict"
    1446              # as the error handler, but we better not take the risk. For
    1447              # example, GNU tar <= 1.23 is known to store filenames it cannot
    1448              # translate to UTF-8 as raw strings (unfortunately without a
    1449              # hdrcharset=BINARY header).
    1450              # We first try the strict standard encoding, and if that fails we
    1451              # fall back on the user's encoding and error handler.
    1452              keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
    1453                      tarfile.errors)
    1454              if keyword in PAX_NAME_FIELDS:
    1455                  value = self._decode_pax_field(value, encoding, tarfile.encoding,
    1456                          tarfile.errors)
    1457              else:
    1458                  value = self._decode_pax_field(value, "utf-8", "utf-8",
    1459                          tarfile.errors)
    1460  
    1461              pax_headers[keyword] = value
    1462              pos += length
    1463  
    1464          # Fetch the next header.
    1465          try:
    1466              next = self.fromtarfile(tarfile)
    1467          except HeaderError as e:
    1468              raise SubsequentHeaderError(str(e)) from None
    1469  
    1470          # Process GNU sparse information.
    1471          if "GNU.sparse.map" in pax_headers:
    1472              # GNU extended sparse format version 0.1.
    1473              self._proc_gnusparse_01(next, pax_headers)
    1474  
    1475          elif "GNU.sparse.size" in pax_headers:
    1476              # GNU extended sparse format version 0.0.
    1477              self._proc_gnusparse_00(next, pax_headers, buf)
    1478  
    1479          elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
    1480              # GNU extended sparse format version 1.0.
    1481              self._proc_gnusparse_10(next, pax_headers, tarfile)
    1482  
    1483          if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
    1484              # Patch the TarInfo object with the extended header info.
    1485              next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
    1486              next.offset = self.offset
    1487  
    1488              if "size" in pax_headers:
    1489                  # If the extended header replaces the size field,
    1490                  # we need to recalculate the offset where the next
    1491                  # header starts.
    1492                  offset = next.offset_data
    1493                  if next.isreg() or next.type not in SUPPORTED_TYPES:
    1494                      offset += next._block(next.size)
    1495                  tarfile.offset = offset
    1496  
    1497          return next
    1498  
    1499      def _proc_gnusparse_00(self, next, pax_headers, buf):
    1500          """Process a GNU tar extended sparse header, version 0.0.
    1501          """
    1502          offsets = []
    1503          for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
    1504              offsets.append(int(match.group(1)))
    1505          numbytes = []
    1506          for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
    1507              numbytes.append(int(match.group(1)))
    1508          next.sparse = list(zip(offsets, numbytes))
    1509  
    1510      def _proc_gnusparse_01(self, next, pax_headers):
    1511          """Process a GNU tar extended sparse header, version 0.1.
    1512          """
    1513          sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
    1514          next.sparse = list(zip(sparse[::2], sparse[1::2]))
    1515  
    1516      def _proc_gnusparse_10(self, next, pax_headers, tarfile):
    1517          """Process a GNU tar extended sparse header, version 1.0.
    1518          """
    1519          fields = None
    1520          sparse = []
    1521          buf = tarfile.fileobj.read(BLOCKSIZE)
    1522          fields, buf = buf.split(b"\n", 1)
    1523          fields = int(fields)
    1524          while len(sparse) < fields * 2:
    1525              if b"\n" not in buf:
    1526                  buf += tarfile.fileobj.read(BLOCKSIZE)
    1527              number, buf = buf.split(b"\n", 1)
    1528              sparse.append(int(number))
    1529          next.offset_data = tarfile.fileobj.tell()
    1530          next.sparse = list(zip(sparse[::2], sparse[1::2]))
    1531  
    1532      def _apply_pax_info(self, pax_headers, encoding, errors):
    1533          """Replace fields with supplemental information from a previous
    1534             pax extended or global header.
    1535          """
    1536          for keyword, value in pax_headers.items():
    1537              if keyword == "GNU.sparse.name":
    1538                  setattr(self, "path", value)
    1539              elif keyword == "GNU.sparse.size":
    1540                  setattr(self, "size", int(value))
    1541              elif keyword == "GNU.sparse.realsize":
    1542                  setattr(self, "size", int(value))
    1543              elif keyword in PAX_FIELDS:
    1544                  if keyword in PAX_NUMBER_FIELDS:
    1545                      try:
    1546                          value = PAX_NUMBER_FIELDS[keyword](value)
    1547                      except ValueError:
    1548                          value = 0
    1549                  if keyword == "path":
    1550                      value = value.rstrip("/")
    1551                  setattr(self, keyword, value)
    1552  
    1553          self.pax_headers = pax_headers.copy()
    1554  
    1555      def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
    1556          """Decode a single field from a pax record.
    1557          """
    1558          try:
    1559              return value.decode(encoding, "strict")
    1560          except UnicodeDecodeError:
    1561              return value.decode(fallback_encoding, fallback_errors)
    1562  
    1563      def _block(self, count):
    1564          """Round up a byte count by BLOCKSIZE and return it,
    1565             e.g. _block(834) => 1024.
    1566          """
    1567          blocks, remainder = divmod(count, BLOCKSIZE)
    1568          if remainder:
    1569              blocks += 1
    1570          return blocks * BLOCKSIZE
    1571  
    1572      def isreg(self):
    1573          'Return True if the Tarinfo object is a regular file.'
    1574          return self.type in REGULAR_TYPES
    1575  
    1576      def isfile(self):
    1577          'Return True if the Tarinfo object is a regular file.'
    1578          return self.isreg()
    1579  
    1580      def isdir(self):
    1581          'Return True if it is a directory.'
    1582          return self.type == DIRTYPE
    1583  
    1584      def issym(self):
    1585          'Return True if it is a symbolic link.'
    1586          return self.type == SYMTYPE
    1587  
    1588      def islnk(self):
    1589          'Return True if it is a hard link.'
    1590          return self.type == LNKTYPE
    1591  
    1592      def ischr(self):
    1593          'Return True if it is a character device.'
    1594          return self.type == CHRTYPE
    1595  
    1596      def isblk(self):
    1597          'Return True if it is a block device.'
    1598          return self.type == BLKTYPE
    1599  
    1600      def isfifo(self):
    1601          'Return True if it is a FIFO.'
    1602          return self.type == FIFOTYPE
    1603  
    1604      def issparse(self):
    1605          return self.sparse is not None
    1606  
    1607      def isdev(self):
    1608          'Return True if it is one of character device, block device or FIFO.'
    1609          return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
    1610  # class TarInfo
    1611  
    1612  class ESC[4;38;5;81mTarFile(ESC[4;38;5;149mobject):
    1613      """The TarFile Class provides an interface to tar archives.
    1614      """
    1615  
    1616      debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
    1617  
    1618      dereference = False         # If true, add content of linked file to the
    1619                                  # tar file, else the link.
    1620  
    1621      ignore_zeros = False        # If true, skips empty or invalid blocks and
    1622                                  # continues processing.
    1623  
    1624      errorlevel = 1              # If 0, fatal errors only appear in debug
    1625                                  # messages (if debug >= 0). If > 0, errors
    1626                                  # are passed to the caller as exceptions.
    1627  
    1628      format = DEFAULT_FORMAT     # The format to use when creating an archive.
    1629  
    1630      encoding = ENCODING         # Encoding for 8-bit character strings.
    1631  
    1632      errors = None               # Error handler for unicode conversion.
    1633  
    1634      tarinfo = TarInfo           # The default TarInfo class to use.
    1635  
    1636      fileobject = ExFileObject   # The file-object for extractfile().
    1637  
    1638      extraction_filter = None    # The default filter for extraction.
    1639  
    1640      def __init__(self, name=None, mode="r", fileobj=None, format=None,
    1641              tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
    1642              errors="surrogateescape", pax_headers=None, debug=None,
    1643              errorlevel=None, copybufsize=None):
    1644          """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
    1645             read from an existing archive, 'a' to append data to an existing
    1646             file or 'w' to create a new file overwriting an existing one. `mode'
    1647             defaults to 'r'.
    1648             If `fileobj' is given, it is used for reading or writing data. If it
    1649             can be determined, `mode' is overridden by `fileobj's mode.
    1650             `fileobj' is not closed, when TarFile is closed.
    1651          """
    1652          modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
    1653          if mode not in modes:
    1654              raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
    1655          self.mode = mode
    1656          self._mode = modes[mode]
    1657  
    1658          if not fileobj:
    1659              if self.mode == "a" and not os.path.exists(name):
    1660                  # Create nonexistent files in append mode.
    1661                  self.mode = "w"
    1662                  self._mode = "wb"
    1663              fileobj = bltn_open(name, self._mode)
    1664              self._extfileobj = False
    1665          else:
    1666              if (name is None and hasattr(fileobj, "name") and
    1667                  isinstance(fileobj.name, (str, bytes))):
    1668                  name = fileobj.name
    1669              if hasattr(fileobj, "mode"):
    1670                  self._mode = fileobj.mode
    1671              self._extfileobj = True
    1672          self.name = os.path.abspath(name) if name else None
    1673          self.fileobj = fileobj
    1674  
    1675          # Init attributes.
    1676          if format is not None:
    1677              self.format = format
    1678          if tarinfo is not None:
    1679              self.tarinfo = tarinfo
    1680          if dereference is not None:
    1681              self.dereference = dereference
    1682          if ignore_zeros is not None:
    1683              self.ignore_zeros = ignore_zeros
    1684          if encoding is not None:
    1685              self.encoding = encoding
    1686          self.errors = errors
    1687  
    1688          if pax_headers is not None and self.format == PAX_FORMAT:
    1689              self.pax_headers = pax_headers
    1690          else:
    1691              self.pax_headers = {}
    1692  
    1693          if debug is not None:
    1694              self.debug = debug
    1695          if errorlevel is not None:
    1696              self.errorlevel = errorlevel
    1697  
    1698          # Init datastructures.
    1699          self.copybufsize = copybufsize
    1700          self.closed = False
    1701          self.members = []       # list of members as TarInfo objects
    1702          self._loaded = False    # flag if all members have been read
    1703          self.offset = self.fileobj.tell()
    1704                                  # current position in the archive file
    1705          self.inodes = {}        # dictionary caching the inodes of
    1706                                  # archive members already added
    1707  
    1708          try:
    1709              if self.mode == "r":
    1710                  self.firstmember = None
    1711                  self.firstmember = self.next()
    1712  
    1713              if self.mode == "a":
    1714                  # Move to the end of the archive,
    1715                  # before the first empty block.
    1716                  while True:
    1717                      self.fileobj.seek(self.offset)
    1718                      try:
    1719                          tarinfo = self.tarinfo.fromtarfile(self)
    1720                          self.members.append(tarinfo)
    1721                      except EOFHeaderError:
    1722                          self.fileobj.seek(self.offset)
    1723                          break
    1724                      except HeaderError as e:
    1725                          raise ReadError(str(e)) from None
    1726  
    1727              if self.mode in ("a", "w", "x"):
    1728                  self._loaded = True
    1729  
    1730                  if self.pax_headers:
    1731                      buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
    1732                      self.fileobj.write(buf)
    1733                      self.offset += len(buf)
    1734          except:
    1735              if not self._extfileobj:
    1736                  self.fileobj.close()
    1737              self.closed = True
    1738              raise
    1739  
    1740      #--------------------------------------------------------------------------
    1741      # Below are the classmethods which act as alternate constructors to the
    1742      # TarFile class. The open() method is the only one that is needed for
    1743      # public use; it is the "super"-constructor and is able to select an
    1744      # adequate "sub"-constructor for a particular compression using the mapping
    1745      # from OPEN_METH.
    1746      #
    1747      # This concept allows one to subclass TarFile without losing the comfort of
    1748      # the super-constructor. A sub-constructor is registered and made available
    1749      # by adding it to the mapping in OPEN_METH.
    1750  
    1751      @classmethod
    1752      def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
    1753          """Open a tar archive for reading, writing or appending. Return
    1754             an appropriate TarFile class.
    1755  
    1756             mode:
    1757             'r' or 'r:*' open for reading with transparent compression
    1758             'r:'         open for reading exclusively uncompressed
    1759             'r:gz'       open for reading with gzip compression
    1760             'r:bz2'      open for reading with bzip2 compression
    1761             'r:xz'       open for reading with lzma compression
    1762             'a' or 'a:'  open for appending, creating the file if necessary
    1763             'w' or 'w:'  open for writing without compression
    1764             'w:gz'       open for writing with gzip compression
    1765             'w:bz2'      open for writing with bzip2 compression
    1766             'w:xz'       open for writing with lzma compression
    1767  
    1768             'x' or 'x:'  create a tarfile exclusively without compression, raise
    1769                          an exception if the file is already created
    1770             'x:gz'       create a gzip compressed tarfile, raise an exception
    1771                          if the file is already created
    1772             'x:bz2'      create a bzip2 compressed tarfile, raise an exception
    1773                          if the file is already created
    1774             'x:xz'       create an lzma compressed tarfile, raise an exception
    1775                          if the file is already created
    1776  
    1777             'r|*'        open a stream of tar blocks with transparent compression
    1778             'r|'         open an uncompressed stream of tar blocks for reading
    1779             'r|gz'       open a gzip compressed stream of tar blocks
    1780             'r|bz2'      open a bzip2 compressed stream of tar blocks
    1781             'r|xz'       open an lzma compressed stream of tar blocks
    1782             'w|'         open an uncompressed stream for writing
    1783             'w|gz'       open a gzip compressed stream for writing
    1784             'w|bz2'      open a bzip2 compressed stream for writing
    1785             'w|xz'       open an lzma compressed stream for writing
    1786          """
    1787  
    1788          if not name and not fileobj:
    1789              raise ValueError("nothing to open")
    1790  
    1791          if mode in ("r", "r:*"):
    1792              # Find out which *open() is appropriate for opening the file.
    1793              def not_compressed(comptype):
    1794                  return cls.OPEN_METH[comptype] == 'taropen'
    1795              error_msgs = []
    1796              for comptype in sorted(cls.OPEN_METH, key=not_compressed):
    1797                  func = getattr(cls, cls.OPEN_METH[comptype])
    1798                  if fileobj is not None:
    1799                      saved_pos = fileobj.tell()
    1800                  try:
    1801                      return func(name, "r", fileobj, **kwargs)
    1802                  except (ReadError, CompressionError) as e:
    1803                      error_msgs.append(f'- method {comptype}: {e!r}')
    1804                      if fileobj is not None:
    1805                          fileobj.seek(saved_pos)
    1806                      continue
    1807              error_msgs_summary = '\n'.join(error_msgs)
    1808              raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}")
    1809  
    1810          elif ":" in mode:
    1811              filemode, comptype = mode.split(":", 1)
    1812              filemode = filemode or "r"
    1813              comptype = comptype or "tar"
    1814  
    1815              # Select the *open() function according to
    1816              # given compression.
    1817              if comptype in cls.OPEN_METH:
    1818                  func = getattr(cls, cls.OPEN_METH[comptype])
    1819              else:
    1820                  raise CompressionError("unknown compression type %r" % comptype)
    1821              return func(name, filemode, fileobj, **kwargs)
    1822  
    1823          elif "|" in mode:
    1824              filemode, comptype = mode.split("|", 1)
    1825              filemode = filemode or "r"
    1826              comptype = comptype or "tar"
    1827  
    1828              if filemode not in ("r", "w"):
    1829                  raise ValueError("mode must be 'r' or 'w'")
    1830  
    1831              compresslevel = kwargs.pop("compresslevel", 9)
    1832              stream = _Stream(name, filemode, comptype, fileobj, bufsize,
    1833                               compresslevel)
    1834              try:
    1835                  t = cls(name, filemode, stream, **kwargs)
    1836              except:
    1837                  stream.close()
    1838                  raise
    1839              t._extfileobj = False
    1840              return t
    1841  
    1842          elif mode in ("a", "w", "x"):
    1843              return cls.taropen(name, mode, fileobj, **kwargs)
    1844  
    1845          raise ValueError("undiscernible mode")
    1846  
    1847      @classmethod
    1848      def taropen(cls, name, mode="r", fileobj=None, **kwargs):
    1849          """Open uncompressed tar archive name for reading or writing.
    1850          """
    1851          if mode not in ("r", "a", "w", "x"):
    1852              raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
    1853          return cls(name, mode, fileobj, **kwargs)
    1854  
    1855      @classmethod
    1856      def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
    1857          """Open gzip compressed tar archive name for reading or writing.
    1858             Appending is not allowed.
    1859          """
    1860          if mode not in ("r", "w", "x"):
    1861              raise ValueError("mode must be 'r', 'w' or 'x'")
    1862  
    1863          try:
    1864              from gzip import GzipFile
    1865          except ImportError:
    1866              raise CompressionError("gzip module is not available") from None
    1867  
    1868          try:
    1869              fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
    1870          except OSError as e:
    1871              if fileobj is not None and mode == 'r':
    1872                  raise ReadError("not a gzip file") from e
    1873              raise
    1874  
    1875          try:
    1876              t = cls.taropen(name, mode, fileobj, **kwargs)
    1877          except OSError as e:
    1878              fileobj.close()
    1879              if mode == 'r':
    1880                  raise ReadError("not a gzip file") from e
    1881              raise
    1882          except:
    1883              fileobj.close()
    1884              raise
    1885          t._extfileobj = False
    1886          return t
    1887  
    1888      @classmethod
    1889      def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
    1890          """Open bzip2 compressed tar archive name for reading or writing.
    1891             Appending is not allowed.
    1892          """
    1893          if mode not in ("r", "w", "x"):
    1894              raise ValueError("mode must be 'r', 'w' or 'x'")
    1895  
    1896          try:
    1897              from bz2 import BZ2File
    1898          except ImportError:
    1899              raise CompressionError("bz2 module is not available") from None
    1900  
    1901          fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)
    1902  
    1903          try:
    1904              t = cls.taropen(name, mode, fileobj, **kwargs)
    1905          except (OSError, EOFError) as e:
    1906              fileobj.close()
    1907              if mode == 'r':
    1908                  raise ReadError("not a bzip2 file") from e
    1909              raise
    1910          except:
    1911              fileobj.close()
    1912              raise
    1913          t._extfileobj = False
    1914          return t
    1915  
    1916      @classmethod
    1917      def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
    1918          """Open lzma compressed tar archive name for reading or writing.
    1919             Appending is not allowed.
    1920          """
    1921          if mode not in ("r", "w", "x"):
    1922              raise ValueError("mode must be 'r', 'w' or 'x'")
    1923  
    1924          try:
    1925              from lzma import LZMAFile, LZMAError
    1926          except ImportError:
    1927              raise CompressionError("lzma module is not available") from None
    1928  
    1929          fileobj = LZMAFile(fileobj or name, mode, preset=preset)
    1930  
    1931          try:
    1932              t = cls.taropen(name, mode, fileobj, **kwargs)
    1933          except (LZMAError, EOFError) as e:
    1934              fileobj.close()
    1935              if mode == 'r':
    1936                  raise ReadError("not an lzma file") from e
    1937              raise
    1938          except:
    1939              fileobj.close()
    1940              raise
    1941          t._extfileobj = False
    1942          return t
    1943  
    1944      # All *open() methods are registered here.
    1945      OPEN_METH = {
    1946          "tar": "taropen",   # uncompressed tar
    1947          "gz":  "gzopen",    # gzip compressed tar
    1948          "bz2": "bz2open",   # bzip2 compressed tar
    1949          "xz":  "xzopen"     # lzma compressed tar
    1950      }
    1951  
    1952      #--------------------------------------------------------------------------
    1953      # The public methods which TarFile provides:
    1954  
    1955      def close(self):
    1956          """Close the TarFile. In write-mode, two finishing zero blocks are
    1957             appended to the archive.
    1958          """
    1959          if self.closed:
    1960              return
    1961  
    1962          self.closed = True
    1963          try:
    1964              if self.mode in ("a", "w", "x"):
    1965                  self.fileobj.write(NUL * (BLOCKSIZE * 2))
    1966                  self.offset += (BLOCKSIZE * 2)
    1967                  # fill up the end with zero-blocks
    1968                  # (like option -b20 for tar does)
    1969                  blocks, remainder = divmod(self.offset, RECORDSIZE)
    1970                  if remainder > 0:
    1971                      self.fileobj.write(NUL * (RECORDSIZE - remainder))
    1972          finally:
    1973              if not self._extfileobj:
    1974                  self.fileobj.close()
    1975  
    1976      def getmember(self, name):
    1977          """Return a TarInfo object for member `name'. If `name' can not be
    1978             found in the archive, KeyError is raised. If a member occurs more
    1979             than once in the archive, its last occurrence is assumed to be the
    1980             most up-to-date version.
    1981          """
    1982          tarinfo = self._getmember(name.rstrip('/'))
    1983          if tarinfo is None:
    1984              raise KeyError("filename %r not found" % name)
    1985          return tarinfo
    1986  
    1987      def getmembers(self):
    1988          """Return the members of the archive as a list of TarInfo objects. The
    1989             list has the same order as the members in the archive.
    1990          """
    1991          self._check()
    1992          if not self._loaded:    # if we want to obtain a list of
    1993              self._load()        # all members, we first have to
    1994                                  # scan the whole archive.
    1995          return self.members
    1996  
    1997      def getnames(self):
    1998          """Return the members of the archive as a list of their names. It has
    1999             the same order as the list returned by getmembers().
    2000          """
    2001          return [tarinfo.name for tarinfo in self.getmembers()]
    2002  
    2003      def gettarinfo(self, name=None, arcname=None, fileobj=None):
    2004          """Create a TarInfo object from the result of os.stat or equivalent
    2005             on an existing file. The file is either named by `name', or
    2006             specified as a file object `fileobj' with a file descriptor. If
    2007             given, `arcname' specifies an alternative name for the file in the
    2008             archive, otherwise, the name is taken from the 'name' attribute of
    2009             'fileobj', or the 'name' argument. The name should be a text
    2010             string.
    2011          """
    2012          self._check("awx")
    2013  
    2014          # When fileobj is given, replace name by
    2015          # fileobj's real name.
    2016          if fileobj is not None:
    2017              name = fileobj.name
    2018  
    2019          # Building the name of the member in the archive.
    2020          # Backward slashes are converted to forward slashes,
    2021          # Absolute paths are turned to relative paths.
    2022          if arcname is None:
    2023              arcname = name
    2024          drv, arcname = os.path.splitdrive(arcname)
    2025          arcname = arcname.replace(os.sep, "/")
    2026          arcname = arcname.lstrip("/")
    2027  
    2028          # Now, fill the TarInfo object with
    2029          # information specific for the file.
    2030          tarinfo = self.tarinfo()
    2031          tarinfo.tarfile = self  # Not needed
    2032  
    2033          # Use os.stat or os.lstat, depending on if symlinks shall be resolved.
    2034          if fileobj is None:
    2035              if not self.dereference:
    2036                  statres = os.lstat(name)
    2037              else:
    2038                  statres = os.stat(name)
    2039          else:
    2040              statres = os.fstat(fileobj.fileno())
    2041          linkname = ""
    2042  
    2043          stmd = statres.st_mode
    2044          if stat.S_ISREG(stmd):
    2045              inode = (statres.st_ino, statres.st_dev)
    2046              if not self.dereference and statres.st_nlink > 1 and \
    2047                      inode in self.inodes and arcname != self.inodes[inode]:
    2048                  # Is it a hardlink to an already
    2049                  # archived file?
    2050                  type = LNKTYPE
    2051                  linkname = self.inodes[inode]
    2052              else:
    2053                  # The inode is added only if its valid.
    2054                  # For win32 it is always 0.
    2055                  type = REGTYPE
    2056                  if inode[0]:
    2057                      self.inodes[inode] = arcname
    2058          elif stat.S_ISDIR(stmd):
    2059              type = DIRTYPE
    2060          elif stat.S_ISFIFO(stmd):
    2061              type = FIFOTYPE
    2062          elif stat.S_ISLNK(stmd):
    2063              type = SYMTYPE
    2064              linkname = os.readlink(name)
    2065          elif stat.S_ISCHR(stmd):
    2066              type = CHRTYPE
    2067          elif stat.S_ISBLK(stmd):
    2068              type = BLKTYPE
    2069          else:
    2070              return None
    2071  
    2072          # Fill the TarInfo object with all
    2073          # information we can get.
    2074          tarinfo.name = arcname
    2075          tarinfo.mode = stmd
    2076          tarinfo.uid = statres.st_uid
    2077          tarinfo.gid = statres.st_gid
    2078          if type == REGTYPE:
    2079              tarinfo.size = statres.st_size
    2080          else:
    2081              tarinfo.size = 0
    2082          tarinfo.mtime = statres.st_mtime
    2083          tarinfo.type = type
    2084          tarinfo.linkname = linkname
    2085          if pwd:
    2086              try:
    2087                  tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
    2088              except KeyError:
    2089                  pass
    2090          if grp:
    2091              try:
    2092                  tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
    2093              except KeyError:
    2094                  pass
    2095  
    2096          if type in (CHRTYPE, BLKTYPE):
    2097              if hasattr(os, "major") and hasattr(os, "minor"):
    2098                  tarinfo.devmajor = os.major(statres.st_rdev)
    2099                  tarinfo.devminor = os.minor(statres.st_rdev)
    2100          return tarinfo
    2101  
    2102      def list(self, verbose=True, *, members=None):
    2103          """Print a table of contents to sys.stdout. If `verbose' is False, only
    2104             the names of the members are printed. If it is True, an `ls -l'-like
    2105             output is produced. `members' is optional and must be a subset of the
    2106             list returned by getmembers().
    2107          """
    2108          self._check()
    2109  
    2110          if members is None:
    2111              members = self
    2112          for tarinfo in members:
    2113              if verbose:
    2114                  if tarinfo.mode is None:
    2115                      _safe_print("??????????")
    2116                  else:
    2117                      _safe_print(stat.filemode(tarinfo.mode))
    2118                  _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
    2119                                         tarinfo.gname or tarinfo.gid))
    2120                  if tarinfo.ischr() or tarinfo.isblk():
    2121                      _safe_print("%10s" %
    2122                              ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
    2123                  else:
    2124                      _safe_print("%10d" % tarinfo.size)
    2125                  if tarinfo.mtime is None:
    2126                      _safe_print("????-??-?? ??:??:??")
    2127                  else:
    2128                      _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
    2129                                  % time.localtime(tarinfo.mtime)[:6])
    2130  
    2131              _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
    2132  
    2133              if verbose:
    2134                  if tarinfo.issym():
    2135                      _safe_print("-> " + tarinfo.linkname)
    2136                  if tarinfo.islnk():
    2137                      _safe_print("link to " + tarinfo.linkname)
    2138              print()
    2139  
    2140      def add(self, name, arcname=None, recursive=True, *, filter=None):
    2141          """Add the file `name' to the archive. `name' may be any type of file
    2142             (directory, fifo, symbolic link, etc.). If given, `arcname'
    2143             specifies an alternative name for the file in the archive.
    2144             Directories are added recursively by default. This can be avoided by
    2145             setting `recursive' to False. `filter' is a function
    2146             that expects a TarInfo object argument and returns the changed
    2147             TarInfo object, if it returns None the TarInfo object will be
    2148             excluded from the archive.
    2149          """
    2150          self._check("awx")
    2151  
    2152          if arcname is None:
    2153              arcname = name
    2154  
    2155          # Skip if somebody tries to archive the archive...
    2156          if self.name is not None and os.path.abspath(name) == self.name:
    2157              self._dbg(2, "tarfile: Skipped %r" % name)
    2158              return
    2159  
    2160          self._dbg(1, name)
    2161  
    2162          # Create a TarInfo object from the file.
    2163          tarinfo = self.gettarinfo(name, arcname)
    2164  
    2165          if tarinfo is None:
    2166              self._dbg(1, "tarfile: Unsupported type %r" % name)
    2167              return
    2168  
    2169          # Change or exclude the TarInfo object.
    2170          if filter is not None:
    2171              tarinfo = filter(tarinfo)
    2172              if tarinfo is None:
    2173                  self._dbg(2, "tarfile: Excluded %r" % name)
    2174                  return
    2175  
    2176          # Append the tar header and data to the archive.
    2177          if tarinfo.isreg():
    2178              with bltn_open(name, "rb") as f:
    2179                  self.addfile(tarinfo, f)
    2180  
    2181          elif tarinfo.isdir():
    2182              self.addfile(tarinfo)
    2183              if recursive:
    2184                  for f in sorted(os.listdir(name)):
    2185                      self.add(os.path.join(name, f), os.path.join(arcname, f),
    2186                              recursive, filter=filter)
    2187  
    2188          else:
    2189              self.addfile(tarinfo)
    2190  
    2191      def addfile(self, tarinfo, fileobj=None):
    2192          """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
    2193             given, it should be a binary file, and tarinfo.size bytes are read
    2194             from it and added to the archive. You can create TarInfo objects
    2195             directly, or by using gettarinfo().
    2196          """
    2197          self._check("awx")
    2198  
    2199          tarinfo = copy.copy(tarinfo)
    2200  
    2201          buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
    2202          self.fileobj.write(buf)
    2203          self.offset += len(buf)
    2204          bufsize=self.copybufsize
    2205          # If there's data to follow, append it.
    2206          if fileobj is not None:
    2207              copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
    2208              blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
    2209              if remainder > 0:
    2210                  self.fileobj.write(NUL * (BLOCKSIZE - remainder))
    2211                  blocks += 1
    2212              self.offset += blocks * BLOCKSIZE
    2213  
    2214          self.members.append(tarinfo)
    2215  
    2216      def _get_filter_function(self, filter):
    2217          if filter is None:
    2218              filter = self.extraction_filter
    2219              if filter is None:
    2220                  warnings.warn(
    2221                      'Python 3.14 will, by default, filter extracted tar '
    2222                      + 'archives and reject files or modify their metadata. '
    2223                      + 'Use the filter argument to control this behavior.',
    2224                      DeprecationWarning)
    2225                  return fully_trusted_filter
    2226              if isinstance(filter, str):
    2227                  raise TypeError(
    2228                      'String names are not supported for '
    2229                      + 'TarFile.extraction_filter. Use a function such as '
    2230                      + 'tarfile.data_filter directly.')
    2231              return filter
    2232          if callable(filter):
    2233              return filter
    2234          try:
    2235              return _NAMED_FILTERS[filter]
    2236          except KeyError:
    2237              raise ValueError(f"filter {filter!r} not found") from None
    2238  
    2239      def extractall(self, path=".", members=None, *, numeric_owner=False,
    2240                     filter=None):
    2241          """Extract all members from the archive to the current working
    2242             directory and set owner, modification time and permissions on
    2243             directories afterwards. `path' specifies a different directory
    2244             to extract to. `members' is optional and must be a subset of the
    2245             list returned by getmembers(). If `numeric_owner` is True, only
    2246             the numbers for user/group names are used and not the names.
    2247  
    2248             The `filter` function will be called on each member just
    2249             before extraction.
    2250             It can return a changed TarInfo or None to skip the member.
    2251             String names of common filters are accepted.
    2252          """
    2253          directories = []
    2254  
    2255          filter_function = self._get_filter_function(filter)
    2256          if members is None:
    2257              members = self
    2258  
    2259          for member in members:
    2260              tarinfo = self._get_extract_tarinfo(member, filter_function, path)
    2261              if tarinfo is None:
    2262                  continue
    2263              if tarinfo.isdir():
    2264                  # For directories, delay setting attributes until later,
    2265                  # since permissions can interfere with extraction and
    2266                  # extracting contents can reset mtime.
    2267                  directories.append(tarinfo)
    2268              self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(),
    2269                                numeric_owner=numeric_owner)
    2270  
    2271          # Reverse sort directories.
    2272          directories.sort(key=lambda a: a.name, reverse=True)
    2273  
    2274          # Set correct owner, mtime and filemode on directories.
    2275          for tarinfo in directories:
    2276              dirpath = os.path.join(path, tarinfo.name)
    2277              try:
    2278                  self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
    2279                  self.utime(tarinfo, dirpath)
    2280                  self.chmod(tarinfo, dirpath)
    2281              except ExtractError as e:
    2282                  self._handle_nonfatal_error(e)
    2283  
    2284      def extract(self, member, path="", set_attrs=True, *, numeric_owner=False,
    2285                  filter=None):
    2286          """Extract a member from the archive to the current working directory,
    2287             using its full name. Its file information is extracted as accurately
    2288             as possible. `member' may be a filename or a TarInfo object. You can
    2289             specify a different directory using `path'. File attributes (owner,
    2290             mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
    2291             is True, only the numbers for user/group names are used and not
    2292             the names.
    2293  
    2294             The `filter` function will be called before extraction.
    2295             It can return a changed TarInfo or None to skip the member.
    2296             String names of common filters are accepted.
    2297          """
    2298          filter_function = self._get_filter_function(filter)
    2299          tarinfo = self._get_extract_tarinfo(member, filter_function, path)
    2300          if tarinfo is not None:
    2301              self._extract_one(tarinfo, path, set_attrs, numeric_owner)
    2302  
    2303      def _get_extract_tarinfo(self, member, filter_function, path):
    2304          """Get filtered TarInfo (or None) from member, which might be a str"""
    2305          if isinstance(member, str):
    2306              tarinfo = self.getmember(member)
    2307          else:
    2308              tarinfo = member
    2309  
    2310          unfiltered = tarinfo
    2311          try:
    2312              tarinfo = filter_function(tarinfo, path)
    2313          except (OSError, FilterError) as e:
    2314              self._handle_fatal_error(e)
    2315          except ExtractError as e:
    2316              self._handle_nonfatal_error(e)
    2317          if tarinfo is None:
    2318              self._dbg(2, "tarfile: Excluded %r" % unfiltered.name)
    2319              return None
    2320          # Prepare the link target for makelink().
    2321          if tarinfo.islnk():
    2322              tarinfo = copy.copy(tarinfo)
    2323              tarinfo._link_target = os.path.join(path, tarinfo.linkname)
    2324          return tarinfo
    2325  
    2326      def _extract_one(self, tarinfo, path, set_attrs, numeric_owner):
    2327          """Extract from filtered tarinfo to disk"""
    2328          self._check("r")
    2329  
    2330          try:
    2331              self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
    2332                                   set_attrs=set_attrs,
    2333                                   numeric_owner=numeric_owner)
    2334          except OSError as e:
    2335              self._handle_fatal_error(e)
    2336          except ExtractError as e:
    2337              self._handle_nonfatal_error(e)
    2338  
    2339      def _handle_nonfatal_error(self, e):
    2340          """Handle non-fatal error (ExtractError) according to errorlevel"""
    2341          if self.errorlevel > 1:
    2342              raise
    2343          else:
    2344              self._dbg(1, "tarfile: %s" % e)
    2345  
    2346      def _handle_fatal_error(self, e):
    2347          """Handle "fatal" error according to self.errorlevel"""
    2348          if self.errorlevel > 0:
    2349              raise
    2350          elif isinstance(e, OSError):
    2351              if e.filename is None:
    2352                  self._dbg(1, "tarfile: %s" % e.strerror)
    2353              else:
    2354                  self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
    2355          else:
    2356              self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e))
    2357  
    2358      def extractfile(self, member):
    2359          """Extract a member from the archive as a file object. `member' may be
    2360             a filename or a TarInfo object. If `member' is a regular file or
    2361             a link, an io.BufferedReader object is returned. For all other
    2362             existing members, None is returned. If `member' does not appear
    2363             in the archive, KeyError is raised.
    2364          """
    2365          self._check("r")
    2366  
    2367          if isinstance(member, str):
    2368              tarinfo = self.getmember(member)
    2369          else:
    2370              tarinfo = member
    2371  
    2372          if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
    2373              # Members with unknown types are treated as regular files.
    2374              return self.fileobject(self, tarinfo)
    2375  
    2376          elif tarinfo.islnk() or tarinfo.issym():
    2377              if isinstance(self.fileobj, _Stream):
    2378                  # A small but ugly workaround for the case that someone tries
    2379                  # to extract a (sym)link as a file-object from a non-seekable
    2380                  # stream of tar blocks.
    2381                  raise StreamError("cannot extract (sym)link as file object")
    2382              else:
    2383                  # A (sym)link's file object is its target's file object.
    2384                  return self.extractfile(self._find_link_target(tarinfo))
    2385          else:
    2386              # If there's no data associated with the member (directory, chrdev,
    2387              # blkdev, etc.), return None instead of a file object.
    2388              return None
    2389  
    2390      def _extract_member(self, tarinfo, targetpath, set_attrs=True,
    2391                          numeric_owner=False):
    2392          """Extract the TarInfo object tarinfo to a physical
    2393             file called targetpath.
    2394          """
    2395          # Fetch the TarInfo object for the given name
    2396          # and build the destination pathname, replacing
    2397          # forward slashes to platform specific separators.
    2398          targetpath = targetpath.rstrip("/")
    2399          targetpath = targetpath.replace("/", os.sep)
    2400  
    2401          # Create all upper directories.
    2402          upperdirs = os.path.dirname(targetpath)
    2403          if upperdirs and not os.path.exists(upperdirs):
    2404              # Create directories that are not part of the archive with
    2405              # default permissions.
    2406              os.makedirs(upperdirs)
    2407  
    2408          if tarinfo.islnk() or tarinfo.issym():
    2409              self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
    2410          else:
    2411              self._dbg(1, tarinfo.name)
    2412  
    2413          if tarinfo.isreg():
    2414              self.makefile(tarinfo, targetpath)
    2415          elif tarinfo.isdir():
    2416              self.makedir(tarinfo, targetpath)
    2417          elif tarinfo.isfifo():
    2418              self.makefifo(tarinfo, targetpath)
    2419          elif tarinfo.ischr() or tarinfo.isblk():
    2420              self.makedev(tarinfo, targetpath)
    2421          elif tarinfo.islnk() or tarinfo.issym():
    2422              self.makelink(tarinfo, targetpath)
    2423          elif tarinfo.type not in SUPPORTED_TYPES:
    2424              self.makeunknown(tarinfo, targetpath)
    2425          else:
    2426              self.makefile(tarinfo, targetpath)
    2427  
    2428          if set_attrs:
    2429              self.chown(tarinfo, targetpath, numeric_owner)
    2430              if not tarinfo.issym():
    2431                  self.chmod(tarinfo, targetpath)
    2432                  self.utime(tarinfo, targetpath)
    2433  
    2434      #--------------------------------------------------------------------------
    2435      # Below are the different file methods. They are called via
    2436      # _extract_member() when extract() is called. They can be replaced in a
    2437      # subclass to implement other functionality.
    2438  
    2439      def makedir(self, tarinfo, targetpath):
    2440          """Make a directory called targetpath.
    2441          """
    2442          try:
    2443              if tarinfo.mode is None:
    2444                  # Use the system's default mode
    2445                  os.mkdir(targetpath)
    2446              else:
    2447                  # Use a safe mode for the directory, the real mode is set
    2448                  # later in _extract_member().
    2449                  os.mkdir(targetpath, 0o700)
    2450          except FileExistsError:
    2451              pass
    2452  
    2453      def makefile(self, tarinfo, targetpath):
    2454          """Make a file called targetpath.
    2455          """
    2456          source = self.fileobj
    2457          source.seek(tarinfo.offset_data)
    2458          bufsize = self.copybufsize
    2459          with bltn_open(targetpath, "wb") as target:
    2460              if tarinfo.sparse is not None:
    2461                  for offset, size in tarinfo.sparse:
    2462                      target.seek(offset)
    2463                      copyfileobj(source, target, size, ReadError, bufsize)
    2464                  target.seek(tarinfo.size)
    2465                  target.truncate()
    2466              else:
    2467                  copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
    2468  
    2469      def makeunknown(self, tarinfo, targetpath):
    2470          """Make a file from a TarInfo object with an unknown type
    2471             at targetpath.
    2472          """
    2473          self.makefile(tarinfo, targetpath)
    2474          self._dbg(1, "tarfile: Unknown file type %r, " \
    2475                       "extracted as regular file." % tarinfo.type)
    2476  
    2477      def makefifo(self, tarinfo, targetpath):
    2478          """Make a fifo called targetpath.
    2479          """
    2480          if hasattr(os, "mkfifo"):
    2481              os.mkfifo(targetpath)
    2482          else:
    2483              raise ExtractError("fifo not supported by system")
    2484  
    2485      def makedev(self, tarinfo, targetpath):
    2486          """Make a character or block device called targetpath.
    2487          """
    2488          if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
    2489              raise ExtractError("special devices not supported by system")
    2490  
    2491          mode = tarinfo.mode
    2492          if mode is None:
    2493              # Use mknod's default
    2494              mode = 0o600
    2495          if tarinfo.isblk():
    2496              mode |= stat.S_IFBLK
    2497          else:
    2498              mode |= stat.S_IFCHR
    2499  
    2500          os.mknod(targetpath, mode,
    2501                   os.makedev(tarinfo.devmajor, tarinfo.devminor))
    2502  
    2503      def makelink(self, tarinfo, targetpath):
    2504          """Make a (symbolic) link called targetpath. If it cannot be created
    2505            (platform limitation), we try to make a copy of the referenced file
    2506            instead of a link.
    2507          """
    2508          try:
    2509              # For systems that support symbolic and hard links.
    2510              if tarinfo.issym():
    2511                  if os.path.lexists(targetpath):
    2512                      # Avoid FileExistsError on following os.symlink.
    2513                      os.unlink(targetpath)
    2514                  os.symlink(tarinfo.linkname, targetpath)
    2515              else:
    2516                  if os.path.exists(tarinfo._link_target):
    2517                      os.link(tarinfo._link_target, targetpath)
    2518                  else:
    2519                      self._extract_member(self._find_link_target(tarinfo),
    2520                                           targetpath)
    2521          except symlink_exception:
    2522              try:
    2523                  self._extract_member(self._find_link_target(tarinfo),
    2524                                       targetpath)
    2525              except KeyError:
    2526                  raise ExtractError("unable to resolve link inside archive") from None
    2527  
    2528      def chown(self, tarinfo, targetpath, numeric_owner):
    2529          """Set owner of targetpath according to tarinfo. If numeric_owner
    2530             is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
    2531             is False, fall back to .gid/.uid when the search based on name
    2532             fails.
    2533          """
    2534          if hasattr(os, "geteuid") and os.geteuid() == 0:
    2535              # We have to be root to do so.
    2536              g = tarinfo.gid
    2537              u = tarinfo.uid
    2538              if not numeric_owner:
    2539                  try:
    2540                      if grp and tarinfo.gname:
    2541                          g = grp.getgrnam(tarinfo.gname)[2]
    2542                  except KeyError:
    2543                      pass
    2544                  try:
    2545                      if pwd and tarinfo.uname:
    2546                          u = pwd.getpwnam(tarinfo.uname)[2]
    2547                  except KeyError:
    2548                      pass
    2549              if g is None:
    2550                  g = -1
    2551              if u is None:
    2552                  u = -1
    2553              try:
    2554                  if tarinfo.issym() and hasattr(os, "lchown"):
    2555                      os.lchown(targetpath, u, g)
    2556                  else:
    2557                      os.chown(targetpath, u, g)
    2558              except OSError as e:
    2559                  raise ExtractError("could not change owner") from e
    2560  
    2561      def chmod(self, tarinfo, targetpath):
    2562          """Set file permissions of targetpath according to tarinfo.
    2563          """
    2564          if tarinfo.mode is None:
    2565              return
    2566          try:
    2567              os.chmod(targetpath, tarinfo.mode)
    2568          except OSError as e:
    2569              raise ExtractError("could not change mode") from e
    2570  
    2571      def utime(self, tarinfo, targetpath):
    2572          """Set modification time of targetpath according to tarinfo.
    2573          """
    2574          mtime = tarinfo.mtime
    2575          if mtime is None:
    2576              return
    2577          if not hasattr(os, 'utime'):
    2578              return
    2579          try:
    2580              os.utime(targetpath, (mtime, mtime))
    2581          except OSError as e:
    2582              raise ExtractError("could not change modification time") from e
    2583  
    2584      #--------------------------------------------------------------------------
    2585      def next(self):
    2586          """Return the next member of the archive as a TarInfo object, when
    2587             TarFile is opened for reading. Return None if there is no more
    2588             available.
    2589          """
    2590          self._check("ra")
    2591          if self.firstmember is not None:
    2592              m = self.firstmember
    2593              self.firstmember = None
    2594              return m
    2595  
    2596          # Advance the file pointer.
    2597          if self.offset != self.fileobj.tell():
    2598              if self.offset == 0:
    2599                  return None
    2600              self.fileobj.seek(self.offset - 1)
    2601              if not self.fileobj.read(1):
    2602                  raise ReadError("unexpected end of data")
    2603  
    2604          # Read the next block.
    2605          tarinfo = None
    2606          while True:
    2607              try:
    2608                  tarinfo = self.tarinfo.fromtarfile(self)
    2609              except EOFHeaderError as e:
    2610                  if self.ignore_zeros:
    2611                      self._dbg(2, "0x%X: %s" % (self.offset, e))
    2612                      self.offset += BLOCKSIZE
    2613                      continue
    2614              except InvalidHeaderError as e:
    2615                  if self.ignore_zeros:
    2616                      self._dbg(2, "0x%X: %s" % (self.offset, e))
    2617                      self.offset += BLOCKSIZE
    2618                      continue
    2619                  elif self.offset == 0:
    2620                      raise ReadError(str(e)) from None
    2621              except EmptyHeaderError:
    2622                  if self.offset == 0:
    2623                      raise ReadError("empty file") from None
    2624              except TruncatedHeaderError as e:
    2625                  if self.offset == 0:
    2626                      raise ReadError(str(e)) from None
    2627              except SubsequentHeaderError as e:
    2628                  raise ReadError(str(e)) from None
    2629              except Exception as e:
    2630                  try:
    2631                      import zlib
    2632                      if isinstance(e, zlib.error):
    2633                          raise ReadError(f'zlib error: {e}') from None
    2634                      else:
    2635                          raise e
    2636                  except ImportError:
    2637                      raise e
    2638              break
    2639  
    2640          if tarinfo is not None:
    2641              self.members.append(tarinfo)
    2642          else:
    2643              self._loaded = True
    2644  
    2645          return tarinfo
    2646  
    2647      #--------------------------------------------------------------------------
    2648      # Little helper methods:
    2649  
    2650      def _getmember(self, name, tarinfo=None, normalize=False):
    2651          """Find an archive member by name from bottom to top.
    2652             If tarinfo is given, it is used as the starting point.
    2653          """
    2654          # Ensure that all members have been loaded.
    2655          members = self.getmembers()
    2656  
    2657          # Limit the member search list up to tarinfo.
    2658          skipping = False
    2659          if tarinfo is not None:
    2660              try:
    2661                  index = members.index(tarinfo)
    2662              except ValueError:
    2663                  # The given starting point might be a (modified) copy.
    2664                  # We'll later skip members until we find an equivalent.
    2665                  skipping = True
    2666              else:
    2667                  # Happy fast path
    2668                  members = members[:index]
    2669  
    2670          if normalize:
    2671              name = os.path.normpath(name)
    2672  
    2673          for member in reversed(members):
    2674              if skipping:
    2675                  if tarinfo.offset == member.offset:
    2676                      skipping = False
    2677                  continue
    2678              if normalize:
    2679                  member_name = os.path.normpath(member.name)
    2680              else:
    2681                  member_name = member.name
    2682  
    2683              if name == member_name:
    2684                  return member
    2685  
    2686          if skipping:
    2687              # Starting point was not found
    2688              raise ValueError(tarinfo)
    2689  
    2690      def _load(self):
    2691          """Read through the entire archive file and look for readable
    2692             members.
    2693          """
    2694          while self.next() is not None:
    2695              pass
    2696          self._loaded = True
    2697  
    2698      def _check(self, mode=None):
    2699          """Check if TarFile is still open, and if the operation's mode
    2700             corresponds to TarFile's mode.
    2701          """
    2702          if self.closed:
    2703              raise OSError("%s is closed" % self.__class__.__name__)
    2704          if mode is not None and self.mode not in mode:
    2705              raise OSError("bad operation for mode %r" % self.mode)
    2706  
    2707      def _find_link_target(self, tarinfo):
    2708          """Find the target member of a symlink or hardlink member in the
    2709             archive.
    2710          """
    2711          if tarinfo.issym():
    2712              # Always search the entire archive.
    2713              linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
    2714              limit = None
    2715          else:
    2716              # Search the archive before the link, because a hard link is
    2717              # just a reference to an already archived file.
    2718              linkname = tarinfo.linkname
    2719              limit = tarinfo
    2720  
    2721          member = self._getmember(linkname, tarinfo=limit, normalize=True)
    2722          if member is None:
    2723              raise KeyError("linkname %r not found" % linkname)
    2724          return member
    2725  
    2726      def __iter__(self):
    2727          """Provide an iterator object.
    2728          """
    2729          if self._loaded:
    2730              yield from self.members
    2731              return
    2732  
    2733          # Yield items using TarFile's next() method.
    2734          # When all members have been read, set TarFile as _loaded.
    2735          index = 0
    2736          # Fix for SF #1100429: Under rare circumstances it can
    2737          # happen that getmembers() is called during iteration,
    2738          # which will have already exhausted the next() method.
    2739          if self.firstmember is not None:
    2740              tarinfo = self.next()
    2741              index += 1
    2742              yield tarinfo
    2743  
    2744          while True:
    2745              if index < len(self.members):
    2746                  tarinfo = self.members[index]
    2747              elif not self._loaded:
    2748                  tarinfo = self.next()
    2749                  if not tarinfo:
    2750                      self._loaded = True
    2751                      return
    2752              else:
    2753                  return
    2754              index += 1
    2755              yield tarinfo
    2756  
    2757      def _dbg(self, level, msg):
    2758          """Write debugging output to sys.stderr.
    2759          """
    2760          if level <= self.debug:
    2761              print(msg, file=sys.stderr)
    2762  
    2763      def __enter__(self):
    2764          self._check()
    2765          return self
    2766  
    2767      def __exit__(self, type, value, traceback):
    2768          if type is None:
    2769              self.close()
    2770          else:
    2771              # An exception occurred. We must not call close() because
    2772              # it would try to write end-of-archive blocks and padding.
    2773              if not self._extfileobj:
    2774                  self.fileobj.close()
    2775              self.closed = True
    2776  
    2777  #--------------------
    2778  # exported functions
    2779  #--------------------
    2780  
    2781  def is_tarfile(name):
    2782      """Return True if name points to a tar archive that we
    2783         are able to handle, else return False.
    2784  
    2785         'name' should be a string, file, or file-like object.
    2786      """
    2787      try:
    2788          if hasattr(name, "read"):
    2789              pos = name.tell()
    2790              t = open(fileobj=name)
    2791              name.seek(pos)
    2792          else:
    2793              t = open(name)
    2794          t.close()
    2795          return True
    2796      except TarError:
    2797          return False
    2798  
    2799  open = TarFile.open
    2800  
    2801  
    2802  def main():
    2803      import argparse
    2804  
    2805      description = 'A simple command-line interface for tarfile module.'
    2806      parser = argparse.ArgumentParser(description=description)
    2807      parser.add_argument('-v', '--verbose', action='store_true', default=False,
    2808                          help='Verbose output')
    2809      parser.add_argument('--filter', metavar='<filtername>',
    2810                          choices=_NAMED_FILTERS,
    2811                          help='Filter for extraction')
    2812  
    2813      group = parser.add_mutually_exclusive_group(required=True)
    2814      group.add_argument('-l', '--list', metavar='<tarfile>',
    2815                         help='Show listing of a tarfile')
    2816      group.add_argument('-e', '--extract', nargs='+',
    2817                         metavar=('<tarfile>', '<output_dir>'),
    2818                         help='Extract tarfile into target dir')
    2819      group.add_argument('-c', '--create', nargs='+',
    2820                         metavar=('<name>', '<file>'),
    2821                         help='Create tarfile from sources')
    2822      group.add_argument('-t', '--test', metavar='<tarfile>',
    2823                         help='Test if a tarfile is valid')
    2824  
    2825      args = parser.parse_args()
    2826  
    2827      if args.filter and args.extract is None:
    2828          parser.exit(1, '--filter is only valid for extraction\n')
    2829  
    2830      if args.test is not None:
    2831          src = args.test
    2832          if is_tarfile(src):
    2833              with open(src, 'r') as tar:
    2834                  tar.getmembers()
    2835                  print(tar.getmembers(), file=sys.stderr)
    2836              if args.verbose:
    2837                  print('{!r} is a tar archive.'.format(src))
    2838          else:
    2839              parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
    2840  
    2841      elif args.list is not None:
    2842          src = args.list
    2843          if is_tarfile(src):
    2844              with TarFile.open(src, 'r:*') as tf:
    2845                  tf.list(verbose=args.verbose)
    2846          else:
    2847              parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
    2848  
    2849      elif args.extract is not None:
    2850          if len(args.extract) == 1:
    2851              src = args.extract[0]
    2852              curdir = os.curdir
    2853          elif len(args.extract) == 2:
    2854              src, curdir = args.extract
    2855          else:
    2856              parser.exit(1, parser.format_help())
    2857  
    2858          if is_tarfile(src):
    2859              with TarFile.open(src, 'r:*') as tf:
    2860                  tf.extractall(path=curdir, filter=args.filter)
    2861              if args.verbose:
    2862                  if curdir == '.':
    2863                      msg = '{!r} file is extracted.'.format(src)
    2864                  else:
    2865                      msg = ('{!r} file is extracted '
    2866                             'into {!r} directory.').format(src, curdir)
    2867                  print(msg)
    2868          else:
    2869              parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
    2870  
    2871      elif args.create is not None:
    2872          tar_name = args.create.pop(0)
    2873          _, ext = os.path.splitext(tar_name)
    2874          compressions = {
    2875              # gz
    2876              '.gz': 'gz',
    2877              '.tgz': 'gz',
    2878              # xz
    2879              '.xz': 'xz',
    2880              '.txz': 'xz',
    2881              # bz2
    2882              '.bz2': 'bz2',
    2883              '.tbz': 'bz2',
    2884              '.tbz2': 'bz2',
    2885              '.tb2': 'bz2',
    2886          }
    2887          tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
    2888          tar_files = args.create
    2889  
    2890          with TarFile.open(tar_name, tar_mode) as tf:
    2891              for file_name in tar_files:
    2892                  tf.add(file_name)
    2893  
    2894          if args.verbose:
    2895              print('{!r} file created.'.format(tar_name))
    2896  
    2897  if __name__ == '__main__':
    2898      main()