1  """
       2  Read and write ZIP files.
       3  
       4  XXX references to utf-8 need further investigation.
       5  """
       6  import binascii
       7  import importlib.util
       8  import io
       9  import os
      10  import shutil
      11  import stat
      12  import struct
      13  import sys
      14  import threading
      15  import time
      16  
      17  try:
      18      import zlib # We may need its compression method
      19      crc32 = zlib.crc32
      20  except ImportError:
      21      zlib = None
      22      crc32 = binascii.crc32
      23  
      24  try:
      25      import bz2 # We may need its compression method
      26  except ImportError:
      27      bz2 = None
      28  
      29  try:
      30      import lzma # We may need its compression method
      31  except ImportError:
      32      lzma = None
      33  
      34  __all__ = ["BadZipFile", "BadZipfile", "error",
      35             "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
      36             "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
      37             "Path"]
      38  
      39  class ESC[4;38;5;81mBadZipFile(ESC[4;38;5;149mException):
      40      pass
      41  
      42  
      43  class ESC[4;38;5;81mLargeZipFile(ESC[4;38;5;149mException):
      44      """
      45      Raised when writing a zipfile, the zipfile requires ZIP64 extensions
      46      and those extensions are disabled.
      47      """
      48  
      49  error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
      50  
      51  
      52  ZIP64_LIMIT = (1 << 31) - 1
      53  ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
      54  ZIP_MAX_COMMENT = (1 << 16) - 1
      55  
      56  # constants for Zip file compression methods
      57  ZIP_STORED = 0
      58  ZIP_DEFLATED = 8
      59  ZIP_BZIP2 = 12
      60  ZIP_LZMA = 14
      61  # Other ZIP compression methods not supported
      62  
      63  DEFAULT_VERSION = 20
      64  ZIP64_VERSION = 45
      65  BZIP2_VERSION = 46
      66  LZMA_VERSION = 63
      67  # we recognize (but not necessarily support) all features up to that version
      68  MAX_EXTRACT_VERSION = 63
      69  
      70  # Below are some formats and associated data for reading/writing headers using
      71  # the struct module.  The names and structures of headers/records are those used
      72  # in the PKWARE description of the ZIP file format:
      73  #     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
      74  # (URL valid as of January 2008)
      75  
      76  # The "end of central directory" structure, magic number, size, and indices
      77  # (section V.I in the format document)
      78  structEndArchive = b"<4s4H2LH"
      79  stringEndArchive = b"PK\005\006"
      80  sizeEndCentDir = struct.calcsize(structEndArchive)
      81  
      82  _ECD_SIGNATURE = 0
      83  _ECD_DISK_NUMBER = 1
      84  _ECD_DISK_START = 2
      85  _ECD_ENTRIES_THIS_DISK = 3
      86  _ECD_ENTRIES_TOTAL = 4
      87  _ECD_SIZE = 5
      88  _ECD_OFFSET = 6
      89  _ECD_COMMENT_SIZE = 7
      90  # These last two indices are not part of the structure as defined in the
      91  # spec, but they are used internally by this module as a convenience
      92  _ECD_COMMENT = 8
      93  _ECD_LOCATION = 9
      94  
      95  # The "central directory" structure, magic number, size, and indices
      96  # of entries in the structure (section V.F in the format document)
      97  structCentralDir = "<4s4B4HL2L5H2L"
      98  stringCentralDir = b"PK\001\002"
      99  sizeCentralDir = struct.calcsize(structCentralDir)
     100  
     101  # indexes of entries in the central directory structure
     102  _CD_SIGNATURE = 0
     103  _CD_CREATE_VERSION = 1
     104  _CD_CREATE_SYSTEM = 2
     105  _CD_EXTRACT_VERSION = 3
     106  _CD_EXTRACT_SYSTEM = 4
     107  _CD_FLAG_BITS = 5
     108  _CD_COMPRESS_TYPE = 6
     109  _CD_TIME = 7
     110  _CD_DATE = 8
     111  _CD_CRC = 9
     112  _CD_COMPRESSED_SIZE = 10
     113  _CD_UNCOMPRESSED_SIZE = 11
     114  _CD_FILENAME_LENGTH = 12
     115  _CD_EXTRA_FIELD_LENGTH = 13
     116  _CD_COMMENT_LENGTH = 14
     117  _CD_DISK_NUMBER_START = 15
     118  _CD_INTERNAL_FILE_ATTRIBUTES = 16
     119  _CD_EXTERNAL_FILE_ATTRIBUTES = 17
     120  _CD_LOCAL_HEADER_OFFSET = 18
     121  
     122  # General purpose bit flags
     123  # Zip Appnote: 4.4.4 general purpose bit flag: (2 bytes)
     124  _MASK_ENCRYPTED = 1 << 0
     125  # Bits 1 and 2 have different meanings depending on the compression used.
     126  _MASK_COMPRESS_OPTION_1 = 1 << 1
     127  # _MASK_COMPRESS_OPTION_2 = 1 << 2
     128  # _MASK_USE_DATA_DESCRIPTOR: If set, crc-32, compressed size and uncompressed
     129  # size are zero in the local header and the real values are written in the data
     130  # descriptor immediately following the compressed data.
     131  _MASK_USE_DATA_DESCRIPTOR = 1 << 3
     132  # Bit 4: Reserved for use with compression method 8, for enhanced deflating.
     133  # _MASK_RESERVED_BIT_4 = 1 << 4
     134  _MASK_COMPRESSED_PATCH = 1 << 5
     135  _MASK_STRONG_ENCRYPTION = 1 << 6
     136  # _MASK_UNUSED_BIT_7 = 1 << 7
     137  # _MASK_UNUSED_BIT_8 = 1 << 8
     138  # _MASK_UNUSED_BIT_9 = 1 << 9
     139  # _MASK_UNUSED_BIT_10 = 1 << 10
     140  _MASK_UTF_FILENAME = 1 << 11
     141  # Bit 12: Reserved by PKWARE for enhanced compression.
     142  # _MASK_RESERVED_BIT_12 = 1 << 12
     143  # _MASK_ENCRYPTED_CENTRAL_DIR = 1 << 13
     144  # Bit 14, 15: Reserved by PKWARE
     145  # _MASK_RESERVED_BIT_14 = 1 << 14
     146  # _MASK_RESERVED_BIT_15 = 1 << 15
     147  
     148  # The "local file header" structure, magic number, size, and indices
     149  # (section V.A in the format document)
     150  structFileHeader = "<4s2B4HL2L2H"
     151  stringFileHeader = b"PK\003\004"
     152  sizeFileHeader = struct.calcsize(structFileHeader)
     153  
     154  _FH_SIGNATURE = 0
     155  _FH_EXTRACT_VERSION = 1
     156  _FH_EXTRACT_SYSTEM = 2
     157  _FH_GENERAL_PURPOSE_FLAG_BITS = 3
     158  _FH_COMPRESSION_METHOD = 4
     159  _FH_LAST_MOD_TIME = 5
     160  _FH_LAST_MOD_DATE = 6
     161  _FH_CRC = 7
     162  _FH_COMPRESSED_SIZE = 8
     163  _FH_UNCOMPRESSED_SIZE = 9
     164  _FH_FILENAME_LENGTH = 10
     165  _FH_EXTRA_FIELD_LENGTH = 11
     166  
     167  # The "Zip64 end of central directory locator" structure, magic number, and size
     168  structEndArchive64Locator = "<4sLQL"
     169  stringEndArchive64Locator = b"PK\x06\x07"
     170  sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
     171  
     172  # The "Zip64 end of central directory" record, magic number, size, and indices
     173  # (section V.G in the format document)
     174  structEndArchive64 = "<4sQ2H2L4Q"
     175  stringEndArchive64 = b"PK\x06\x06"
     176  sizeEndCentDir64 = struct.calcsize(structEndArchive64)
     177  
     178  _CD64_SIGNATURE = 0
     179  _CD64_DIRECTORY_RECSIZE = 1
     180  _CD64_CREATE_VERSION = 2
     181  _CD64_EXTRACT_VERSION = 3
     182  _CD64_DISK_NUMBER = 4
     183  _CD64_DISK_NUMBER_START = 5
     184  _CD64_NUMBER_ENTRIES_THIS_DISK = 6
     185  _CD64_NUMBER_ENTRIES_TOTAL = 7
     186  _CD64_DIRECTORY_SIZE = 8
     187  _CD64_OFFSET_START_CENTDIR = 9
     188  
     189  _DD_SIGNATURE = 0x08074b50
     190  
     191  _EXTRA_FIELD_STRUCT = struct.Struct('<HH')
     192  
     193  def _strip_extra(extra, xids):
     194      # Remove Extra Fields with specified IDs.
     195      unpack = _EXTRA_FIELD_STRUCT.unpack
     196      modified = False
     197      buffer = []
     198      start = i = 0
     199      while i + 4 <= len(extra):
     200          xid, xlen = unpack(extra[i : i + 4])
     201          j = i + 4 + xlen
     202          if xid in xids:
     203              if i != start:
     204                  buffer.append(extra[start : i])
     205              start = j
     206              modified = True
     207          i = j
     208      if not modified:
     209          return extra
     210      if start != len(extra):
     211          buffer.append(extra[start:])
     212      return b''.join(buffer)
     213  
     214  def _check_zipfile(fp):
     215      try:
     216          if _EndRecData(fp):
     217              return True         # file has correct magic number
     218      except OSError:
     219          pass
     220      return False
     221  
     222  def is_zipfile(filename):
     223      """Quickly see if a file is a ZIP file by checking the magic number.
     224  
     225      The filename argument may be a file or file-like object too.
     226      """
     227      result = False
     228      try:
     229          if hasattr(filename, "read"):
     230              result = _check_zipfile(fp=filename)
     231          else:
     232              with open(filename, "rb") as fp:
     233                  result = _check_zipfile(fp)
     234      except OSError:
     235          pass
     236      return result
     237  
     238  def _EndRecData64(fpin, offset, endrec):
     239      """
     240      Read the ZIP64 end-of-archive records and use that to update endrec
     241      """
     242      try:
     243          fpin.seek(offset - sizeEndCentDir64Locator, 2)
     244      except OSError:
     245          # If the seek fails, the file is not large enough to contain a ZIP64
     246          # end-of-archive record, so just return the end record we were given.
     247          return endrec
     248  
     249      data = fpin.read(sizeEndCentDir64Locator)
     250      if len(data) != sizeEndCentDir64Locator:
     251          return endrec
     252      sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
     253      if sig != stringEndArchive64Locator:
     254          return endrec
     255  
     256      if diskno != 0 or disks > 1:
     257          raise BadZipFile("zipfiles that span multiple disks are not supported")
     258  
     259      # Assume no 'zip64 extensible data'
     260      fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
     261      data = fpin.read(sizeEndCentDir64)
     262      if len(data) != sizeEndCentDir64:
     263          return endrec
     264      sig, sz, create_version, read_version, disk_num, disk_dir, \
     265          dircount, dircount2, dirsize, diroffset = \
     266          struct.unpack(structEndArchive64, data)
     267      if sig != stringEndArchive64:
     268          return endrec
     269  
     270      # Update the original endrec using data from the ZIP64 record
     271      endrec[_ECD_SIGNATURE] = sig
     272      endrec[_ECD_DISK_NUMBER] = disk_num
     273      endrec[_ECD_DISK_START] = disk_dir
     274      endrec[_ECD_ENTRIES_THIS_DISK] = dircount
     275      endrec[_ECD_ENTRIES_TOTAL] = dircount2
     276      endrec[_ECD_SIZE] = dirsize
     277      endrec[_ECD_OFFSET] = diroffset
     278      return endrec
     279  
     280  
     281  def _EndRecData(fpin):
     282      """Return data from the "End of Central Directory" record, or None.
     283  
     284      The data is a list of the nine items in the ZIP "End of central dir"
     285      record followed by a tenth item, the file seek offset of this record."""
     286  
     287      # Determine file size
     288      fpin.seek(0, 2)
     289      filesize = fpin.tell()
     290  
     291      # Check to see if this is ZIP file with no archive comment (the
     292      # "end of central directory" structure should be the last item in the
     293      # file if this is the case).
     294      try:
     295          fpin.seek(-sizeEndCentDir, 2)
     296      except OSError:
     297          return None
     298      data = fpin.read()
     299      if (len(data) == sizeEndCentDir and
     300          data[0:4] == stringEndArchive and
     301          data[-2:] == b"\000\000"):
     302          # the signature is correct and there's no comment, unpack structure
     303          endrec = struct.unpack(structEndArchive, data)
     304          endrec=list(endrec)
     305  
     306          # Append a blank comment and record start offset
     307          endrec.append(b"")
     308          endrec.append(filesize - sizeEndCentDir)
     309  
     310          # Try to read the "Zip64 end of central directory" structure
     311          return _EndRecData64(fpin, -sizeEndCentDir, endrec)
     312  
     313      # Either this is not a ZIP file, or it is a ZIP file with an archive
     314      # comment.  Search the end of the file for the "end of central directory"
     315      # record signature. The comment is the last item in the ZIP file and may be
     316      # up to 64K long.  It is assumed that the "end of central directory" magic
     317      # number does not appear in the comment.
     318      maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
     319      fpin.seek(maxCommentStart, 0)
     320      data = fpin.read()
     321      start = data.rfind(stringEndArchive)
     322      if start >= 0:
     323          # found the magic number; attempt to unpack and interpret
     324          recData = data[start:start+sizeEndCentDir]
     325          if len(recData) != sizeEndCentDir:
     326              # Zip file is corrupted.
     327              return None
     328          endrec = list(struct.unpack(structEndArchive, recData))
     329          commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
     330          comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
     331          endrec.append(comment)
     332          endrec.append(maxCommentStart + start)
     333  
     334          # Try to read the "Zip64 end of central directory" structure
     335          return _EndRecData64(fpin, maxCommentStart + start - filesize,
     336                               endrec)
     337  
     338      # Unable to find a valid end of central directory structure
     339      return None
     340  
     341  def _sanitize_filename(filename):
     342      """Terminate the file name at the first null byte and
     343      ensure paths always use forward slashes as the directory separator."""
     344  
     345      # Terminate the file name at the first null byte.  Null bytes in file
     346      # names are used as tricks by viruses in archives.
     347      null_byte = filename.find(chr(0))
     348      if null_byte >= 0:
     349          filename = filename[0:null_byte]
     350      # This is used to ensure paths in generated ZIP files always use
     351      # forward slashes as the directory separator, as required by the
     352      # ZIP format specification.
     353      if os.sep != "/" and os.sep in filename:
     354          filename = filename.replace(os.sep, "/")
     355      if os.altsep and os.altsep != "/" and os.altsep in filename:
     356          filename = filename.replace(os.altsep, "/")
     357      return filename
     358  
     359  
     360  class ESC[4;38;5;81mZipInfo (ESC[4;38;5;149mobject):
     361      """Class with attributes describing each file in the ZIP archive."""
     362  
     363      __slots__ = (
     364          'orig_filename',
     365          'filename',
     366          'date_time',
     367          'compress_type',
     368          '_compresslevel',
     369          'comment',
     370          'extra',
     371          'create_system',
     372          'create_version',
     373          'extract_version',
     374          'reserved',
     375          'flag_bits',
     376          'volume',
     377          'internal_attr',
     378          'external_attr',
     379          'header_offset',
     380          'CRC',
     381          'compress_size',
     382          'file_size',
     383          '_raw_time',
     384      )
     385  
     386      def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
     387          self.orig_filename = filename   # Original file name in archive
     388  
     389          # Terminate the file name at the first null byte and
     390          # ensure paths always use forward slashes as the directory separator.
     391          filename = _sanitize_filename(filename)
     392  
     393          self.filename = filename        # Normalized file name
     394          self.date_time = date_time      # year, month, day, hour, min, sec
     395  
     396          if date_time[0] < 1980:
     397              raise ValueError('ZIP does not support timestamps before 1980')
     398  
     399          # Standard values:
     400          self.compress_type = ZIP_STORED # Type of compression for the file
     401          self._compresslevel = None      # Level for the compressor
     402          self.comment = b""              # Comment for each file
     403          self.extra = b""                # ZIP extra data
     404          if sys.platform == 'win32':
     405              self.create_system = 0          # System which created ZIP archive
     406          else:
     407              # Assume everything else is unix-y
     408              self.create_system = 3          # System which created ZIP archive
     409          self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
     410          self.extract_version = DEFAULT_VERSION # Version needed to extract archive
     411          self.reserved = 0               # Must be zero
     412          self.flag_bits = 0              # ZIP flag bits
     413          self.volume = 0                 # Volume number of file header
     414          self.internal_attr = 0          # Internal attributes
     415          self.external_attr = 0          # External file attributes
     416          self.compress_size = 0          # Size of the compressed file
     417          self.file_size = 0              # Size of the uncompressed file
     418          # Other attributes are set by class ZipFile:
     419          # header_offset         Byte offset to the file header
     420          # CRC                   CRC-32 of the uncompressed file
     421  
     422      def __repr__(self):
     423          result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
     424          if self.compress_type != ZIP_STORED:
     425              result.append(' compress_type=%s' %
     426                            compressor_names.get(self.compress_type,
     427                                                 self.compress_type))
     428          hi = self.external_attr >> 16
     429          lo = self.external_attr & 0xFFFF
     430          if hi:
     431              result.append(' filemode=%r' % stat.filemode(hi))
     432          if lo:
     433              result.append(' external_attr=%#x' % lo)
     434          isdir = self.is_dir()
     435          if not isdir or self.file_size:
     436              result.append(' file_size=%r' % self.file_size)
     437          if ((not isdir or self.compress_size) and
     438              (self.compress_type != ZIP_STORED or
     439               self.file_size != self.compress_size)):
     440              result.append(' compress_size=%r' % self.compress_size)
     441          result.append('>')
     442          return ''.join(result)
     443  
     444      def FileHeader(self, zip64=None):
     445          """Return the per-file header as a bytes object.
     446  
     447          When the optional zip64 arg is None rather than a bool, we will
     448          decide based upon the file_size and compress_size, if known,
     449          False otherwise.
     450          """
     451          dt = self.date_time
     452          dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
     453          dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
     454          if self.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
     455              # Set these to zero because we write them after the file data
     456              CRC = compress_size = file_size = 0
     457          else:
     458              CRC = self.CRC
     459              compress_size = self.compress_size
     460              file_size = self.file_size
     461  
     462          extra = self.extra
     463  
     464          min_version = 0
     465          if zip64 is None:
     466              # We always explicitly pass zip64 within this module.... This
     467              # remains for anyone using ZipInfo.FileHeader as a public API.
     468              zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
     469          if zip64:
     470              fmt = '<HHQQ'
     471              extra = extra + struct.pack(fmt,
     472                                          1, struct.calcsize(fmt)-4, file_size, compress_size)
     473              file_size = 0xffffffff
     474              compress_size = 0xffffffff
     475              min_version = ZIP64_VERSION
     476  
     477          if self.compress_type == ZIP_BZIP2:
     478              min_version = max(BZIP2_VERSION, min_version)
     479          elif self.compress_type == ZIP_LZMA:
     480              min_version = max(LZMA_VERSION, min_version)
     481  
     482          self.extract_version = max(min_version, self.extract_version)
     483          self.create_version = max(min_version, self.create_version)
     484          filename, flag_bits = self._encodeFilenameFlags()
     485          header = struct.pack(structFileHeader, stringFileHeader,
     486                               self.extract_version, self.reserved, flag_bits,
     487                               self.compress_type, dostime, dosdate, CRC,
     488                               compress_size, file_size,
     489                               len(filename), len(extra))
     490          return header + filename + extra
     491  
     492      def _encodeFilenameFlags(self):
     493          try:
     494              return self.filename.encode('ascii'), self.flag_bits
     495          except UnicodeEncodeError:
     496              return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME
     497  
     498      def _decodeExtra(self, filename_crc):
     499          # Try to decode the extra field.
     500          extra = self.extra
     501          unpack = struct.unpack
     502          while len(extra) >= 4:
     503              tp, ln = unpack('<HH', extra[:4])
     504              if ln+4 > len(extra):
     505                  raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
     506              if tp == 0x0001:
     507                  data = extra[4:ln+4]
     508                  # ZIP64 extension (large files and/or large archives)
     509                  try:
     510                      if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
     511                          field = "File size"
     512                          self.file_size, = unpack('<Q', data[:8])
     513                          data = data[8:]
     514                      if self.compress_size == 0xFFFF_FFFF:
     515                          field = "Compress size"
     516                          self.compress_size, = unpack('<Q', data[:8])
     517                          data = data[8:]
     518                      if self.header_offset == 0xFFFF_FFFF:
     519                          field = "Header offset"
     520                          self.header_offset, = unpack('<Q', data[:8])
     521                  except struct.error:
     522                      raise BadZipFile(f"Corrupt zip64 extra field. "
     523                                       f"{field} not found.") from None
     524              elif tp == 0x7075:
     525                  data = extra[4:ln+4]
     526                  # Unicode Path Extra Field
     527                  try:
     528                      up_version, up_name_crc = unpack('<BL', data[:5])
     529                      if up_version == 1 and up_name_crc == filename_crc:
     530                          up_unicode_name = data[5:].decode('utf-8')
     531                          if up_unicode_name:
     532                              self.filename = _sanitize_filename(up_unicode_name)
     533                          else:
     534                              warnings.warn("Empty unicode path extra field (0x7075)", stacklevel=2)
     535                  except struct.error as e:
     536                      raise BadZipFile("Corrupt unicode path extra field (0x7075)") from e
     537                  except UnicodeDecodeError as e:
     538                      raise BadZipFile('Corrupt unicode path extra field (0x7075): invalid utf-8 bytes') from e
     539  
     540              extra = extra[ln+4:]
     541  
     542      @classmethod
     543      def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
     544          """Construct an appropriate ZipInfo for a file on the filesystem.
     545  
     546          filename should be the path to a file or directory on the filesystem.
     547  
     548          arcname is the name which it will have within the archive (by default,
     549          this will be the same as filename, but without a drive letter and with
     550          leading path separators removed).
     551          """
     552          if isinstance(filename, os.PathLike):
     553              filename = os.fspath(filename)
     554          st = os.stat(filename)
     555          isdir = stat.S_ISDIR(st.st_mode)
     556          mtime = time.localtime(st.st_mtime)
     557          date_time = mtime[0:6]
     558          if not strict_timestamps and date_time[0] < 1980:
     559              date_time = (1980, 1, 1, 0, 0, 0)
     560          elif not strict_timestamps and date_time[0] > 2107:
     561              date_time = (2107, 12, 31, 23, 59, 59)
     562          # Create ZipInfo instance to store file information
     563          if arcname is None:
     564              arcname = filename
     565          arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
     566          while arcname[0] in (os.sep, os.altsep):
     567              arcname = arcname[1:]
     568          if isdir:
     569              arcname += '/'
     570          zinfo = cls(arcname, date_time)
     571          zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
     572          if isdir:
     573              zinfo.file_size = 0
     574              zinfo.external_attr |= 0x10  # MS-DOS directory flag
     575          else:
     576              zinfo.file_size = st.st_size
     577  
     578          return zinfo
     579  
     580      def is_dir(self):
     581          """Return True if this archive member is a directory."""
     582          return self.filename.endswith('/')
     583  
     584  
     585  # ZIP encryption uses the CRC32 one-byte primitive for scrambling some
     586  # internal keys. We noticed that a direct implementation is faster than
     587  # relying on binascii.crc32().
     588  
     589  _crctable = None
     590  def _gen_crc(crc):
     591      for j in range(8):
     592          if crc & 1:
     593              crc = (crc >> 1) ^ 0xEDB88320
     594          else:
     595              crc >>= 1
     596      return crc
     597  
     598  # ZIP supports a password-based form of encryption. Even though known
     599  # plaintext attacks have been found against it, it is still useful
     600  # to be able to get data out of such a file.
     601  #
     602  # Usage:
     603  #     zd = _ZipDecrypter(mypwd)
     604  #     plain_bytes = zd(cypher_bytes)
     605  
     606  def _ZipDecrypter(pwd):
     607      key0 = 305419896
     608      key1 = 591751049
     609      key2 = 878082192
     610  
     611      global _crctable
     612      if _crctable is None:
     613          _crctable = list(map(_gen_crc, range(256)))
     614      crctable = _crctable
     615  
     616      def crc32(ch, crc):
     617          """Compute the CRC32 primitive on one byte."""
     618          return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
     619  
     620      def update_keys(c):
     621          nonlocal key0, key1, key2
     622          key0 = crc32(c, key0)
     623          key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
     624          key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
     625          key2 = crc32(key1 >> 24, key2)
     626  
     627      for p in pwd:
     628          update_keys(p)
     629  
     630      def decrypter(data):
     631          """Decrypt a bytes object."""
     632          result = bytearray()
     633          append = result.append
     634          for c in data:
     635              k = key2 | 2
     636              c ^= ((k * (k^1)) >> 8) & 0xFF
     637              update_keys(c)
     638              append(c)
     639          return bytes(result)
     640  
     641      return decrypter
     642  
     643  
     644  class ESC[4;38;5;81mLZMACompressor:
     645  
     646      def __init__(self):
     647          self._comp = None
     648  
     649      def _init(self):
     650          props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
     651          self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
     652              lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
     653          ])
     654          return struct.pack('<BBH', 9, 4, len(props)) + props
     655  
     656      def compress(self, data):
     657          if self._comp is None:
     658              return self._init() + self._comp.compress(data)
     659          return self._comp.compress(data)
     660  
     661      def flush(self):
     662          if self._comp is None:
     663              return self._init() + self._comp.flush()
     664          return self._comp.flush()
     665  
     666  
     667  class ESC[4;38;5;81mLZMADecompressor:
     668  
     669      def __init__(self):
     670          self._decomp = None
     671          self._unconsumed = b''
     672          self.eof = False
     673  
     674      def decompress(self, data):
     675          if self._decomp is None:
     676              self._unconsumed += data
     677              if len(self._unconsumed) <= 4:
     678                  return b''
     679              psize, = struct.unpack('<H', self._unconsumed[2:4])
     680              if len(self._unconsumed) <= 4 + psize:
     681                  return b''
     682  
     683              self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
     684                  lzma._decode_filter_properties(lzma.FILTER_LZMA1,
     685                                                 self._unconsumed[4:4 + psize])
     686              ])
     687              data = self._unconsumed[4 + psize:]
     688              del self._unconsumed
     689  
     690          result = self._decomp.decompress(data)
     691          self.eof = self._decomp.eof
     692          return result
     693  
     694  
     695  compressor_names = {
     696      0: 'store',
     697      1: 'shrink',
     698      2: 'reduce',
     699      3: 'reduce',
     700      4: 'reduce',
     701      5: 'reduce',
     702      6: 'implode',
     703      7: 'tokenize',
     704      8: 'deflate',
     705      9: 'deflate64',
     706      10: 'implode',
     707      12: 'bzip2',
     708      14: 'lzma',
     709      18: 'terse',
     710      19: 'lz77',
     711      97: 'wavpack',
     712      98: 'ppmd',
     713  }
     714  
     715  def _check_compression(compression):
     716      if compression == ZIP_STORED:
     717          pass
     718      elif compression == ZIP_DEFLATED:
     719          if not zlib:
     720              raise RuntimeError(
     721                  "Compression requires the (missing) zlib module")
     722      elif compression == ZIP_BZIP2:
     723          if not bz2:
     724              raise RuntimeError(
     725                  "Compression requires the (missing) bz2 module")
     726      elif compression == ZIP_LZMA:
     727          if not lzma:
     728              raise RuntimeError(
     729                  "Compression requires the (missing) lzma module")
     730      else:
     731          raise NotImplementedError("That compression method is not supported")
     732  
     733  
     734  def _get_compressor(compress_type, compresslevel=None):
     735      if compress_type == ZIP_DEFLATED:
     736          if compresslevel is not None:
     737              return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
     738          return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
     739      elif compress_type == ZIP_BZIP2:
     740          if compresslevel is not None:
     741              return bz2.BZ2Compressor(compresslevel)
     742          return bz2.BZ2Compressor()
     743      # compresslevel is ignored for ZIP_LZMA
     744      elif compress_type == ZIP_LZMA:
     745          return LZMACompressor()
     746      else:
     747          return None
     748  
     749  
     750  def _get_decompressor(compress_type):
     751      _check_compression(compress_type)
     752      if compress_type == ZIP_STORED:
     753          return None
     754      elif compress_type == ZIP_DEFLATED:
     755          return zlib.decompressobj(-15)
     756      elif compress_type == ZIP_BZIP2:
     757          return bz2.BZ2Decompressor()
     758      elif compress_type == ZIP_LZMA:
     759          return LZMADecompressor()
     760      else:
     761          descr = compressor_names.get(compress_type)
     762          if descr:
     763              raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
     764          else:
     765              raise NotImplementedError("compression type %d" % (compress_type,))
     766  
     767  
     768  class ESC[4;38;5;81m_SharedFile:
     769      def __init__(self, file, pos, close, lock, writing):
     770          self._file = file
     771          self._pos = pos
     772          self._close = close
     773          self._lock = lock
     774          self._writing = writing
     775          self.seekable = file.seekable
     776  
     777      def tell(self):
     778          return self._pos
     779  
     780      def seek(self, offset, whence=0):
     781          with self._lock:
     782              if self._writing():
     783                  raise ValueError("Can't reposition in the ZIP file while "
     784                          "there is an open writing handle on it. "
     785                          "Close the writing handle before trying to read.")
     786              self._file.seek(offset, whence)
     787              self._pos = self._file.tell()
     788              return self._pos
     789  
     790      def read(self, n=-1):
     791          with self._lock:
     792              if self._writing():
     793                  raise ValueError("Can't read from the ZIP file while there "
     794                          "is an open writing handle on it. "
     795                          "Close the writing handle before trying to read.")
     796              self._file.seek(self._pos)
     797              data = self._file.read(n)
     798              self._pos = self._file.tell()
     799              return data
     800  
     801      def close(self):
     802          if self._file is not None:
     803              fileobj = self._file
     804              self._file = None
     805              self._close(fileobj)
     806  
     807  # Provide the tell method for unseekable stream
     808  class ESC[4;38;5;81m_Tellable:
     809      def __init__(self, fp):
     810          self.fp = fp
     811          self.offset = 0
     812  
     813      def write(self, data):
     814          n = self.fp.write(data)
     815          self.offset += n
     816          return n
     817  
     818      def tell(self):
     819          return self.offset
     820  
     821      def flush(self):
     822          self.fp.flush()
     823  
     824      def close(self):
     825          self.fp.close()
     826  
     827  
     828  class ESC[4;38;5;81mZipExtFile(ESC[4;38;5;149mioESC[4;38;5;149m.ESC[4;38;5;149mBufferedIOBase):
     829      """File-like object for reading an archive member.
     830         Is returned by ZipFile.open().
     831      """
     832  
     833      # Max size supported by decompressor.
     834      MAX_N = 1 << 31 - 1
     835  
     836      # Read from compressed files in 4k blocks.
     837      MIN_READ_SIZE = 4096
     838  
     839      # Chunk size to read during seek
     840      MAX_SEEK_READ = 1 << 24
     841  
     842      def __init__(self, fileobj, mode, zipinfo, pwd=None,
     843                   close_fileobj=False):
     844          self._fileobj = fileobj
     845          self._pwd = pwd
     846          self._close_fileobj = close_fileobj
     847  
     848          self._compress_type = zipinfo.compress_type
     849          self._compress_left = zipinfo.compress_size
     850          self._left = zipinfo.file_size
     851  
     852          self._decompressor = _get_decompressor(self._compress_type)
     853  
     854          self._eof = False
     855          self._readbuffer = b''
     856          self._offset = 0
     857  
     858          self.newlines = None
     859  
     860          self.mode = mode
     861          self.name = zipinfo.filename
     862  
     863          if hasattr(zipinfo, 'CRC'):
     864              self._expected_crc = zipinfo.CRC
     865              self._running_crc = crc32(b'')
     866          else:
     867              self._expected_crc = None
     868  
     869          self._seekable = False
     870          try:
     871              if fileobj.seekable():
     872                  self._orig_compress_start = fileobj.tell()
     873                  self._orig_compress_size = zipinfo.compress_size
     874                  self._orig_file_size = zipinfo.file_size
     875                  self._orig_start_crc = self._running_crc
     876                  self._orig_crc = self._expected_crc
     877                  self._seekable = True
     878          except AttributeError:
     879              pass
     880  
     881          self._decrypter = None
     882          if pwd:
     883              if zipinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
     884                  # compare against the file type from extended local headers
     885                  check_byte = (zipinfo._raw_time >> 8) & 0xff
     886              else:
     887                  # compare against the CRC otherwise
     888                  check_byte = (zipinfo.CRC >> 24) & 0xff
     889              h = self._init_decrypter()
     890              if h != check_byte:
     891                  raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
     892  
     893  
     894      def _init_decrypter(self):
     895          self._decrypter = _ZipDecrypter(self._pwd)
     896          # The first 12 bytes in the cypher stream is an encryption header
     897          #  used to strengthen the algorithm. The first 11 bytes are
     898          #  completely random, while the 12th contains the MSB of the CRC,
     899          #  or the MSB of the file time depending on the header type
     900          #  and is used to check the correctness of the password.
     901          header = self._fileobj.read(12)
     902          self._compress_left -= 12
     903          return self._decrypter(header)[11]
     904  
     905      def __repr__(self):
     906          result = ['<%s.%s' % (self.__class__.__module__,
     907                                self.__class__.__qualname__)]
     908          if not self.closed:
     909              result.append(' name=%r mode=%r' % (self.name, self.mode))
     910              if self._compress_type != ZIP_STORED:
     911                  result.append(' compress_type=%s' %
     912                                compressor_names.get(self._compress_type,
     913                                                     self._compress_type))
     914          else:
     915              result.append(' [closed]')
     916          result.append('>')
     917          return ''.join(result)
     918  
     919      def readline(self, limit=-1):
     920          """Read and return a line from the stream.
     921  
     922          If limit is specified, at most limit bytes will be read.
     923          """
     924  
     925          if limit < 0:
     926              # Shortcut common case - newline found in buffer.
     927              i = self._readbuffer.find(b'\n', self._offset) + 1
     928              if i > 0:
     929                  line = self._readbuffer[self._offset: i]
     930                  self._offset = i
     931                  return line
     932  
     933          return io.BufferedIOBase.readline(self, limit)
     934  
     935      def peek(self, n=1):
     936          """Returns buffered bytes without advancing the position."""
     937          if n > len(self._readbuffer) - self._offset:
     938              chunk = self.read(n)
     939              if len(chunk) > self._offset:
     940                  self._readbuffer = chunk + self._readbuffer[self._offset:]
     941                  self._offset = 0
     942              else:
     943                  self._offset -= len(chunk)
     944  
     945          # Return up to 512 bytes to reduce allocation overhead for tight loops.
     946          return self._readbuffer[self._offset: self._offset + 512]
     947  
     948      def readable(self):
     949          if self.closed:
     950              raise ValueError("I/O operation on closed file.")
     951          return True
     952  
     953      def read(self, n=-1):
     954          """Read and return up to n bytes.
     955          If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
     956          """
     957          if self.closed:
     958              raise ValueError("read from closed file.")
     959          if n is None or n < 0:
     960              buf = self._readbuffer[self._offset:]
     961              self._readbuffer = b''
     962              self._offset = 0
     963              while not self._eof:
     964                  buf += self._read1(self.MAX_N)
     965              return buf
     966  
     967          end = n + self._offset
     968          if end < len(self._readbuffer):
     969              buf = self._readbuffer[self._offset:end]
     970              self._offset = end
     971              return buf
     972  
     973          n = end - len(self._readbuffer)
     974          buf = self._readbuffer[self._offset:]
     975          self._readbuffer = b''
     976          self._offset = 0
     977          while n > 0 and not self._eof:
     978              data = self._read1(n)
     979              if n < len(data):
     980                  self._readbuffer = data
     981                  self._offset = n
     982                  buf += data[:n]
     983                  break
     984              buf += data
     985              n -= len(data)
     986          return buf
     987  
     988      def _update_crc(self, newdata):
     989          # Update the CRC using the given data.
     990          if self._expected_crc is None:
     991              # No need to compute the CRC if we don't have a reference value
     992              return
     993          self._running_crc = crc32(newdata, self._running_crc)
     994          # Check the CRC if we're at the end of the file
     995          if self._eof and self._running_crc != self._expected_crc:
     996              raise BadZipFile("Bad CRC-32 for file %r" % self.name)
     997  
     998      def read1(self, n):
     999          """Read up to n bytes with at most one read() system call."""
    1000  
    1001          if n is None or n < 0:
    1002              buf = self._readbuffer[self._offset:]
    1003              self._readbuffer = b''
    1004              self._offset = 0
    1005              while not self._eof:
    1006                  data = self._read1(self.MAX_N)
    1007                  if data:
    1008                      buf += data
    1009                      break
    1010              return buf
    1011  
    1012          end = n + self._offset
    1013          if end < len(self._readbuffer):
    1014              buf = self._readbuffer[self._offset:end]
    1015              self._offset = end
    1016              return buf
    1017  
    1018          n = end - len(self._readbuffer)
    1019          buf = self._readbuffer[self._offset:]
    1020          self._readbuffer = b''
    1021          self._offset = 0
    1022          if n > 0:
    1023              while not self._eof:
    1024                  data = self._read1(n)
    1025                  if n < len(data):
    1026                      self._readbuffer = data
    1027                      self._offset = n
    1028                      buf += data[:n]
    1029                      break
    1030                  if data:
    1031                      buf += data
    1032                      break
    1033          return buf
    1034  
    1035      def _read1(self, n):
    1036          # Read up to n compressed bytes with at most one read() system call,
    1037          # decrypt and decompress them.
    1038          if self._eof or n <= 0:
    1039              return b''
    1040  
    1041          # Read from file.
    1042          if self._compress_type == ZIP_DEFLATED:
    1043              ## Handle unconsumed data.
    1044              data = self._decompressor.unconsumed_tail
    1045              if n > len(data):
    1046                  data += self._read2(n - len(data))
    1047          else:
    1048              data = self._read2(n)
    1049  
    1050          if self._compress_type == ZIP_STORED:
    1051              self._eof = self._compress_left <= 0
    1052          elif self._compress_type == ZIP_DEFLATED:
    1053              n = max(n, self.MIN_READ_SIZE)
    1054              data = self._decompressor.decompress(data, n)
    1055              self._eof = (self._decompressor.eof or
    1056                           self._compress_left <= 0 and
    1057                           not self._decompressor.unconsumed_tail)
    1058              if self._eof:
    1059                  data += self._decompressor.flush()
    1060          else:
    1061              data = self._decompressor.decompress(data)
    1062              self._eof = self._decompressor.eof or self._compress_left <= 0
    1063  
    1064          data = data[:self._left]
    1065          self._left -= len(data)
    1066          if self._left <= 0:
    1067              self._eof = True
    1068          self._update_crc(data)
    1069          return data
    1070  
    1071      def _read2(self, n):
    1072          if self._compress_left <= 0:
    1073              return b''
    1074  
    1075          n = max(n, self.MIN_READ_SIZE)
    1076          n = min(n, self._compress_left)
    1077  
    1078          data = self._fileobj.read(n)
    1079          self._compress_left -= len(data)
    1080          if not data:
    1081              raise EOFError
    1082  
    1083          if self._decrypter is not None:
    1084              data = self._decrypter(data)
    1085          return data
    1086  
    1087      def close(self):
    1088          try:
    1089              if self._close_fileobj:
    1090                  self._fileobj.close()
    1091          finally:
    1092              super().close()
    1093  
    1094      def seekable(self):
    1095          if self.closed:
    1096              raise ValueError("I/O operation on closed file.")
    1097          return self._seekable
    1098  
    1099      def seek(self, offset, whence=os.SEEK_SET):
    1100          if self.closed:
    1101              raise ValueError("seek on closed file.")
    1102          if not self._seekable:
    1103              raise io.UnsupportedOperation("underlying stream is not seekable")
    1104          curr_pos = self.tell()
    1105          if whence == os.SEEK_SET:
    1106              new_pos = offset
    1107          elif whence == os.SEEK_CUR:
    1108              new_pos = curr_pos + offset
    1109          elif whence == os.SEEK_END:
    1110              new_pos = self._orig_file_size + offset
    1111          else:
    1112              raise ValueError("whence must be os.SEEK_SET (0), "
    1113                               "os.SEEK_CUR (1), or os.SEEK_END (2)")
    1114  
    1115          if new_pos > self._orig_file_size:
    1116              new_pos = self._orig_file_size
    1117  
    1118          if new_pos < 0:
    1119              new_pos = 0
    1120  
    1121          read_offset = new_pos - curr_pos
    1122          buff_offset = read_offset + self._offset
    1123  
    1124          # Fast seek uncompressed unencrypted file
    1125          if self._compress_type == ZIP_STORED and self._decrypter is None and read_offset > 0:
    1126              # disable CRC checking after first seeking - it would be invalid
    1127              self._expected_crc = None
    1128              # seek actual file taking already buffered data into account
    1129              read_offset -= len(self._readbuffer) - self._offset
    1130              self._fileobj.seek(read_offset, os.SEEK_CUR)
    1131              self._left -= read_offset
    1132              read_offset = 0
    1133              # flush read buffer
    1134              self._readbuffer = b''
    1135              self._offset = 0
    1136          elif buff_offset >= 0 and buff_offset < len(self._readbuffer):
    1137              # Just move the _offset index if the new position is in the _readbuffer
    1138              self._offset = buff_offset
    1139              read_offset = 0
    1140          elif read_offset < 0:
    1141              # Position is before the current position. Reset the ZipExtFile
    1142              self._fileobj.seek(self._orig_compress_start)
    1143              self._running_crc = self._orig_start_crc
    1144              self._expected_crc = self._orig_crc
    1145              self._compress_left = self._orig_compress_size
    1146              self._left = self._orig_file_size
    1147              self._readbuffer = b''
    1148              self._offset = 0
    1149              self._decompressor = _get_decompressor(self._compress_type)
    1150              self._eof = False
    1151              read_offset = new_pos
    1152              if self._decrypter is not None:
    1153                  self._init_decrypter()
    1154  
    1155          while read_offset > 0:
    1156              read_len = min(self.MAX_SEEK_READ, read_offset)
    1157              self.read(read_len)
    1158              read_offset -= read_len
    1159  
    1160          return self.tell()
    1161  
    1162      def tell(self):
    1163          if self.closed:
    1164              raise ValueError("tell on closed file.")
    1165          if not self._seekable:
    1166              raise io.UnsupportedOperation("underlying stream is not seekable")
    1167          filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
    1168          return filepos
    1169  
    1170  
    1171  class ESC[4;38;5;81m_ZipWriteFile(ESC[4;38;5;149mioESC[4;38;5;149m.ESC[4;38;5;149mBufferedIOBase):
    1172      def __init__(self, zf, zinfo, zip64):
    1173          self._zinfo = zinfo
    1174          self._zip64 = zip64
    1175          self._zipfile = zf
    1176          self._compressor = _get_compressor(zinfo.compress_type,
    1177                                             zinfo._compresslevel)
    1178          self._file_size = 0
    1179          self._compress_size = 0
    1180          self._crc = 0
    1181  
    1182      @property
    1183      def _fileobj(self):
    1184          return self._zipfile.fp
    1185  
    1186      def writable(self):
    1187          return True
    1188  
    1189      def write(self, data):
    1190          if self.closed:
    1191              raise ValueError('I/O operation on closed file.')
    1192  
    1193          # Accept any data that supports the buffer protocol
    1194          if isinstance(data, (bytes, bytearray)):
    1195              nbytes = len(data)
    1196          else:
    1197              data = memoryview(data)
    1198              nbytes = data.nbytes
    1199          self._file_size += nbytes
    1200  
    1201          self._crc = crc32(data, self._crc)
    1202          if self._compressor:
    1203              data = self._compressor.compress(data)
    1204              self._compress_size += len(data)
    1205          self._fileobj.write(data)
    1206          return nbytes
    1207  
    1208      def close(self):
    1209          if self.closed:
    1210              return
    1211          try:
    1212              super().close()
    1213              # Flush any data from the compressor, and update header info
    1214              if self._compressor:
    1215                  buf = self._compressor.flush()
    1216                  self._compress_size += len(buf)
    1217                  self._fileobj.write(buf)
    1218                  self._zinfo.compress_size = self._compress_size
    1219              else:
    1220                  self._zinfo.compress_size = self._file_size
    1221              self._zinfo.CRC = self._crc
    1222              self._zinfo.file_size = self._file_size
    1223  
    1224              if not self._zip64:
    1225                  if self._file_size > ZIP64_LIMIT:
    1226                      raise RuntimeError("File size too large, try using force_zip64")
    1227                  if self._compress_size > ZIP64_LIMIT:
    1228                      raise RuntimeError("Compressed size too large, try using force_zip64")
    1229  
    1230              # Write updated header info
    1231              if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
    1232                  # Write CRC and file sizes after the file data
    1233                  fmt = '<LLQQ' if self._zip64 else '<LLLL'
    1234                  self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
    1235                      self._zinfo.compress_size, self._zinfo.file_size))
    1236                  self._zipfile.start_dir = self._fileobj.tell()
    1237              else:
    1238                  # Seek backwards and write file header (which will now include
    1239                  # correct CRC and file sizes)
    1240  
    1241                  # Preserve current position in file
    1242                  self._zipfile.start_dir = self._fileobj.tell()
    1243                  self._fileobj.seek(self._zinfo.header_offset)
    1244                  self._fileobj.write(self._zinfo.FileHeader(self._zip64))
    1245                  self._fileobj.seek(self._zipfile.start_dir)
    1246  
    1247              # Successfully written: Add file to our caches
    1248              self._zipfile.filelist.append(self._zinfo)
    1249              self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
    1250          finally:
    1251              self._zipfile._writing = False
    1252  
    1253  
    1254  
    1255  class ESC[4;38;5;81mZipFile:
    1256      """ Class with methods to open, read, write, close, list zip files.
    1257  
    1258      z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
    1259                  compresslevel=None)
    1260  
    1261      file: Either the path to the file, or a file-like object.
    1262            If it is a path, the file will be opened and closed by ZipFile.
    1263      mode: The mode can be either read 'r', write 'w', exclusive create 'x',
    1264            or append 'a'.
    1265      compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
    1266                   ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
    1267      allowZip64: if True ZipFile will create files with ZIP64 extensions when
    1268                  needed, otherwise it will raise an exception when this would
    1269                  be necessary.
    1270      compresslevel: None (default for the given compression type) or an integer
    1271                     specifying the level to pass to the compressor.
    1272                     When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
    1273                     When using ZIP_DEFLATED integers 0 through 9 are accepted.
    1274                     When using ZIP_BZIP2 integers 1 through 9 are accepted.
    1275  
    1276      """
    1277  
    1278      fp = None                   # Set here since __del__ checks it
    1279      _windows_illegal_name_trans_table = None
    1280  
    1281      def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
    1282                   compresslevel=None, *, strict_timestamps=True, metadata_encoding=None):
    1283          """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
    1284          or append 'a'."""
    1285          if mode not in ('r', 'w', 'x', 'a'):
    1286              raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
    1287  
    1288          _check_compression(compression)
    1289  
    1290          self._allowZip64 = allowZip64
    1291          self._didModify = False
    1292          self.debug = 0  # Level of printing: 0 through 3
    1293          self.NameToInfo = {}    # Find file info given name
    1294          self.filelist = []      # List of ZipInfo instances for archive
    1295          self.compression = compression  # Method of compression
    1296          self.compresslevel = compresslevel
    1297          self.mode = mode
    1298          self.pwd = None
    1299          self._comment = b''
    1300          self._strict_timestamps = strict_timestamps
    1301          self.metadata_encoding = metadata_encoding
    1302  
    1303          # Check that we don't try to write with nonconforming codecs
    1304          if self.metadata_encoding and mode != 'r':
    1305              raise ValueError(
    1306                  "metadata_encoding is only supported for reading files")
    1307  
    1308          # Check if we were passed a file-like object
    1309          if isinstance(file, os.PathLike):
    1310              file = os.fspath(file)
    1311          if isinstance(file, str):
    1312              # No, it's a filename
    1313              self._filePassed = 0
    1314              self.filename = file
    1315              modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
    1316                          'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
    1317              filemode = modeDict[mode]
    1318              while True:
    1319                  try:
    1320                      self.fp = io.open(file, filemode)
    1321                  except OSError:
    1322                      if filemode in modeDict:
    1323                          filemode = modeDict[filemode]
    1324                          continue
    1325                      raise
    1326                  break
    1327          else:
    1328              self._filePassed = 1
    1329              self.fp = file
    1330              self.filename = getattr(file, 'name', None)
    1331          self._fileRefCnt = 1
    1332          self._lock = threading.RLock()
    1333          self._seekable = True
    1334          self._writing = False
    1335  
    1336          try:
    1337              if mode == 'r':
    1338                  self._RealGetContents()
    1339              elif mode in ('w', 'x'):
    1340                  # set the modified flag so central directory gets written
    1341                  # even if no files are added to the archive
    1342                  self._didModify = True
    1343                  try:
    1344                      self.start_dir = self.fp.tell()
    1345                  except (AttributeError, OSError):
    1346                      self.fp = _Tellable(self.fp)
    1347                      self.start_dir = 0
    1348                      self._seekable = False
    1349                  else:
    1350                      # Some file-like objects can provide tell() but not seek()
    1351                      try:
    1352                          self.fp.seek(self.start_dir)
    1353                      except (AttributeError, OSError):
    1354                          self._seekable = False
    1355              elif mode == 'a':
    1356                  try:
    1357                      # See if file is a zip file
    1358                      self._RealGetContents()
    1359                      # seek to start of directory and overwrite
    1360                      self.fp.seek(self.start_dir)
    1361                  except BadZipFile:
    1362                      # file is not a zip file, just append
    1363                      self.fp.seek(0, 2)
    1364  
    1365                      # set the modified flag so central directory gets written
    1366                      # even if no files are added to the archive
    1367                      self._didModify = True
    1368                      self.start_dir = self.fp.tell()
    1369              else:
    1370                  raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
    1371          except:
    1372              fp = self.fp
    1373              self.fp = None
    1374              self._fpclose(fp)
    1375              raise
    1376  
    1377      def __enter__(self):
    1378          return self
    1379  
    1380      def __exit__(self, type, value, traceback):
    1381          self.close()
    1382  
    1383      def __repr__(self):
    1384          result = ['<%s.%s' % (self.__class__.__module__,
    1385                                self.__class__.__qualname__)]
    1386          if self.fp is not None:
    1387              if self._filePassed:
    1388                  result.append(' file=%r' % self.fp)
    1389              elif self.filename is not None:
    1390                  result.append(' filename=%r' % self.filename)
    1391              result.append(' mode=%r' % self.mode)
    1392          else:
    1393              result.append(' [closed]')
    1394          result.append('>')
    1395          return ''.join(result)
    1396  
    1397      def _RealGetContents(self):
    1398          """Read in the table of contents for the ZIP file."""
    1399          fp = self.fp
    1400          try:
    1401              endrec = _EndRecData(fp)
    1402          except OSError:
    1403              raise BadZipFile("File is not a zip file")
    1404          if not endrec:
    1405              raise BadZipFile("File is not a zip file")
    1406          if self.debug > 1:
    1407              print(endrec)
    1408          size_cd = endrec[_ECD_SIZE]             # bytes in central directory
    1409          offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
    1410          self._comment = endrec[_ECD_COMMENT]    # archive comment
    1411  
    1412          # "concat" is zero, unless zip was concatenated to another file
    1413          concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
    1414          if endrec[_ECD_SIGNATURE] == stringEndArchive64:
    1415              # If Zip64 extension structures are present, account for them
    1416              concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
    1417  
    1418          if self.debug > 2:
    1419              inferred = concat + offset_cd
    1420              print("given, inferred, offset", offset_cd, inferred, concat)
    1421          # self.start_dir:  Position of start of central directory
    1422          self.start_dir = offset_cd + concat
    1423          if self.start_dir < 0:
    1424              raise BadZipFile("Bad offset for central directory")
    1425          fp.seek(self.start_dir, 0)
    1426          data = fp.read(size_cd)
    1427          fp = io.BytesIO(data)
    1428          total = 0
    1429          while total < size_cd:
    1430              centdir = fp.read(sizeCentralDir)
    1431              if len(centdir) != sizeCentralDir:
    1432                  raise BadZipFile("Truncated central directory")
    1433              centdir = struct.unpack(structCentralDir, centdir)
    1434              if centdir[_CD_SIGNATURE] != stringCentralDir:
    1435                  raise BadZipFile("Bad magic number for central directory")
    1436              if self.debug > 2:
    1437                  print(centdir)
    1438              filename = fp.read(centdir[_CD_FILENAME_LENGTH])
    1439              orig_filename_crc = crc32(filename)
    1440              flags = centdir[_CD_FLAG_BITS]
    1441              if flags & _MASK_UTF_FILENAME:
    1442                  # UTF-8 file names extension
    1443                  filename = filename.decode('utf-8')
    1444              else:
    1445                  # Historical ZIP filename encoding
    1446                  filename = filename.decode(self.metadata_encoding or 'cp437')
    1447              # Create ZipInfo instance to store file information
    1448              x = ZipInfo(filename)
    1449              x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
    1450              x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
    1451              x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
    1452              (x.create_version, x.create_system, x.extract_version, x.reserved,
    1453               x.flag_bits, x.compress_type, t, d,
    1454               x.CRC, x.compress_size, x.file_size) = centdir[1:12]
    1455              if x.extract_version > MAX_EXTRACT_VERSION:
    1456                  raise NotImplementedError("zip file version %.1f" %
    1457                                            (x.extract_version / 10))
    1458              x.volume, x.internal_attr, x.external_attr = centdir[15:18]
    1459              # Convert date/time code to (year, month, day, hour, min, sec)
    1460              x._raw_time = t
    1461              x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
    1462                              t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
    1463              x._decodeExtra(orig_filename_crc)
    1464              x.header_offset = x.header_offset + concat
    1465              self.filelist.append(x)
    1466              self.NameToInfo[x.filename] = x
    1467  
    1468              # update total bytes read from central directory
    1469              total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
    1470                       + centdir[_CD_EXTRA_FIELD_LENGTH]
    1471                       + centdir[_CD_COMMENT_LENGTH])
    1472  
    1473              if self.debug > 2:
    1474                  print("total", total)
    1475  
    1476  
    1477      def namelist(self):
    1478          """Return a list of file names in the archive."""
    1479          return [data.filename for data in self.filelist]
    1480  
    1481      def infolist(self):
    1482          """Return a list of class ZipInfo instances for files in the
    1483          archive."""
    1484          return self.filelist
    1485  
    1486      def printdir(self, file=None):
    1487          """Print a table of contents for the zip file."""
    1488          print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
    1489                file=file)
    1490          for zinfo in self.filelist:
    1491              date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
    1492              print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
    1493                    file=file)
    1494  
    1495      def testzip(self):
    1496          """Read all the files and check the CRC.
    1497  
    1498          Return None if all files could be read successfully, or the name
    1499          of the offending file otherwise."""
    1500          chunk_size = 2 ** 20
    1501          for zinfo in self.filelist:
    1502              try:
    1503                  # Read by chunks, to avoid an OverflowError or a
    1504                  # MemoryError with very large embedded files.
    1505                  with self.open(zinfo.filename, "r") as f:
    1506                      while f.read(chunk_size):     # Check CRC-32
    1507                          pass
    1508              except BadZipFile:
    1509                  return zinfo.filename
    1510  
    1511      def getinfo(self, name):
    1512          """Return the instance of ZipInfo given 'name'."""
    1513          info = self.NameToInfo.get(name)
    1514          if info is None:
    1515              raise KeyError(
    1516                  'There is no item named %r in the archive' % name)
    1517  
    1518          return info
    1519  
    1520      def setpassword(self, pwd):
    1521          """Set default password for encrypted files."""
    1522          if pwd and not isinstance(pwd, bytes):
    1523              raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
    1524          if pwd:
    1525              self.pwd = pwd
    1526          else:
    1527              self.pwd = None
    1528  
    1529      @property
    1530      def comment(self):
    1531          """The comment text associated with the ZIP file."""
    1532          return self._comment
    1533  
    1534      @comment.setter
    1535      def comment(self, comment):
    1536          if not isinstance(comment, bytes):
    1537              raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
    1538          # check for valid comment length
    1539          if len(comment) > ZIP_MAX_COMMENT:
    1540              import warnings
    1541              warnings.warn('Archive comment is too long; truncating to %d bytes'
    1542                            % ZIP_MAX_COMMENT, stacklevel=2)
    1543              comment = comment[:ZIP_MAX_COMMENT]
    1544          self._comment = comment
    1545          self._didModify = True
    1546  
    1547      def read(self, name, pwd=None):
    1548          """Return file bytes for name."""
    1549          with self.open(name, "r", pwd) as fp:
    1550              return fp.read()
    1551  
    1552      def open(self, name, mode="r", pwd=None, *, force_zip64=False):
    1553          """Return file-like object for 'name'.
    1554  
    1555          name is a string for the file name within the ZIP file, or a ZipInfo
    1556          object.
    1557  
    1558          mode should be 'r' to read a file already in the ZIP file, or 'w' to
    1559          write to a file newly added to the archive.
    1560  
    1561          pwd is the password to decrypt files (only used for reading).
    1562  
    1563          When writing, if the file size is not known in advance but may exceed
    1564          2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
    1565          files.  If the size is known in advance, it is best to pass a ZipInfo
    1566          instance for name, with zinfo.file_size set.
    1567          """
    1568          if mode not in {"r", "w"}:
    1569              raise ValueError('open() requires mode "r" or "w"')
    1570          if pwd and (mode == "w"):
    1571              raise ValueError("pwd is only supported for reading files")
    1572          if not self.fp:
    1573              raise ValueError(
    1574                  "Attempt to use ZIP archive that was already closed")
    1575  
    1576          # Make sure we have an info object
    1577          if isinstance(name, ZipInfo):
    1578              # 'name' is already an info object
    1579              zinfo = name
    1580          elif mode == 'w':
    1581              zinfo = ZipInfo(name)
    1582              zinfo.compress_type = self.compression
    1583              zinfo._compresslevel = self.compresslevel
    1584          else:
    1585              # Get info object for name
    1586              zinfo = self.getinfo(name)
    1587  
    1588          if mode == 'w':
    1589              return self._open_to_write(zinfo, force_zip64=force_zip64)
    1590  
    1591          if self._writing:
    1592              raise ValueError("Can't read from the ZIP file while there "
    1593                      "is an open writing handle on it. "
    1594                      "Close the writing handle before trying to read.")
    1595  
    1596          # Open for reading:
    1597          self._fileRefCnt += 1
    1598          zef_file = _SharedFile(self.fp, zinfo.header_offset,
    1599                                 self._fpclose, self._lock, lambda: self._writing)
    1600          try:
    1601              # Skip the file header:
    1602              fheader = zef_file.read(sizeFileHeader)
    1603              if len(fheader) != sizeFileHeader:
    1604                  raise BadZipFile("Truncated file header")
    1605              fheader = struct.unpack(structFileHeader, fheader)
    1606              if fheader[_FH_SIGNATURE] != stringFileHeader:
    1607                  raise BadZipFile("Bad magic number for file header")
    1608  
    1609              fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
    1610              if fheader[_FH_EXTRA_FIELD_LENGTH]:
    1611                  zef_file.seek(fheader[_FH_EXTRA_FIELD_LENGTH], whence=1)
    1612  
    1613              if zinfo.flag_bits & _MASK_COMPRESSED_PATCH:
    1614                  # Zip 2.7: compressed patched data
    1615                  raise NotImplementedError("compressed patched data (flag bit 5)")
    1616  
    1617              if zinfo.flag_bits & _MASK_STRONG_ENCRYPTION:
    1618                  # strong encryption
    1619                  raise NotImplementedError("strong encryption (flag bit 6)")
    1620  
    1621              if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & _MASK_UTF_FILENAME:
    1622                  # UTF-8 filename
    1623                  fname_str = fname.decode("utf-8")
    1624              else:
    1625                  fname_str = fname.decode(self.metadata_encoding or "cp437")
    1626  
    1627              if fname_str != zinfo.orig_filename:
    1628                  raise BadZipFile(
    1629                      'File name in directory %r and header %r differ.'
    1630                      % (zinfo.orig_filename, fname))
    1631  
    1632              # check for encrypted flag & handle password
    1633              is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
    1634              if is_encrypted:
    1635                  if not pwd:
    1636                      pwd = self.pwd
    1637                  if pwd and not isinstance(pwd, bytes):
    1638                      raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
    1639                  if not pwd:
    1640                      raise RuntimeError("File %r is encrypted, password "
    1641                                         "required for extraction" % name)
    1642              else:
    1643                  pwd = None
    1644  
    1645              return ZipExtFile(zef_file, mode, zinfo, pwd, True)
    1646          except:
    1647              zef_file.close()
    1648              raise
    1649  
    1650      def _open_to_write(self, zinfo, force_zip64=False):
    1651          if force_zip64 and not self._allowZip64:
    1652              raise ValueError(
    1653                  "force_zip64 is True, but allowZip64 was False when opening "
    1654                  "the ZIP file."
    1655              )
    1656          if self._writing:
    1657              raise ValueError("Can't write to the ZIP file while there is "
    1658                               "another write handle open on it. "
    1659                               "Close the first handle before opening another.")
    1660  
    1661          # Size and CRC are overwritten with correct data after processing the file
    1662          zinfo.compress_size = 0
    1663          zinfo.CRC = 0
    1664  
    1665          zinfo.flag_bits = 0x00
    1666          if zinfo.compress_type == ZIP_LZMA:
    1667              # Compressed data includes an end-of-stream (EOS) marker
    1668              zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
    1669          if not self._seekable:
    1670              zinfo.flag_bits |= _MASK_USE_DATA_DESCRIPTOR
    1671  
    1672          if not zinfo.external_attr:
    1673              zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
    1674  
    1675          # Compressed size can be larger than uncompressed size
    1676          zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT)
    1677          if not self._allowZip64 and zip64:
    1678              raise LargeZipFile("Filesize would require ZIP64 extensions")
    1679  
    1680          if self._seekable:
    1681              self.fp.seek(self.start_dir)
    1682          zinfo.header_offset = self.fp.tell()
    1683  
    1684          self._writecheck(zinfo)
    1685          self._didModify = True
    1686  
    1687          self.fp.write(zinfo.FileHeader(zip64))
    1688  
    1689          self._writing = True
    1690          return _ZipWriteFile(self, zinfo, zip64)
    1691  
    1692      def extract(self, member, path=None, pwd=None):
    1693          """Extract a member from the archive to the current working directory,
    1694             using its full name. Its file information is extracted as accurately
    1695             as possible. `member' may be a filename or a ZipInfo object. You can
    1696             specify a different directory using `path'.
    1697          """
    1698          if path is None:
    1699              path = os.getcwd()
    1700          else:
    1701              path = os.fspath(path)
    1702  
    1703          return self._extract_member(member, path, pwd)
    1704  
    1705      def extractall(self, path=None, members=None, pwd=None):
    1706          """Extract all members from the archive to the current working
    1707             directory. `path' specifies a different directory to extract to.
    1708             `members' is optional and must be a subset of the list returned
    1709             by namelist().
    1710          """
    1711          if members is None:
    1712              members = self.namelist()
    1713  
    1714          if path is None:
    1715              path = os.getcwd()
    1716          else:
    1717              path = os.fspath(path)
    1718  
    1719          for zipinfo in members:
    1720              self._extract_member(zipinfo, path, pwd)
    1721  
    1722      @classmethod
    1723      def _sanitize_windows_name(cls, arcname, pathsep):
    1724          """Replace bad characters and remove trailing dots from parts."""
    1725          table = cls._windows_illegal_name_trans_table
    1726          if not table:
    1727              illegal = ':<>|"?*'
    1728              table = str.maketrans(illegal, '_' * len(illegal))
    1729              cls._windows_illegal_name_trans_table = table
    1730          arcname = arcname.translate(table)
    1731          # remove trailing dots and spaces
    1732          arcname = (x.rstrip(' .') for x in arcname.split(pathsep))
    1733          # rejoin, removing empty parts.
    1734          arcname = pathsep.join(x for x in arcname if x)
    1735          return arcname
    1736  
    1737      def _extract_member(self, member, targetpath, pwd):
    1738          """Extract the ZipInfo object 'member' to a physical
    1739             file on the path targetpath.
    1740          """
    1741          if not isinstance(member, ZipInfo):
    1742              member = self.getinfo(member)
    1743  
    1744          # build the destination pathname, replacing
    1745          # forward slashes to platform specific separators.
    1746          arcname = member.filename.replace('/', os.path.sep)
    1747  
    1748          if os.path.altsep:
    1749              arcname = arcname.replace(os.path.altsep, os.path.sep)
    1750          # interpret absolute pathname as relative, remove drive letter or
    1751          # UNC path, redundant separators, "." and ".." components.
    1752          arcname = os.path.splitdrive(arcname)[1]
    1753          invalid_path_parts = ('', os.path.curdir, os.path.pardir)
    1754          arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
    1755                                     if x not in invalid_path_parts)
    1756          if os.path.sep == '\\':
    1757              # filter illegal characters on Windows
    1758              arcname = self._sanitize_windows_name(arcname, os.path.sep)
    1759  
    1760          if not arcname:
    1761              raise ValueError("Empty filename.")
    1762  
    1763          targetpath = os.path.join(targetpath, arcname)
    1764          targetpath = os.path.normpath(targetpath)
    1765  
    1766          # Create all upper directories if necessary.
    1767          upperdirs = os.path.dirname(targetpath)
    1768          if upperdirs and not os.path.exists(upperdirs):
    1769              os.makedirs(upperdirs)
    1770  
    1771          if member.is_dir():
    1772              if not os.path.isdir(targetpath):
    1773                  os.mkdir(targetpath)
    1774              return targetpath
    1775  
    1776          with self.open(member, pwd=pwd) as source, \
    1777               open(targetpath, "wb") as target:
    1778              shutil.copyfileobj(source, target)
    1779  
    1780          return targetpath
    1781  
    1782      def _writecheck(self, zinfo):
    1783          """Check for errors before writing a file to the archive."""
    1784          if zinfo.filename in self.NameToInfo:
    1785              import warnings
    1786              warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
    1787          if self.mode not in ('w', 'x', 'a'):
    1788              raise ValueError("write() requires mode 'w', 'x', or 'a'")
    1789          if not self.fp:
    1790              raise ValueError(
    1791                  "Attempt to write ZIP archive that was already closed")
    1792          _check_compression(zinfo.compress_type)
    1793          if not self._allowZip64:
    1794              requires_zip64 = None
    1795              if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
    1796                  requires_zip64 = "Files count"
    1797              elif zinfo.file_size > ZIP64_LIMIT:
    1798                  requires_zip64 = "Filesize"
    1799              elif zinfo.header_offset > ZIP64_LIMIT:
    1800                  requires_zip64 = "Zipfile size"
    1801              if requires_zip64:
    1802                  raise LargeZipFile(requires_zip64 +
    1803                                     " would require ZIP64 extensions")
    1804  
    1805      def write(self, filename, arcname=None,
    1806                compress_type=None, compresslevel=None):
    1807          """Put the bytes from filename into the archive under the name
    1808          arcname."""
    1809          if not self.fp:
    1810              raise ValueError(
    1811                  "Attempt to write to ZIP archive that was already closed")
    1812          if self._writing:
    1813              raise ValueError(
    1814                  "Can't write to ZIP archive while an open writing handle exists"
    1815              )
    1816  
    1817          zinfo = ZipInfo.from_file(filename, arcname,
    1818                                    strict_timestamps=self._strict_timestamps)
    1819  
    1820          if zinfo.is_dir():
    1821              zinfo.compress_size = 0
    1822              zinfo.CRC = 0
    1823              self.mkdir(zinfo)
    1824          else:
    1825              if compress_type is not None:
    1826                  zinfo.compress_type = compress_type
    1827              else:
    1828                  zinfo.compress_type = self.compression
    1829  
    1830              if compresslevel is not None:
    1831                  zinfo._compresslevel = compresslevel
    1832              else:
    1833                  zinfo._compresslevel = self.compresslevel
    1834  
    1835              with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
    1836                  shutil.copyfileobj(src, dest, 1024*8)
    1837  
    1838      def writestr(self, zinfo_or_arcname, data,
    1839                   compress_type=None, compresslevel=None):
    1840          """Write a file into the archive.  The contents is 'data', which
    1841          may be either a 'str' or a 'bytes' instance; if it is a 'str',
    1842          it is encoded as UTF-8 first.
    1843          'zinfo_or_arcname' is either a ZipInfo instance or
    1844          the name of the file in the archive."""
    1845          if isinstance(data, str):
    1846              data = data.encode("utf-8")
    1847          if not isinstance(zinfo_or_arcname, ZipInfo):
    1848              zinfo = ZipInfo(filename=zinfo_or_arcname,
    1849                              date_time=time.localtime(time.time())[:6])
    1850              zinfo.compress_type = self.compression
    1851              zinfo._compresslevel = self.compresslevel
    1852              if zinfo.filename.endswith('/'):
    1853                  zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
    1854                  zinfo.external_attr |= 0x10           # MS-DOS directory flag
    1855              else:
    1856                  zinfo.external_attr = 0o600 << 16     # ?rw-------
    1857          else:
    1858              zinfo = zinfo_or_arcname
    1859  
    1860          if not self.fp:
    1861              raise ValueError(
    1862                  "Attempt to write to ZIP archive that was already closed")
    1863          if self._writing:
    1864              raise ValueError(
    1865                  "Can't write to ZIP archive while an open writing handle exists."
    1866              )
    1867  
    1868          if compress_type is not None:
    1869              zinfo.compress_type = compress_type
    1870  
    1871          if compresslevel is not None:
    1872              zinfo._compresslevel = compresslevel
    1873  
    1874          zinfo.file_size = len(data)            # Uncompressed size
    1875          with self._lock:
    1876              with self.open(zinfo, mode='w') as dest:
    1877                  dest.write(data)
    1878  
    1879      def mkdir(self, zinfo_or_directory_name, mode=511):
    1880          """Creates a directory inside the zip archive."""
    1881          if isinstance(zinfo_or_directory_name, ZipInfo):
    1882              zinfo = zinfo_or_directory_name
    1883              if not zinfo.is_dir():
    1884                  raise ValueError("The given ZipInfo does not describe a directory")
    1885          elif isinstance(zinfo_or_directory_name, str):
    1886              directory_name = zinfo_or_directory_name
    1887              if not directory_name.endswith("/"):
    1888                  directory_name += "/"
    1889              zinfo = ZipInfo(directory_name)
    1890              zinfo.compress_size = 0
    1891              zinfo.CRC = 0
    1892              zinfo.external_attr = ((0o40000 | mode) & 0xFFFF) << 16
    1893              zinfo.file_size = 0
    1894              zinfo.external_attr |= 0x10
    1895          else:
    1896              raise TypeError("Expected type str or ZipInfo")
    1897  
    1898          with self._lock:
    1899              if self._seekable:
    1900                  self.fp.seek(self.start_dir)
    1901              zinfo.header_offset = self.fp.tell()  # Start of header bytes
    1902              if zinfo.compress_type == ZIP_LZMA:
    1903              # Compressed data includes an end-of-stream (EOS) marker
    1904                  zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
    1905  
    1906              self._writecheck(zinfo)
    1907              self._didModify = True
    1908  
    1909              self.filelist.append(zinfo)
    1910              self.NameToInfo[zinfo.filename] = zinfo
    1911              self.fp.write(zinfo.FileHeader(False))
    1912              self.start_dir = self.fp.tell()
    1913  
    1914      def __del__(self):
    1915          """Call the "close()" method in case the user forgot."""
    1916          self.close()
    1917  
    1918      def close(self):
    1919          """Close the file, and for mode 'w', 'x' and 'a' write the ending
    1920          records."""
    1921          if self.fp is None:
    1922              return
    1923  
    1924          if self._writing:
    1925              raise ValueError("Can't close the ZIP file while there is "
    1926                               "an open writing handle on it. "
    1927                               "Close the writing handle before closing the zip.")
    1928  
    1929          try:
    1930              if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
    1931                  with self._lock:
    1932                      if self._seekable:
    1933                          self.fp.seek(self.start_dir)
    1934                      self._write_end_record()
    1935          finally:
    1936              fp = self.fp
    1937              self.fp = None
    1938              self._fpclose(fp)
    1939  
    1940      def _write_end_record(self):
    1941          for zinfo in self.filelist:         # write central directory
    1942              dt = zinfo.date_time
    1943              dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
    1944              dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
    1945              extra = []
    1946              if zinfo.file_size > ZIP64_LIMIT \
    1947                 or zinfo.compress_size > ZIP64_LIMIT:
    1948                  extra.append(zinfo.file_size)
    1949                  extra.append(zinfo.compress_size)
    1950                  file_size = 0xffffffff
    1951                  compress_size = 0xffffffff
    1952              else:
    1953                  file_size = zinfo.file_size
    1954                  compress_size = zinfo.compress_size
    1955  
    1956              if zinfo.header_offset > ZIP64_LIMIT:
    1957                  extra.append(zinfo.header_offset)
    1958                  header_offset = 0xffffffff
    1959              else:
    1960                  header_offset = zinfo.header_offset
    1961  
    1962              extra_data = zinfo.extra
    1963              min_version = 0
    1964              if extra:
    1965                  # Append a ZIP64 field to the extra's
    1966                  extra_data = _strip_extra(extra_data, (1,))
    1967                  extra_data = struct.pack(
    1968                      '<HH' + 'Q'*len(extra),
    1969                      1, 8*len(extra), *extra) + extra_data
    1970  
    1971                  min_version = ZIP64_VERSION
    1972  
    1973              if zinfo.compress_type == ZIP_BZIP2:
    1974                  min_version = max(BZIP2_VERSION, min_version)
    1975              elif zinfo.compress_type == ZIP_LZMA:
    1976                  min_version = max(LZMA_VERSION, min_version)
    1977  
    1978              extract_version = max(min_version, zinfo.extract_version)
    1979              create_version = max(min_version, zinfo.create_version)
    1980              filename, flag_bits = zinfo._encodeFilenameFlags()
    1981              centdir = struct.pack(structCentralDir,
    1982                                    stringCentralDir, create_version,
    1983                                    zinfo.create_system, extract_version, zinfo.reserved,
    1984                                    flag_bits, zinfo.compress_type, dostime, dosdate,
    1985                                    zinfo.CRC, compress_size, file_size,
    1986                                    len(filename), len(extra_data), len(zinfo.comment),
    1987                                    0, zinfo.internal_attr, zinfo.external_attr,
    1988                                    header_offset)
    1989              self.fp.write(centdir)
    1990              self.fp.write(filename)
    1991              self.fp.write(extra_data)
    1992              self.fp.write(zinfo.comment)
    1993  
    1994          pos2 = self.fp.tell()
    1995          # Write end-of-zip-archive record
    1996          centDirCount = len(self.filelist)
    1997          centDirSize = pos2 - self.start_dir
    1998          centDirOffset = self.start_dir
    1999          requires_zip64 = None
    2000          if centDirCount > ZIP_FILECOUNT_LIMIT:
    2001              requires_zip64 = "Files count"
    2002          elif centDirOffset > ZIP64_LIMIT:
    2003              requires_zip64 = "Central directory offset"
    2004          elif centDirSize > ZIP64_LIMIT:
    2005              requires_zip64 = "Central directory size"
    2006          if requires_zip64:
    2007              # Need to write the ZIP64 end-of-archive records
    2008              if not self._allowZip64:
    2009                  raise LargeZipFile(requires_zip64 +
    2010                                     " would require ZIP64 extensions")
    2011              zip64endrec = struct.pack(
    2012                  structEndArchive64, stringEndArchive64,
    2013                  44, 45, 45, 0, 0, centDirCount, centDirCount,
    2014                  centDirSize, centDirOffset)
    2015              self.fp.write(zip64endrec)
    2016  
    2017              zip64locrec = struct.pack(
    2018                  structEndArchive64Locator,
    2019                  stringEndArchive64Locator, 0, pos2, 1)
    2020              self.fp.write(zip64locrec)
    2021              centDirCount = min(centDirCount, 0xFFFF)
    2022              centDirSize = min(centDirSize, 0xFFFFFFFF)
    2023              centDirOffset = min(centDirOffset, 0xFFFFFFFF)
    2024  
    2025          endrec = struct.pack(structEndArchive, stringEndArchive,
    2026                               0, 0, centDirCount, centDirCount,
    2027                               centDirSize, centDirOffset, len(self._comment))
    2028          self.fp.write(endrec)
    2029          self.fp.write(self._comment)
    2030          if self.mode == "a":
    2031              self.fp.truncate()
    2032          self.fp.flush()
    2033  
    2034      def _fpclose(self, fp):
    2035          assert self._fileRefCnt > 0
    2036          self._fileRefCnt -= 1
    2037          if not self._fileRefCnt and not self._filePassed:
    2038              fp.close()
    2039  
    2040  
    2041  class ESC[4;38;5;81mPyZipFile(ESC[4;38;5;149mZipFile):
    2042      """Class to create ZIP archives with Python library files and packages."""
    2043  
    2044      def __init__(self, file, mode="r", compression=ZIP_STORED,
    2045                   allowZip64=True, optimize=-1):
    2046          ZipFile.__init__(self, file, mode=mode, compression=compression,
    2047                           allowZip64=allowZip64)
    2048          self._optimize = optimize
    2049  
    2050      def writepy(self, pathname, basename="", filterfunc=None):
    2051          """Add all files from "pathname" to the ZIP archive.
    2052  
    2053          If pathname is a package directory, search the directory and
    2054          all package subdirectories recursively for all *.py and enter
    2055          the modules into the archive.  If pathname is a plain
    2056          directory, listdir *.py and enter all modules.  Else, pathname
    2057          must be a Python *.py file and the module will be put into the
    2058          archive.  Added modules are always module.pyc.
    2059          This method will compile the module.py into module.pyc if
    2060          necessary.
    2061          If filterfunc(pathname) is given, it is called with every argument.
    2062          When it is False, the file or directory is skipped.
    2063          """
    2064          pathname = os.fspath(pathname)
    2065          if filterfunc and not filterfunc(pathname):
    2066              if self.debug:
    2067                  label = 'path' if os.path.isdir(pathname) else 'file'
    2068                  print('%s %r skipped by filterfunc' % (label, pathname))
    2069              return
    2070          dir, name = os.path.split(pathname)
    2071          if os.path.isdir(pathname):
    2072              initname = os.path.join(pathname, "__init__.py")
    2073              if os.path.isfile(initname):
    2074                  # This is a package directory, add it
    2075                  if basename:
    2076                      basename = "%s/%s" % (basename, name)
    2077                  else:
    2078                      basename = name
    2079                  if self.debug:
    2080                      print("Adding package in", pathname, "as", basename)
    2081                  fname, arcname = self._get_codename(initname[0:-3], basename)
    2082                  if self.debug:
    2083                      print("Adding", arcname)
    2084                  self.write(fname, arcname)
    2085                  dirlist = sorted(os.listdir(pathname))
    2086                  dirlist.remove("__init__.py")
    2087                  # Add all *.py files and package subdirectories
    2088                  for filename in dirlist:
    2089                      path = os.path.join(pathname, filename)
    2090                      root, ext = os.path.splitext(filename)
    2091                      if os.path.isdir(path):
    2092                          if os.path.isfile(os.path.join(path, "__init__.py")):
    2093                              # This is a package directory, add it
    2094                              self.writepy(path, basename,
    2095                                           filterfunc=filterfunc)  # Recursive call
    2096                      elif ext == ".py":
    2097                          if filterfunc and not filterfunc(path):
    2098                              if self.debug:
    2099                                  print('file %r skipped by filterfunc' % path)
    2100                              continue
    2101                          fname, arcname = self._get_codename(path[0:-3],
    2102                                                              basename)
    2103                          if self.debug:
    2104                              print("Adding", arcname)
    2105                          self.write(fname, arcname)
    2106              else:
    2107                  # This is NOT a package directory, add its files at top level
    2108                  if self.debug:
    2109                      print("Adding files from directory", pathname)
    2110                  for filename in sorted(os.listdir(pathname)):
    2111                      path = os.path.join(pathname, filename)
    2112                      root, ext = os.path.splitext(filename)
    2113                      if ext == ".py":
    2114                          if filterfunc and not filterfunc(path):
    2115                              if self.debug:
    2116                                  print('file %r skipped by filterfunc' % path)
    2117                              continue
    2118                          fname, arcname = self._get_codename(path[0:-3],
    2119                                                              basename)
    2120                          if self.debug:
    2121                              print("Adding", arcname)
    2122                          self.write(fname, arcname)
    2123          else:
    2124              if pathname[-3:] != ".py":
    2125                  raise RuntimeError(
    2126                      'Files added with writepy() must end with ".py"')
    2127              fname, arcname = self._get_codename(pathname[0:-3], basename)
    2128              if self.debug:
    2129                  print("Adding file", arcname)
    2130              self.write(fname, arcname)
    2131  
    2132      def _get_codename(self, pathname, basename):
    2133          """Return (filename, archivename) for the path.
    2134  
    2135          Given a module name path, return the correct file path and
    2136          archive name, compiling if necessary.  For example, given
    2137          /python/lib/string, return (/python/lib/string.pyc, string).
    2138          """
    2139          def _compile(file, optimize=-1):
    2140              import py_compile
    2141              if self.debug:
    2142                  print("Compiling", file)
    2143              try:
    2144                  py_compile.compile(file, doraise=True, optimize=optimize)
    2145              except py_compile.PyCompileError as err:
    2146                  print(err.msg)
    2147                  return False
    2148              return True
    2149  
    2150          file_py  = pathname + ".py"
    2151          file_pyc = pathname + ".pyc"
    2152          pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
    2153          pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
    2154          pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
    2155          if self._optimize == -1:
    2156              # legacy mode: use whatever file is present
    2157              if (os.path.isfile(file_pyc) and
    2158                    os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
    2159                  # Use .pyc file.
    2160                  arcname = fname = file_pyc
    2161              elif (os.path.isfile(pycache_opt0) and
    2162                    os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
    2163                  # Use the __pycache__/*.pyc file, but write it to the legacy pyc
    2164                  # file name in the archive.
    2165                  fname = pycache_opt0
    2166                  arcname = file_pyc
    2167              elif (os.path.isfile(pycache_opt1) and
    2168                    os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
    2169                  # Use the __pycache__/*.pyc file, but write it to the legacy pyc
    2170                  # file name in the archive.
    2171                  fname = pycache_opt1
    2172                  arcname = file_pyc
    2173              elif (os.path.isfile(pycache_opt2) and
    2174                    os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
    2175                  # Use the __pycache__/*.pyc file, but write it to the legacy pyc
    2176                  # file name in the archive.
    2177                  fname = pycache_opt2
    2178                  arcname = file_pyc
    2179              else:
    2180                  # Compile py into PEP 3147 pyc file.
    2181                  if _compile(file_py):
    2182                      if sys.flags.optimize == 0:
    2183                          fname = pycache_opt0
    2184                      elif sys.flags.optimize == 1:
    2185                          fname = pycache_opt1
    2186                      else:
    2187                          fname = pycache_opt2
    2188                      arcname = file_pyc
    2189                  else:
    2190                      fname = arcname = file_py
    2191          else:
    2192              # new mode: use given optimization level
    2193              if self._optimize == 0:
    2194                  fname = pycache_opt0
    2195                  arcname = file_pyc
    2196              else:
    2197                  arcname = file_pyc
    2198                  if self._optimize == 1:
    2199                      fname = pycache_opt1
    2200                  elif self._optimize == 2:
    2201                      fname = pycache_opt2
    2202                  else:
    2203                      msg = "invalid value for 'optimize': {!r}".format(self._optimize)
    2204                      raise ValueError(msg)
    2205              if not (os.path.isfile(fname) and
    2206                      os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
    2207                  if not _compile(file_py, optimize=self._optimize):
    2208                      fname = arcname = file_py
    2209          archivename = os.path.split(arcname)[1]
    2210          if basename:
    2211              archivename = "%s/%s" % (basename, archivename)
    2212          return (fname, archivename)
    2213  
    2214  
    2215  from ._path import (  # noqa: E402
    2216      Path,
    2217  
    2218      # used privately for tests
    2219      CompleteDirs,  # noqa: F401
    2220  )
    2221  
    2222  # used privately for tests
    2223  from .__main__ import main  # noqa: F401, E402