1 """
2 Read and write ZIP files.
3
4 XXX references to utf-8 need further investigation.
5 """
6 import binascii
7 import importlib.util
8 import io
9 import os
10 import shutil
11 import stat
12 import struct
13 import sys
14 import threading
15 import time
16
17 try:
18 import zlib # We may need its compression method
19 crc32 = zlib.crc32
20 except ImportError:
21 zlib = None
22 crc32 = binascii.crc32
23
24 try:
25 import bz2 # We may need its compression method
26 except ImportError:
27 bz2 = None
28
29 try:
30 import lzma # We may need its compression method
31 except ImportError:
32 lzma = None
33
34 __all__ = ["BadZipFile", "BadZipfile", "error",
35 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
36 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
37 "Path"]
38
39 class ESC[4;38;5;81mBadZipFile(ESC[4;38;5;149mException):
40 pass
41
42
43 class ESC[4;38;5;81mLargeZipFile(ESC[4;38;5;149mException):
44 """
45 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
46 and those extensions are disabled.
47 """
48
49 error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
50
51
52 ZIP64_LIMIT = (1 << 31) - 1
53 ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
54 ZIP_MAX_COMMENT = (1 << 16) - 1
55
56 # constants for Zip file compression methods
57 ZIP_STORED = 0
58 ZIP_DEFLATED = 8
59 ZIP_BZIP2 = 12
60 ZIP_LZMA = 14
61 # Other ZIP compression methods not supported
62
63 DEFAULT_VERSION = 20
64 ZIP64_VERSION = 45
65 BZIP2_VERSION = 46
66 LZMA_VERSION = 63
67 # we recognize (but not necessarily support) all features up to that version
68 MAX_EXTRACT_VERSION = 63
69
70 # Below are some formats and associated data for reading/writing headers using
71 # the struct module. The names and structures of headers/records are those used
72 # in the PKWARE description of the ZIP file format:
73 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
74 # (URL valid as of January 2008)
75
76 # The "end of central directory" structure, magic number, size, and indices
77 # (section V.I in the format document)
78 structEndArchive = b"<4s4H2LH"
79 stringEndArchive = b"PK\005\006"
80 sizeEndCentDir = struct.calcsize(structEndArchive)
81
82 _ECD_SIGNATURE = 0
83 _ECD_DISK_NUMBER = 1
84 _ECD_DISK_START = 2
85 _ECD_ENTRIES_THIS_DISK = 3
86 _ECD_ENTRIES_TOTAL = 4
87 _ECD_SIZE = 5
88 _ECD_OFFSET = 6
89 _ECD_COMMENT_SIZE = 7
90 # These last two indices are not part of the structure as defined in the
91 # spec, but they are used internally by this module as a convenience
92 _ECD_COMMENT = 8
93 _ECD_LOCATION = 9
94
95 # The "central directory" structure, magic number, size, and indices
96 # of entries in the structure (section V.F in the format document)
97 structCentralDir = "<4s4B4HL2L5H2L"
98 stringCentralDir = b"PK\001\002"
99 sizeCentralDir = struct.calcsize(structCentralDir)
100
101 # indexes of entries in the central directory structure
102 _CD_SIGNATURE = 0
103 _CD_CREATE_VERSION = 1
104 _CD_CREATE_SYSTEM = 2
105 _CD_EXTRACT_VERSION = 3
106 _CD_EXTRACT_SYSTEM = 4
107 _CD_FLAG_BITS = 5
108 _CD_COMPRESS_TYPE = 6
109 _CD_TIME = 7
110 _CD_DATE = 8
111 _CD_CRC = 9
112 _CD_COMPRESSED_SIZE = 10
113 _CD_UNCOMPRESSED_SIZE = 11
114 _CD_FILENAME_LENGTH = 12
115 _CD_EXTRA_FIELD_LENGTH = 13
116 _CD_COMMENT_LENGTH = 14
117 _CD_DISK_NUMBER_START = 15
118 _CD_INTERNAL_FILE_ATTRIBUTES = 16
119 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
120 _CD_LOCAL_HEADER_OFFSET = 18
121
122 # General purpose bit flags
123 # Zip Appnote: 4.4.4 general purpose bit flag: (2 bytes)
124 _MASK_ENCRYPTED = 1 << 0
125 # Bits 1 and 2 have different meanings depending on the compression used.
126 _MASK_COMPRESS_OPTION_1 = 1 << 1
127 # _MASK_COMPRESS_OPTION_2 = 1 << 2
128 # _MASK_USE_DATA_DESCRIPTOR: If set, crc-32, compressed size and uncompressed
129 # size are zero in the local header and the real values are written in the data
130 # descriptor immediately following the compressed data.
131 _MASK_USE_DATA_DESCRIPTOR = 1 << 3
132 # Bit 4: Reserved for use with compression method 8, for enhanced deflating.
133 # _MASK_RESERVED_BIT_4 = 1 << 4
134 _MASK_COMPRESSED_PATCH = 1 << 5
135 _MASK_STRONG_ENCRYPTION = 1 << 6
136 # _MASK_UNUSED_BIT_7 = 1 << 7
137 # _MASK_UNUSED_BIT_8 = 1 << 8
138 # _MASK_UNUSED_BIT_9 = 1 << 9
139 # _MASK_UNUSED_BIT_10 = 1 << 10
140 _MASK_UTF_FILENAME = 1 << 11
141 # Bit 12: Reserved by PKWARE for enhanced compression.
142 # _MASK_RESERVED_BIT_12 = 1 << 12
143 # _MASK_ENCRYPTED_CENTRAL_DIR = 1 << 13
144 # Bit 14, 15: Reserved by PKWARE
145 # _MASK_RESERVED_BIT_14 = 1 << 14
146 # _MASK_RESERVED_BIT_15 = 1 << 15
147
148 # The "local file header" structure, magic number, size, and indices
149 # (section V.A in the format document)
150 structFileHeader = "<4s2B4HL2L2H"
151 stringFileHeader = b"PK\003\004"
152 sizeFileHeader = struct.calcsize(structFileHeader)
153
154 _FH_SIGNATURE = 0
155 _FH_EXTRACT_VERSION = 1
156 _FH_EXTRACT_SYSTEM = 2
157 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
158 _FH_COMPRESSION_METHOD = 4
159 _FH_LAST_MOD_TIME = 5
160 _FH_LAST_MOD_DATE = 6
161 _FH_CRC = 7
162 _FH_COMPRESSED_SIZE = 8
163 _FH_UNCOMPRESSED_SIZE = 9
164 _FH_FILENAME_LENGTH = 10
165 _FH_EXTRA_FIELD_LENGTH = 11
166
167 # The "Zip64 end of central directory locator" structure, magic number, and size
168 structEndArchive64Locator = "<4sLQL"
169 stringEndArchive64Locator = b"PK\x06\x07"
170 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
171
172 # The "Zip64 end of central directory" record, magic number, size, and indices
173 # (section V.G in the format document)
174 structEndArchive64 = "<4sQ2H2L4Q"
175 stringEndArchive64 = b"PK\x06\x06"
176 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
177
178 _CD64_SIGNATURE = 0
179 _CD64_DIRECTORY_RECSIZE = 1
180 _CD64_CREATE_VERSION = 2
181 _CD64_EXTRACT_VERSION = 3
182 _CD64_DISK_NUMBER = 4
183 _CD64_DISK_NUMBER_START = 5
184 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
185 _CD64_NUMBER_ENTRIES_TOTAL = 7
186 _CD64_DIRECTORY_SIZE = 8
187 _CD64_OFFSET_START_CENTDIR = 9
188
189 _DD_SIGNATURE = 0x08074b50
190
191 _EXTRA_FIELD_STRUCT = struct.Struct('<HH')
192
193 def _strip_extra(extra, xids):
194 # Remove Extra Fields with specified IDs.
195 unpack = _EXTRA_FIELD_STRUCT.unpack
196 modified = False
197 buffer = []
198 start = i = 0
199 while i + 4 <= len(extra):
200 xid, xlen = unpack(extra[i : i + 4])
201 j = i + 4 + xlen
202 if xid in xids:
203 if i != start:
204 buffer.append(extra[start : i])
205 start = j
206 modified = True
207 i = j
208 if not modified:
209 return extra
210 if start != len(extra):
211 buffer.append(extra[start:])
212 return b''.join(buffer)
213
214 def _check_zipfile(fp):
215 try:
216 if _EndRecData(fp):
217 return True # file has correct magic number
218 except OSError:
219 pass
220 return False
221
222 def is_zipfile(filename):
223 """Quickly see if a file is a ZIP file by checking the magic number.
224
225 The filename argument may be a file or file-like object too.
226 """
227 result = False
228 try:
229 if hasattr(filename, "read"):
230 result = _check_zipfile(fp=filename)
231 else:
232 with open(filename, "rb") as fp:
233 result = _check_zipfile(fp)
234 except OSError:
235 pass
236 return result
237
238 def _EndRecData64(fpin, offset, endrec):
239 """
240 Read the ZIP64 end-of-archive records and use that to update endrec
241 """
242 try:
243 fpin.seek(offset - sizeEndCentDir64Locator, 2)
244 except OSError:
245 # If the seek fails, the file is not large enough to contain a ZIP64
246 # end-of-archive record, so just return the end record we were given.
247 return endrec
248
249 data = fpin.read(sizeEndCentDir64Locator)
250 if len(data) != sizeEndCentDir64Locator:
251 return endrec
252 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
253 if sig != stringEndArchive64Locator:
254 return endrec
255
256 if diskno != 0 or disks > 1:
257 raise BadZipFile("zipfiles that span multiple disks are not supported")
258
259 # Assume no 'zip64 extensible data'
260 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
261 data = fpin.read(sizeEndCentDir64)
262 if len(data) != sizeEndCentDir64:
263 return endrec
264 sig, sz, create_version, read_version, disk_num, disk_dir, \
265 dircount, dircount2, dirsize, diroffset = \
266 struct.unpack(structEndArchive64, data)
267 if sig != stringEndArchive64:
268 return endrec
269
270 # Update the original endrec using data from the ZIP64 record
271 endrec[_ECD_SIGNATURE] = sig
272 endrec[_ECD_DISK_NUMBER] = disk_num
273 endrec[_ECD_DISK_START] = disk_dir
274 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
275 endrec[_ECD_ENTRIES_TOTAL] = dircount2
276 endrec[_ECD_SIZE] = dirsize
277 endrec[_ECD_OFFSET] = diroffset
278 return endrec
279
280
281 def _EndRecData(fpin):
282 """Return data from the "End of Central Directory" record, or None.
283
284 The data is a list of the nine items in the ZIP "End of central dir"
285 record followed by a tenth item, the file seek offset of this record."""
286
287 # Determine file size
288 fpin.seek(0, 2)
289 filesize = fpin.tell()
290
291 # Check to see if this is ZIP file with no archive comment (the
292 # "end of central directory" structure should be the last item in the
293 # file if this is the case).
294 try:
295 fpin.seek(-sizeEndCentDir, 2)
296 except OSError:
297 return None
298 data = fpin.read()
299 if (len(data) == sizeEndCentDir and
300 data[0:4] == stringEndArchive and
301 data[-2:] == b"\000\000"):
302 # the signature is correct and there's no comment, unpack structure
303 endrec = struct.unpack(structEndArchive, data)
304 endrec=list(endrec)
305
306 # Append a blank comment and record start offset
307 endrec.append(b"")
308 endrec.append(filesize - sizeEndCentDir)
309
310 # Try to read the "Zip64 end of central directory" structure
311 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
312
313 # Either this is not a ZIP file, or it is a ZIP file with an archive
314 # comment. Search the end of the file for the "end of central directory"
315 # record signature. The comment is the last item in the ZIP file and may be
316 # up to 64K long. It is assumed that the "end of central directory" magic
317 # number does not appear in the comment.
318 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
319 fpin.seek(maxCommentStart, 0)
320 data = fpin.read()
321 start = data.rfind(stringEndArchive)
322 if start >= 0:
323 # found the magic number; attempt to unpack and interpret
324 recData = data[start:start+sizeEndCentDir]
325 if len(recData) != sizeEndCentDir:
326 # Zip file is corrupted.
327 return None
328 endrec = list(struct.unpack(structEndArchive, recData))
329 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
330 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
331 endrec.append(comment)
332 endrec.append(maxCommentStart + start)
333
334 # Try to read the "Zip64 end of central directory" structure
335 return _EndRecData64(fpin, maxCommentStart + start - filesize,
336 endrec)
337
338 # Unable to find a valid end of central directory structure
339 return None
340
341 def _sanitize_filename(filename):
342 """Terminate the file name at the first null byte and
343 ensure paths always use forward slashes as the directory separator."""
344
345 # Terminate the file name at the first null byte. Null bytes in file
346 # names are used as tricks by viruses in archives.
347 null_byte = filename.find(chr(0))
348 if null_byte >= 0:
349 filename = filename[0:null_byte]
350 # This is used to ensure paths in generated ZIP files always use
351 # forward slashes as the directory separator, as required by the
352 # ZIP format specification.
353 if os.sep != "/" and os.sep in filename:
354 filename = filename.replace(os.sep, "/")
355 if os.altsep and os.altsep != "/" and os.altsep in filename:
356 filename = filename.replace(os.altsep, "/")
357 return filename
358
359
360 class ESC[4;38;5;81mZipInfo (ESC[4;38;5;149mobject):
361 """Class with attributes describing each file in the ZIP archive."""
362
363 __slots__ = (
364 'orig_filename',
365 'filename',
366 'date_time',
367 'compress_type',
368 '_compresslevel',
369 'comment',
370 'extra',
371 'create_system',
372 'create_version',
373 'extract_version',
374 'reserved',
375 'flag_bits',
376 'volume',
377 'internal_attr',
378 'external_attr',
379 'header_offset',
380 'CRC',
381 'compress_size',
382 'file_size',
383 '_raw_time',
384 )
385
386 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
387 self.orig_filename = filename # Original file name in archive
388
389 # Terminate the file name at the first null byte and
390 # ensure paths always use forward slashes as the directory separator.
391 filename = _sanitize_filename(filename)
392
393 self.filename = filename # Normalized file name
394 self.date_time = date_time # year, month, day, hour, min, sec
395
396 if date_time[0] < 1980:
397 raise ValueError('ZIP does not support timestamps before 1980')
398
399 # Standard values:
400 self.compress_type = ZIP_STORED # Type of compression for the file
401 self._compresslevel = None # Level for the compressor
402 self.comment = b"" # Comment for each file
403 self.extra = b"" # ZIP extra data
404 if sys.platform == 'win32':
405 self.create_system = 0 # System which created ZIP archive
406 else:
407 # Assume everything else is unix-y
408 self.create_system = 3 # System which created ZIP archive
409 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
410 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
411 self.reserved = 0 # Must be zero
412 self.flag_bits = 0 # ZIP flag bits
413 self.volume = 0 # Volume number of file header
414 self.internal_attr = 0 # Internal attributes
415 self.external_attr = 0 # External file attributes
416 self.compress_size = 0 # Size of the compressed file
417 self.file_size = 0 # Size of the uncompressed file
418 # Other attributes are set by class ZipFile:
419 # header_offset Byte offset to the file header
420 # CRC CRC-32 of the uncompressed file
421
422 def __repr__(self):
423 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
424 if self.compress_type != ZIP_STORED:
425 result.append(' compress_type=%s' %
426 compressor_names.get(self.compress_type,
427 self.compress_type))
428 hi = self.external_attr >> 16
429 lo = self.external_attr & 0xFFFF
430 if hi:
431 result.append(' filemode=%r' % stat.filemode(hi))
432 if lo:
433 result.append(' external_attr=%#x' % lo)
434 isdir = self.is_dir()
435 if not isdir or self.file_size:
436 result.append(' file_size=%r' % self.file_size)
437 if ((not isdir or self.compress_size) and
438 (self.compress_type != ZIP_STORED or
439 self.file_size != self.compress_size)):
440 result.append(' compress_size=%r' % self.compress_size)
441 result.append('>')
442 return ''.join(result)
443
444 def FileHeader(self, zip64=None):
445 """Return the per-file header as a bytes object.
446
447 When the optional zip64 arg is None rather than a bool, we will
448 decide based upon the file_size and compress_size, if known,
449 False otherwise.
450 """
451 dt = self.date_time
452 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
453 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
454 if self.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
455 # Set these to zero because we write them after the file data
456 CRC = compress_size = file_size = 0
457 else:
458 CRC = self.CRC
459 compress_size = self.compress_size
460 file_size = self.file_size
461
462 extra = self.extra
463
464 min_version = 0
465 if zip64 is None:
466 # We always explicitly pass zip64 within this module.... This
467 # remains for anyone using ZipInfo.FileHeader as a public API.
468 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
469 if zip64:
470 fmt = '<HHQQ'
471 extra = extra + struct.pack(fmt,
472 1, struct.calcsize(fmt)-4, file_size, compress_size)
473 file_size = 0xffffffff
474 compress_size = 0xffffffff
475 min_version = ZIP64_VERSION
476
477 if self.compress_type == ZIP_BZIP2:
478 min_version = max(BZIP2_VERSION, min_version)
479 elif self.compress_type == ZIP_LZMA:
480 min_version = max(LZMA_VERSION, min_version)
481
482 self.extract_version = max(min_version, self.extract_version)
483 self.create_version = max(min_version, self.create_version)
484 filename, flag_bits = self._encodeFilenameFlags()
485 header = struct.pack(structFileHeader, stringFileHeader,
486 self.extract_version, self.reserved, flag_bits,
487 self.compress_type, dostime, dosdate, CRC,
488 compress_size, file_size,
489 len(filename), len(extra))
490 return header + filename + extra
491
492 def _encodeFilenameFlags(self):
493 try:
494 return self.filename.encode('ascii'), self.flag_bits
495 except UnicodeEncodeError:
496 return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME
497
498 def _decodeExtra(self, filename_crc):
499 # Try to decode the extra field.
500 extra = self.extra
501 unpack = struct.unpack
502 while len(extra) >= 4:
503 tp, ln = unpack('<HH', extra[:4])
504 if ln+4 > len(extra):
505 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
506 if tp == 0x0001:
507 data = extra[4:ln+4]
508 # ZIP64 extension (large files and/or large archives)
509 try:
510 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
511 field = "File size"
512 self.file_size, = unpack('<Q', data[:8])
513 data = data[8:]
514 if self.compress_size == 0xFFFF_FFFF:
515 field = "Compress size"
516 self.compress_size, = unpack('<Q', data[:8])
517 data = data[8:]
518 if self.header_offset == 0xFFFF_FFFF:
519 field = "Header offset"
520 self.header_offset, = unpack('<Q', data[:8])
521 except struct.error:
522 raise BadZipFile(f"Corrupt zip64 extra field. "
523 f"{field} not found.") from None
524 elif tp == 0x7075:
525 data = extra[4:ln+4]
526 # Unicode Path Extra Field
527 try:
528 up_version, up_name_crc = unpack('<BL', data[:5])
529 if up_version == 1 and up_name_crc == filename_crc:
530 up_unicode_name = data[5:].decode('utf-8')
531 if up_unicode_name:
532 self.filename = _sanitize_filename(up_unicode_name)
533 else:
534 warnings.warn("Empty unicode path extra field (0x7075)", stacklevel=2)
535 except struct.error as e:
536 raise BadZipFile("Corrupt unicode path extra field (0x7075)") from e
537 except UnicodeDecodeError as e:
538 raise BadZipFile('Corrupt unicode path extra field (0x7075): invalid utf-8 bytes') from e
539
540 extra = extra[ln+4:]
541
542 @classmethod
543 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
544 """Construct an appropriate ZipInfo for a file on the filesystem.
545
546 filename should be the path to a file or directory on the filesystem.
547
548 arcname is the name which it will have within the archive (by default,
549 this will be the same as filename, but without a drive letter and with
550 leading path separators removed).
551 """
552 if isinstance(filename, os.PathLike):
553 filename = os.fspath(filename)
554 st = os.stat(filename)
555 isdir = stat.S_ISDIR(st.st_mode)
556 mtime = time.localtime(st.st_mtime)
557 date_time = mtime[0:6]
558 if not strict_timestamps and date_time[0] < 1980:
559 date_time = (1980, 1, 1, 0, 0, 0)
560 elif not strict_timestamps and date_time[0] > 2107:
561 date_time = (2107, 12, 31, 23, 59, 59)
562 # Create ZipInfo instance to store file information
563 if arcname is None:
564 arcname = filename
565 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
566 while arcname[0] in (os.sep, os.altsep):
567 arcname = arcname[1:]
568 if isdir:
569 arcname += '/'
570 zinfo = cls(arcname, date_time)
571 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
572 if isdir:
573 zinfo.file_size = 0
574 zinfo.external_attr |= 0x10 # MS-DOS directory flag
575 else:
576 zinfo.file_size = st.st_size
577
578 return zinfo
579
580 def is_dir(self):
581 """Return True if this archive member is a directory."""
582 return self.filename.endswith('/')
583
584
585 # ZIP encryption uses the CRC32 one-byte primitive for scrambling some
586 # internal keys. We noticed that a direct implementation is faster than
587 # relying on binascii.crc32().
588
589 _crctable = None
590 def _gen_crc(crc):
591 for j in range(8):
592 if crc & 1:
593 crc = (crc >> 1) ^ 0xEDB88320
594 else:
595 crc >>= 1
596 return crc
597
598 # ZIP supports a password-based form of encryption. Even though known
599 # plaintext attacks have been found against it, it is still useful
600 # to be able to get data out of such a file.
601 #
602 # Usage:
603 # zd = _ZipDecrypter(mypwd)
604 # plain_bytes = zd(cypher_bytes)
605
606 def _ZipDecrypter(pwd):
607 key0 = 305419896
608 key1 = 591751049
609 key2 = 878082192
610
611 global _crctable
612 if _crctable is None:
613 _crctable = list(map(_gen_crc, range(256)))
614 crctable = _crctable
615
616 def crc32(ch, crc):
617 """Compute the CRC32 primitive on one byte."""
618 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
619
620 def update_keys(c):
621 nonlocal key0, key1, key2
622 key0 = crc32(c, key0)
623 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
624 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
625 key2 = crc32(key1 >> 24, key2)
626
627 for p in pwd:
628 update_keys(p)
629
630 def decrypter(data):
631 """Decrypt a bytes object."""
632 result = bytearray()
633 append = result.append
634 for c in data:
635 k = key2 | 2
636 c ^= ((k * (k^1)) >> 8) & 0xFF
637 update_keys(c)
638 append(c)
639 return bytes(result)
640
641 return decrypter
642
643
644 class ESC[4;38;5;81mLZMACompressor:
645
646 def __init__(self):
647 self._comp = None
648
649 def _init(self):
650 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
651 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
652 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
653 ])
654 return struct.pack('<BBH', 9, 4, len(props)) + props
655
656 def compress(self, data):
657 if self._comp is None:
658 return self._init() + self._comp.compress(data)
659 return self._comp.compress(data)
660
661 def flush(self):
662 if self._comp is None:
663 return self._init() + self._comp.flush()
664 return self._comp.flush()
665
666
667 class ESC[4;38;5;81mLZMADecompressor:
668
669 def __init__(self):
670 self._decomp = None
671 self._unconsumed = b''
672 self.eof = False
673
674 def decompress(self, data):
675 if self._decomp is None:
676 self._unconsumed += data
677 if len(self._unconsumed) <= 4:
678 return b''
679 psize, = struct.unpack('<H', self._unconsumed[2:4])
680 if len(self._unconsumed) <= 4 + psize:
681 return b''
682
683 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
684 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
685 self._unconsumed[4:4 + psize])
686 ])
687 data = self._unconsumed[4 + psize:]
688 del self._unconsumed
689
690 result = self._decomp.decompress(data)
691 self.eof = self._decomp.eof
692 return result
693
694
695 compressor_names = {
696 0: 'store',
697 1: 'shrink',
698 2: 'reduce',
699 3: 'reduce',
700 4: 'reduce',
701 5: 'reduce',
702 6: 'implode',
703 7: 'tokenize',
704 8: 'deflate',
705 9: 'deflate64',
706 10: 'implode',
707 12: 'bzip2',
708 14: 'lzma',
709 18: 'terse',
710 19: 'lz77',
711 97: 'wavpack',
712 98: 'ppmd',
713 }
714
715 def _check_compression(compression):
716 if compression == ZIP_STORED:
717 pass
718 elif compression == ZIP_DEFLATED:
719 if not zlib:
720 raise RuntimeError(
721 "Compression requires the (missing) zlib module")
722 elif compression == ZIP_BZIP2:
723 if not bz2:
724 raise RuntimeError(
725 "Compression requires the (missing) bz2 module")
726 elif compression == ZIP_LZMA:
727 if not lzma:
728 raise RuntimeError(
729 "Compression requires the (missing) lzma module")
730 else:
731 raise NotImplementedError("That compression method is not supported")
732
733
734 def _get_compressor(compress_type, compresslevel=None):
735 if compress_type == ZIP_DEFLATED:
736 if compresslevel is not None:
737 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
738 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
739 elif compress_type == ZIP_BZIP2:
740 if compresslevel is not None:
741 return bz2.BZ2Compressor(compresslevel)
742 return bz2.BZ2Compressor()
743 # compresslevel is ignored for ZIP_LZMA
744 elif compress_type == ZIP_LZMA:
745 return LZMACompressor()
746 else:
747 return None
748
749
750 def _get_decompressor(compress_type):
751 _check_compression(compress_type)
752 if compress_type == ZIP_STORED:
753 return None
754 elif compress_type == ZIP_DEFLATED:
755 return zlib.decompressobj(-15)
756 elif compress_type == ZIP_BZIP2:
757 return bz2.BZ2Decompressor()
758 elif compress_type == ZIP_LZMA:
759 return LZMADecompressor()
760 else:
761 descr = compressor_names.get(compress_type)
762 if descr:
763 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
764 else:
765 raise NotImplementedError("compression type %d" % (compress_type,))
766
767
768 class ESC[4;38;5;81m_SharedFile:
769 def __init__(self, file, pos, close, lock, writing):
770 self._file = file
771 self._pos = pos
772 self._close = close
773 self._lock = lock
774 self._writing = writing
775 self.seekable = file.seekable
776
777 def tell(self):
778 return self._pos
779
780 def seek(self, offset, whence=0):
781 with self._lock:
782 if self._writing():
783 raise ValueError("Can't reposition in the ZIP file while "
784 "there is an open writing handle on it. "
785 "Close the writing handle before trying to read.")
786 self._file.seek(offset, whence)
787 self._pos = self._file.tell()
788 return self._pos
789
790 def read(self, n=-1):
791 with self._lock:
792 if self._writing():
793 raise ValueError("Can't read from the ZIP file while there "
794 "is an open writing handle on it. "
795 "Close the writing handle before trying to read.")
796 self._file.seek(self._pos)
797 data = self._file.read(n)
798 self._pos = self._file.tell()
799 return data
800
801 def close(self):
802 if self._file is not None:
803 fileobj = self._file
804 self._file = None
805 self._close(fileobj)
806
807 # Provide the tell method for unseekable stream
808 class ESC[4;38;5;81m_Tellable:
809 def __init__(self, fp):
810 self.fp = fp
811 self.offset = 0
812
813 def write(self, data):
814 n = self.fp.write(data)
815 self.offset += n
816 return n
817
818 def tell(self):
819 return self.offset
820
821 def flush(self):
822 self.fp.flush()
823
824 def close(self):
825 self.fp.close()
826
827
828 class ESC[4;38;5;81mZipExtFile(ESC[4;38;5;149mioESC[4;38;5;149m.ESC[4;38;5;149mBufferedIOBase):
829 """File-like object for reading an archive member.
830 Is returned by ZipFile.open().
831 """
832
833 # Max size supported by decompressor.
834 MAX_N = 1 << 31 - 1
835
836 # Read from compressed files in 4k blocks.
837 MIN_READ_SIZE = 4096
838
839 # Chunk size to read during seek
840 MAX_SEEK_READ = 1 << 24
841
842 def __init__(self, fileobj, mode, zipinfo, pwd=None,
843 close_fileobj=False):
844 self._fileobj = fileobj
845 self._pwd = pwd
846 self._close_fileobj = close_fileobj
847
848 self._compress_type = zipinfo.compress_type
849 self._compress_left = zipinfo.compress_size
850 self._left = zipinfo.file_size
851
852 self._decompressor = _get_decompressor(self._compress_type)
853
854 self._eof = False
855 self._readbuffer = b''
856 self._offset = 0
857
858 self.newlines = None
859
860 self.mode = mode
861 self.name = zipinfo.filename
862
863 if hasattr(zipinfo, 'CRC'):
864 self._expected_crc = zipinfo.CRC
865 self._running_crc = crc32(b'')
866 else:
867 self._expected_crc = None
868
869 self._seekable = False
870 try:
871 if fileobj.seekable():
872 self._orig_compress_start = fileobj.tell()
873 self._orig_compress_size = zipinfo.compress_size
874 self._orig_file_size = zipinfo.file_size
875 self._orig_start_crc = self._running_crc
876 self._orig_crc = self._expected_crc
877 self._seekable = True
878 except AttributeError:
879 pass
880
881 self._decrypter = None
882 if pwd:
883 if zipinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
884 # compare against the file type from extended local headers
885 check_byte = (zipinfo._raw_time >> 8) & 0xff
886 else:
887 # compare against the CRC otherwise
888 check_byte = (zipinfo.CRC >> 24) & 0xff
889 h = self._init_decrypter()
890 if h != check_byte:
891 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
892
893
894 def _init_decrypter(self):
895 self._decrypter = _ZipDecrypter(self._pwd)
896 # The first 12 bytes in the cypher stream is an encryption header
897 # used to strengthen the algorithm. The first 11 bytes are
898 # completely random, while the 12th contains the MSB of the CRC,
899 # or the MSB of the file time depending on the header type
900 # and is used to check the correctness of the password.
901 header = self._fileobj.read(12)
902 self._compress_left -= 12
903 return self._decrypter(header)[11]
904
905 def __repr__(self):
906 result = ['<%s.%s' % (self.__class__.__module__,
907 self.__class__.__qualname__)]
908 if not self.closed:
909 result.append(' name=%r mode=%r' % (self.name, self.mode))
910 if self._compress_type != ZIP_STORED:
911 result.append(' compress_type=%s' %
912 compressor_names.get(self._compress_type,
913 self._compress_type))
914 else:
915 result.append(' [closed]')
916 result.append('>')
917 return ''.join(result)
918
919 def readline(self, limit=-1):
920 """Read and return a line from the stream.
921
922 If limit is specified, at most limit bytes will be read.
923 """
924
925 if limit < 0:
926 # Shortcut common case - newline found in buffer.
927 i = self._readbuffer.find(b'\n', self._offset) + 1
928 if i > 0:
929 line = self._readbuffer[self._offset: i]
930 self._offset = i
931 return line
932
933 return io.BufferedIOBase.readline(self, limit)
934
935 def peek(self, n=1):
936 """Returns buffered bytes without advancing the position."""
937 if n > len(self._readbuffer) - self._offset:
938 chunk = self.read(n)
939 if len(chunk) > self._offset:
940 self._readbuffer = chunk + self._readbuffer[self._offset:]
941 self._offset = 0
942 else:
943 self._offset -= len(chunk)
944
945 # Return up to 512 bytes to reduce allocation overhead for tight loops.
946 return self._readbuffer[self._offset: self._offset + 512]
947
948 def readable(self):
949 if self.closed:
950 raise ValueError("I/O operation on closed file.")
951 return True
952
953 def read(self, n=-1):
954 """Read and return up to n bytes.
955 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
956 """
957 if self.closed:
958 raise ValueError("read from closed file.")
959 if n is None or n < 0:
960 buf = self._readbuffer[self._offset:]
961 self._readbuffer = b''
962 self._offset = 0
963 while not self._eof:
964 buf += self._read1(self.MAX_N)
965 return buf
966
967 end = n + self._offset
968 if end < len(self._readbuffer):
969 buf = self._readbuffer[self._offset:end]
970 self._offset = end
971 return buf
972
973 n = end - len(self._readbuffer)
974 buf = self._readbuffer[self._offset:]
975 self._readbuffer = b''
976 self._offset = 0
977 while n > 0 and not self._eof:
978 data = self._read1(n)
979 if n < len(data):
980 self._readbuffer = data
981 self._offset = n
982 buf += data[:n]
983 break
984 buf += data
985 n -= len(data)
986 return buf
987
988 def _update_crc(self, newdata):
989 # Update the CRC using the given data.
990 if self._expected_crc is None:
991 # No need to compute the CRC if we don't have a reference value
992 return
993 self._running_crc = crc32(newdata, self._running_crc)
994 # Check the CRC if we're at the end of the file
995 if self._eof and self._running_crc != self._expected_crc:
996 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
997
998 def read1(self, n):
999 """Read up to n bytes with at most one read() system call."""
1000
1001 if n is None or n < 0:
1002 buf = self._readbuffer[self._offset:]
1003 self._readbuffer = b''
1004 self._offset = 0
1005 while not self._eof:
1006 data = self._read1(self.MAX_N)
1007 if data:
1008 buf += data
1009 break
1010 return buf
1011
1012 end = n + self._offset
1013 if end < len(self._readbuffer):
1014 buf = self._readbuffer[self._offset:end]
1015 self._offset = end
1016 return buf
1017
1018 n = end - len(self._readbuffer)
1019 buf = self._readbuffer[self._offset:]
1020 self._readbuffer = b''
1021 self._offset = 0
1022 if n > 0:
1023 while not self._eof:
1024 data = self._read1(n)
1025 if n < len(data):
1026 self._readbuffer = data
1027 self._offset = n
1028 buf += data[:n]
1029 break
1030 if data:
1031 buf += data
1032 break
1033 return buf
1034
1035 def _read1(self, n):
1036 # Read up to n compressed bytes with at most one read() system call,
1037 # decrypt and decompress them.
1038 if self._eof or n <= 0:
1039 return b''
1040
1041 # Read from file.
1042 if self._compress_type == ZIP_DEFLATED:
1043 ## Handle unconsumed data.
1044 data = self._decompressor.unconsumed_tail
1045 if n > len(data):
1046 data += self._read2(n - len(data))
1047 else:
1048 data = self._read2(n)
1049
1050 if self._compress_type == ZIP_STORED:
1051 self._eof = self._compress_left <= 0
1052 elif self._compress_type == ZIP_DEFLATED:
1053 n = max(n, self.MIN_READ_SIZE)
1054 data = self._decompressor.decompress(data, n)
1055 self._eof = (self._decompressor.eof or
1056 self._compress_left <= 0 and
1057 not self._decompressor.unconsumed_tail)
1058 if self._eof:
1059 data += self._decompressor.flush()
1060 else:
1061 data = self._decompressor.decompress(data)
1062 self._eof = self._decompressor.eof or self._compress_left <= 0
1063
1064 data = data[:self._left]
1065 self._left -= len(data)
1066 if self._left <= 0:
1067 self._eof = True
1068 self._update_crc(data)
1069 return data
1070
1071 def _read2(self, n):
1072 if self._compress_left <= 0:
1073 return b''
1074
1075 n = max(n, self.MIN_READ_SIZE)
1076 n = min(n, self._compress_left)
1077
1078 data = self._fileobj.read(n)
1079 self._compress_left -= len(data)
1080 if not data:
1081 raise EOFError
1082
1083 if self._decrypter is not None:
1084 data = self._decrypter(data)
1085 return data
1086
1087 def close(self):
1088 try:
1089 if self._close_fileobj:
1090 self._fileobj.close()
1091 finally:
1092 super().close()
1093
1094 def seekable(self):
1095 if self.closed:
1096 raise ValueError("I/O operation on closed file.")
1097 return self._seekable
1098
1099 def seek(self, offset, whence=os.SEEK_SET):
1100 if self.closed:
1101 raise ValueError("seek on closed file.")
1102 if not self._seekable:
1103 raise io.UnsupportedOperation("underlying stream is not seekable")
1104 curr_pos = self.tell()
1105 if whence == os.SEEK_SET:
1106 new_pos = offset
1107 elif whence == os.SEEK_CUR:
1108 new_pos = curr_pos + offset
1109 elif whence == os.SEEK_END:
1110 new_pos = self._orig_file_size + offset
1111 else:
1112 raise ValueError("whence must be os.SEEK_SET (0), "
1113 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1114
1115 if new_pos > self._orig_file_size:
1116 new_pos = self._orig_file_size
1117
1118 if new_pos < 0:
1119 new_pos = 0
1120
1121 read_offset = new_pos - curr_pos
1122 buff_offset = read_offset + self._offset
1123
1124 # Fast seek uncompressed unencrypted file
1125 if self._compress_type == ZIP_STORED and self._decrypter is None and read_offset > 0:
1126 # disable CRC checking after first seeking - it would be invalid
1127 self._expected_crc = None
1128 # seek actual file taking already buffered data into account
1129 read_offset -= len(self._readbuffer) - self._offset
1130 self._fileobj.seek(read_offset, os.SEEK_CUR)
1131 self._left -= read_offset
1132 read_offset = 0
1133 # flush read buffer
1134 self._readbuffer = b''
1135 self._offset = 0
1136 elif buff_offset >= 0 and buff_offset < len(self._readbuffer):
1137 # Just move the _offset index if the new position is in the _readbuffer
1138 self._offset = buff_offset
1139 read_offset = 0
1140 elif read_offset < 0:
1141 # Position is before the current position. Reset the ZipExtFile
1142 self._fileobj.seek(self._orig_compress_start)
1143 self._running_crc = self._orig_start_crc
1144 self._expected_crc = self._orig_crc
1145 self._compress_left = self._orig_compress_size
1146 self._left = self._orig_file_size
1147 self._readbuffer = b''
1148 self._offset = 0
1149 self._decompressor = _get_decompressor(self._compress_type)
1150 self._eof = False
1151 read_offset = new_pos
1152 if self._decrypter is not None:
1153 self._init_decrypter()
1154
1155 while read_offset > 0:
1156 read_len = min(self.MAX_SEEK_READ, read_offset)
1157 self.read(read_len)
1158 read_offset -= read_len
1159
1160 return self.tell()
1161
1162 def tell(self):
1163 if self.closed:
1164 raise ValueError("tell on closed file.")
1165 if not self._seekable:
1166 raise io.UnsupportedOperation("underlying stream is not seekable")
1167 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1168 return filepos
1169
1170
1171 class ESC[4;38;5;81m_ZipWriteFile(ESC[4;38;5;149mioESC[4;38;5;149m.ESC[4;38;5;149mBufferedIOBase):
1172 def __init__(self, zf, zinfo, zip64):
1173 self._zinfo = zinfo
1174 self._zip64 = zip64
1175 self._zipfile = zf
1176 self._compressor = _get_compressor(zinfo.compress_type,
1177 zinfo._compresslevel)
1178 self._file_size = 0
1179 self._compress_size = 0
1180 self._crc = 0
1181
1182 @property
1183 def _fileobj(self):
1184 return self._zipfile.fp
1185
1186 def writable(self):
1187 return True
1188
1189 def write(self, data):
1190 if self.closed:
1191 raise ValueError('I/O operation on closed file.')
1192
1193 # Accept any data that supports the buffer protocol
1194 if isinstance(data, (bytes, bytearray)):
1195 nbytes = len(data)
1196 else:
1197 data = memoryview(data)
1198 nbytes = data.nbytes
1199 self._file_size += nbytes
1200
1201 self._crc = crc32(data, self._crc)
1202 if self._compressor:
1203 data = self._compressor.compress(data)
1204 self._compress_size += len(data)
1205 self._fileobj.write(data)
1206 return nbytes
1207
1208 def close(self):
1209 if self.closed:
1210 return
1211 try:
1212 super().close()
1213 # Flush any data from the compressor, and update header info
1214 if self._compressor:
1215 buf = self._compressor.flush()
1216 self._compress_size += len(buf)
1217 self._fileobj.write(buf)
1218 self._zinfo.compress_size = self._compress_size
1219 else:
1220 self._zinfo.compress_size = self._file_size
1221 self._zinfo.CRC = self._crc
1222 self._zinfo.file_size = self._file_size
1223
1224 if not self._zip64:
1225 if self._file_size > ZIP64_LIMIT:
1226 raise RuntimeError("File size too large, try using force_zip64")
1227 if self._compress_size > ZIP64_LIMIT:
1228 raise RuntimeError("Compressed size too large, try using force_zip64")
1229
1230 # Write updated header info
1231 if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
1232 # Write CRC and file sizes after the file data
1233 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1234 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1235 self._zinfo.compress_size, self._zinfo.file_size))
1236 self._zipfile.start_dir = self._fileobj.tell()
1237 else:
1238 # Seek backwards and write file header (which will now include
1239 # correct CRC and file sizes)
1240
1241 # Preserve current position in file
1242 self._zipfile.start_dir = self._fileobj.tell()
1243 self._fileobj.seek(self._zinfo.header_offset)
1244 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1245 self._fileobj.seek(self._zipfile.start_dir)
1246
1247 # Successfully written: Add file to our caches
1248 self._zipfile.filelist.append(self._zinfo)
1249 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1250 finally:
1251 self._zipfile._writing = False
1252
1253
1254
1255 class ESC[4;38;5;81mZipFile:
1256 """ Class with methods to open, read, write, close, list zip files.
1257
1258 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1259 compresslevel=None)
1260
1261 file: Either the path to the file, or a file-like object.
1262 If it is a path, the file will be opened and closed by ZipFile.
1263 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1264 or append 'a'.
1265 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1266 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1267 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1268 needed, otherwise it will raise an exception when this would
1269 be necessary.
1270 compresslevel: None (default for the given compression type) or an integer
1271 specifying the level to pass to the compressor.
1272 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1273 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1274 When using ZIP_BZIP2 integers 1 through 9 are accepted.
1275
1276 """
1277
1278 fp = None # Set here since __del__ checks it
1279 _windows_illegal_name_trans_table = None
1280
1281 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1282 compresslevel=None, *, strict_timestamps=True, metadata_encoding=None):
1283 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1284 or append 'a'."""
1285 if mode not in ('r', 'w', 'x', 'a'):
1286 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1287
1288 _check_compression(compression)
1289
1290 self._allowZip64 = allowZip64
1291 self._didModify = False
1292 self.debug = 0 # Level of printing: 0 through 3
1293 self.NameToInfo = {} # Find file info given name
1294 self.filelist = [] # List of ZipInfo instances for archive
1295 self.compression = compression # Method of compression
1296 self.compresslevel = compresslevel
1297 self.mode = mode
1298 self.pwd = None
1299 self._comment = b''
1300 self._strict_timestamps = strict_timestamps
1301 self.metadata_encoding = metadata_encoding
1302
1303 # Check that we don't try to write with nonconforming codecs
1304 if self.metadata_encoding and mode != 'r':
1305 raise ValueError(
1306 "metadata_encoding is only supported for reading files")
1307
1308 # Check if we were passed a file-like object
1309 if isinstance(file, os.PathLike):
1310 file = os.fspath(file)
1311 if isinstance(file, str):
1312 # No, it's a filename
1313 self._filePassed = 0
1314 self.filename = file
1315 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1316 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1317 filemode = modeDict[mode]
1318 while True:
1319 try:
1320 self.fp = io.open(file, filemode)
1321 except OSError:
1322 if filemode in modeDict:
1323 filemode = modeDict[filemode]
1324 continue
1325 raise
1326 break
1327 else:
1328 self._filePassed = 1
1329 self.fp = file
1330 self.filename = getattr(file, 'name', None)
1331 self._fileRefCnt = 1
1332 self._lock = threading.RLock()
1333 self._seekable = True
1334 self._writing = False
1335
1336 try:
1337 if mode == 'r':
1338 self._RealGetContents()
1339 elif mode in ('w', 'x'):
1340 # set the modified flag so central directory gets written
1341 # even if no files are added to the archive
1342 self._didModify = True
1343 try:
1344 self.start_dir = self.fp.tell()
1345 except (AttributeError, OSError):
1346 self.fp = _Tellable(self.fp)
1347 self.start_dir = 0
1348 self._seekable = False
1349 else:
1350 # Some file-like objects can provide tell() but not seek()
1351 try:
1352 self.fp.seek(self.start_dir)
1353 except (AttributeError, OSError):
1354 self._seekable = False
1355 elif mode == 'a':
1356 try:
1357 # See if file is a zip file
1358 self._RealGetContents()
1359 # seek to start of directory and overwrite
1360 self.fp.seek(self.start_dir)
1361 except BadZipFile:
1362 # file is not a zip file, just append
1363 self.fp.seek(0, 2)
1364
1365 # set the modified flag so central directory gets written
1366 # even if no files are added to the archive
1367 self._didModify = True
1368 self.start_dir = self.fp.tell()
1369 else:
1370 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1371 except:
1372 fp = self.fp
1373 self.fp = None
1374 self._fpclose(fp)
1375 raise
1376
1377 def __enter__(self):
1378 return self
1379
1380 def __exit__(self, type, value, traceback):
1381 self.close()
1382
1383 def __repr__(self):
1384 result = ['<%s.%s' % (self.__class__.__module__,
1385 self.__class__.__qualname__)]
1386 if self.fp is not None:
1387 if self._filePassed:
1388 result.append(' file=%r' % self.fp)
1389 elif self.filename is not None:
1390 result.append(' filename=%r' % self.filename)
1391 result.append(' mode=%r' % self.mode)
1392 else:
1393 result.append(' [closed]')
1394 result.append('>')
1395 return ''.join(result)
1396
1397 def _RealGetContents(self):
1398 """Read in the table of contents for the ZIP file."""
1399 fp = self.fp
1400 try:
1401 endrec = _EndRecData(fp)
1402 except OSError:
1403 raise BadZipFile("File is not a zip file")
1404 if not endrec:
1405 raise BadZipFile("File is not a zip file")
1406 if self.debug > 1:
1407 print(endrec)
1408 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1409 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
1410 self._comment = endrec[_ECD_COMMENT] # archive comment
1411
1412 # "concat" is zero, unless zip was concatenated to another file
1413 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1414 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1415 # If Zip64 extension structures are present, account for them
1416 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1417
1418 if self.debug > 2:
1419 inferred = concat + offset_cd
1420 print("given, inferred, offset", offset_cd, inferred, concat)
1421 # self.start_dir: Position of start of central directory
1422 self.start_dir = offset_cd + concat
1423 if self.start_dir < 0:
1424 raise BadZipFile("Bad offset for central directory")
1425 fp.seek(self.start_dir, 0)
1426 data = fp.read(size_cd)
1427 fp = io.BytesIO(data)
1428 total = 0
1429 while total < size_cd:
1430 centdir = fp.read(sizeCentralDir)
1431 if len(centdir) != sizeCentralDir:
1432 raise BadZipFile("Truncated central directory")
1433 centdir = struct.unpack(structCentralDir, centdir)
1434 if centdir[_CD_SIGNATURE] != stringCentralDir:
1435 raise BadZipFile("Bad magic number for central directory")
1436 if self.debug > 2:
1437 print(centdir)
1438 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1439 orig_filename_crc = crc32(filename)
1440 flags = centdir[_CD_FLAG_BITS]
1441 if flags & _MASK_UTF_FILENAME:
1442 # UTF-8 file names extension
1443 filename = filename.decode('utf-8')
1444 else:
1445 # Historical ZIP filename encoding
1446 filename = filename.decode(self.metadata_encoding or 'cp437')
1447 # Create ZipInfo instance to store file information
1448 x = ZipInfo(filename)
1449 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1450 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1451 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1452 (x.create_version, x.create_system, x.extract_version, x.reserved,
1453 x.flag_bits, x.compress_type, t, d,
1454 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1455 if x.extract_version > MAX_EXTRACT_VERSION:
1456 raise NotImplementedError("zip file version %.1f" %
1457 (x.extract_version / 10))
1458 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1459 # Convert date/time code to (year, month, day, hour, min, sec)
1460 x._raw_time = t
1461 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1462 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1463 x._decodeExtra(orig_filename_crc)
1464 x.header_offset = x.header_offset + concat
1465 self.filelist.append(x)
1466 self.NameToInfo[x.filename] = x
1467
1468 # update total bytes read from central directory
1469 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1470 + centdir[_CD_EXTRA_FIELD_LENGTH]
1471 + centdir[_CD_COMMENT_LENGTH])
1472
1473 if self.debug > 2:
1474 print("total", total)
1475
1476
1477 def namelist(self):
1478 """Return a list of file names in the archive."""
1479 return [data.filename for data in self.filelist]
1480
1481 def infolist(self):
1482 """Return a list of class ZipInfo instances for files in the
1483 archive."""
1484 return self.filelist
1485
1486 def printdir(self, file=None):
1487 """Print a table of contents for the zip file."""
1488 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1489 file=file)
1490 for zinfo in self.filelist:
1491 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1492 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1493 file=file)
1494
1495 def testzip(self):
1496 """Read all the files and check the CRC.
1497
1498 Return None if all files could be read successfully, or the name
1499 of the offending file otherwise."""
1500 chunk_size = 2 ** 20
1501 for zinfo in self.filelist:
1502 try:
1503 # Read by chunks, to avoid an OverflowError or a
1504 # MemoryError with very large embedded files.
1505 with self.open(zinfo.filename, "r") as f:
1506 while f.read(chunk_size): # Check CRC-32
1507 pass
1508 except BadZipFile:
1509 return zinfo.filename
1510
1511 def getinfo(self, name):
1512 """Return the instance of ZipInfo given 'name'."""
1513 info = self.NameToInfo.get(name)
1514 if info is None:
1515 raise KeyError(
1516 'There is no item named %r in the archive' % name)
1517
1518 return info
1519
1520 def setpassword(self, pwd):
1521 """Set default password for encrypted files."""
1522 if pwd and not isinstance(pwd, bytes):
1523 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1524 if pwd:
1525 self.pwd = pwd
1526 else:
1527 self.pwd = None
1528
1529 @property
1530 def comment(self):
1531 """The comment text associated with the ZIP file."""
1532 return self._comment
1533
1534 @comment.setter
1535 def comment(self, comment):
1536 if not isinstance(comment, bytes):
1537 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1538 # check for valid comment length
1539 if len(comment) > ZIP_MAX_COMMENT:
1540 import warnings
1541 warnings.warn('Archive comment is too long; truncating to %d bytes'
1542 % ZIP_MAX_COMMENT, stacklevel=2)
1543 comment = comment[:ZIP_MAX_COMMENT]
1544 self._comment = comment
1545 self._didModify = True
1546
1547 def read(self, name, pwd=None):
1548 """Return file bytes for name."""
1549 with self.open(name, "r", pwd) as fp:
1550 return fp.read()
1551
1552 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1553 """Return file-like object for 'name'.
1554
1555 name is a string for the file name within the ZIP file, or a ZipInfo
1556 object.
1557
1558 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1559 write to a file newly added to the archive.
1560
1561 pwd is the password to decrypt files (only used for reading).
1562
1563 When writing, if the file size is not known in advance but may exceed
1564 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1565 files. If the size is known in advance, it is best to pass a ZipInfo
1566 instance for name, with zinfo.file_size set.
1567 """
1568 if mode not in {"r", "w"}:
1569 raise ValueError('open() requires mode "r" or "w"')
1570 if pwd and (mode == "w"):
1571 raise ValueError("pwd is only supported for reading files")
1572 if not self.fp:
1573 raise ValueError(
1574 "Attempt to use ZIP archive that was already closed")
1575
1576 # Make sure we have an info object
1577 if isinstance(name, ZipInfo):
1578 # 'name' is already an info object
1579 zinfo = name
1580 elif mode == 'w':
1581 zinfo = ZipInfo(name)
1582 zinfo.compress_type = self.compression
1583 zinfo._compresslevel = self.compresslevel
1584 else:
1585 # Get info object for name
1586 zinfo = self.getinfo(name)
1587
1588 if mode == 'w':
1589 return self._open_to_write(zinfo, force_zip64=force_zip64)
1590
1591 if self._writing:
1592 raise ValueError("Can't read from the ZIP file while there "
1593 "is an open writing handle on it. "
1594 "Close the writing handle before trying to read.")
1595
1596 # Open for reading:
1597 self._fileRefCnt += 1
1598 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1599 self._fpclose, self._lock, lambda: self._writing)
1600 try:
1601 # Skip the file header:
1602 fheader = zef_file.read(sizeFileHeader)
1603 if len(fheader) != sizeFileHeader:
1604 raise BadZipFile("Truncated file header")
1605 fheader = struct.unpack(structFileHeader, fheader)
1606 if fheader[_FH_SIGNATURE] != stringFileHeader:
1607 raise BadZipFile("Bad magic number for file header")
1608
1609 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1610 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1611 zef_file.seek(fheader[_FH_EXTRA_FIELD_LENGTH], whence=1)
1612
1613 if zinfo.flag_bits & _MASK_COMPRESSED_PATCH:
1614 # Zip 2.7: compressed patched data
1615 raise NotImplementedError("compressed patched data (flag bit 5)")
1616
1617 if zinfo.flag_bits & _MASK_STRONG_ENCRYPTION:
1618 # strong encryption
1619 raise NotImplementedError("strong encryption (flag bit 6)")
1620
1621 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & _MASK_UTF_FILENAME:
1622 # UTF-8 filename
1623 fname_str = fname.decode("utf-8")
1624 else:
1625 fname_str = fname.decode(self.metadata_encoding or "cp437")
1626
1627 if fname_str != zinfo.orig_filename:
1628 raise BadZipFile(
1629 'File name in directory %r and header %r differ.'
1630 % (zinfo.orig_filename, fname))
1631
1632 # check for encrypted flag & handle password
1633 is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
1634 if is_encrypted:
1635 if not pwd:
1636 pwd = self.pwd
1637 if pwd and not isinstance(pwd, bytes):
1638 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1639 if not pwd:
1640 raise RuntimeError("File %r is encrypted, password "
1641 "required for extraction" % name)
1642 else:
1643 pwd = None
1644
1645 return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1646 except:
1647 zef_file.close()
1648 raise
1649
1650 def _open_to_write(self, zinfo, force_zip64=False):
1651 if force_zip64 and not self._allowZip64:
1652 raise ValueError(
1653 "force_zip64 is True, but allowZip64 was False when opening "
1654 "the ZIP file."
1655 )
1656 if self._writing:
1657 raise ValueError("Can't write to the ZIP file while there is "
1658 "another write handle open on it. "
1659 "Close the first handle before opening another.")
1660
1661 # Size and CRC are overwritten with correct data after processing the file
1662 zinfo.compress_size = 0
1663 zinfo.CRC = 0
1664
1665 zinfo.flag_bits = 0x00
1666 if zinfo.compress_type == ZIP_LZMA:
1667 # Compressed data includes an end-of-stream (EOS) marker
1668 zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
1669 if not self._seekable:
1670 zinfo.flag_bits |= _MASK_USE_DATA_DESCRIPTOR
1671
1672 if not zinfo.external_attr:
1673 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1674
1675 # Compressed size can be larger than uncompressed size
1676 zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT)
1677 if not self._allowZip64 and zip64:
1678 raise LargeZipFile("Filesize would require ZIP64 extensions")
1679
1680 if self._seekable:
1681 self.fp.seek(self.start_dir)
1682 zinfo.header_offset = self.fp.tell()
1683
1684 self._writecheck(zinfo)
1685 self._didModify = True
1686
1687 self.fp.write(zinfo.FileHeader(zip64))
1688
1689 self._writing = True
1690 return _ZipWriteFile(self, zinfo, zip64)
1691
1692 def extract(self, member, path=None, pwd=None):
1693 """Extract a member from the archive to the current working directory,
1694 using its full name. Its file information is extracted as accurately
1695 as possible. `member' may be a filename or a ZipInfo object. You can
1696 specify a different directory using `path'.
1697 """
1698 if path is None:
1699 path = os.getcwd()
1700 else:
1701 path = os.fspath(path)
1702
1703 return self._extract_member(member, path, pwd)
1704
1705 def extractall(self, path=None, members=None, pwd=None):
1706 """Extract all members from the archive to the current working
1707 directory. `path' specifies a different directory to extract to.
1708 `members' is optional and must be a subset of the list returned
1709 by namelist().
1710 """
1711 if members is None:
1712 members = self.namelist()
1713
1714 if path is None:
1715 path = os.getcwd()
1716 else:
1717 path = os.fspath(path)
1718
1719 for zipinfo in members:
1720 self._extract_member(zipinfo, path, pwd)
1721
1722 @classmethod
1723 def _sanitize_windows_name(cls, arcname, pathsep):
1724 """Replace bad characters and remove trailing dots from parts."""
1725 table = cls._windows_illegal_name_trans_table
1726 if not table:
1727 illegal = ':<>|"?*'
1728 table = str.maketrans(illegal, '_' * len(illegal))
1729 cls._windows_illegal_name_trans_table = table
1730 arcname = arcname.translate(table)
1731 # remove trailing dots and spaces
1732 arcname = (x.rstrip(' .') for x in arcname.split(pathsep))
1733 # rejoin, removing empty parts.
1734 arcname = pathsep.join(x for x in arcname if x)
1735 return arcname
1736
1737 def _extract_member(self, member, targetpath, pwd):
1738 """Extract the ZipInfo object 'member' to a physical
1739 file on the path targetpath.
1740 """
1741 if not isinstance(member, ZipInfo):
1742 member = self.getinfo(member)
1743
1744 # build the destination pathname, replacing
1745 # forward slashes to platform specific separators.
1746 arcname = member.filename.replace('/', os.path.sep)
1747
1748 if os.path.altsep:
1749 arcname = arcname.replace(os.path.altsep, os.path.sep)
1750 # interpret absolute pathname as relative, remove drive letter or
1751 # UNC path, redundant separators, "." and ".." components.
1752 arcname = os.path.splitdrive(arcname)[1]
1753 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1754 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1755 if x not in invalid_path_parts)
1756 if os.path.sep == '\\':
1757 # filter illegal characters on Windows
1758 arcname = self._sanitize_windows_name(arcname, os.path.sep)
1759
1760 if not arcname:
1761 raise ValueError("Empty filename.")
1762
1763 targetpath = os.path.join(targetpath, arcname)
1764 targetpath = os.path.normpath(targetpath)
1765
1766 # Create all upper directories if necessary.
1767 upperdirs = os.path.dirname(targetpath)
1768 if upperdirs and not os.path.exists(upperdirs):
1769 os.makedirs(upperdirs)
1770
1771 if member.is_dir():
1772 if not os.path.isdir(targetpath):
1773 os.mkdir(targetpath)
1774 return targetpath
1775
1776 with self.open(member, pwd=pwd) as source, \
1777 open(targetpath, "wb") as target:
1778 shutil.copyfileobj(source, target)
1779
1780 return targetpath
1781
1782 def _writecheck(self, zinfo):
1783 """Check for errors before writing a file to the archive."""
1784 if zinfo.filename in self.NameToInfo:
1785 import warnings
1786 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1787 if self.mode not in ('w', 'x', 'a'):
1788 raise ValueError("write() requires mode 'w', 'x', or 'a'")
1789 if not self.fp:
1790 raise ValueError(
1791 "Attempt to write ZIP archive that was already closed")
1792 _check_compression(zinfo.compress_type)
1793 if not self._allowZip64:
1794 requires_zip64 = None
1795 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1796 requires_zip64 = "Files count"
1797 elif zinfo.file_size > ZIP64_LIMIT:
1798 requires_zip64 = "Filesize"
1799 elif zinfo.header_offset > ZIP64_LIMIT:
1800 requires_zip64 = "Zipfile size"
1801 if requires_zip64:
1802 raise LargeZipFile(requires_zip64 +
1803 " would require ZIP64 extensions")
1804
1805 def write(self, filename, arcname=None,
1806 compress_type=None, compresslevel=None):
1807 """Put the bytes from filename into the archive under the name
1808 arcname."""
1809 if not self.fp:
1810 raise ValueError(
1811 "Attempt to write to ZIP archive that was already closed")
1812 if self._writing:
1813 raise ValueError(
1814 "Can't write to ZIP archive while an open writing handle exists"
1815 )
1816
1817 zinfo = ZipInfo.from_file(filename, arcname,
1818 strict_timestamps=self._strict_timestamps)
1819
1820 if zinfo.is_dir():
1821 zinfo.compress_size = 0
1822 zinfo.CRC = 0
1823 self.mkdir(zinfo)
1824 else:
1825 if compress_type is not None:
1826 zinfo.compress_type = compress_type
1827 else:
1828 zinfo.compress_type = self.compression
1829
1830 if compresslevel is not None:
1831 zinfo._compresslevel = compresslevel
1832 else:
1833 zinfo._compresslevel = self.compresslevel
1834
1835 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1836 shutil.copyfileobj(src, dest, 1024*8)
1837
1838 def writestr(self, zinfo_or_arcname, data,
1839 compress_type=None, compresslevel=None):
1840 """Write a file into the archive. The contents is 'data', which
1841 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1842 it is encoded as UTF-8 first.
1843 'zinfo_or_arcname' is either a ZipInfo instance or
1844 the name of the file in the archive."""
1845 if isinstance(data, str):
1846 data = data.encode("utf-8")
1847 if not isinstance(zinfo_or_arcname, ZipInfo):
1848 zinfo = ZipInfo(filename=zinfo_or_arcname,
1849 date_time=time.localtime(time.time())[:6])
1850 zinfo.compress_type = self.compression
1851 zinfo._compresslevel = self.compresslevel
1852 if zinfo.filename.endswith('/'):
1853 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1854 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1855 else:
1856 zinfo.external_attr = 0o600 << 16 # ?rw-------
1857 else:
1858 zinfo = zinfo_or_arcname
1859
1860 if not self.fp:
1861 raise ValueError(
1862 "Attempt to write to ZIP archive that was already closed")
1863 if self._writing:
1864 raise ValueError(
1865 "Can't write to ZIP archive while an open writing handle exists."
1866 )
1867
1868 if compress_type is not None:
1869 zinfo.compress_type = compress_type
1870
1871 if compresslevel is not None:
1872 zinfo._compresslevel = compresslevel
1873
1874 zinfo.file_size = len(data) # Uncompressed size
1875 with self._lock:
1876 with self.open(zinfo, mode='w') as dest:
1877 dest.write(data)
1878
1879 def mkdir(self, zinfo_or_directory_name, mode=511):
1880 """Creates a directory inside the zip archive."""
1881 if isinstance(zinfo_or_directory_name, ZipInfo):
1882 zinfo = zinfo_or_directory_name
1883 if not zinfo.is_dir():
1884 raise ValueError("The given ZipInfo does not describe a directory")
1885 elif isinstance(zinfo_or_directory_name, str):
1886 directory_name = zinfo_or_directory_name
1887 if not directory_name.endswith("/"):
1888 directory_name += "/"
1889 zinfo = ZipInfo(directory_name)
1890 zinfo.compress_size = 0
1891 zinfo.CRC = 0
1892 zinfo.external_attr = ((0o40000 | mode) & 0xFFFF) << 16
1893 zinfo.file_size = 0
1894 zinfo.external_attr |= 0x10
1895 else:
1896 raise TypeError("Expected type str or ZipInfo")
1897
1898 with self._lock:
1899 if self._seekable:
1900 self.fp.seek(self.start_dir)
1901 zinfo.header_offset = self.fp.tell() # Start of header bytes
1902 if zinfo.compress_type == ZIP_LZMA:
1903 # Compressed data includes an end-of-stream (EOS) marker
1904 zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
1905
1906 self._writecheck(zinfo)
1907 self._didModify = True
1908
1909 self.filelist.append(zinfo)
1910 self.NameToInfo[zinfo.filename] = zinfo
1911 self.fp.write(zinfo.FileHeader(False))
1912 self.start_dir = self.fp.tell()
1913
1914 def __del__(self):
1915 """Call the "close()" method in case the user forgot."""
1916 self.close()
1917
1918 def close(self):
1919 """Close the file, and for mode 'w', 'x' and 'a' write the ending
1920 records."""
1921 if self.fp is None:
1922 return
1923
1924 if self._writing:
1925 raise ValueError("Can't close the ZIP file while there is "
1926 "an open writing handle on it. "
1927 "Close the writing handle before closing the zip.")
1928
1929 try:
1930 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1931 with self._lock:
1932 if self._seekable:
1933 self.fp.seek(self.start_dir)
1934 self._write_end_record()
1935 finally:
1936 fp = self.fp
1937 self.fp = None
1938 self._fpclose(fp)
1939
1940 def _write_end_record(self):
1941 for zinfo in self.filelist: # write central directory
1942 dt = zinfo.date_time
1943 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1944 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1945 extra = []
1946 if zinfo.file_size > ZIP64_LIMIT \
1947 or zinfo.compress_size > ZIP64_LIMIT:
1948 extra.append(zinfo.file_size)
1949 extra.append(zinfo.compress_size)
1950 file_size = 0xffffffff
1951 compress_size = 0xffffffff
1952 else:
1953 file_size = zinfo.file_size
1954 compress_size = zinfo.compress_size
1955
1956 if zinfo.header_offset > ZIP64_LIMIT:
1957 extra.append(zinfo.header_offset)
1958 header_offset = 0xffffffff
1959 else:
1960 header_offset = zinfo.header_offset
1961
1962 extra_data = zinfo.extra
1963 min_version = 0
1964 if extra:
1965 # Append a ZIP64 field to the extra's
1966 extra_data = _strip_extra(extra_data, (1,))
1967 extra_data = struct.pack(
1968 '<HH' + 'Q'*len(extra),
1969 1, 8*len(extra), *extra) + extra_data
1970
1971 min_version = ZIP64_VERSION
1972
1973 if zinfo.compress_type == ZIP_BZIP2:
1974 min_version = max(BZIP2_VERSION, min_version)
1975 elif zinfo.compress_type == ZIP_LZMA:
1976 min_version = max(LZMA_VERSION, min_version)
1977
1978 extract_version = max(min_version, zinfo.extract_version)
1979 create_version = max(min_version, zinfo.create_version)
1980 filename, flag_bits = zinfo._encodeFilenameFlags()
1981 centdir = struct.pack(structCentralDir,
1982 stringCentralDir, create_version,
1983 zinfo.create_system, extract_version, zinfo.reserved,
1984 flag_bits, zinfo.compress_type, dostime, dosdate,
1985 zinfo.CRC, compress_size, file_size,
1986 len(filename), len(extra_data), len(zinfo.comment),
1987 0, zinfo.internal_attr, zinfo.external_attr,
1988 header_offset)
1989 self.fp.write(centdir)
1990 self.fp.write(filename)
1991 self.fp.write(extra_data)
1992 self.fp.write(zinfo.comment)
1993
1994 pos2 = self.fp.tell()
1995 # Write end-of-zip-archive record
1996 centDirCount = len(self.filelist)
1997 centDirSize = pos2 - self.start_dir
1998 centDirOffset = self.start_dir
1999 requires_zip64 = None
2000 if centDirCount > ZIP_FILECOUNT_LIMIT:
2001 requires_zip64 = "Files count"
2002 elif centDirOffset > ZIP64_LIMIT:
2003 requires_zip64 = "Central directory offset"
2004 elif centDirSize > ZIP64_LIMIT:
2005 requires_zip64 = "Central directory size"
2006 if requires_zip64:
2007 # Need to write the ZIP64 end-of-archive records
2008 if not self._allowZip64:
2009 raise LargeZipFile(requires_zip64 +
2010 " would require ZIP64 extensions")
2011 zip64endrec = struct.pack(
2012 structEndArchive64, stringEndArchive64,
2013 44, 45, 45, 0, 0, centDirCount, centDirCount,
2014 centDirSize, centDirOffset)
2015 self.fp.write(zip64endrec)
2016
2017 zip64locrec = struct.pack(
2018 structEndArchive64Locator,
2019 stringEndArchive64Locator, 0, pos2, 1)
2020 self.fp.write(zip64locrec)
2021 centDirCount = min(centDirCount, 0xFFFF)
2022 centDirSize = min(centDirSize, 0xFFFFFFFF)
2023 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
2024
2025 endrec = struct.pack(structEndArchive, stringEndArchive,
2026 0, 0, centDirCount, centDirCount,
2027 centDirSize, centDirOffset, len(self._comment))
2028 self.fp.write(endrec)
2029 self.fp.write(self._comment)
2030 if self.mode == "a":
2031 self.fp.truncate()
2032 self.fp.flush()
2033
2034 def _fpclose(self, fp):
2035 assert self._fileRefCnt > 0
2036 self._fileRefCnt -= 1
2037 if not self._fileRefCnt and not self._filePassed:
2038 fp.close()
2039
2040
2041 class ESC[4;38;5;81mPyZipFile(ESC[4;38;5;149mZipFile):
2042 """Class to create ZIP archives with Python library files and packages."""
2043
2044 def __init__(self, file, mode="r", compression=ZIP_STORED,
2045 allowZip64=True, optimize=-1):
2046 ZipFile.__init__(self, file, mode=mode, compression=compression,
2047 allowZip64=allowZip64)
2048 self._optimize = optimize
2049
2050 def writepy(self, pathname, basename="", filterfunc=None):
2051 """Add all files from "pathname" to the ZIP archive.
2052
2053 If pathname is a package directory, search the directory and
2054 all package subdirectories recursively for all *.py and enter
2055 the modules into the archive. If pathname is a plain
2056 directory, listdir *.py and enter all modules. Else, pathname
2057 must be a Python *.py file and the module will be put into the
2058 archive. Added modules are always module.pyc.
2059 This method will compile the module.py into module.pyc if
2060 necessary.
2061 If filterfunc(pathname) is given, it is called with every argument.
2062 When it is False, the file or directory is skipped.
2063 """
2064 pathname = os.fspath(pathname)
2065 if filterfunc and not filterfunc(pathname):
2066 if self.debug:
2067 label = 'path' if os.path.isdir(pathname) else 'file'
2068 print('%s %r skipped by filterfunc' % (label, pathname))
2069 return
2070 dir, name = os.path.split(pathname)
2071 if os.path.isdir(pathname):
2072 initname = os.path.join(pathname, "__init__.py")
2073 if os.path.isfile(initname):
2074 # This is a package directory, add it
2075 if basename:
2076 basename = "%s/%s" % (basename, name)
2077 else:
2078 basename = name
2079 if self.debug:
2080 print("Adding package in", pathname, "as", basename)
2081 fname, arcname = self._get_codename(initname[0:-3], basename)
2082 if self.debug:
2083 print("Adding", arcname)
2084 self.write(fname, arcname)
2085 dirlist = sorted(os.listdir(pathname))
2086 dirlist.remove("__init__.py")
2087 # Add all *.py files and package subdirectories
2088 for filename in dirlist:
2089 path = os.path.join(pathname, filename)
2090 root, ext = os.path.splitext(filename)
2091 if os.path.isdir(path):
2092 if os.path.isfile(os.path.join(path, "__init__.py")):
2093 # This is a package directory, add it
2094 self.writepy(path, basename,
2095 filterfunc=filterfunc) # Recursive call
2096 elif ext == ".py":
2097 if filterfunc and not filterfunc(path):
2098 if self.debug:
2099 print('file %r skipped by filterfunc' % path)
2100 continue
2101 fname, arcname = self._get_codename(path[0:-3],
2102 basename)
2103 if self.debug:
2104 print("Adding", arcname)
2105 self.write(fname, arcname)
2106 else:
2107 # This is NOT a package directory, add its files at top level
2108 if self.debug:
2109 print("Adding files from directory", pathname)
2110 for filename in sorted(os.listdir(pathname)):
2111 path = os.path.join(pathname, filename)
2112 root, ext = os.path.splitext(filename)
2113 if ext == ".py":
2114 if filterfunc and not filterfunc(path):
2115 if self.debug:
2116 print('file %r skipped by filterfunc' % path)
2117 continue
2118 fname, arcname = self._get_codename(path[0:-3],
2119 basename)
2120 if self.debug:
2121 print("Adding", arcname)
2122 self.write(fname, arcname)
2123 else:
2124 if pathname[-3:] != ".py":
2125 raise RuntimeError(
2126 'Files added with writepy() must end with ".py"')
2127 fname, arcname = self._get_codename(pathname[0:-3], basename)
2128 if self.debug:
2129 print("Adding file", arcname)
2130 self.write(fname, arcname)
2131
2132 def _get_codename(self, pathname, basename):
2133 """Return (filename, archivename) for the path.
2134
2135 Given a module name path, return the correct file path and
2136 archive name, compiling if necessary. For example, given
2137 /python/lib/string, return (/python/lib/string.pyc, string).
2138 """
2139 def _compile(file, optimize=-1):
2140 import py_compile
2141 if self.debug:
2142 print("Compiling", file)
2143 try:
2144 py_compile.compile(file, doraise=True, optimize=optimize)
2145 except py_compile.PyCompileError as err:
2146 print(err.msg)
2147 return False
2148 return True
2149
2150 file_py = pathname + ".py"
2151 file_pyc = pathname + ".pyc"
2152 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2153 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2154 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2155 if self._optimize == -1:
2156 # legacy mode: use whatever file is present
2157 if (os.path.isfile(file_pyc) and
2158 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2159 # Use .pyc file.
2160 arcname = fname = file_pyc
2161 elif (os.path.isfile(pycache_opt0) and
2162 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2163 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2164 # file name in the archive.
2165 fname = pycache_opt0
2166 arcname = file_pyc
2167 elif (os.path.isfile(pycache_opt1) and
2168 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2169 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2170 # file name in the archive.
2171 fname = pycache_opt1
2172 arcname = file_pyc
2173 elif (os.path.isfile(pycache_opt2) and
2174 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2175 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2176 # file name in the archive.
2177 fname = pycache_opt2
2178 arcname = file_pyc
2179 else:
2180 # Compile py into PEP 3147 pyc file.
2181 if _compile(file_py):
2182 if sys.flags.optimize == 0:
2183 fname = pycache_opt0
2184 elif sys.flags.optimize == 1:
2185 fname = pycache_opt1
2186 else:
2187 fname = pycache_opt2
2188 arcname = file_pyc
2189 else:
2190 fname = arcname = file_py
2191 else:
2192 # new mode: use given optimization level
2193 if self._optimize == 0:
2194 fname = pycache_opt0
2195 arcname = file_pyc
2196 else:
2197 arcname = file_pyc
2198 if self._optimize == 1:
2199 fname = pycache_opt1
2200 elif self._optimize == 2:
2201 fname = pycache_opt2
2202 else:
2203 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2204 raise ValueError(msg)
2205 if not (os.path.isfile(fname) and
2206 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2207 if not _compile(file_py, optimize=self._optimize):
2208 fname = arcname = file_py
2209 archivename = os.path.split(arcname)[1]
2210 if basename:
2211 archivename = "%s/%s" % (basename, archivename)
2212 return (fname, archivename)
2213
2214
2215 from ._path import ( # noqa: E402
2216 Path,
2217
2218 # used privately for tests
2219 CompleteDirs, # noqa: F401
2220 )
2221
2222 # used privately for tests
2223 from .__main__ import main # noqa: F401, E402