1 import io
2 import posixpath
3 import zipfile
4 import itertools
5 import contextlib
6 import pathlib
7 import re
8
9 from .glob import translate
10
11
12 __all__ = ['Path']
13
14
15 def _parents(path):
16 """
17 Given a path with elements separated by
18 posixpath.sep, generate all parents of that path.
19
20 >>> list(_parents('b/d'))
21 ['b']
22 >>> list(_parents('/b/d/'))
23 ['/b']
24 >>> list(_parents('b/d/f/'))
25 ['b/d', 'b']
26 >>> list(_parents('b'))
27 []
28 >>> list(_parents(''))
29 []
30 """
31 return itertools.islice(_ancestry(path), 1, None)
32
33
34 def _ancestry(path):
35 """
36 Given a path with elements separated by
37 posixpath.sep, generate all elements of that path
38
39 >>> list(_ancestry('b/d'))
40 ['b/d', 'b']
41 >>> list(_ancestry('/b/d/'))
42 ['/b/d', '/b']
43 >>> list(_ancestry('b/d/f/'))
44 ['b/d/f', 'b/d', 'b']
45 >>> list(_ancestry('b'))
46 ['b']
47 >>> list(_ancestry(''))
48 []
49 """
50 path = path.rstrip(posixpath.sep)
51 while path and path != posixpath.sep:
52 yield path
53 path, tail = posixpath.split(path)
54
55
56 _dedupe = dict.fromkeys
57 """Deduplicate an iterable in original order"""
58
59
60 def _difference(minuend, subtrahend):
61 """
62 Return items in minuend not in subtrahend, retaining order
63 with O(1) lookup.
64 """
65 return itertools.filterfalse(set(subtrahend).__contains__, minuend)
66
67
68 class ESC[4;38;5;81mInitializedState:
69 """
70 Mix-in to save the initialization state for pickling.
71 """
72
73 def __init__(self, *args, **kwargs):
74 self.__args = args
75 self.__kwargs = kwargs
76 super().__init__(*args, **kwargs)
77
78 def __getstate__(self):
79 return self.__args, self.__kwargs
80
81 def __setstate__(self, state):
82 args, kwargs = state
83 super().__init__(*args, **kwargs)
84
85
86 class ESC[4;38;5;81mCompleteDirs(ESC[4;38;5;149mInitializedState, ESC[4;38;5;149mzipfileESC[4;38;5;149m.ESC[4;38;5;149mZipFile):
87 """
88 A ZipFile subclass that ensures that implied directories
89 are always included in the namelist.
90
91 >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt']))
92 ['foo/', 'foo/bar/']
93 >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt', 'foo/bar/']))
94 ['foo/']
95 """
96
97 @staticmethod
98 def _implied_dirs(names):
99 parents = itertools.chain.from_iterable(map(_parents, names))
100 as_dirs = (p + posixpath.sep for p in parents)
101 return _dedupe(_difference(as_dirs, names))
102
103 def namelist(self):
104 names = super().namelist()
105 return names + list(self._implied_dirs(names))
106
107 def _name_set(self):
108 return set(self.namelist())
109
110 def resolve_dir(self, name):
111 """
112 If the name represents a directory, return that name
113 as a directory (with the trailing slash).
114 """
115 names = self._name_set()
116 dirname = name + '/'
117 dir_match = name not in names and dirname in names
118 return dirname if dir_match else name
119
120 def getinfo(self, name):
121 """
122 Supplement getinfo for implied dirs.
123 """
124 try:
125 return super().getinfo(name)
126 except KeyError:
127 if not name.endswith('/') or name not in self._name_set():
128 raise
129 return zipfile.ZipInfo(filename=name)
130
131 @classmethod
132 def make(cls, source):
133 """
134 Given a source (filename or zipfile), return an
135 appropriate CompleteDirs subclass.
136 """
137 if isinstance(source, CompleteDirs):
138 return source
139
140 if not isinstance(source, zipfile.ZipFile):
141 return cls(source)
142
143 # Only allow for FastLookup when supplied zipfile is read-only
144 if 'r' not in source.mode:
145 cls = CompleteDirs
146
147 source.__class__ = cls
148 return source
149
150
151 class ESC[4;38;5;81mFastLookup(ESC[4;38;5;149mCompleteDirs):
152 """
153 ZipFile subclass to ensure implicit
154 dirs exist and are resolved rapidly.
155 """
156
157 def namelist(self):
158 with contextlib.suppress(AttributeError):
159 return self.__names
160 self.__names = super().namelist()
161 return self.__names
162
163 def _name_set(self):
164 with contextlib.suppress(AttributeError):
165 return self.__lookup
166 self.__lookup = super()._name_set()
167 return self.__lookup
168
169
170 def _extract_text_encoding(encoding=None, *args, **kwargs):
171 # stacklevel=3 so that the caller of the caller see any warning.
172 return io.text_encoding(encoding, 3), args, kwargs
173
174
175 class ESC[4;38;5;81mPath:
176 """
177 A pathlib-compatible interface for zip files.
178
179 Consider a zip file with this structure::
180
181 .
182 ├── a.txt
183 └── b
184 ├── c.txt
185 └── d
186 └── e.txt
187
188 >>> data = io.BytesIO()
189 >>> zf = ZipFile(data, 'w')
190 >>> zf.writestr('a.txt', 'content of a')
191 >>> zf.writestr('b/c.txt', 'content of c')
192 >>> zf.writestr('b/d/e.txt', 'content of e')
193 >>> zf.filename = 'mem/abcde.zip'
194
195 Path accepts the zipfile object itself or a filename
196
197 >>> root = Path(zf)
198
199 From there, several path operations are available.
200
201 Directory iteration (including the zip file itself):
202
203 >>> a, b = root.iterdir()
204 >>> a
205 Path('mem/abcde.zip', 'a.txt')
206 >>> b
207 Path('mem/abcde.zip', 'b/')
208
209 name property:
210
211 >>> b.name
212 'b'
213
214 join with divide operator:
215
216 >>> c = b / 'c.txt'
217 >>> c
218 Path('mem/abcde.zip', 'b/c.txt')
219 >>> c.name
220 'c.txt'
221
222 Read text:
223
224 >>> c.read_text(encoding='utf-8')
225 'content of c'
226
227 existence:
228
229 >>> c.exists()
230 True
231 >>> (b / 'missing.txt').exists()
232 False
233
234 Coercion to string:
235
236 >>> import os
237 >>> str(c).replace(os.sep, posixpath.sep)
238 'mem/abcde.zip/b/c.txt'
239
240 At the root, ``name``, ``filename``, and ``parent``
241 resolve to the zipfile. Note these attributes are not
242 valid and will raise a ``ValueError`` if the zipfile
243 has no filename.
244
245 >>> root.name
246 'abcde.zip'
247 >>> str(root.filename).replace(os.sep, posixpath.sep)
248 'mem/abcde.zip'
249 >>> str(root.parent)
250 'mem'
251 """
252
253 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
254
255 def __init__(self, root, at=""):
256 """
257 Construct a Path from a ZipFile or filename.
258
259 Note: When the source is an existing ZipFile object,
260 its type (__class__) will be mutated to a
261 specialized type. If the caller wishes to retain the
262 original type, the caller should either create a
263 separate ZipFile object or pass a filename.
264 """
265 self.root = FastLookup.make(root)
266 self.at = at
267
268 def __eq__(self, other):
269 """
270 >>> Path(zipfile.ZipFile(io.BytesIO(), 'w')) == 'foo'
271 False
272 """
273 if self.__class__ is not other.__class__:
274 return NotImplemented
275 return (self.root, self.at) == (other.root, other.at)
276
277 def __hash__(self):
278 return hash((self.root, self.at))
279
280 def open(self, mode='r', *args, pwd=None, **kwargs):
281 """
282 Open this entry as text or binary following the semantics
283 of ``pathlib.Path.open()`` by passing arguments through
284 to io.TextIOWrapper().
285 """
286 if self.is_dir():
287 raise IsADirectoryError(self)
288 zip_mode = mode[0]
289 if not self.exists() and zip_mode == 'r':
290 raise FileNotFoundError(self)
291 stream = self.root.open(self.at, zip_mode, pwd=pwd)
292 if 'b' in mode:
293 if args or kwargs:
294 raise ValueError("encoding args invalid for binary operation")
295 return stream
296 # Text mode:
297 encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
298 return io.TextIOWrapper(stream, encoding, *args, **kwargs)
299
300 def _base(self):
301 return pathlib.PurePosixPath(self.at or self.root.filename)
302
303 @property
304 def name(self):
305 return self._base().name
306
307 @property
308 def suffix(self):
309 return self._base().suffix
310
311 @property
312 def suffixes(self):
313 return self._base().suffixes
314
315 @property
316 def stem(self):
317 return self._base().stem
318
319 @property
320 def filename(self):
321 return pathlib.Path(self.root.filename).joinpath(self.at)
322
323 def read_text(self, *args, **kwargs):
324 encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
325 with self.open('r', encoding, *args, **kwargs) as strm:
326 return strm.read()
327
328 def read_bytes(self):
329 with self.open('rb') as strm:
330 return strm.read()
331
332 def _is_child(self, path):
333 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
334
335 def _next(self, at):
336 return self.__class__(self.root, at)
337
338 def is_dir(self):
339 return not self.at or self.at.endswith("/")
340
341 def is_file(self):
342 return self.exists() and not self.is_dir()
343
344 def exists(self):
345 return self.at in self.root._name_set()
346
347 def iterdir(self):
348 if not self.is_dir():
349 raise ValueError("Can't listdir a file")
350 subs = map(self._next, self.root.namelist())
351 return filter(self._is_child, subs)
352
353 def match(self, path_pattern):
354 return pathlib.PurePosixPath(self.at).match(path_pattern)
355
356 def is_symlink(self):
357 """
358 Return whether this path is a symlink. Always false (python/cpython#82102).
359 """
360 return False
361
362 def glob(self, pattern):
363 if not pattern:
364 raise ValueError(f"Unacceptable pattern: {pattern!r}")
365
366 prefix = re.escape(self.at)
367 matches = re.compile(prefix + translate(pattern)).fullmatch
368 return map(self._next, filter(matches, self.root.namelist()))
369
370 def rglob(self, pattern):
371 return self.glob(f'**/{pattern}')
372
373 def relative_to(self, other, *extra):
374 return posixpath.relpath(str(self), str(other.joinpath(*extra)))
375
376 def __str__(self):
377 return posixpath.join(self.root.filename, self.at)
378
379 def __repr__(self):
380 return self.__repr.format(self=self)
381
382 def joinpath(self, *other):
383 next = posixpath.join(self.at, *other)
384 return self._next(self.root.resolve_dir(next))
385
386 __truediv__ = joinpath
387
388 @property
389 def parent(self):
390 if not self.at:
391 return self.filename.parent
392 parent_at = posixpath.dirname(self.at.rstrip('/'))
393 if parent_at:
394 parent_at += '/'
395 return self._next(parent_at)