1 """Filename globbing utility."""
2
3 import contextlib
4 import os
5 import re
6 import fnmatch
7 import itertools
8 import stat
9 import sys
10
11 __all__ = ["glob", "iglob", "escape"]
12
13 def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False,
14 include_hidden=False):
15 """Return a list of paths matching a pathname pattern.
16
17 The pattern may contain simple shell-style wildcards a la
18 fnmatch. Unlike fnmatch, filenames starting with a
19 dot are special cases that are not matched by '*' and '?'
20 patterns by default.
21
22 If `include_hidden` is true, the patterns '*', '?', '**' will match hidden
23 directories.
24
25 If `recursive` is true, the pattern '**' will match any files and
26 zero or more directories and subdirectories.
27 """
28 return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive,
29 include_hidden=include_hidden))
30
31 def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False,
32 include_hidden=False):
33 """Return an iterator which yields the paths matching a pathname pattern.
34
35 The pattern may contain simple shell-style wildcards a la
36 fnmatch. However, unlike fnmatch, filenames starting with a
37 dot are special cases that are not matched by '*' and '?'
38 patterns.
39
40 If recursive is true, the pattern '**' will match any files and
41 zero or more directories and subdirectories.
42 """
43 sys.audit("glob.glob", pathname, recursive)
44 sys.audit("glob.glob/2", pathname, recursive, root_dir, dir_fd)
45 if root_dir is not None:
46 root_dir = os.fspath(root_dir)
47 else:
48 root_dir = pathname[:0]
49 it = _iglob(pathname, root_dir, dir_fd, recursive, False,
50 include_hidden=include_hidden)
51 if not pathname or recursive and _isrecursive(pathname[:2]):
52 try:
53 s = next(it) # skip empty string
54 if s:
55 it = itertools.chain((s,), it)
56 except StopIteration:
57 pass
58 return it
59
60 def _iglob(pathname, root_dir, dir_fd, recursive, dironly,
61 include_hidden=False):
62 dirname, basename = os.path.split(pathname)
63 if not has_magic(pathname):
64 assert not dironly
65 if basename:
66 if _lexists(_join(root_dir, pathname), dir_fd):
67 yield pathname
68 else:
69 # Patterns ending with a slash should match only directories
70 if _isdir(_join(root_dir, dirname), dir_fd):
71 yield pathname
72 return
73 if not dirname:
74 if recursive and _isrecursive(basename):
75 yield from _glob2(root_dir, basename, dir_fd, dironly,
76 include_hidden=include_hidden)
77 else:
78 yield from _glob1(root_dir, basename, dir_fd, dironly,
79 include_hidden=include_hidden)
80 return
81 # `os.path.split()` returns the argument itself as a dirname if it is a
82 # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
83 # contains magic characters (i.e. r'\\?\C:').
84 if dirname != pathname and has_magic(dirname):
85 dirs = _iglob(dirname, root_dir, dir_fd, recursive, True,
86 include_hidden=include_hidden)
87 else:
88 dirs = [dirname]
89 if has_magic(basename):
90 if recursive and _isrecursive(basename):
91 glob_in_dir = _glob2
92 else:
93 glob_in_dir = _glob1
94 else:
95 glob_in_dir = _glob0
96 for dirname in dirs:
97 for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly,
98 include_hidden=include_hidden):
99 yield os.path.join(dirname, name)
100
101 # These 2 helper functions non-recursively glob inside a literal directory.
102 # They return a list of basenames. _glob1 accepts a pattern while _glob0
103 # takes a literal basename (so it only has to check for its existence).
104
105 def _glob1(dirname, pattern, dir_fd, dironly, include_hidden=False):
106 names = _listdir(dirname, dir_fd, dironly)
107 if include_hidden or not _ishidden(pattern):
108 names = (x for x in names if include_hidden or not _ishidden(x))
109 return fnmatch.filter(names, pattern)
110
111 def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False):
112 if basename:
113 if _lexists(_join(dirname, basename), dir_fd):
114 return [basename]
115 else:
116 # `os.path.split()` returns an empty basename for paths ending with a
117 # directory separator. 'q*x/' should match only directories.
118 if _isdir(dirname, dir_fd):
119 return [basename]
120 return []
121
122 # Following functions are not public but can be used by third-party code.
123
124 def glob0(dirname, pattern):
125 return _glob0(dirname, pattern, None, False)
126
127 def glob1(dirname, pattern):
128 return _glob1(dirname, pattern, None, False)
129
130 # This helper function recursively yields relative pathnames inside a literal
131 # directory.
132
133 def _glob2(dirname, pattern, dir_fd, dironly, include_hidden=False):
134 assert _isrecursive(pattern)
135 yield pattern[:0]
136 yield from _rlistdir(dirname, dir_fd, dironly,
137 include_hidden=include_hidden)
138
139 # If dironly is false, yields all file names inside a directory.
140 # If dironly is true, yields only directory names.
141 def _iterdir(dirname, dir_fd, dironly):
142 try:
143 fd = None
144 fsencode = None
145 if dir_fd is not None:
146 if dirname:
147 fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd)
148 else:
149 arg = dir_fd
150 if isinstance(dirname, bytes):
151 fsencode = os.fsencode
152 elif dirname:
153 arg = dirname
154 elif isinstance(dirname, bytes):
155 arg = bytes(os.curdir, 'ASCII')
156 else:
157 arg = os.curdir
158 try:
159 with os.scandir(arg) as it:
160 for entry in it:
161 try:
162 if not dironly or entry.is_dir():
163 if fsencode is not None:
164 yield fsencode(entry.name)
165 else:
166 yield entry.name
167 except OSError:
168 pass
169 finally:
170 if fd is not None:
171 os.close(fd)
172 except OSError:
173 return
174
175 def _listdir(dirname, dir_fd, dironly):
176 with contextlib.closing(_iterdir(dirname, dir_fd, dironly)) as it:
177 return list(it)
178
179 # Recursively yields relative pathnames inside a literal directory.
180 def _rlistdir(dirname, dir_fd, dironly, include_hidden=False):
181 names = _listdir(dirname, dir_fd, dironly)
182 for x in names:
183 if include_hidden or not _ishidden(x):
184 yield x
185 path = _join(dirname, x) if dirname else x
186 for y in _rlistdir(path, dir_fd, dironly,
187 include_hidden=include_hidden):
188 yield _join(x, y)
189
190
191 def _lexists(pathname, dir_fd):
192 # Same as os.path.lexists(), but with dir_fd
193 if dir_fd is None:
194 return os.path.lexists(pathname)
195 try:
196 os.lstat(pathname, dir_fd=dir_fd)
197 except (OSError, ValueError):
198 return False
199 else:
200 return True
201
202 def _isdir(pathname, dir_fd):
203 # Same as os.path.isdir(), but with dir_fd
204 if dir_fd is None:
205 return os.path.isdir(pathname)
206 try:
207 st = os.stat(pathname, dir_fd=dir_fd)
208 except (OSError, ValueError):
209 return False
210 else:
211 return stat.S_ISDIR(st.st_mode)
212
213 def _join(dirname, basename):
214 # It is common if dirname or basename is empty
215 if not dirname or not basename:
216 return dirname or basename
217 return os.path.join(dirname, basename)
218
219 magic_check = re.compile('([*?[])')
220 magic_check_bytes = re.compile(b'([*?[])')
221
222 def has_magic(s):
223 if isinstance(s, bytes):
224 match = magic_check_bytes.search(s)
225 else:
226 match = magic_check.search(s)
227 return match is not None
228
229 def _ishidden(path):
230 return path[0] in ('.', b'.'[0])
231
232 def _isrecursive(pattern):
233 if isinstance(pattern, bytes):
234 return pattern == b'**'
235 else:
236 return pattern == '**'
237
238 def escape(pathname):
239 """Escape all special characters.
240 """
241 # Escaping is done by wrapping any of "*?[" between square brackets.
242 # Metacharacters do not work in the drive part and shouldn't be escaped.
243 drive, pathname = os.path.splitdrive(pathname)
244 if isinstance(pathname, bytes):
245 pathname = magic_check_bytes.sub(br'[\1]', pathname)
246 else:
247 pathname = magic_check.sub(r'[\1]', pathname)
248 return drive + pathname
249
250
251 _dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)