1 """Filename matching with shell patterns.
2
3 fnmatch(FILENAME, PATTERN) matches according to the local convention.
4 fnmatchcase(FILENAME, PATTERN) always takes case in account.
5
6 The functions operate by translating the pattern into a regular
7 expression. They cache the compiled regular expressions for speed.
8
9 The function translate(PATTERN) returns a regular expression
10 corresponding to PATTERN. (It does not compile it.)
11 """
12 import os
13 import posixpath
14 import re
15 import functools
16
17 __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
18
19 def fnmatch(name, pat):
20 """Test whether FILENAME matches PATTERN.
21
22 Patterns are Unix shell style:
23
24 * matches everything
25 ? matches any single character
26 [seq] matches any character in seq
27 [!seq] matches any char not in seq
28
29 An initial period in FILENAME is not special.
30 Both FILENAME and PATTERN are first case-normalized
31 if the operating system requires it.
32 If you don't want this, use fnmatchcase(FILENAME, PATTERN).
33 """
34 name = os.path.normcase(name)
35 pat = os.path.normcase(pat)
36 return fnmatchcase(name, pat)
37
38 @functools.lru_cache(maxsize=32768, typed=True)
39 def _compile_pattern(pat):
40 if isinstance(pat, bytes):
41 pat_str = str(pat, 'ISO-8859-1')
42 res_str = translate(pat_str)
43 res = bytes(res_str, 'ISO-8859-1')
44 else:
45 res = translate(pat)
46 return re.compile(res).match
47
48 def filter(names, pat):
49 """Construct a list from those elements of the iterable NAMES that match PAT."""
50 result = []
51 pat = os.path.normcase(pat)
52 match = _compile_pattern(pat)
53 if os.path is posixpath:
54 # normcase on posix is NOP. Optimize it away from the loop.
55 for name in names:
56 if match(name):
57 result.append(name)
58 else:
59 for name in names:
60 if match(os.path.normcase(name)):
61 result.append(name)
62 return result
63
64 def fnmatchcase(name, pat):
65 """Test whether FILENAME matches PATTERN, including case.
66
67 This is a version of fnmatch() which doesn't case-normalize
68 its arguments.
69 """
70 match = _compile_pattern(pat)
71 return match(name) is not None
72
73
74 def translate(pat):
75 """Translate a shell PATTERN to a regular expression.
76
77 There is no way to quote meta-characters.
78 """
79
80 STAR = object()
81 res = []
82 add = res.append
83 i, n = 0, len(pat)
84 while i < n:
85 c = pat[i]
86 i = i+1
87 if c == '*':
88 # compress consecutive `*` into one
89 if (not res) or res[-1] is not STAR:
90 add(STAR)
91 elif c == '?':
92 add('.')
93 elif c == '[':
94 j = i
95 if j < n and pat[j] == '!':
96 j = j+1
97 if j < n and pat[j] == ']':
98 j = j+1
99 while j < n and pat[j] != ']':
100 j = j+1
101 if j >= n:
102 add('\\[')
103 else:
104 stuff = pat[i:j]
105 if '-' not in stuff:
106 stuff = stuff.replace('\\', r'\\')
107 else:
108 chunks = []
109 k = i+2 if pat[i] == '!' else i+1
110 while True:
111 k = pat.find('-', k, j)
112 if k < 0:
113 break
114 chunks.append(pat[i:k])
115 i = k+1
116 k = k+3
117 chunk = pat[i:j]
118 if chunk:
119 chunks.append(chunk)
120 else:
121 chunks[-1] += '-'
122 # Remove empty ranges -- invalid in RE.
123 for k in range(len(chunks)-1, 0, -1):
124 if chunks[k-1][-1] > chunks[k][0]:
125 chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
126 del chunks[k]
127 # Escape backslashes and hyphens for set difference (--).
128 # Hyphens that create ranges shouldn't be escaped.
129 stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
130 for s in chunks)
131 # Escape set operations (&&, ~~ and ||).
132 stuff = re.sub(r'([&~|])', r'\\\1', stuff)
133 i = j+1
134 if not stuff:
135 # Empty range: never match.
136 add('(?!)')
137 elif stuff == '!':
138 # Negated empty range: match any character.
139 add('.')
140 else:
141 if stuff[0] == '!':
142 stuff = '^' + stuff[1:]
143 elif stuff[0] in ('^', '['):
144 stuff = '\\' + stuff
145 add(f'[{stuff}]')
146 else:
147 add(re.escape(c))
148 assert i == n
149
150 # Deal with STARs.
151 inp = res
152 res = []
153 add = res.append
154 i, n = 0, len(inp)
155 # Fixed pieces at the start?
156 while i < n and inp[i] is not STAR:
157 add(inp[i])
158 i += 1
159 # Now deal with STAR fixed STAR fixed ...
160 # For an interior `STAR fixed` pairing, we want to do a minimal
161 # .*? match followed by `fixed`, with no possibility of backtracking.
162 # Atomic groups ("(?>...)") allow us to spell that directly.
163 # Note: people rely on the undocumented ability to join multiple
164 # translate() results together via "|" to build large regexps matching
165 # "one of many" shell patterns.
166 while i < n:
167 assert inp[i] is STAR
168 i += 1
169 if i == n:
170 add(".*")
171 break
172 assert inp[i] is not STAR
173 fixed = []
174 while i < n and inp[i] is not STAR:
175 fixed.append(inp[i])
176 i += 1
177 fixed = "".join(fixed)
178 if i == n:
179 add(".*")
180 add(fixed)
181 else:
182 add(f"(?>.*?{fixed})")
183 assert i == n
184 res = "".join(res)
185 return fr'(?s:{res})\Z'