python (3.12.0)
1 import io
2 import itertools
3 import shlex
4 import string
5 import unittest
6
7
8 # The original test data set was from shellwords, by Hartmut Goebel.
9
10 data = r"""x|x|
11 foo bar|foo|bar|
12 foo bar|foo|bar|
13 foo bar |foo|bar|
14 foo bar bla fasel|foo|bar|bla|fasel|
15 x y z xxxx|x|y|z|xxxx|
16 \x bar|\|x|bar|
17 \ x bar|\|x|bar|
18 \ bar|\|bar|
19 foo \x bar|foo|\|x|bar|
20 foo \ x bar|foo|\|x|bar|
21 foo \ bar|foo|\|bar|
22 foo "bar" bla|foo|"bar"|bla|
23 "foo" "bar" "bla"|"foo"|"bar"|"bla"|
24 "foo" bar "bla"|"foo"|bar|"bla"|
25 "foo" bar bla|"foo"|bar|bla|
26 foo 'bar' bla|foo|'bar'|bla|
27 'foo' 'bar' 'bla'|'foo'|'bar'|'bla'|
28 'foo' bar 'bla'|'foo'|bar|'bla'|
29 'foo' bar bla|'foo'|bar|bla|
30 blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz|
31 blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz|
32 ""|""|
33 ''|''|
34 foo "" bar|foo|""|bar|
35 foo '' bar|foo|''|bar|
36 foo "" "" "" bar|foo|""|""|""|bar|
37 foo '' '' '' bar|foo|''|''|''|bar|
38 \""|\|""|
39 "\"|"\"|
40 "foo\ bar"|"foo\ bar"|
41 "foo\\ bar"|"foo\\ bar"|
42 "foo\\ bar\"|"foo\\ bar\"|
43 "foo\\" bar\""|"foo\\"|bar|\|""|
44 "foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"|
45 "foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"|
46 "foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"|
47 "foo\x bar\" dfadf"|"foo\x bar\"|dfadf"|
48 \''|\|''|
49 'foo\ bar'|'foo\ bar'|
50 'foo\\ bar'|'foo\\ bar'|
51 "foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'|
52 \"foo"|\|"foo"|
53 \"foo"\x|\|"foo"|\|x|
54 "foo\x"|"foo\x"|
55 "foo\ "|"foo\ "|
56 foo\ xx|foo|\|xx|
57 foo\ x\x|foo|\|x|\|x|
58 foo\ x\x\""|foo|\|x|\|x|\|""|
59 "foo\ x\x"|"foo\ x\x"|
60 "foo\ x\x\\"|"foo\ x\x\\"|
61 "foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"|
62 "foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"|
63 "foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"|
64 "foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'|
65 'foo\ bar'|'foo\ bar'|
66 'foo\\ bar'|'foo\\ bar'|
67 foo\ bar|foo|\|bar|
68 foo#bar\nbaz|foobaz|
69 :-) ;-)|:|-|)|;|-|)|
70 áéíóú|á|é|í|ó|ú|
71 """
72
73 posix_data = r"""x|x|
74 foo bar|foo|bar|
75 foo bar|foo|bar|
76 foo bar |foo|bar|
77 foo bar bla fasel|foo|bar|bla|fasel|
78 x y z xxxx|x|y|z|xxxx|
79 \x bar|x|bar|
80 \ x bar| x|bar|
81 \ bar| bar|
82 foo \x bar|foo|x|bar|
83 foo \ x bar|foo| x|bar|
84 foo \ bar|foo| bar|
85 foo "bar" bla|foo|bar|bla|
86 "foo" "bar" "bla"|foo|bar|bla|
87 "foo" bar "bla"|foo|bar|bla|
88 "foo" bar bla|foo|bar|bla|
89 foo 'bar' bla|foo|bar|bla|
90 'foo' 'bar' 'bla'|foo|bar|bla|
91 'foo' bar 'bla'|foo|bar|bla|
92 'foo' bar bla|foo|bar|bla|
93 blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz|
94 blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz|
95 ""||
96 ''||
97 foo "" bar|foo||bar|
98 foo '' bar|foo||bar|
99 foo "" "" "" bar|foo||||bar|
100 foo '' '' '' bar|foo||||bar|
101 \"|"|
102 "\""|"|
103 "foo\ bar"|foo\ bar|
104 "foo\\ bar"|foo\ bar|
105 "foo\\ bar\""|foo\ bar"|
106 "foo\\" bar\"|foo\|bar"|
107 "foo\\ bar\" dfadf"|foo\ bar" dfadf|
108 "foo\\\ bar\" dfadf"|foo\\ bar" dfadf|
109 "foo\\\x bar\" dfadf"|foo\\x bar" dfadf|
110 "foo\x bar\" dfadf"|foo\x bar" dfadf|
111 \'|'|
112 'foo\ bar'|foo\ bar|
113 'foo\\ bar'|foo\\ bar|
114 "foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df|
115 \"foo|"foo|
116 \"foo\x|"foox|
117 "foo\x"|foo\x|
118 "foo\ "|foo\ |
119 foo\ xx|foo xx|
120 foo\ x\x|foo xx|
121 foo\ x\x\"|foo xx"|
122 "foo\ x\x"|foo\ x\x|
123 "foo\ x\x\\"|foo\ x\x\|
124 "foo\ x\x\\""foobar"|foo\ x\x\foobar|
125 "foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar|
126 "foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar|
127 "foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't|
128 "foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\|
129 'foo\ bar'|foo\ bar|
130 'foo\\ bar'|foo\\ bar|
131 foo\ bar|foo bar|
132 foo#bar\nbaz|foo|baz|
133 :-) ;-)|:-)|;-)|
134 áéíóú|áéíóú|
135 """
136
137 class ESC[4;38;5;81mShlexTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
138 def setUp(self):
139 self.data = [x.split("|")[:-1]
140 for x in data.splitlines()]
141 self.posix_data = [x.split("|")[:-1]
142 for x in posix_data.splitlines()]
143 for item in self.data:
144 item[0] = item[0].replace(r"\n", "\n")
145 for item in self.posix_data:
146 item[0] = item[0].replace(r"\n", "\n")
147
148 def splitTest(self, data, comments):
149 for i in range(len(data)):
150 l = shlex.split(data[i][0], comments=comments)
151 self.assertEqual(l, data[i][1:],
152 "%s: %s != %s" %
153 (data[i][0], l, data[i][1:]))
154
155 def oldSplit(self, s):
156 ret = []
157 lex = shlex.shlex(io.StringIO(s))
158 tok = lex.get_token()
159 while tok:
160 ret.append(tok)
161 tok = lex.get_token()
162 return ret
163
164 def testSplitNone(self):
165 with self.assertRaises(ValueError):
166 shlex.split(None)
167
168 def testSplitPosix(self):
169 """Test data splitting with posix parser"""
170 self.splitTest(self.posix_data, comments=True)
171
172 def testCompat(self):
173 """Test compatibility interface"""
174 for i in range(len(self.data)):
175 l = self.oldSplit(self.data[i][0])
176 self.assertEqual(l, self.data[i][1:],
177 "%s: %s != %s" %
178 (self.data[i][0], l, self.data[i][1:]))
179
180 def testSyntaxSplitAmpersandAndPipe(self):
181 """Test handling of syntax splitting of &, |"""
182 # Could take these forms: &&, &, |&, ;&, ;;&
183 # of course, the same applies to | and ||
184 # these should all parse to the same output
185 for delimiter in ('&&', '&', '|&', ';&', ';;&',
186 '||', '|', '&|', ';|', ';;|'):
187 src = ['echo hi %s echo bye' % delimiter,
188 'echo hi%secho bye' % delimiter]
189 ref = ['echo', 'hi', delimiter, 'echo', 'bye']
190 for ss, ws in itertools.product(src, (False, True)):
191 s = shlex.shlex(ss, punctuation_chars=True)
192 s.whitespace_split = ws
193 result = list(s)
194 self.assertEqual(ref, result,
195 "While splitting '%s' [ws=%s]" % (ss, ws))
196
197 def testSyntaxSplitSemicolon(self):
198 """Test handling of syntax splitting of ;"""
199 # Could take these forms: ;, ;;, ;&, ;;&
200 # these should all parse to the same output
201 for delimiter in (';', ';;', ';&', ';;&'):
202 src = ['echo hi %s echo bye' % delimiter,
203 'echo hi%s echo bye' % delimiter,
204 'echo hi%secho bye' % delimiter]
205 ref = ['echo', 'hi', delimiter, 'echo', 'bye']
206 for ss, ws in itertools.product(src, (False, True)):
207 s = shlex.shlex(ss, punctuation_chars=True)
208 s.whitespace_split = ws
209 result = list(s)
210 self.assertEqual(ref, result,
211 "While splitting '%s' [ws=%s]" % (ss, ws))
212
213 def testSyntaxSplitRedirect(self):
214 """Test handling of syntax splitting of >"""
215 # of course, the same applies to <, |
216 # these should all parse to the same output
217 for delimiter in ('<', '|'):
218 src = ['echo hi %s out' % delimiter,
219 'echo hi%s out' % delimiter,
220 'echo hi%sout' % delimiter]
221 ref = ['echo', 'hi', delimiter, 'out']
222 for ss, ws in itertools.product(src, (False, True)):
223 s = shlex.shlex(ss, punctuation_chars=True)
224 result = list(s)
225 self.assertEqual(ref, result,
226 "While splitting '%s' [ws=%s]" % (ss, ws))
227
228 def testSyntaxSplitParen(self):
229 """Test handling of syntax splitting of ()"""
230 # these should all parse to the same output
231 src = ['( echo hi )',
232 '(echo hi)']
233 ref = ['(', 'echo', 'hi', ')']
234 for ss, ws in itertools.product(src, (False, True)):
235 s = shlex.shlex(ss, punctuation_chars=True)
236 s.whitespace_split = ws
237 result = list(s)
238 self.assertEqual(ref, result,
239 "While splitting '%s' [ws=%s]" % (ss, ws))
240
241 def testSyntaxSplitCustom(self):
242 """Test handling of syntax splitting with custom chars"""
243 ss = "~/a&&b-c --color=auto||d *.py?"
244 ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?']
245 s = shlex.shlex(ss, punctuation_chars="|")
246 result = list(s)
247 self.assertEqual(ref, result, "While splitting '%s' [ws=False]" % ss)
248 ref = ['~/a&&b-c', '--color=auto', '||', 'd', '*.py?']
249 s = shlex.shlex(ss, punctuation_chars="|")
250 s.whitespace_split = True
251 result = list(s)
252 self.assertEqual(ref, result, "While splitting '%s' [ws=True]" % ss)
253
254 def testTokenTypes(self):
255 """Test that tokens are split with types as expected."""
256 for source, expected in (
257 ('a && b || c',
258 [('a', 'a'), ('&&', 'c'), ('b', 'a'),
259 ('||', 'c'), ('c', 'a')]),
260 ):
261 s = shlex.shlex(source, punctuation_chars=True)
262 observed = []
263 while True:
264 t = s.get_token()
265 if t == s.eof:
266 break
267 if t[0] in s.punctuation_chars:
268 tt = 'c'
269 else:
270 tt = 'a'
271 observed.append((t, tt))
272 self.assertEqual(observed, expected)
273
274 def testPunctuationInWordChars(self):
275 """Test that any punctuation chars are removed from wordchars"""
276 s = shlex.shlex('a_b__c', punctuation_chars='_')
277 self.assertNotIn('_', s.wordchars)
278 self.assertEqual(list(s), ['a', '_', 'b', '__', 'c'])
279
280 def testPunctuationWithWhitespaceSplit(self):
281 """Test that with whitespace_split, behaviour is as expected"""
282 s = shlex.shlex('a && b || c', punctuation_chars='&')
283 # whitespace_split is False, so splitting will be based on
284 # punctuation_chars
285 self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c'])
286 s = shlex.shlex('a && b || c', punctuation_chars='&')
287 s.whitespace_split = True
288 # whitespace_split is True, so splitting will be based on
289 # white space
290 self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c'])
291
292 def testPunctuationWithPosix(self):
293 """Test that punctuation_chars and posix behave correctly together."""
294 # see Issue #29132
295 s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True)
296 self.assertEqual(list(s), ['f', '>', 'abc'])
297 s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True)
298 self.assertEqual(list(s), ['f', '>', '"abc"'])
299
300 def testEmptyStringHandling(self):
301 """Test that parsing of empty strings is correctly handled."""
302 # see Issue #21999
303 expected = ['', ')', 'abc']
304 for punct in (False, True):
305 s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct)
306 slist = list(s)
307 self.assertEqual(slist, expected)
308 expected = ["''", ')', 'abc']
309 s = shlex.shlex("'')abc", punctuation_chars=True)
310 self.assertEqual(list(s), expected)
311
312 def testUnicodeHandling(self):
313 """Test punctuation_chars and whitespace_split handle unicode."""
314 ss = "\u2119\u01b4\u2602\u210c\u00f8\u1f24"
315 # Should be parsed as one complete token (whitespace_split=True).
316 ref = ['\u2119\u01b4\u2602\u210c\u00f8\u1f24']
317 s = shlex.shlex(ss, punctuation_chars=True)
318 s.whitespace_split = True
319 self.assertEqual(list(s), ref)
320 # Without whitespace_split, uses wordchars and splits on all.
321 ref = ['\u2119', '\u01b4', '\u2602', '\u210c', '\u00f8', '\u1f24']
322 s = shlex.shlex(ss, punctuation_chars=True)
323 self.assertEqual(list(s), ref)
324
325 def testQuote(self):
326 safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./'
327 unicode_sample = '\xe9\xe0\xdf' # e + acute accent, a + grave, sharp s
328 unsafe = '"`$\\!' + unicode_sample
329
330 self.assertEqual(shlex.quote(''), "''")
331 self.assertEqual(shlex.quote(safeunquoted), safeunquoted)
332 self.assertEqual(shlex.quote('test file name'), "'test file name'")
333 for u in unsafe:
334 self.assertEqual(shlex.quote('test%sname' % u),
335 "'test%sname'" % u)
336 for u in unsafe:
337 self.assertEqual(shlex.quote("test%s'name'" % u),
338 "'test%s'\"'\"'name'\"'\"''" % u)
339
340 def testJoin(self):
341 for split_command, command in [
342 (['a ', 'b'], "'a ' b"),
343 (['a', ' b'], "a ' b'"),
344 (['a', ' ', 'b'], "a ' ' b"),
345 (['"a', 'b"'], '\'"a\' \'b"\''),
346 ]:
347 with self.subTest(command=command):
348 joined = shlex.join(split_command)
349 self.assertEqual(joined, command)
350
351 def testJoinRoundtrip(self):
352 all_data = self.data + self.posix_data
353 for command, *split_command in all_data:
354 with self.subTest(command=command):
355 joined = shlex.join(split_command)
356 resplit = shlex.split(joined)
357 self.assertEqual(split_command, resplit)
358
359 def testPunctuationCharsReadOnly(self):
360 punctuation_chars = "/|$%^"
361 shlex_instance = shlex.shlex(punctuation_chars=punctuation_chars)
362 self.assertEqual(shlex_instance.punctuation_chars, punctuation_chars)
363 with self.assertRaises(AttributeError):
364 shlex_instance.punctuation_chars = False
365
366
367 # Allow this test to be used with old shlex.py
368 if not getattr(shlex, "split", None):
369 for methname in dir(ShlexTest):
370 if methname.startswith("test") and methname != "testCompat":
371 delattr(ShlexTest, methname)
372
373 if __name__ == "__main__":
374 unittest.main()