python (3.12.0)
1 import sys
2 import unicodedata
3 import unittest
4 import urllib.parse
5
6 RFC1808_BASE = "http://a/b/c/d;p?q#f"
7 RFC2396_BASE = "http://a/b/c/d;p?q"
8 RFC3986_BASE = 'http://a/b/c/d;p?q'
9 SIMPLE_BASE = 'http://a/b/c/d'
10
11 # Each parse_qsl testcase is a two-tuple that contains
12 # a string with the query and a list with the expected result.
13
14 parse_qsl_test_cases = [
15 ("", []),
16 ("&", []),
17 ("&&", []),
18 ("=", [('', '')]),
19 ("=a", [('', 'a')]),
20 ("a", [('a', '')]),
21 ("a=", [('a', '')]),
22 ("&a=b", [('a', 'b')]),
23 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
24 ("a=1&a=2", [('a', '1'), ('a', '2')]),
25 (b"", []),
26 (b"&", []),
27 (b"&&", []),
28 (b"=", [(b'', b'')]),
29 (b"=a", [(b'', b'a')]),
30 (b"a", [(b'a', b'')]),
31 (b"a=", [(b'a', b'')]),
32 (b"&a=b", [(b'a', b'b')]),
33 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
34 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
35 (";a=b", [(';a', 'b')]),
36 ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
37 (b";a=b", [(b';a', b'b')]),
38 (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
39 ]
40
41 # Each parse_qs testcase is a two-tuple that contains
42 # a string with the query and a dictionary with the expected result.
43
44 parse_qs_test_cases = [
45 ("", {}),
46 ("&", {}),
47 ("&&", {}),
48 ("=", {'': ['']}),
49 ("=a", {'': ['a']}),
50 ("a", {'a': ['']}),
51 ("a=", {'a': ['']}),
52 ("&a=b", {'a': ['b']}),
53 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
54 ("a=1&a=2", {'a': ['1', '2']}),
55 (b"", {}),
56 (b"&", {}),
57 (b"&&", {}),
58 (b"=", {b'': [b'']}),
59 (b"=a", {b'': [b'a']}),
60 (b"a", {b'a': [b'']}),
61 (b"a=", {b'a': [b'']}),
62 (b"&a=b", {b'a': [b'b']}),
63 (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
64 (b"a=1&a=2", {b'a': [b'1', b'2']}),
65 (";a=b", {';a': ['b']}),
66 ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
67 (b";a=b", {b';a': [b'b']}),
68 (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
69 ]
70
71 class ESC[4;38;5;81mUrlParseTestCase(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
72
73 def checkRoundtrips(self, url, parsed, split):
74 result = urllib.parse.urlparse(url)
75 self.assertSequenceEqual(result, parsed)
76 t = (result.scheme, result.netloc, result.path,
77 result.params, result.query, result.fragment)
78 self.assertSequenceEqual(t, parsed)
79 # put it back together and it should be the same
80 result2 = urllib.parse.urlunparse(result)
81 self.assertSequenceEqual(result2, url)
82 self.assertSequenceEqual(result2, result.geturl())
83
84 # the result of geturl() is a fixpoint; we can always parse it
85 # again to get the same result:
86 result3 = urllib.parse.urlparse(result.geturl())
87 self.assertEqual(result3.geturl(), result.geturl())
88 self.assertSequenceEqual(result3, result)
89 self.assertEqual(result3.scheme, result.scheme)
90 self.assertEqual(result3.netloc, result.netloc)
91 self.assertEqual(result3.path, result.path)
92 self.assertEqual(result3.params, result.params)
93 self.assertEqual(result3.query, result.query)
94 self.assertEqual(result3.fragment, result.fragment)
95 self.assertEqual(result3.username, result.username)
96 self.assertEqual(result3.password, result.password)
97 self.assertEqual(result3.hostname, result.hostname)
98 self.assertEqual(result3.port, result.port)
99
100 # check the roundtrip using urlsplit() as well
101 result = urllib.parse.urlsplit(url)
102 self.assertSequenceEqual(result, split)
103 t = (result.scheme, result.netloc, result.path,
104 result.query, result.fragment)
105 self.assertSequenceEqual(t, split)
106 result2 = urllib.parse.urlunsplit(result)
107 self.assertSequenceEqual(result2, url)
108 self.assertSequenceEqual(result2, result.geturl())
109
110 # check the fixpoint property of re-parsing the result of geturl()
111 result3 = urllib.parse.urlsplit(result.geturl())
112 self.assertEqual(result3.geturl(), result.geturl())
113 self.assertSequenceEqual(result3, result)
114 self.assertEqual(result3.scheme, result.scheme)
115 self.assertEqual(result3.netloc, result.netloc)
116 self.assertEqual(result3.path, result.path)
117 self.assertEqual(result3.query, result.query)
118 self.assertEqual(result3.fragment, result.fragment)
119 self.assertEqual(result3.username, result.username)
120 self.assertEqual(result3.password, result.password)
121 self.assertEqual(result3.hostname, result.hostname)
122 self.assertEqual(result3.port, result.port)
123
124 def test_qsl(self):
125 for orig, expect in parse_qsl_test_cases:
126 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
127 self.assertEqual(result, expect, "Error parsing %r" % orig)
128 expect_without_blanks = [v for v in expect if len(v[1])]
129 result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
130 self.assertEqual(result, expect_without_blanks,
131 "Error parsing %r" % orig)
132
133 def test_qs(self):
134 for orig, expect in parse_qs_test_cases:
135 result = urllib.parse.parse_qs(orig, keep_blank_values=True)
136 self.assertEqual(result, expect, "Error parsing %r" % orig)
137 expect_without_blanks = {v: expect[v]
138 for v in expect if len(expect[v][0])}
139 result = urllib.parse.parse_qs(orig, keep_blank_values=False)
140 self.assertEqual(result, expect_without_blanks,
141 "Error parsing %r" % orig)
142
143 def test_roundtrips(self):
144 str_cases = [
145 ('file:///tmp/junk.txt',
146 ('file', '', '/tmp/junk.txt', '', '', ''),
147 ('file', '', '/tmp/junk.txt', '', '')),
148 ('imap://mail.python.org/mbox1',
149 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
150 ('imap', 'mail.python.org', '/mbox1', '', '')),
151 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
152 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
153 '', '', ''),
154 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
155 '', '')),
156 ('nfs://server/path/to/file.txt',
157 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
158 ('nfs', 'server', '/path/to/file.txt', '', '')),
159 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
160 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
161 '', '', ''),
162 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
163 '', '')),
164 ('git+ssh://git@github.com/user/project.git',
165 ('git+ssh', 'git@github.com','/user/project.git',
166 '','',''),
167 ('git+ssh', 'git@github.com','/user/project.git',
168 '', '')),
169 ('itms-services://?action=download-manifest&url=https://example.com/app',
170 ('itms-services', '', '', '',
171 'action=download-manifest&url=https://example.com/app', ''),
172 ('itms-services', '', '',
173 'action=download-manifest&url=https://example.com/app', '')),
174 ]
175 def _encode(t):
176 return (t[0].encode('ascii'),
177 tuple(x.encode('ascii') for x in t[1]),
178 tuple(x.encode('ascii') for x in t[2]))
179 bytes_cases = [_encode(x) for x in str_cases]
180 for url, parsed, split in str_cases + bytes_cases:
181 self.checkRoundtrips(url, parsed, split)
182
183 def test_http_roundtrips(self):
184 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
185 # so we test both 'http:' and 'https:' in all the following.
186 # Three cheers for white box knowledge!
187 str_cases = [
188 ('://www.python.org',
189 ('www.python.org', '', '', '', ''),
190 ('www.python.org', '', '', '')),
191 ('://www.python.org#abc',
192 ('www.python.org', '', '', '', 'abc'),
193 ('www.python.org', '', '', 'abc')),
194 ('://www.python.org?q=abc',
195 ('www.python.org', '', '', 'q=abc', ''),
196 ('www.python.org', '', 'q=abc', '')),
197 ('://www.python.org/#abc',
198 ('www.python.org', '/', '', '', 'abc'),
199 ('www.python.org', '/', '', 'abc')),
200 ('://a/b/c/d;p?q#f',
201 ('a', '/b/c/d', 'p', 'q', 'f'),
202 ('a', '/b/c/d;p', 'q', 'f')),
203 ]
204 def _encode(t):
205 return (t[0].encode('ascii'),
206 tuple(x.encode('ascii') for x in t[1]),
207 tuple(x.encode('ascii') for x in t[2]))
208 bytes_cases = [_encode(x) for x in str_cases]
209 str_schemes = ('http', 'https')
210 bytes_schemes = (b'http', b'https')
211 str_tests = str_schemes, str_cases
212 bytes_tests = bytes_schemes, bytes_cases
213 for schemes, test_cases in (str_tests, bytes_tests):
214 for scheme in schemes:
215 for url, parsed, split in test_cases:
216 url = scheme + url
217 parsed = (scheme,) + parsed
218 split = (scheme,) + split
219 self.checkRoundtrips(url, parsed, split)
220
221 def checkJoin(self, base, relurl, expected):
222 str_components = (base, relurl, expected)
223 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
224 bytes_components = baseb, relurlb, expectedb = [
225 x.encode('ascii') for x in str_components]
226 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
227
228 def test_unparse_parse(self):
229 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
230 bytes_cases = [x.encode('ascii') for x in str_cases]
231 for u in str_cases + bytes_cases:
232 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
233 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
234
235 def test_RFC1808(self):
236 # "normal" cases from RFC 1808:
237 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
238 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
239 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
240 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
241 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
242 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
243 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
244 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
245 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
246 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
247 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
248 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
249 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
250 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
251 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
252 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
253 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
254 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
255 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
256 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
257 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
258 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
259
260 # "abnormal" cases from RFC 1808:
261 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
262 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
263 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
264 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
265 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
266 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
267 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
268 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
269 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
270
271 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
272 # so we'll not actually run these tests (which expect 1808 behavior).
273 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
274 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
275
276 # XXX: The following tests are no longer compatible with RFC3986
277 # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
278 # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
279 # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
280 # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
281
282
283 def test_RFC2368(self):
284 # Issue 11467: path that starts with a number is not parsed correctly
285 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
286 ('mailto', '', '1337@example.org', '', '', ''))
287
288 def test_RFC2396(self):
289 # cases from RFC 2396
290
291 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
292 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
293 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
294 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
295 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
296 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
297 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
298 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
299 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
300 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
301 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
302 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
303 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
304 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
305 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
306 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
307 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
308 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
309 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
310 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
311 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
312 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
313 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
314 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
315 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
316 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
317 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
318 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
319 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
320 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
321 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
322 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
323 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
324 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
325 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
326
327 # XXX: The following tests are no longer compatible with RFC3986
328 # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
329 # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
330 # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
331 # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
332
333 def test_RFC3986(self):
334 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
335 self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
336 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
337 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
338 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
339 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
340 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
341 self.checkJoin(RFC3986_BASE, '//g','http://g')
342 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
343 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
344 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
345 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
346 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
347 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
348 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
349 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
350 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
351 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
352 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
353 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
354 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
355 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
356 self.checkJoin(RFC3986_BASE, '../..','http://a/')
357 self.checkJoin(RFC3986_BASE, '../../','http://a/')
358 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
359 self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
360
361 # Abnormal Examples
362
363 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
364 # Tests are here for reference.
365
366 self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
367 self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
368 self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
369 self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
370 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
371 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
372 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
373 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
374 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
375 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
376 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
377 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
378 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
379 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
380 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
381 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
382 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
383 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
384 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
385 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
386
387 # Test for issue9721
388 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
389
390 def test_urljoins(self):
391 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
392 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
393 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
394 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
395 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
396 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
397 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
398 self.checkJoin(SIMPLE_BASE, '//g','http://g')
399 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
400 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
401 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
402 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
403 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
404 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
405 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
406 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
407 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
408 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
409 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
410 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
411 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
412 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
413 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
414 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
415 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
416 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
417 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
418 self.checkJoin('http:///', '..','http:///')
419 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
420 self.checkJoin('', 'http://a/./g', 'http://a/./g')
421 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
422 self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
423 self.checkJoin('ws://a/b','g','ws://a/g')
424 self.checkJoin('wss://a/b','g','wss://a/g')
425
426 # XXX: The following tests are no longer compatible with RFC3986
427 # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
428 # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
429
430 # test for issue22118 duplicate slashes
431 self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
432
433 # Non-RFC-defined tests, covering variations of base and trailing
434 # slashes
435 self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
436 self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
437 self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
438 self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
439 self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
440 self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
441
442 # issue 23703: don't duplicate filename
443 self.checkJoin('a', 'b', 'b')
444
445 def test_RFC2732(self):
446 str_cases = [
447 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
448 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
449 ('http://[::1]:5432/foo/', '::1', 5432),
450 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
451 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
452 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
453 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
454 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
455 ('http://[::ffff:12.34.56.78]:5432/foo/',
456 '::ffff:12.34.56.78', 5432),
457 ('http://Test.python.org/foo/', 'test.python.org', None),
458 ('http://12.34.56.78/foo/', '12.34.56.78', None),
459 ('http://[::1]/foo/', '::1', None),
460 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
461 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
462 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
463 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
464 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
465 ('http://[::ffff:12.34.56.78]/foo/',
466 '::ffff:12.34.56.78', None),
467 ('http://Test.python.org:/foo/', 'test.python.org', None),
468 ('http://12.34.56.78:/foo/', '12.34.56.78', None),
469 ('http://[::1]:/foo/', '::1', None),
470 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
471 ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
472 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
473 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
474 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
475 ('http://[::ffff:12.34.56.78]:/foo/',
476 '::ffff:12.34.56.78', None),
477 ]
478 def _encode(t):
479 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
480 bytes_cases = [_encode(x) for x in str_cases]
481 for url, hostname, port in str_cases + bytes_cases:
482 urlparsed = urllib.parse.urlparse(url)
483 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
484
485 str_cases = [
486 'http://::12.34.56.78]/',
487 'http://[::1/foo/',
488 'ftp://[::1/foo/bad]/bad',
489 'http://[::1/foo/bad]/bad',
490 'http://[::ffff:12.34.56.78']
491 bytes_cases = [x.encode('ascii') for x in str_cases]
492 for invalid_url in str_cases + bytes_cases:
493 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
494
495 def test_urldefrag(self):
496 str_cases = [
497 ('http://python.org#frag', 'http://python.org', 'frag'),
498 ('http://python.org', 'http://python.org', ''),
499 ('http://python.org/#frag', 'http://python.org/', 'frag'),
500 ('http://python.org/', 'http://python.org/', ''),
501 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
502 ('http://python.org/?q', 'http://python.org/?q', ''),
503 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
504 ('http://python.org/p?q', 'http://python.org/p?q', ''),
505 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
506 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
507 ]
508 def _encode(t):
509 return type(t)(x.encode('ascii') for x in t)
510 bytes_cases = [_encode(x) for x in str_cases]
511 for url, defrag, frag in str_cases + bytes_cases:
512 result = urllib.parse.urldefrag(url)
513 self.assertEqual(result.geturl(), url)
514 self.assertEqual(result, (defrag, frag))
515 self.assertEqual(result.url, defrag)
516 self.assertEqual(result.fragment, frag)
517
518 def test_urlsplit_scoped_IPv6(self):
519 p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
520 self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt")
521 self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234')
522
523 p = urllib.parse.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
524 self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt")
525 self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234')
526
527 def test_urlsplit_attributes(self):
528 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
529 p = urllib.parse.urlsplit(url)
530 self.assertEqual(p.scheme, "http")
531 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
532 self.assertEqual(p.path, "/doc/")
533 self.assertEqual(p.query, "")
534 self.assertEqual(p.fragment, "frag")
535 self.assertEqual(p.username, None)
536 self.assertEqual(p.password, None)
537 self.assertEqual(p.hostname, "www.python.org")
538 self.assertEqual(p.port, None)
539 # geturl() won't return exactly the original URL in this case
540 # since the scheme is always case-normalized
541 # We handle this by ignoring the first 4 characters of the URL
542 self.assertEqual(p.geturl()[4:], url[4:])
543
544 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
545 p = urllib.parse.urlsplit(url)
546 self.assertEqual(p.scheme, "http")
547 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
548 self.assertEqual(p.path, "/doc/")
549 self.assertEqual(p.query, "query=yes")
550 self.assertEqual(p.fragment, "frag")
551 self.assertEqual(p.username, "User")
552 self.assertEqual(p.password, "Pass")
553 self.assertEqual(p.hostname, "www.python.org")
554 self.assertEqual(p.port, 80)
555 self.assertEqual(p.geturl(), url)
556
557 # Addressing issue1698, which suggests Username can contain
558 # "@" characters. Though not RFC compliant, many ftp sites allow
559 # and request email addresses as usernames.
560
561 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
562 p = urllib.parse.urlsplit(url)
563 self.assertEqual(p.scheme, "http")
564 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
565 self.assertEqual(p.path, "/doc/")
566 self.assertEqual(p.query, "query=yes")
567 self.assertEqual(p.fragment, "frag")
568 self.assertEqual(p.username, "User@example.com")
569 self.assertEqual(p.password, "Pass")
570 self.assertEqual(p.hostname, "www.python.org")
571 self.assertEqual(p.port, 80)
572 self.assertEqual(p.geturl(), url)
573
574 # And check them all again, only with bytes this time
575 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
576 p = urllib.parse.urlsplit(url)
577 self.assertEqual(p.scheme, b"http")
578 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
579 self.assertEqual(p.path, b"/doc/")
580 self.assertEqual(p.query, b"")
581 self.assertEqual(p.fragment, b"frag")
582 self.assertEqual(p.username, None)
583 self.assertEqual(p.password, None)
584 self.assertEqual(p.hostname, b"www.python.org")
585 self.assertEqual(p.port, None)
586 self.assertEqual(p.geturl()[4:], url[4:])
587
588 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
589 p = urllib.parse.urlsplit(url)
590 self.assertEqual(p.scheme, b"http")
591 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
592 self.assertEqual(p.path, b"/doc/")
593 self.assertEqual(p.query, b"query=yes")
594 self.assertEqual(p.fragment, b"frag")
595 self.assertEqual(p.username, b"User")
596 self.assertEqual(p.password, b"Pass")
597 self.assertEqual(p.hostname, b"www.python.org")
598 self.assertEqual(p.port, 80)
599 self.assertEqual(p.geturl(), url)
600
601 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
602 p = urllib.parse.urlsplit(url)
603 self.assertEqual(p.scheme, b"http")
604 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
605 self.assertEqual(p.path, b"/doc/")
606 self.assertEqual(p.query, b"query=yes")
607 self.assertEqual(p.fragment, b"frag")
608 self.assertEqual(p.username, b"User@example.com")
609 self.assertEqual(p.password, b"Pass")
610 self.assertEqual(p.hostname, b"www.python.org")
611 self.assertEqual(p.port, 80)
612 self.assertEqual(p.geturl(), url)
613
614 # Verify an illegal port raises ValueError
615 url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
616 p = urllib.parse.urlsplit(url)
617 with self.assertRaisesRegex(ValueError, "out of range"):
618 p.port
619
620 def test_urlsplit_remove_unsafe_bytes(self):
621 # Remove ASCII tabs and newlines from input
622 url = "http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
623 p = urllib.parse.urlsplit(url)
624 self.assertEqual(p.scheme, "http")
625 self.assertEqual(p.netloc, "www.python.org")
626 self.assertEqual(p.path, "/javascript:alert('msg')/")
627 self.assertEqual(p.query, "query=something")
628 self.assertEqual(p.fragment, "fragment")
629 self.assertEqual(p.username, None)
630 self.assertEqual(p.password, None)
631 self.assertEqual(p.hostname, "www.python.org")
632 self.assertEqual(p.port, None)
633 self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
634
635 # Remove ASCII tabs and newlines from input as bytes.
636 url = b"http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
637 p = urllib.parse.urlsplit(url)
638 self.assertEqual(p.scheme, b"http")
639 self.assertEqual(p.netloc, b"www.python.org")
640 self.assertEqual(p.path, b"/javascript:alert('msg')/")
641 self.assertEqual(p.query, b"query=something")
642 self.assertEqual(p.fragment, b"fragment")
643 self.assertEqual(p.username, None)
644 self.assertEqual(p.password, None)
645 self.assertEqual(p.hostname, b"www.python.org")
646 self.assertEqual(p.port, None)
647 self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment")
648
649 # with scheme as cache-key
650 url = "http://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
651 scheme = "ht\ntp"
652 for _ in range(2):
653 p = urllib.parse.urlsplit(url, scheme=scheme)
654 self.assertEqual(p.scheme, "http")
655 self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
656
657 def test_urlsplit_strip_url(self):
658 noise = bytes(range(0, 0x20 + 1))
659 base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
660
661 url = noise.decode("utf-8") + base_url
662 p = urllib.parse.urlsplit(url)
663 self.assertEqual(p.scheme, "http")
664 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
665 self.assertEqual(p.path, "/doc/")
666 self.assertEqual(p.query, "query=yes")
667 self.assertEqual(p.fragment, "frag")
668 self.assertEqual(p.username, "User")
669 self.assertEqual(p.password, "Pass")
670 self.assertEqual(p.hostname, "www.python.org")
671 self.assertEqual(p.port, 80)
672 self.assertEqual(p.geturl(), base_url)
673
674 url = noise + base_url.encode("utf-8")
675 p = urllib.parse.urlsplit(url)
676 self.assertEqual(p.scheme, b"http")
677 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
678 self.assertEqual(p.path, b"/doc/")
679 self.assertEqual(p.query, b"query=yes")
680 self.assertEqual(p.fragment, b"frag")
681 self.assertEqual(p.username, b"User")
682 self.assertEqual(p.password, b"Pass")
683 self.assertEqual(p.hostname, b"www.python.org")
684 self.assertEqual(p.port, 80)
685 self.assertEqual(p.geturl(), base_url.encode("utf-8"))
686
687 # Test that trailing space is preserved as some applications rely on
688 # this within query strings.
689 query_spaces_url = "https://www.python.org:88/doc/?query= "
690 p = urllib.parse.urlsplit(noise.decode("utf-8") + query_spaces_url)
691 self.assertEqual(p.scheme, "https")
692 self.assertEqual(p.netloc, "www.python.org:88")
693 self.assertEqual(p.path, "/doc/")
694 self.assertEqual(p.query, "query= ")
695 self.assertEqual(p.port, 88)
696 self.assertEqual(p.geturl(), query_spaces_url)
697
698 p = urllib.parse.urlsplit("www.pypi.org ")
699 # That "hostname" gets considered a "path" due to the
700 # trailing space and our existing logic... YUCK...
701 # and re-assembles via geturl aka unurlsplit into the original.
702 # django.core.validators.URLValidator (at least through v3.2) relies on
703 # this, for better or worse, to catch it in a ValidationError via its
704 # regular expressions.
705 # Here we test the basic round trip concept of such a trailing space.
706 self.assertEqual(urllib.parse.urlunsplit(p), "www.pypi.org ")
707
708 # with scheme as cache-key
709 url = "//www.python.org/"
710 scheme = noise.decode("utf-8") + "https" + noise.decode("utf-8")
711 for _ in range(2):
712 p = urllib.parse.urlsplit(url, scheme=scheme)
713 self.assertEqual(p.scheme, "https")
714 self.assertEqual(p.geturl(), "https://www.python.org/")
715
716 def test_attributes_bad_port(self):
717 """Check handling of invalid ports."""
718 for bytes in (False, True):
719 for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
720 for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६"):
721 with self.subTest(bytes=bytes, parse=parse, port=port):
722 netloc = "www.example.net:" + port
723 url = "http://" + netloc + "/"
724 if bytes:
725 if netloc.isascii() and port.isascii():
726 netloc = netloc.encode("ascii")
727 url = url.encode("ascii")
728 else:
729 continue
730 p = parse(url)
731 self.assertEqual(p.netloc, netloc)
732 with self.assertRaises(ValueError):
733 p.port
734
735 def test_attributes_bad_scheme(self):
736 """Check handling of invalid schemes."""
737 for bytes in (False, True):
738 for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
739 for scheme in (".", "+", "-", "0", "http&", "६http"):
740 with self.subTest(bytes=bytes, parse=parse, scheme=scheme):
741 url = scheme + "://www.example.net"
742 if bytes:
743 if url.isascii():
744 url = url.encode("ascii")
745 else:
746 continue
747 p = parse(url)
748 if bytes:
749 self.assertEqual(p.scheme, b"")
750 else:
751 self.assertEqual(p.scheme, "")
752
753 def test_attributes_without_netloc(self):
754 # This example is straight from RFC 3261. It looks like it
755 # should allow the username, hostname, and port to be filled
756 # in, but doesn't. Since it's a URI and doesn't use the
757 # scheme://netloc syntax, the netloc and related attributes
758 # should be left empty.
759 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
760 p = urllib.parse.urlsplit(uri)
761 self.assertEqual(p.netloc, "")
762 self.assertEqual(p.username, None)
763 self.assertEqual(p.password, None)
764 self.assertEqual(p.hostname, None)
765 self.assertEqual(p.port, None)
766 self.assertEqual(p.geturl(), uri)
767
768 p = urllib.parse.urlparse(uri)
769 self.assertEqual(p.netloc, "")
770 self.assertEqual(p.username, None)
771 self.assertEqual(p.password, None)
772 self.assertEqual(p.hostname, None)
773 self.assertEqual(p.port, None)
774 self.assertEqual(p.geturl(), uri)
775
776 # You guessed it, repeating the test with bytes input
777 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
778 p = urllib.parse.urlsplit(uri)
779 self.assertEqual(p.netloc, b"")
780 self.assertEqual(p.username, None)
781 self.assertEqual(p.password, None)
782 self.assertEqual(p.hostname, None)
783 self.assertEqual(p.port, None)
784 self.assertEqual(p.geturl(), uri)
785
786 p = urllib.parse.urlparse(uri)
787 self.assertEqual(p.netloc, b"")
788 self.assertEqual(p.username, None)
789 self.assertEqual(p.password, None)
790 self.assertEqual(p.hostname, None)
791 self.assertEqual(p.port, None)
792 self.assertEqual(p.geturl(), uri)
793
794 def test_noslash(self):
795 # Issue 1637: http://foo.com?query is legal
796 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
797 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
798 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
799 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
800
801 def test_withoutscheme(self):
802 # Test urlparse without scheme
803 # Issue 754016: urlparse goes wrong with IP:port without scheme
804 # RFC 1808 specifies that netloc should start with //, urlparse expects
805 # the same, otherwise it classifies the portion of url as path.
806 self.assertEqual(urllib.parse.urlparse("path"),
807 ('','','path','','',''))
808 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
809 ('','www.python.org:80','','','',''))
810 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
811 ('http','www.python.org:80','','','',''))
812 # Repeat for bytes input
813 self.assertEqual(urllib.parse.urlparse(b"path"),
814 (b'',b'',b'path',b'',b'',b''))
815 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
816 (b'',b'www.python.org:80',b'',b'',b'',b''))
817 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
818 (b'http',b'www.python.org:80',b'',b'',b'',b''))
819
820 def test_portseparator(self):
821 # Issue 754016 makes changes for port separator ':' from scheme separator
822 self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','',''))
823 self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','',''))
824 self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))
825 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
826 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
827 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
828 ('http','www.python.org:80','','','',''))
829 # As usual, need to check bytes input as well
830 self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b''))
831 self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b''))
832 self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))
833 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
834 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
835 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
836 (b'http',b'www.python.org:80',b'',b'',b'',b''))
837
838 def test_usingsys(self):
839 # Issue 3314: sys module is used in the error
840 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
841
842 def test_anyscheme(self):
843 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
844 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
845 ('s3', 'foo.com', '/stuff', '', '', ''))
846 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
847 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
848 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
849 ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
850 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
851 ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
852
853 # And for bytes...
854 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
855 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
856 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
857 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
858 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
859 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
860 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
861 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
862
863 def test_default_scheme(self):
864 # Exercise the scheme parameter of urlparse() and urlsplit()
865 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
866 with self.subTest(function=func):
867 result = func("http://example.net/", "ftp")
868 self.assertEqual(result.scheme, "http")
869 result = func(b"http://example.net/", b"ftp")
870 self.assertEqual(result.scheme, b"http")
871 self.assertEqual(func("path", "ftp").scheme, "ftp")
872 self.assertEqual(func("path", scheme="ftp").scheme, "ftp")
873 self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")
874 self.assertEqual(func("path").scheme, "")
875 self.assertEqual(func(b"path").scheme, b"")
876 self.assertEqual(func(b"path", "").scheme, b"")
877
878 def test_parse_fragments(self):
879 # Exercise the allow_fragments parameter of urlparse() and urlsplit()
880 tests = (
881 ("http:#frag", "path", "frag"),
882 ("//example.net#frag", "path", "frag"),
883 ("index.html#frag", "path", "frag"),
884 (";a=b#frag", "params", "frag"),
885 ("?a=b#frag", "query", "frag"),
886 ("#frag", "path", "frag"),
887 ("abc#@frag", "path", "@frag"),
888 ("//abc#@frag", "path", "@frag"),
889 ("//abc:80#@frag", "path", "@frag"),
890 ("//abc#@frag:80", "path", "@frag:80"),
891 )
892 for url, attr, expected_frag in tests:
893 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
894 if attr == "params" and func is urllib.parse.urlsplit:
895 attr = "path"
896 with self.subTest(url=url, function=func):
897 result = func(url, allow_fragments=False)
898 self.assertEqual(result.fragment, "")
899 self.assertTrue(
900 getattr(result, attr).endswith("#" + expected_frag))
901 self.assertEqual(func(url, "", False).fragment, "")
902
903 result = func(url, allow_fragments=True)
904 self.assertEqual(result.fragment, expected_frag)
905 self.assertFalse(
906 getattr(result, attr).endswith(expected_frag))
907 self.assertEqual(func(url, "", True).fragment,
908 expected_frag)
909 self.assertEqual(func(url).fragment, expected_frag)
910
911 def test_mixed_types_rejected(self):
912 # Several functions that process either strings or ASCII encoded bytes
913 # accept multiple arguments. Check they reject mixed type input
914 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
915 urllib.parse.urlparse("www.python.org", b"http")
916 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
917 urllib.parse.urlparse(b"www.python.org", "http")
918 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
919 urllib.parse.urlsplit("www.python.org", b"http")
920 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
921 urllib.parse.urlsplit(b"www.python.org", "http")
922 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
923 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
924 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
925 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
926 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
927 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
928 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
929 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
930 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
931 urllib.parse.urljoin("http://python.org", b"http://python.org")
932 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
933 urllib.parse.urljoin(b"http://python.org", "http://python.org")
934
935 def _check_result_type(self, str_type):
936 num_args = len(str_type._fields)
937 bytes_type = str_type._encoded_counterpart
938 self.assertIs(bytes_type._decoded_counterpart, str_type)
939 str_args = ('',) * num_args
940 bytes_args = (b'',) * num_args
941 str_result = str_type(*str_args)
942 bytes_result = bytes_type(*bytes_args)
943 encoding = 'ascii'
944 errors = 'strict'
945 self.assertEqual(str_result, str_args)
946 self.assertEqual(bytes_result.decode(), str_args)
947 self.assertEqual(bytes_result.decode(), str_result)
948 self.assertEqual(bytes_result.decode(encoding), str_args)
949 self.assertEqual(bytes_result.decode(encoding), str_result)
950 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
951 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
952 self.assertEqual(bytes_result, bytes_args)
953 self.assertEqual(str_result.encode(), bytes_args)
954 self.assertEqual(str_result.encode(), bytes_result)
955 self.assertEqual(str_result.encode(encoding), bytes_args)
956 self.assertEqual(str_result.encode(encoding), bytes_result)
957 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
958 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
959
960 def test_result_pairs(self):
961 # Check encoding and decoding between result pairs
962 result_types = [
963 urllib.parse.DefragResult,
964 urllib.parse.SplitResult,
965 urllib.parse.ParseResult,
966 ]
967 for result_type in result_types:
968 self._check_result_type(result_type)
969
970 def test_parse_qs_encoding(self):
971 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
972 self.assertEqual(result, {'key': ['\u0141\xE9']})
973 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
974 self.assertEqual(result, {'key': ['\u0141\xE9']})
975 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
976 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
977 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
978 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
979 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
980 errors="ignore")
981 self.assertEqual(result, {'key': ['\u0141-']})
982
983 def test_parse_qsl_encoding(self):
984 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
985 self.assertEqual(result, [('key', '\u0141\xE9')])
986 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
987 self.assertEqual(result, [('key', '\u0141\xE9')])
988 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
989 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
990 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
991 self.assertEqual(result, [('key', '\u0141\ufffd-')])
992 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
993 errors="ignore")
994 self.assertEqual(result, [('key', '\u0141-')])
995
996 def test_parse_qsl_max_num_fields(self):
997 with self.assertRaises(ValueError):
998 urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
999 urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
1000
1001 def test_parse_qs_separator(self):
1002 parse_qs_semicolon_cases = [
1003 (";", {}),
1004 (";;", {}),
1005 (";a=b", {'a': ['b']}),
1006 ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
1007 ("a=1;a=2", {'a': ['1', '2']}),
1008 (b";", {}),
1009 (b";;", {}),
1010 (b";a=b", {b'a': [b'b']}),
1011 (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
1012 (b"a=1;a=2", {b'a': [b'1', b'2']}),
1013 ]
1014 for orig, expect in parse_qs_semicolon_cases:
1015 with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
1016 result = urllib.parse.parse_qs(orig, separator=';')
1017 self.assertEqual(result, expect, "Error parsing %r" % orig)
1018 result_bytes = urllib.parse.parse_qs(orig, separator=b';')
1019 self.assertEqual(result_bytes, expect, "Error parsing %r" % orig)
1020
1021
1022 def test_parse_qsl_separator(self):
1023 parse_qsl_semicolon_cases = [
1024 (";", []),
1025 (";;", []),
1026 (";a=b", [('a', 'b')]),
1027 ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
1028 ("a=1;a=2", [('a', '1'), ('a', '2')]),
1029 (b";", []),
1030 (b";;", []),
1031 (b";a=b", [(b'a', b'b')]),
1032 (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
1033 (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
1034 ]
1035 for orig, expect in parse_qsl_semicolon_cases:
1036 with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
1037 result = urllib.parse.parse_qsl(orig, separator=';')
1038 self.assertEqual(result, expect, "Error parsing %r" % orig)
1039 result_bytes = urllib.parse.parse_qsl(orig, separator=b';')
1040 self.assertEqual(result_bytes, expect, "Error parsing %r" % orig)
1041
1042
1043 def test_urlencode_sequences(self):
1044 # Other tests incidentally urlencode things; test non-covered cases:
1045 # Sequence and object values.
1046 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
1047 # we cannot rely on ordering here
1048 assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
1049
1050 class ESC[4;38;5;81mTrivial:
1051 def __str__(self):
1052 return 'trivial'
1053
1054 result = urllib.parse.urlencode({'a': Trivial()}, True)
1055 self.assertEqual(result, 'a=trivial')
1056
1057 def test_urlencode_quote_via(self):
1058 result = urllib.parse.urlencode({'a': 'some value'})
1059 self.assertEqual(result, "a=some+value")
1060 result = urllib.parse.urlencode({'a': 'some value/another'},
1061 quote_via=urllib.parse.quote)
1062 self.assertEqual(result, "a=some%20value%2Fanother")
1063 result = urllib.parse.urlencode({'a': 'some value/another'},
1064 safe='/', quote_via=urllib.parse.quote)
1065 self.assertEqual(result, "a=some%20value/another")
1066
1067 def test_quote_from_bytes(self):
1068 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
1069 result = urllib.parse.quote_from_bytes(b'archaeological arcana')
1070 self.assertEqual(result, 'archaeological%20arcana')
1071 result = urllib.parse.quote_from_bytes(b'')
1072 self.assertEqual(result, '')
1073 result = urllib.parse.quote_from_bytes(b'A'*10_000)
1074 self.assertEqual(result, 'A'*10_000)
1075 result = urllib.parse.quote_from_bytes(b'z\x01/ '*253_183)
1076 self.assertEqual(result, 'z%01/%20'*253_183)
1077
1078 def test_unquote_to_bytes(self):
1079 result = urllib.parse.unquote_to_bytes('abc%20def')
1080 self.assertEqual(result, b'abc def')
1081 result = urllib.parse.unquote_to_bytes('')
1082 self.assertEqual(result, b'')
1083
1084 def test_quote_errors(self):
1085 self.assertRaises(TypeError, urllib.parse.quote, b'foo',
1086 encoding='utf-8')
1087 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
1088
1089 def test_issue14072(self):
1090 p1 = urllib.parse.urlsplit('tel:+31-641044153')
1091 self.assertEqual(p1.scheme, 'tel')
1092 self.assertEqual(p1.path, '+31-641044153')
1093 p2 = urllib.parse.urlsplit('tel:+31641044153')
1094 self.assertEqual(p2.scheme, 'tel')
1095 self.assertEqual(p2.path, '+31641044153')
1096 # assert the behavior for urlparse
1097 p1 = urllib.parse.urlparse('tel:+31-641044153')
1098 self.assertEqual(p1.scheme, 'tel')
1099 self.assertEqual(p1.path, '+31-641044153')
1100 p2 = urllib.parse.urlparse('tel:+31641044153')
1101 self.assertEqual(p2.scheme, 'tel')
1102 self.assertEqual(p2.path, '+31641044153')
1103
1104 def test_invalid_bracketed_hosts(self):
1105 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query')
1106 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query')
1107 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query')
1108 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v12ae]/Path?Query')
1109 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v.IP]/Path?Query')
1110 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123.]/Path?Query')
1111 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query')
1112 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query')
1113 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query')
1114 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path')
1115
1116 def test_splitting_bracketed_hosts(self):
1117 p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query')
1118 self.assertEqual(p1.hostname, 'v6a.ip')
1119 self.assertEqual(p1.username, 'user')
1120 self.assertEqual(p1.path, '/path')
1121 p2 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7%test]/path?query')
1122 self.assertEqual(p2.hostname, '0439:23af:2309::fae7%test')
1123 self.assertEqual(p2.username, 'user')
1124 self.assertEqual(p2.path, '/path')
1125 p3 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146%test]/path?query')
1126 self.assertEqual(p3.hostname, '0439:23af:2309::fae7:1234:192.0.2.146%test')
1127 self.assertEqual(p3.username, 'user')
1128 self.assertEqual(p3.path, '/path')
1129
1130 def test_port_casting_failure_message(self):
1131 message = "Port could not be cast to integer value as 'oracle'"
1132 p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')
1133 with self.assertRaisesRegex(ValueError, message):
1134 p1.port
1135
1136 p2 = urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle')
1137 with self.assertRaisesRegex(ValueError, message):
1138 p2.port
1139
1140 def test_telurl_params(self):
1141 p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
1142 self.assertEqual(p1.scheme, 'tel')
1143 self.assertEqual(p1.path, '123-4')
1144 self.assertEqual(p1.params, 'phone-context=+1-650-516')
1145
1146 p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
1147 self.assertEqual(p1.scheme, 'tel')
1148 self.assertEqual(p1.path, '+1-201-555-0123')
1149 self.assertEqual(p1.params, '')
1150
1151 p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
1152 self.assertEqual(p1.scheme, 'tel')
1153 self.assertEqual(p1.path, '7042')
1154 self.assertEqual(p1.params, 'phone-context=example.com')
1155
1156 p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
1157 self.assertEqual(p1.scheme, 'tel')
1158 self.assertEqual(p1.path, '863-1234')
1159 self.assertEqual(p1.params, 'phone-context=+1-914-555')
1160
1161 def test_Quoter_repr(self):
1162 quoter = urllib.parse._Quoter(urllib.parse._ALWAYS_SAFE)
1163 self.assertIn('Quoter', repr(quoter))
1164
1165 def test_clear_cache_for_code_coverage(self):
1166 urllib.parse.clear_cache()
1167
1168 def test_urllib_parse_getattr_failure(self):
1169 """Test that urllib.parse.__getattr__() fails correctly."""
1170 with self.assertRaises(AttributeError):
1171 unused = urllib.parse.this_does_not_exist
1172
1173 def test_all(self):
1174 expected = []
1175 undocumented = {
1176 'splitattr', 'splithost', 'splitnport', 'splitpasswd',
1177 'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',
1178 'splitvalue',
1179 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
1180 }
1181 for name in dir(urllib.parse):
1182 if name.startswith('_') or name in undocumented:
1183 continue
1184 object = getattr(urllib.parse, name)
1185 if getattr(object, '__module__', None) == 'urllib.parse':
1186 expected.append(name)
1187 self.assertCountEqual(urllib.parse.__all__, expected)
1188
1189 def test_urlsplit_normalization(self):
1190 # Certain characters should never occur in the netloc,
1191 # including under normalization.
1192 # Ensure that ALL of them are detected and cause an error
1193 illegal_chars = '/:#?@'
1194 hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
1195 denorm_chars = [
1196 c for c in map(chr, range(128, sys.maxunicode))
1197 if unicodedata.decomposition(c)
1198 and (hex_chars & set(unicodedata.decomposition(c).split()))
1199 and c not in illegal_chars
1200 ]
1201 # Sanity check that we found at least one such character
1202 self.assertIn('\u2100', denorm_chars)
1203 self.assertIn('\uFF03', denorm_chars)
1204
1205 # bpo-36742: Verify port separators are ignored when they
1206 # existed prior to decomposition
1207 urllib.parse.urlsplit('http://\u30d5\u309a:80')
1208 with self.assertRaises(ValueError):
1209 urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380')
1210
1211 for scheme in ["http", "https", "ftp"]:
1212 for netloc in ["netloc{}false.netloc", "n{}user@netloc"]:
1213 for c in denorm_chars:
1214 url = "{}://{}/path".format(scheme, netloc.format(c))
1215 with self.subTest(url=url, char='{:04X}'.format(ord(c))):
1216 with self.assertRaises(ValueError):
1217 urllib.parse.urlsplit(url)
1218
1219 class ESC[4;38;5;81mUtility_Tests(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
1220 """Testcase to test the various utility functions in the urllib."""
1221 # In Python 2 this test class was in test_urllib.
1222
1223 def test_splittype(self):
1224 splittype = urllib.parse._splittype
1225 self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
1226 self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
1227 self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
1228 self.assertEqual(splittype('type:'), ('type', ''))
1229 self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
1230
1231 def test_splithost(self):
1232 splithost = urllib.parse._splithost
1233 self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
1234 ('www.example.org:80', '/foo/bar/baz.html'))
1235 self.assertEqual(splithost('//www.example.org:80'),
1236 ('www.example.org:80', ''))
1237 self.assertEqual(splithost('/foo/bar/baz.html'),
1238 (None, '/foo/bar/baz.html'))
1239
1240 # bpo-30500: # starts a fragment.
1241 self.assertEqual(splithost('//127.0.0.1#@host.com'),
1242 ('127.0.0.1', '/#@host.com'))
1243 self.assertEqual(splithost('//127.0.0.1#@host.com:80'),
1244 ('127.0.0.1', '/#@host.com:80'))
1245 self.assertEqual(splithost('//127.0.0.1:80#@host.com'),
1246 ('127.0.0.1:80', '/#@host.com'))
1247
1248 # Empty host is returned as empty string.
1249 self.assertEqual(splithost("///file"),
1250 ('', '/file'))
1251
1252 # Trailing semicolon, question mark and hash symbol are kept.
1253 self.assertEqual(splithost("//example.net/file;"),
1254 ('example.net', '/file;'))
1255 self.assertEqual(splithost("//example.net/file?"),
1256 ('example.net', '/file?'))
1257 self.assertEqual(splithost("//example.net/file#"),
1258 ('example.net', '/file#'))
1259
1260 def test_splituser(self):
1261 splituser = urllib.parse._splituser
1262 self.assertEqual(splituser('User:Pass@www.python.org:080'),
1263 ('User:Pass', 'www.python.org:080'))
1264 self.assertEqual(splituser('@www.python.org:080'),
1265 ('', 'www.python.org:080'))
1266 self.assertEqual(splituser('www.python.org:080'),
1267 (None, 'www.python.org:080'))
1268 self.assertEqual(splituser('User:Pass@'),
1269 ('User:Pass', ''))
1270 self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
1271 ('User@example.com:Pass', 'www.python.org:080'))
1272
1273 def test_splitpasswd(self):
1274 # Some of the password examples are not sensible, but it is added to
1275 # confirming to RFC2617 and addressing issue4675.
1276 splitpasswd = urllib.parse._splitpasswd
1277 self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
1278 self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
1279 self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
1280 self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
1281 self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
1282 self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
1283 self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
1284 self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
1285 self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
1286 self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
1287 self.assertEqual(splitpasswd('user:'), ('user', ''))
1288 self.assertEqual(splitpasswd('user'), ('user', None))
1289 self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
1290
1291 def test_splitport(self):
1292 splitport = urllib.parse._splitport
1293 self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
1294 self.assertEqual(splitport('parrot'), ('parrot', None))
1295 self.assertEqual(splitport('parrot:'), ('parrot', None))
1296 self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
1297 self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
1298 self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
1299 self.assertEqual(splitport('[::1]'), ('[::1]', None))
1300 self.assertEqual(splitport(':88'), ('', '88'))
1301
1302 def test_splitnport(self):
1303 splitnport = urllib.parse._splitnport
1304 self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
1305 self.assertEqual(splitnport('parrot'), ('parrot', -1))
1306 self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
1307 self.assertEqual(splitnport('parrot:'), ('parrot', -1))
1308 self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
1309 self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
1310 self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
1311 self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
1312 self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
1313 self.assertEqual(splitnport('parrot: +1_0 '), ('parrot', None))
1314
1315 def test_splitquery(self):
1316 # Normal cases are exercised by other tests; ensure that we also
1317 # catch cases with no port specified (testcase ensuring coverage)
1318 splitquery = urllib.parse._splitquery
1319 self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
1320 ('http://python.org/fake', 'foo=bar'))
1321 self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
1322 ('http://python.org/fake?foo=bar', ''))
1323 self.assertEqual(splitquery('http://python.org/fake'),
1324 ('http://python.org/fake', None))
1325 self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
1326
1327 def test_splittag(self):
1328 splittag = urllib.parse._splittag
1329 self.assertEqual(splittag('http://example.com?foo=bar#baz'),
1330 ('http://example.com?foo=bar', 'baz'))
1331 self.assertEqual(splittag('http://example.com?foo=bar#'),
1332 ('http://example.com?foo=bar', ''))
1333 self.assertEqual(splittag('#baz'), ('', 'baz'))
1334 self.assertEqual(splittag('http://example.com?foo=bar'),
1335 ('http://example.com?foo=bar', None))
1336 self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
1337 ('http://example.com?foo=bar#baz', 'boo'))
1338
1339 def test_splitattr(self):
1340 splitattr = urllib.parse._splitattr
1341 self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
1342 ('/path', ['attr1=value1', 'attr2=value2']))
1343 self.assertEqual(splitattr('/path;'), ('/path', ['']))
1344 self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
1345 ('', ['attr1=value1', 'attr2=value2']))
1346 self.assertEqual(splitattr('/path'), ('/path', []))
1347
1348 def test_splitvalue(self):
1349 # Normal cases are exercised by other tests; test pathological cases
1350 # with no key/value pairs. (testcase ensuring coverage)
1351 splitvalue = urllib.parse._splitvalue
1352 self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
1353 self.assertEqual(splitvalue('foo='), ('foo', ''))
1354 self.assertEqual(splitvalue('=bar'), ('', 'bar'))
1355 self.assertEqual(splitvalue('foobar'), ('foobar', None))
1356 self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
1357
1358 def test_to_bytes(self):
1359 result = urllib.parse._to_bytes('http://www.python.org')
1360 self.assertEqual(result, 'http://www.python.org')
1361 self.assertRaises(UnicodeError, urllib.parse._to_bytes,
1362 'http://www.python.org/medi\u00e6val')
1363
1364 def test_unwrap(self):
1365 for wrapped_url in ('<URL:scheme://host/path>', '<scheme://host/path>',
1366 'URL:scheme://host/path', 'scheme://host/path'):
1367 url = urllib.parse.unwrap(wrapped_url)
1368 self.assertEqual(url, 'scheme://host/path')
1369
1370
1371 class ESC[4;38;5;81mDeprecationTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
1372
1373 def test_Quoter_deprecation(self):
1374 with self.assertWarns(DeprecationWarning) as cm:
1375 old_class = urllib.parse.Quoter
1376 self.assertIs(old_class, urllib.parse._Quoter)
1377 self.assertIn('Quoter will be removed', str(cm.warning))
1378
1379 def test_splittype_deprecation(self):
1380 with self.assertWarns(DeprecationWarning) as cm:
1381 urllib.parse.splittype('')
1382 self.assertEqual(str(cm.warning),
1383 'urllib.parse.splittype() is deprecated as of 3.8, '
1384 'use urllib.parse.urlparse() instead')
1385
1386 def test_splithost_deprecation(self):
1387 with self.assertWarns(DeprecationWarning) as cm:
1388 urllib.parse.splithost('')
1389 self.assertEqual(str(cm.warning),
1390 'urllib.parse.splithost() is deprecated as of 3.8, '
1391 'use urllib.parse.urlparse() instead')
1392
1393 def test_splituser_deprecation(self):
1394 with self.assertWarns(DeprecationWarning) as cm:
1395 urllib.parse.splituser('')
1396 self.assertEqual(str(cm.warning),
1397 'urllib.parse.splituser() is deprecated as of 3.8, '
1398 'use urllib.parse.urlparse() instead')
1399
1400 def test_splitpasswd_deprecation(self):
1401 with self.assertWarns(DeprecationWarning) as cm:
1402 urllib.parse.splitpasswd('')
1403 self.assertEqual(str(cm.warning),
1404 'urllib.parse.splitpasswd() is deprecated as of 3.8, '
1405 'use urllib.parse.urlparse() instead')
1406
1407 def test_splitport_deprecation(self):
1408 with self.assertWarns(DeprecationWarning) as cm:
1409 urllib.parse.splitport('')
1410 self.assertEqual(str(cm.warning),
1411 'urllib.parse.splitport() is deprecated as of 3.8, '
1412 'use urllib.parse.urlparse() instead')
1413
1414 def test_splitnport_deprecation(self):
1415 with self.assertWarns(DeprecationWarning) as cm:
1416 urllib.parse.splitnport('')
1417 self.assertEqual(str(cm.warning),
1418 'urllib.parse.splitnport() is deprecated as of 3.8, '
1419 'use urllib.parse.urlparse() instead')
1420
1421 def test_splitquery_deprecation(self):
1422 with self.assertWarns(DeprecationWarning) as cm:
1423 urllib.parse.splitquery('')
1424 self.assertEqual(str(cm.warning),
1425 'urllib.parse.splitquery() is deprecated as of 3.8, '
1426 'use urllib.parse.urlparse() instead')
1427
1428 def test_splittag_deprecation(self):
1429 with self.assertWarns(DeprecationWarning) as cm:
1430 urllib.parse.splittag('')
1431 self.assertEqual(str(cm.warning),
1432 'urllib.parse.splittag() is deprecated as of 3.8, '
1433 'use urllib.parse.urlparse() instead')
1434
1435 def test_splitattr_deprecation(self):
1436 with self.assertWarns(DeprecationWarning) as cm:
1437 urllib.parse.splitattr('')
1438 self.assertEqual(str(cm.warning),
1439 'urllib.parse.splitattr() is deprecated as of 3.8, '
1440 'use urllib.parse.urlparse() instead')
1441
1442 def test_splitvalue_deprecation(self):
1443 with self.assertWarns(DeprecationWarning) as cm:
1444 urllib.parse.splitvalue('')
1445 self.assertEqual(str(cm.warning),
1446 'urllib.parse.splitvalue() is deprecated as of 3.8, '
1447 'use urllib.parse.parse_qsl() instead')
1448
1449 def test_to_bytes_deprecation(self):
1450 with self.assertWarns(DeprecationWarning) as cm:
1451 urllib.parse.to_bytes('')
1452 self.assertEqual(str(cm.warning),
1453 'urllib.parse.to_bytes() is deprecated as of 3.8')
1454
1455
1456 if __name__ == "__main__":
1457 unittest.main()