1 import sys
2 import unicodedata
3 import unittest
4 import urllib.parse
5
6 RFC1808_BASE = "http://a/b/c/d;p?q#f"
7 RFC2396_BASE = "http://a/b/c/d;p?q"
8 RFC3986_BASE = 'http://a/b/c/d;p?q'
9 SIMPLE_BASE = 'http://a/b/c/d'
10
11 # Each parse_qsl testcase is a two-tuple that contains
12 # a string with the query and a list with the expected result.
13
14 parse_qsl_test_cases = [
15 ("", []),
16 ("&", []),
17 ("&&", []),
18 ("=", [('', '')]),
19 ("=a", [('', 'a')]),
20 ("a", [('a', '')]),
21 ("a=", [('a', '')]),
22 ("&a=b", [('a', 'b')]),
23 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
24 ("a=1&a=2", [('a', '1'), ('a', '2')]),
25 (b"", []),
26 (b"&", []),
27 (b"&&", []),
28 (b"=", [(b'', b'')]),
29 (b"=a", [(b'', b'a')]),
30 (b"a", [(b'a', b'')]),
31 (b"a=", [(b'a', b'')]),
32 (b"&a=b", [(b'a', b'b')]),
33 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
34 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
35 (";a=b", [(';a', 'b')]),
36 ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
37 (b";a=b", [(b';a', b'b')]),
38 (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
39 ]
40
41 # Each parse_qs testcase is a two-tuple that contains
42 # a string with the query and a dictionary with the expected result.
43
44 parse_qs_test_cases = [
45 ("", {}),
46 ("&", {}),
47 ("&&", {}),
48 ("=", {'': ['']}),
49 ("=a", {'': ['a']}),
50 ("a", {'a': ['']}),
51 ("a=", {'a': ['']}),
52 ("&a=b", {'a': ['b']}),
53 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
54 ("a=1&a=2", {'a': ['1', '2']}),
55 (b"", {}),
56 (b"&", {}),
57 (b"&&", {}),
58 (b"=", {b'': [b'']}),
59 (b"=a", {b'': [b'a']}),
60 (b"a", {b'a': [b'']}),
61 (b"a=", {b'a': [b'']}),
62 (b"&a=b", {b'a': [b'b']}),
63 (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
64 (b"a=1&a=2", {b'a': [b'1', b'2']}),
65 (";a=b", {';a': ['b']}),
66 ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
67 (b";a=b", {b';a': [b'b']}),
68 (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
69 ]
70
71 class ESC[4;38;5;81mUrlParseTestCase(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
72
73 def checkRoundtrips(self, url, parsed, split):
74 result = urllib.parse.urlparse(url)
75 self.assertEqual(result, parsed)
76 t = (result.scheme, result.netloc, result.path,
77 result.params, result.query, result.fragment)
78 self.assertEqual(t, parsed)
79 # put it back together and it should be the same
80 result2 = urllib.parse.urlunparse(result)
81 self.assertEqual(result2, url)
82 self.assertEqual(result2, result.geturl())
83
84 # the result of geturl() is a fixpoint; we can always parse it
85 # again to get the same result:
86 result3 = urllib.parse.urlparse(result.geturl())
87 self.assertEqual(result3.geturl(), result.geturl())
88 self.assertEqual(result3, result)
89 self.assertEqual(result3.scheme, result.scheme)
90 self.assertEqual(result3.netloc, result.netloc)
91 self.assertEqual(result3.path, result.path)
92 self.assertEqual(result3.params, result.params)
93 self.assertEqual(result3.query, result.query)
94 self.assertEqual(result3.fragment, result.fragment)
95 self.assertEqual(result3.username, result.username)
96 self.assertEqual(result3.password, result.password)
97 self.assertEqual(result3.hostname, result.hostname)
98 self.assertEqual(result3.port, result.port)
99
100 # check the roundtrip using urlsplit() as well
101 result = urllib.parse.urlsplit(url)
102 self.assertEqual(result, split)
103 t = (result.scheme, result.netloc, result.path,
104 result.query, result.fragment)
105 self.assertEqual(t, split)
106 result2 = urllib.parse.urlunsplit(result)
107 self.assertEqual(result2, url)
108 self.assertEqual(result2, result.geturl())
109
110 # check the fixpoint property of re-parsing the result of geturl()
111 result3 = urllib.parse.urlsplit(result.geturl())
112 self.assertEqual(result3.geturl(), result.geturl())
113 self.assertEqual(result3, result)
114 self.assertEqual(result3.scheme, result.scheme)
115 self.assertEqual(result3.netloc, result.netloc)
116 self.assertEqual(result3.path, result.path)
117 self.assertEqual(result3.query, result.query)
118 self.assertEqual(result3.fragment, result.fragment)
119 self.assertEqual(result3.username, result.username)
120 self.assertEqual(result3.password, result.password)
121 self.assertEqual(result3.hostname, result.hostname)
122 self.assertEqual(result3.port, result.port)
123
124 def test_qsl(self):
125 for orig, expect in parse_qsl_test_cases:
126 result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
127 self.assertEqual(result, expect, "Error parsing %r" % orig)
128 expect_without_blanks = [v for v in expect if len(v[1])]
129 result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
130 self.assertEqual(result, expect_without_blanks,
131 "Error parsing %r" % orig)
132
133 def test_qs(self):
134 for orig, expect in parse_qs_test_cases:
135 result = urllib.parse.parse_qs(orig, keep_blank_values=True)
136 self.assertEqual(result, expect, "Error parsing %r" % orig)
137 expect_without_blanks = {v: expect[v]
138 for v in expect if len(expect[v][0])}
139 result = urllib.parse.parse_qs(orig, keep_blank_values=False)
140 self.assertEqual(result, expect_without_blanks,
141 "Error parsing %r" % orig)
142
143 def test_roundtrips(self):
144 str_cases = [
145 ('file:///tmp/junk.txt',
146 ('file', '', '/tmp/junk.txt', '', '', ''),
147 ('file', '', '/tmp/junk.txt', '', '')),
148 ('imap://mail.python.org/mbox1',
149 ('imap', 'mail.python.org', '/mbox1', '', '', ''),
150 ('imap', 'mail.python.org', '/mbox1', '', '')),
151 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
152 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
153 '', '', ''),
154 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
155 '', '')),
156 ('nfs://server/path/to/file.txt',
157 ('nfs', 'server', '/path/to/file.txt', '', '', ''),
158 ('nfs', 'server', '/path/to/file.txt', '', '')),
159 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
160 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
161 '', '', ''),
162 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
163 '', '')),
164 ('git+ssh://git@github.com/user/project.git',
165 ('git+ssh', 'git@github.com','/user/project.git',
166 '','',''),
167 ('git+ssh', 'git@github.com','/user/project.git',
168 '', '')),
169 ]
170 def _encode(t):
171 return (t[0].encode('ascii'),
172 tuple(x.encode('ascii') for x in t[1]),
173 tuple(x.encode('ascii') for x in t[2]))
174 bytes_cases = [_encode(x) for x in str_cases]
175 for url, parsed, split in str_cases + bytes_cases:
176 self.checkRoundtrips(url, parsed, split)
177
178 def test_http_roundtrips(self):
179 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
180 # so we test both 'http:' and 'https:' in all the following.
181 # Three cheers for white box knowledge!
182 str_cases = [
183 ('://www.python.org',
184 ('www.python.org', '', '', '', ''),
185 ('www.python.org', '', '', '')),
186 ('://www.python.org#abc',
187 ('www.python.org', '', '', '', 'abc'),
188 ('www.python.org', '', '', 'abc')),
189 ('://www.python.org?q=abc',
190 ('www.python.org', '', '', 'q=abc', ''),
191 ('www.python.org', '', 'q=abc', '')),
192 ('://www.python.org/#abc',
193 ('www.python.org', '/', '', '', 'abc'),
194 ('www.python.org', '/', '', 'abc')),
195 ('://a/b/c/d;p?q#f',
196 ('a', '/b/c/d', 'p', 'q', 'f'),
197 ('a', '/b/c/d;p', 'q', 'f')),
198 ]
199 def _encode(t):
200 return (t[0].encode('ascii'),
201 tuple(x.encode('ascii') for x in t[1]),
202 tuple(x.encode('ascii') for x in t[2]))
203 bytes_cases = [_encode(x) for x in str_cases]
204 str_schemes = ('http', 'https')
205 bytes_schemes = (b'http', b'https')
206 str_tests = str_schemes, str_cases
207 bytes_tests = bytes_schemes, bytes_cases
208 for schemes, test_cases in (str_tests, bytes_tests):
209 for scheme in schemes:
210 for url, parsed, split in test_cases:
211 url = scheme + url
212 parsed = (scheme,) + parsed
213 split = (scheme,) + split
214 self.checkRoundtrips(url, parsed, split)
215
216 def checkJoin(self, base, relurl, expected):
217 str_components = (base, relurl, expected)
218 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
219 bytes_components = baseb, relurlb, expectedb = [
220 x.encode('ascii') for x in str_components]
221 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
222
223 def test_unparse_parse(self):
224 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
225 bytes_cases = [x.encode('ascii') for x in str_cases]
226 for u in str_cases + bytes_cases:
227 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
228 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
229
230 def test_RFC1808(self):
231 # "normal" cases from RFC 1808:
232 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
233 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
234 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
235 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
236 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
237 self.checkJoin(RFC1808_BASE, '//g', 'http://g')
238 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
239 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
240 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
241 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
242 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
243 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
244 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
245 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
246 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
247 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
248 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
249 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
250 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
251 self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
252 self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
253 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
254
255 # "abnormal" cases from RFC 1808:
256 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
257 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
258 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
259 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
260 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
261 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
262 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
263 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
264 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
265
266 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
267 # so we'll not actually run these tests (which expect 1808 behavior).
268 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
269 #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
270
271 # XXX: The following tests are no longer compatible with RFC3986
272 # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
273 # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
274 # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
275 # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
276
277
278 def test_RFC2368(self):
279 # Issue 11467: path that starts with a number is not parsed correctly
280 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
281 ('mailto', '', '1337@example.org', '', '', ''))
282
283 def test_RFC2396(self):
284 # cases from RFC 2396
285
286 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
287 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
288 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
289 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
290 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
291 self.checkJoin(RFC2396_BASE, '//g', 'http://g')
292 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
293 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
294 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
295 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
296 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
297 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
298 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
299 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
300 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
301 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
302 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
303 self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
304 self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
305 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
306 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
307 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
308 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
309 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
310 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
311 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
312 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
313 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
314 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
315 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
316 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
317 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
318 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
319 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
320 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
321
322 # XXX: The following tests are no longer compatible with RFC3986
323 # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
324 # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
325 # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
326 # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
327
328 def test_RFC3986(self):
329 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
330 self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
331 self.checkJoin(RFC3986_BASE, 'g:h','g:h')
332 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
333 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
334 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
335 self.checkJoin(RFC3986_BASE, '/g','http://a/g')
336 self.checkJoin(RFC3986_BASE, '//g','http://g')
337 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
338 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
339 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
340 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
341 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
342 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
343 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
344 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
345 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
346 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
347 self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
348 self.checkJoin(RFC3986_BASE, '..','http://a/b/')
349 self.checkJoin(RFC3986_BASE, '../','http://a/b/')
350 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
351 self.checkJoin(RFC3986_BASE, '../..','http://a/')
352 self.checkJoin(RFC3986_BASE, '../../','http://a/')
353 self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
354 self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
355
356 # Abnormal Examples
357
358 # The 'abnormal scenarios' are incompatible with RFC2986 parsing
359 # Tests are here for reference.
360
361 self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
362 self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
363 self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
364 self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
365 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
366 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
367 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
368 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
369 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
370 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
371 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
372 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
373 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
374 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
375 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
376 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
377 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
378 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
379 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
380 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
381
382 # Test for issue9721
383 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
384
385 def test_urljoins(self):
386 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
387 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
388 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
389 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
390 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
391 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
392 self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
393 self.checkJoin(SIMPLE_BASE, '//g','http://g')
394 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
395 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
396 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
397 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
398 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
399 self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
400 self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
401 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
402 self.checkJoin(SIMPLE_BASE, '../..','http://a/')
403 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
404 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
405 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
406 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
407 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
408 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
409 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
410 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
411 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
412 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
413 self.checkJoin('http:///', '..','http:///')
414 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
415 self.checkJoin('', 'http://a/./g', 'http://a/./g')
416 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
417 self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
418 self.checkJoin('ws://a/b','g','ws://a/g')
419 self.checkJoin('wss://a/b','g','wss://a/g')
420
421 # XXX: The following tests are no longer compatible with RFC3986
422 # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
423 # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
424
425 # test for issue22118 duplicate slashes
426 self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
427
428 # Non-RFC-defined tests, covering variations of base and trailing
429 # slashes
430 self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
431 self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
432 self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
433 self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
434 self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
435 self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
436
437 # issue 23703: don't duplicate filename
438 self.checkJoin('a', 'b', 'b')
439
440 def test_RFC2732(self):
441 str_cases = [
442 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
443 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
444 ('http://[::1]:5432/foo/', '::1', 5432),
445 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
446 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
447 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
448 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
449 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
450 ('http://[::ffff:12.34.56.78]:5432/foo/',
451 '::ffff:12.34.56.78', 5432),
452 ('http://Test.python.org/foo/', 'test.python.org', None),
453 ('http://12.34.56.78/foo/', '12.34.56.78', None),
454 ('http://[::1]/foo/', '::1', None),
455 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
456 ('http://[dead:beef::]/foo/', 'dead:beef::', None),
457 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
458 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
459 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
460 ('http://[::ffff:12.34.56.78]/foo/',
461 '::ffff:12.34.56.78', None),
462 ('http://Test.python.org:/foo/', 'test.python.org', None),
463 ('http://12.34.56.78:/foo/', '12.34.56.78', None),
464 ('http://[::1]:/foo/', '::1', None),
465 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
466 ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
467 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
468 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
469 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
470 ('http://[::ffff:12.34.56.78]:/foo/',
471 '::ffff:12.34.56.78', None),
472 ]
473 def _encode(t):
474 return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
475 bytes_cases = [_encode(x) for x in str_cases]
476 for url, hostname, port in str_cases + bytes_cases:
477 urlparsed = urllib.parse.urlparse(url)
478 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
479
480 str_cases = [
481 'http://::12.34.56.78]/',
482 'http://[::1/foo/',
483 'ftp://[::1/foo/bad]/bad',
484 'http://[::1/foo/bad]/bad',
485 'http://[::ffff:12.34.56.78']
486 bytes_cases = [x.encode('ascii') for x in str_cases]
487 for invalid_url in str_cases + bytes_cases:
488 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
489
490 def test_urldefrag(self):
491 str_cases = [
492 ('http://python.org#frag', 'http://python.org', 'frag'),
493 ('http://python.org', 'http://python.org', ''),
494 ('http://python.org/#frag', 'http://python.org/', 'frag'),
495 ('http://python.org/', 'http://python.org/', ''),
496 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
497 ('http://python.org/?q', 'http://python.org/?q', ''),
498 ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
499 ('http://python.org/p?q', 'http://python.org/p?q', ''),
500 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
501 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
502 ]
503 def _encode(t):
504 return type(t)(x.encode('ascii') for x in t)
505 bytes_cases = [_encode(x) for x in str_cases]
506 for url, defrag, frag in str_cases + bytes_cases:
507 result = urllib.parse.urldefrag(url)
508 self.assertEqual(result.geturl(), url)
509 self.assertEqual(result, (defrag, frag))
510 self.assertEqual(result.url, defrag)
511 self.assertEqual(result.fragment, frag)
512
513 def test_urlsplit_scoped_IPv6(self):
514 p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
515 self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt")
516 self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234')
517
518 p = urllib.parse.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
519 self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt")
520 self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234')
521
522 def test_urlsplit_attributes(self):
523 url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
524 p = urllib.parse.urlsplit(url)
525 self.assertEqual(p.scheme, "http")
526 self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
527 self.assertEqual(p.path, "/doc/")
528 self.assertEqual(p.query, "")
529 self.assertEqual(p.fragment, "frag")
530 self.assertEqual(p.username, None)
531 self.assertEqual(p.password, None)
532 self.assertEqual(p.hostname, "www.python.org")
533 self.assertEqual(p.port, None)
534 # geturl() won't return exactly the original URL in this case
535 # since the scheme is always case-normalized
536 # We handle this by ignoring the first 4 characters of the URL
537 self.assertEqual(p.geturl()[4:], url[4:])
538
539 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
540 p = urllib.parse.urlsplit(url)
541 self.assertEqual(p.scheme, "http")
542 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
543 self.assertEqual(p.path, "/doc/")
544 self.assertEqual(p.query, "query=yes")
545 self.assertEqual(p.fragment, "frag")
546 self.assertEqual(p.username, "User")
547 self.assertEqual(p.password, "Pass")
548 self.assertEqual(p.hostname, "www.python.org")
549 self.assertEqual(p.port, 80)
550 self.assertEqual(p.geturl(), url)
551
552 # Addressing issue1698, which suggests Username can contain
553 # "@" characters. Though not RFC compliant, many ftp sites allow
554 # and request email addresses as usernames.
555
556 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
557 p = urllib.parse.urlsplit(url)
558 self.assertEqual(p.scheme, "http")
559 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
560 self.assertEqual(p.path, "/doc/")
561 self.assertEqual(p.query, "query=yes")
562 self.assertEqual(p.fragment, "frag")
563 self.assertEqual(p.username, "User@example.com")
564 self.assertEqual(p.password, "Pass")
565 self.assertEqual(p.hostname, "www.python.org")
566 self.assertEqual(p.port, 80)
567 self.assertEqual(p.geturl(), url)
568
569 # And check them all again, only with bytes this time
570 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
571 p = urllib.parse.urlsplit(url)
572 self.assertEqual(p.scheme, b"http")
573 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
574 self.assertEqual(p.path, b"/doc/")
575 self.assertEqual(p.query, b"")
576 self.assertEqual(p.fragment, b"frag")
577 self.assertEqual(p.username, None)
578 self.assertEqual(p.password, None)
579 self.assertEqual(p.hostname, b"www.python.org")
580 self.assertEqual(p.port, None)
581 self.assertEqual(p.geturl()[4:], url[4:])
582
583 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
584 p = urllib.parse.urlsplit(url)
585 self.assertEqual(p.scheme, b"http")
586 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
587 self.assertEqual(p.path, b"/doc/")
588 self.assertEqual(p.query, b"query=yes")
589 self.assertEqual(p.fragment, b"frag")
590 self.assertEqual(p.username, b"User")
591 self.assertEqual(p.password, b"Pass")
592 self.assertEqual(p.hostname, b"www.python.org")
593 self.assertEqual(p.port, 80)
594 self.assertEqual(p.geturl(), url)
595
596 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
597 p = urllib.parse.urlsplit(url)
598 self.assertEqual(p.scheme, b"http")
599 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
600 self.assertEqual(p.path, b"/doc/")
601 self.assertEqual(p.query, b"query=yes")
602 self.assertEqual(p.fragment, b"frag")
603 self.assertEqual(p.username, b"User@example.com")
604 self.assertEqual(p.password, b"Pass")
605 self.assertEqual(p.hostname, b"www.python.org")
606 self.assertEqual(p.port, 80)
607 self.assertEqual(p.geturl(), url)
608
609 # Verify an illegal port raises ValueError
610 url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
611 p = urllib.parse.urlsplit(url)
612 with self.assertRaisesRegex(ValueError, "out of range"):
613 p.port
614
615 def test_urlsplit_remove_unsafe_bytes(self):
616 # Remove ASCII tabs and newlines from input
617 url = "http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
618 p = urllib.parse.urlsplit(url)
619 self.assertEqual(p.scheme, "http")
620 self.assertEqual(p.netloc, "www.python.org")
621 self.assertEqual(p.path, "/javascript:alert('msg')/")
622 self.assertEqual(p.query, "query=something")
623 self.assertEqual(p.fragment, "fragment")
624 self.assertEqual(p.username, None)
625 self.assertEqual(p.password, None)
626 self.assertEqual(p.hostname, "www.python.org")
627 self.assertEqual(p.port, None)
628 self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
629
630 # Remove ASCII tabs and newlines from input as bytes.
631 url = b"http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
632 p = urllib.parse.urlsplit(url)
633 self.assertEqual(p.scheme, b"http")
634 self.assertEqual(p.netloc, b"www.python.org")
635 self.assertEqual(p.path, b"/javascript:alert('msg')/")
636 self.assertEqual(p.query, b"query=something")
637 self.assertEqual(p.fragment, b"fragment")
638 self.assertEqual(p.username, None)
639 self.assertEqual(p.password, None)
640 self.assertEqual(p.hostname, b"www.python.org")
641 self.assertEqual(p.port, None)
642 self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment")
643
644 # with scheme as cache-key
645 url = "http://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
646 scheme = "ht\ntp"
647 for _ in range(2):
648 p = urllib.parse.urlsplit(url, scheme=scheme)
649 self.assertEqual(p.scheme, "http")
650 self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
651
652 def test_urlsplit_strip_url(self):
653 noise = bytes(range(0, 0x20 + 1))
654 base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
655
656 url = noise.decode("utf-8") + base_url
657 p = urllib.parse.urlsplit(url)
658 self.assertEqual(p.scheme, "http")
659 self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
660 self.assertEqual(p.path, "/doc/")
661 self.assertEqual(p.query, "query=yes")
662 self.assertEqual(p.fragment, "frag")
663 self.assertEqual(p.username, "User")
664 self.assertEqual(p.password, "Pass")
665 self.assertEqual(p.hostname, "www.python.org")
666 self.assertEqual(p.port, 80)
667 self.assertEqual(p.geturl(), base_url)
668
669 url = noise + base_url.encode("utf-8")
670 p = urllib.parse.urlsplit(url)
671 self.assertEqual(p.scheme, b"http")
672 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
673 self.assertEqual(p.path, b"/doc/")
674 self.assertEqual(p.query, b"query=yes")
675 self.assertEqual(p.fragment, b"frag")
676 self.assertEqual(p.username, b"User")
677 self.assertEqual(p.password, b"Pass")
678 self.assertEqual(p.hostname, b"www.python.org")
679 self.assertEqual(p.port, 80)
680 self.assertEqual(p.geturl(), base_url.encode("utf-8"))
681
682 # Test that trailing space is preserved as some applications rely on
683 # this within query strings.
684 query_spaces_url = "https://www.python.org:88/doc/?query= "
685 p = urllib.parse.urlsplit(noise.decode("utf-8") + query_spaces_url)
686 self.assertEqual(p.scheme, "https")
687 self.assertEqual(p.netloc, "www.python.org:88")
688 self.assertEqual(p.path, "/doc/")
689 self.assertEqual(p.query, "query= ")
690 self.assertEqual(p.port, 88)
691 self.assertEqual(p.geturl(), query_spaces_url)
692
693 p = urllib.parse.urlsplit("www.pypi.org ")
694 # That "hostname" gets considered a "path" due to the
695 # trailing space and our existing logic... YUCK...
696 # and re-assembles via geturl aka unurlsplit into the original.
697 # django.core.validators.URLValidator (at least through v3.2) relies on
698 # this, for better or worse, to catch it in a ValidationError via its
699 # regular expressions.
700 # Here we test the basic round trip concept of such a trailing space.
701 self.assertEqual(urllib.parse.urlunsplit(p), "www.pypi.org ")
702
703 # with scheme as cache-key
704 url = "//www.python.org/"
705 scheme = noise.decode("utf-8") + "https" + noise.decode("utf-8")
706 for _ in range(2):
707 p = urllib.parse.urlsplit(url, scheme=scheme)
708 self.assertEqual(p.scheme, "https")
709 self.assertEqual(p.geturl(), "https://www.python.org/")
710
711 def test_attributes_bad_port(self):
712 """Check handling of invalid ports."""
713 for bytes in (False, True):
714 for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
715 for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६"):
716 with self.subTest(bytes=bytes, parse=parse, port=port):
717 netloc = "www.example.net:" + port
718 url = "http://" + netloc + "/"
719 if bytes:
720 if netloc.isascii() and port.isascii():
721 netloc = netloc.encode("ascii")
722 url = url.encode("ascii")
723 else:
724 continue
725 p = parse(url)
726 self.assertEqual(p.netloc, netloc)
727 with self.assertRaises(ValueError):
728 p.port
729
730 def test_attributes_bad_scheme(self):
731 """Check handling of invalid schemes."""
732 for bytes in (False, True):
733 for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
734 for scheme in (".", "+", "-", "0", "http&", "६http"):
735 with self.subTest(bytes=bytes, parse=parse, scheme=scheme):
736 url = scheme + "://www.example.net"
737 if bytes:
738 if url.isascii():
739 url = url.encode("ascii")
740 else:
741 continue
742 p = parse(url)
743 if bytes:
744 self.assertEqual(p.scheme, b"")
745 else:
746 self.assertEqual(p.scheme, "")
747
748 def test_attributes_without_netloc(self):
749 # This example is straight from RFC 3261. It looks like it
750 # should allow the username, hostname, and port to be filled
751 # in, but doesn't. Since it's a URI and doesn't use the
752 # scheme://netloc syntax, the netloc and related attributes
753 # should be left empty.
754 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
755 p = urllib.parse.urlsplit(uri)
756 self.assertEqual(p.netloc, "")
757 self.assertEqual(p.username, None)
758 self.assertEqual(p.password, None)
759 self.assertEqual(p.hostname, None)
760 self.assertEqual(p.port, None)
761 self.assertEqual(p.geturl(), uri)
762
763 p = urllib.parse.urlparse(uri)
764 self.assertEqual(p.netloc, "")
765 self.assertEqual(p.username, None)
766 self.assertEqual(p.password, None)
767 self.assertEqual(p.hostname, None)
768 self.assertEqual(p.port, None)
769 self.assertEqual(p.geturl(), uri)
770
771 # You guessed it, repeating the test with bytes input
772 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
773 p = urllib.parse.urlsplit(uri)
774 self.assertEqual(p.netloc, b"")
775 self.assertEqual(p.username, None)
776 self.assertEqual(p.password, None)
777 self.assertEqual(p.hostname, None)
778 self.assertEqual(p.port, None)
779 self.assertEqual(p.geturl(), uri)
780
781 p = urllib.parse.urlparse(uri)
782 self.assertEqual(p.netloc, b"")
783 self.assertEqual(p.username, None)
784 self.assertEqual(p.password, None)
785 self.assertEqual(p.hostname, None)
786 self.assertEqual(p.port, None)
787 self.assertEqual(p.geturl(), uri)
788
789 def test_noslash(self):
790 # Issue 1637: http://foo.com?query is legal
791 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
792 ('http', 'example.com', '', '', 'blahblah=/foo', ''))
793 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
794 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
795
796 def test_withoutscheme(self):
797 # Test urlparse without scheme
798 # Issue 754016: urlparse goes wrong with IP:port without scheme
799 # RFC 1808 specifies that netloc should start with //, urlparse expects
800 # the same, otherwise it classifies the portion of url as path.
801 self.assertEqual(urllib.parse.urlparse("path"),
802 ('','','path','','',''))
803 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
804 ('','www.python.org:80','','','',''))
805 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
806 ('http','www.python.org:80','','','',''))
807 # Repeat for bytes input
808 self.assertEqual(urllib.parse.urlparse(b"path"),
809 (b'',b'',b'path',b'',b'',b''))
810 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
811 (b'',b'www.python.org:80',b'',b'',b'',b''))
812 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
813 (b'http',b'www.python.org:80',b'',b'',b'',b''))
814
815 def test_portseparator(self):
816 # Issue 754016 makes changes for port separator ':' from scheme separator
817 self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','',''))
818 self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','',''))
819 self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))
820 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
821 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
822 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
823 ('http','www.python.org:80','','','',''))
824 # As usual, need to check bytes input as well
825 self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b''))
826 self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b''))
827 self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))
828 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
829 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
830 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
831 (b'http',b'www.python.org:80',b'',b'',b'',b''))
832
833 def test_usingsys(self):
834 # Issue 3314: sys module is used in the error
835 self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
836
837 def test_anyscheme(self):
838 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
839 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
840 ('s3', 'foo.com', '/stuff', '', '', ''))
841 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
842 ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
843 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
844 ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
845 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
846 ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
847
848 # And for bytes...
849 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
850 (b's3', b'foo.com', b'/stuff', b'', b'', b''))
851 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
852 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
853 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
854 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
855 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
856 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
857
858 def test_default_scheme(self):
859 # Exercise the scheme parameter of urlparse() and urlsplit()
860 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
861 with self.subTest(function=func):
862 result = func("http://example.net/", "ftp")
863 self.assertEqual(result.scheme, "http")
864 result = func(b"http://example.net/", b"ftp")
865 self.assertEqual(result.scheme, b"http")
866 self.assertEqual(func("path", "ftp").scheme, "ftp")
867 self.assertEqual(func("path", scheme="ftp").scheme, "ftp")
868 self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")
869 self.assertEqual(func("path").scheme, "")
870 self.assertEqual(func(b"path").scheme, b"")
871 self.assertEqual(func(b"path", "").scheme, b"")
872
873 def test_parse_fragments(self):
874 # Exercise the allow_fragments parameter of urlparse() and urlsplit()
875 tests = (
876 ("http:#frag", "path", "frag"),
877 ("//example.net#frag", "path", "frag"),
878 ("index.html#frag", "path", "frag"),
879 (";a=b#frag", "params", "frag"),
880 ("?a=b#frag", "query", "frag"),
881 ("#frag", "path", "frag"),
882 ("abc#@frag", "path", "@frag"),
883 ("//abc#@frag", "path", "@frag"),
884 ("//abc:80#@frag", "path", "@frag"),
885 ("//abc#@frag:80", "path", "@frag:80"),
886 )
887 for url, attr, expected_frag in tests:
888 for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
889 if attr == "params" and func is urllib.parse.urlsplit:
890 attr = "path"
891 with self.subTest(url=url, function=func):
892 result = func(url, allow_fragments=False)
893 self.assertEqual(result.fragment, "")
894 self.assertTrue(
895 getattr(result, attr).endswith("#" + expected_frag))
896 self.assertEqual(func(url, "", False).fragment, "")
897
898 result = func(url, allow_fragments=True)
899 self.assertEqual(result.fragment, expected_frag)
900 self.assertFalse(
901 getattr(result, attr).endswith(expected_frag))
902 self.assertEqual(func(url, "", True).fragment,
903 expected_frag)
904 self.assertEqual(func(url).fragment, expected_frag)
905
906 def test_mixed_types_rejected(self):
907 # Several functions that process either strings or ASCII encoded bytes
908 # accept multiple arguments. Check they reject mixed type input
909 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
910 urllib.parse.urlparse("www.python.org", b"http")
911 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
912 urllib.parse.urlparse(b"www.python.org", "http")
913 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
914 urllib.parse.urlsplit("www.python.org", b"http")
915 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
916 urllib.parse.urlsplit(b"www.python.org", "http")
917 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
918 urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
919 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
920 urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
921 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
922 urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
923 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
924 urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
925 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
926 urllib.parse.urljoin("http://python.org", b"http://python.org")
927 with self.assertRaisesRegex(TypeError, "Cannot mix str"):
928 urllib.parse.urljoin(b"http://python.org", "http://python.org")
929
930 def _check_result_type(self, str_type):
931 num_args = len(str_type._fields)
932 bytes_type = str_type._encoded_counterpart
933 self.assertIs(bytes_type._decoded_counterpart, str_type)
934 str_args = ('',) * num_args
935 bytes_args = (b'',) * num_args
936 str_result = str_type(*str_args)
937 bytes_result = bytes_type(*bytes_args)
938 encoding = 'ascii'
939 errors = 'strict'
940 self.assertEqual(str_result, str_args)
941 self.assertEqual(bytes_result.decode(), str_args)
942 self.assertEqual(bytes_result.decode(), str_result)
943 self.assertEqual(bytes_result.decode(encoding), str_args)
944 self.assertEqual(bytes_result.decode(encoding), str_result)
945 self.assertEqual(bytes_result.decode(encoding, errors), str_args)
946 self.assertEqual(bytes_result.decode(encoding, errors), str_result)
947 self.assertEqual(bytes_result, bytes_args)
948 self.assertEqual(str_result.encode(), bytes_args)
949 self.assertEqual(str_result.encode(), bytes_result)
950 self.assertEqual(str_result.encode(encoding), bytes_args)
951 self.assertEqual(str_result.encode(encoding), bytes_result)
952 self.assertEqual(str_result.encode(encoding, errors), bytes_args)
953 self.assertEqual(str_result.encode(encoding, errors), bytes_result)
954
955 def test_result_pairs(self):
956 # Check encoding and decoding between result pairs
957 result_types = [
958 urllib.parse.DefragResult,
959 urllib.parse.SplitResult,
960 urllib.parse.ParseResult,
961 ]
962 for result_type in result_types:
963 self._check_result_type(result_type)
964
965 def test_parse_qs_encoding(self):
966 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
967 self.assertEqual(result, {'key': ['\u0141\xE9']})
968 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
969 self.assertEqual(result, {'key': ['\u0141\xE9']})
970 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
971 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
972 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
973 self.assertEqual(result, {'key': ['\u0141\ufffd-']})
974 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
975 errors="ignore")
976 self.assertEqual(result, {'key': ['\u0141-']})
977
978 def test_parse_qsl_encoding(self):
979 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
980 self.assertEqual(result, [('key', '\u0141\xE9')])
981 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
982 self.assertEqual(result, [('key', '\u0141\xE9')])
983 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
984 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
985 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
986 self.assertEqual(result, [('key', '\u0141\ufffd-')])
987 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
988 errors="ignore")
989 self.assertEqual(result, [('key', '\u0141-')])
990
991 def test_parse_qsl_max_num_fields(self):
992 with self.assertRaises(ValueError):
993 urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
994 urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
995
996 def test_parse_qs_separator(self):
997 parse_qs_semicolon_cases = [
998 (";", {}),
999 (";;", {}),
1000 (";a=b", {'a': ['b']}),
1001 ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
1002 ("a=1;a=2", {'a': ['1', '2']}),
1003 (b";", {}),
1004 (b";;", {}),
1005 (b";a=b", {b'a': [b'b']}),
1006 (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
1007 (b"a=1;a=2", {b'a': [b'1', b'2']}),
1008 ]
1009 for orig, expect in parse_qs_semicolon_cases:
1010 with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
1011 result = urllib.parse.parse_qs(orig, separator=';')
1012 self.assertEqual(result, expect, "Error parsing %r" % orig)
1013 result_bytes = urllib.parse.parse_qs(orig, separator=b';')
1014 self.assertEqual(result_bytes, expect, "Error parsing %r" % orig)
1015
1016
1017 def test_parse_qsl_separator(self):
1018 parse_qsl_semicolon_cases = [
1019 (";", []),
1020 (";;", []),
1021 (";a=b", [('a', 'b')]),
1022 ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
1023 ("a=1;a=2", [('a', '1'), ('a', '2')]),
1024 (b";", []),
1025 (b";;", []),
1026 (b";a=b", [(b'a', b'b')]),
1027 (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
1028 (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
1029 ]
1030 for orig, expect in parse_qsl_semicolon_cases:
1031 with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
1032 result = urllib.parse.parse_qsl(orig, separator=';')
1033 self.assertEqual(result, expect, "Error parsing %r" % orig)
1034 result_bytes = urllib.parse.parse_qsl(orig, separator=b';')
1035 self.assertEqual(result_bytes, expect, "Error parsing %r" % orig)
1036
1037
1038 def test_urlencode_sequences(self):
1039 # Other tests incidentally urlencode things; test non-covered cases:
1040 # Sequence and object values.
1041 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
1042 # we cannot rely on ordering here
1043 assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
1044
1045 class ESC[4;38;5;81mTrivial:
1046 def __str__(self):
1047 return 'trivial'
1048
1049 result = urllib.parse.urlencode({'a': Trivial()}, True)
1050 self.assertEqual(result, 'a=trivial')
1051
1052 def test_urlencode_quote_via(self):
1053 result = urllib.parse.urlencode({'a': 'some value'})
1054 self.assertEqual(result, "a=some+value")
1055 result = urllib.parse.urlencode({'a': 'some value/another'},
1056 quote_via=urllib.parse.quote)
1057 self.assertEqual(result, "a=some%20value%2Fanother")
1058 result = urllib.parse.urlencode({'a': 'some value/another'},
1059 safe='/', quote_via=urllib.parse.quote)
1060 self.assertEqual(result, "a=some%20value/another")
1061
1062 def test_quote_from_bytes(self):
1063 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
1064 result = urllib.parse.quote_from_bytes(b'archaeological arcana')
1065 self.assertEqual(result, 'archaeological%20arcana')
1066 result = urllib.parse.quote_from_bytes(b'')
1067 self.assertEqual(result, '')
1068
1069 def test_unquote_to_bytes(self):
1070 result = urllib.parse.unquote_to_bytes('abc%20def')
1071 self.assertEqual(result, b'abc def')
1072 result = urllib.parse.unquote_to_bytes('')
1073 self.assertEqual(result, b'')
1074
1075 def test_quote_errors(self):
1076 self.assertRaises(TypeError, urllib.parse.quote, b'foo',
1077 encoding='utf-8')
1078 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
1079
1080 def test_issue14072(self):
1081 p1 = urllib.parse.urlsplit('tel:+31-641044153')
1082 self.assertEqual(p1.scheme, 'tel')
1083 self.assertEqual(p1.path, '+31-641044153')
1084 p2 = urllib.parse.urlsplit('tel:+31641044153')
1085 self.assertEqual(p2.scheme, 'tel')
1086 self.assertEqual(p2.path, '+31641044153')
1087 # assert the behavior for urlparse
1088 p1 = urllib.parse.urlparse('tel:+31-641044153')
1089 self.assertEqual(p1.scheme, 'tel')
1090 self.assertEqual(p1.path, '+31-641044153')
1091 p2 = urllib.parse.urlparse('tel:+31641044153')
1092 self.assertEqual(p2.scheme, 'tel')
1093 self.assertEqual(p2.path, '+31641044153')
1094
1095 def test_invalid_bracketed_hosts(self):
1096 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query')
1097 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query')
1098 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query')
1099 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v12ae]/Path?Query')
1100 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v.IP]/Path?Query')
1101 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123.]/Path?Query')
1102 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query')
1103 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query')
1104 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query')
1105 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path')
1106
1107 def test_splitting_bracketed_hosts(self):
1108 p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query')
1109 self.assertEqual(p1.hostname, 'v6a.ip')
1110 self.assertEqual(p1.username, 'user')
1111 self.assertEqual(p1.path, '/path')
1112 p2 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7%test]/path?query')
1113 self.assertEqual(p2.hostname, '0439:23af:2309::fae7%test')
1114 self.assertEqual(p2.username, 'user')
1115 self.assertEqual(p2.path, '/path')
1116 p3 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146%test]/path?query')
1117 self.assertEqual(p3.hostname, '0439:23af:2309::fae7:1234:192.0.2.146%test')
1118 self.assertEqual(p3.username, 'user')
1119 self.assertEqual(p3.path, '/path')
1120
1121 def test_port_casting_failure_message(self):
1122 message = "Port could not be cast to integer value as 'oracle'"
1123 p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')
1124 with self.assertRaisesRegex(ValueError, message):
1125 p1.port
1126
1127 p2 = urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle')
1128 with self.assertRaisesRegex(ValueError, message):
1129 p2.port
1130
1131 def test_telurl_params(self):
1132 p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
1133 self.assertEqual(p1.scheme, 'tel')
1134 self.assertEqual(p1.path, '123-4')
1135 self.assertEqual(p1.params, 'phone-context=+1-650-516')
1136
1137 p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
1138 self.assertEqual(p1.scheme, 'tel')
1139 self.assertEqual(p1.path, '+1-201-555-0123')
1140 self.assertEqual(p1.params, '')
1141
1142 p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
1143 self.assertEqual(p1.scheme, 'tel')
1144 self.assertEqual(p1.path, '7042')
1145 self.assertEqual(p1.params, 'phone-context=example.com')
1146
1147 p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
1148 self.assertEqual(p1.scheme, 'tel')
1149 self.assertEqual(p1.path, '863-1234')
1150 self.assertEqual(p1.params, 'phone-context=+1-914-555')
1151
1152 def test_Quoter_repr(self):
1153 quoter = urllib.parse._Quoter(urllib.parse._ALWAYS_SAFE)
1154 self.assertIn('Quoter', repr(quoter))
1155
1156 def test_clear_cache_for_code_coverage(self):
1157 urllib.parse.clear_cache()
1158
1159 def test_urllib_parse_getattr_failure(self):
1160 """Test that urllib.parse.__getattr__() fails correctly."""
1161 with self.assertRaises(AttributeError):
1162 unused = urllib.parse.this_does_not_exist
1163
1164 def test_all(self):
1165 expected = []
1166 undocumented = {
1167 'splitattr', 'splithost', 'splitnport', 'splitpasswd',
1168 'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',
1169 'splitvalue',
1170 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
1171 }
1172 for name in dir(urllib.parse):
1173 if name.startswith('_') or name in undocumented:
1174 continue
1175 object = getattr(urllib.parse, name)
1176 if getattr(object, '__module__', None) == 'urllib.parse':
1177 expected.append(name)
1178 self.assertCountEqual(urllib.parse.__all__, expected)
1179
1180 def test_urlsplit_normalization(self):
1181 # Certain characters should never occur in the netloc,
1182 # including under normalization.
1183 # Ensure that ALL of them are detected and cause an error
1184 illegal_chars = '/:#?@'
1185 hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
1186 denorm_chars = [
1187 c for c in map(chr, range(128, sys.maxunicode))
1188 if unicodedata.decomposition(c)
1189 and (hex_chars & set(unicodedata.decomposition(c).split()))
1190 and c not in illegal_chars
1191 ]
1192 # Sanity check that we found at least one such character
1193 self.assertIn('\u2100', denorm_chars)
1194 self.assertIn('\uFF03', denorm_chars)
1195
1196 # bpo-36742: Verify port separators are ignored when they
1197 # existed prior to decomposition
1198 urllib.parse.urlsplit('http://\u30d5\u309a:80')
1199 with self.assertRaises(ValueError):
1200 urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380')
1201
1202 for scheme in ["http", "https", "ftp"]:
1203 for netloc in ["netloc{}false.netloc", "n{}user@netloc"]:
1204 for c in denorm_chars:
1205 url = "{}://{}/path".format(scheme, netloc.format(c))
1206 with self.subTest(url=url, char='{:04X}'.format(ord(c))):
1207 with self.assertRaises(ValueError):
1208 urllib.parse.urlsplit(url)
1209
1210 class ESC[4;38;5;81mUtility_Tests(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
1211 """Testcase to test the various utility functions in the urllib."""
1212 # In Python 2 this test class was in test_urllib.
1213
1214 def test_splittype(self):
1215 splittype = urllib.parse._splittype
1216 self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
1217 self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
1218 self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
1219 self.assertEqual(splittype('type:'), ('type', ''))
1220 self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
1221
1222 def test_splithost(self):
1223 splithost = urllib.parse._splithost
1224 self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
1225 ('www.example.org:80', '/foo/bar/baz.html'))
1226 self.assertEqual(splithost('//www.example.org:80'),
1227 ('www.example.org:80', ''))
1228 self.assertEqual(splithost('/foo/bar/baz.html'),
1229 (None, '/foo/bar/baz.html'))
1230
1231 # bpo-30500: # starts a fragment.
1232 self.assertEqual(splithost('//127.0.0.1#@host.com'),
1233 ('127.0.0.1', '/#@host.com'))
1234 self.assertEqual(splithost('//127.0.0.1#@host.com:80'),
1235 ('127.0.0.1', '/#@host.com:80'))
1236 self.assertEqual(splithost('//127.0.0.1:80#@host.com'),
1237 ('127.0.0.1:80', '/#@host.com'))
1238
1239 # Empty host is returned as empty string.
1240 self.assertEqual(splithost("///file"),
1241 ('', '/file'))
1242
1243 # Trailing semicolon, question mark and hash symbol are kept.
1244 self.assertEqual(splithost("//example.net/file;"),
1245 ('example.net', '/file;'))
1246 self.assertEqual(splithost("//example.net/file?"),
1247 ('example.net', '/file?'))
1248 self.assertEqual(splithost("//example.net/file#"),
1249 ('example.net', '/file#'))
1250
1251 def test_splituser(self):
1252 splituser = urllib.parse._splituser
1253 self.assertEqual(splituser('User:Pass@www.python.org:080'),
1254 ('User:Pass', 'www.python.org:080'))
1255 self.assertEqual(splituser('@www.python.org:080'),
1256 ('', 'www.python.org:080'))
1257 self.assertEqual(splituser('www.python.org:080'),
1258 (None, 'www.python.org:080'))
1259 self.assertEqual(splituser('User:Pass@'),
1260 ('User:Pass', ''))
1261 self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
1262 ('User@example.com:Pass', 'www.python.org:080'))
1263
1264 def test_splitpasswd(self):
1265 # Some of the password examples are not sensible, but it is added to
1266 # confirming to RFC2617 and addressing issue4675.
1267 splitpasswd = urllib.parse._splitpasswd
1268 self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
1269 self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
1270 self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
1271 self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
1272 self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
1273 self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
1274 self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
1275 self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
1276 self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
1277 self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
1278 self.assertEqual(splitpasswd('user:'), ('user', ''))
1279 self.assertEqual(splitpasswd('user'), ('user', None))
1280 self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
1281
1282 def test_splitport(self):
1283 splitport = urllib.parse._splitport
1284 self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
1285 self.assertEqual(splitport('parrot'), ('parrot', None))
1286 self.assertEqual(splitport('parrot:'), ('parrot', None))
1287 self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
1288 self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
1289 self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
1290 self.assertEqual(splitport('[::1]'), ('[::1]', None))
1291 self.assertEqual(splitport(':88'), ('', '88'))
1292
1293 def test_splitnport(self):
1294 splitnport = urllib.parse._splitnport
1295 self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
1296 self.assertEqual(splitnport('parrot'), ('parrot', -1))
1297 self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
1298 self.assertEqual(splitnport('parrot:'), ('parrot', -1))
1299 self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
1300 self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
1301 self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
1302 self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
1303 self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
1304 self.assertEqual(splitnport('parrot: +1_0 '), ('parrot', None))
1305
1306 def test_splitquery(self):
1307 # Normal cases are exercised by other tests; ensure that we also
1308 # catch cases with no port specified (testcase ensuring coverage)
1309 splitquery = urllib.parse._splitquery
1310 self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
1311 ('http://python.org/fake', 'foo=bar'))
1312 self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
1313 ('http://python.org/fake?foo=bar', ''))
1314 self.assertEqual(splitquery('http://python.org/fake'),
1315 ('http://python.org/fake', None))
1316 self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
1317
1318 def test_splittag(self):
1319 splittag = urllib.parse._splittag
1320 self.assertEqual(splittag('http://example.com?foo=bar#baz'),
1321 ('http://example.com?foo=bar', 'baz'))
1322 self.assertEqual(splittag('http://example.com?foo=bar#'),
1323 ('http://example.com?foo=bar', ''))
1324 self.assertEqual(splittag('#baz'), ('', 'baz'))
1325 self.assertEqual(splittag('http://example.com?foo=bar'),
1326 ('http://example.com?foo=bar', None))
1327 self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
1328 ('http://example.com?foo=bar#baz', 'boo'))
1329
1330 def test_splitattr(self):
1331 splitattr = urllib.parse._splitattr
1332 self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
1333 ('/path', ['attr1=value1', 'attr2=value2']))
1334 self.assertEqual(splitattr('/path;'), ('/path', ['']))
1335 self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
1336 ('', ['attr1=value1', 'attr2=value2']))
1337 self.assertEqual(splitattr('/path'), ('/path', []))
1338
1339 def test_splitvalue(self):
1340 # Normal cases are exercised by other tests; test pathological cases
1341 # with no key/value pairs. (testcase ensuring coverage)
1342 splitvalue = urllib.parse._splitvalue
1343 self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
1344 self.assertEqual(splitvalue('foo='), ('foo', ''))
1345 self.assertEqual(splitvalue('=bar'), ('', 'bar'))
1346 self.assertEqual(splitvalue('foobar'), ('foobar', None))
1347 self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
1348
1349 def test_to_bytes(self):
1350 result = urllib.parse._to_bytes('http://www.python.org')
1351 self.assertEqual(result, 'http://www.python.org')
1352 self.assertRaises(UnicodeError, urllib.parse._to_bytes,
1353 'http://www.python.org/medi\u00e6val')
1354
1355 def test_unwrap(self):
1356 for wrapped_url in ('<URL:scheme://host/path>', '<scheme://host/path>',
1357 'URL:scheme://host/path', 'scheme://host/path'):
1358 url = urllib.parse.unwrap(wrapped_url)
1359 self.assertEqual(url, 'scheme://host/path')
1360
1361
1362 class ESC[4;38;5;81mDeprecationTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
1363
1364 def test_Quoter_deprecation(self):
1365 with self.assertWarns(DeprecationWarning) as cm:
1366 old_class = urllib.parse.Quoter
1367 self.assertIs(old_class, urllib.parse._Quoter)
1368 self.assertIn('Quoter will be removed', str(cm.warning))
1369
1370 def test_splittype_deprecation(self):
1371 with self.assertWarns(DeprecationWarning) as cm:
1372 urllib.parse.splittype('')
1373 self.assertEqual(str(cm.warning),
1374 'urllib.parse.splittype() is deprecated as of 3.8, '
1375 'use urllib.parse.urlparse() instead')
1376
1377 def test_splithost_deprecation(self):
1378 with self.assertWarns(DeprecationWarning) as cm:
1379 urllib.parse.splithost('')
1380 self.assertEqual(str(cm.warning),
1381 'urllib.parse.splithost() is deprecated as of 3.8, '
1382 'use urllib.parse.urlparse() instead')
1383
1384 def test_splituser_deprecation(self):
1385 with self.assertWarns(DeprecationWarning) as cm:
1386 urllib.parse.splituser('')
1387 self.assertEqual(str(cm.warning),
1388 'urllib.parse.splituser() is deprecated as of 3.8, '
1389 'use urllib.parse.urlparse() instead')
1390
1391 def test_splitpasswd_deprecation(self):
1392 with self.assertWarns(DeprecationWarning) as cm:
1393 urllib.parse.splitpasswd('')
1394 self.assertEqual(str(cm.warning),
1395 'urllib.parse.splitpasswd() is deprecated as of 3.8, '
1396 'use urllib.parse.urlparse() instead')
1397
1398 def test_splitport_deprecation(self):
1399 with self.assertWarns(DeprecationWarning) as cm:
1400 urllib.parse.splitport('')
1401 self.assertEqual(str(cm.warning),
1402 'urllib.parse.splitport() is deprecated as of 3.8, '
1403 'use urllib.parse.urlparse() instead')
1404
1405 def test_splitnport_deprecation(self):
1406 with self.assertWarns(DeprecationWarning) as cm:
1407 urllib.parse.splitnport('')
1408 self.assertEqual(str(cm.warning),
1409 'urllib.parse.splitnport() is deprecated as of 3.8, '
1410 'use urllib.parse.urlparse() instead')
1411
1412 def test_splitquery_deprecation(self):
1413 with self.assertWarns(DeprecationWarning) as cm:
1414 urllib.parse.splitquery('')
1415 self.assertEqual(str(cm.warning),
1416 'urllib.parse.splitquery() is deprecated as of 3.8, '
1417 'use urllib.parse.urlparse() instead')
1418
1419 def test_splittag_deprecation(self):
1420 with self.assertWarns(DeprecationWarning) as cm:
1421 urllib.parse.splittag('')
1422 self.assertEqual(str(cm.warning),
1423 'urllib.parse.splittag() is deprecated as of 3.8, '
1424 'use urllib.parse.urlparse() instead')
1425
1426 def test_splitattr_deprecation(self):
1427 with self.assertWarns(DeprecationWarning) as cm:
1428 urllib.parse.splitattr('')
1429 self.assertEqual(str(cm.warning),
1430 'urllib.parse.splitattr() is deprecated as of 3.8, '
1431 'use urllib.parse.urlparse() instead')
1432
1433 def test_splitvalue_deprecation(self):
1434 with self.assertWarns(DeprecationWarning) as cm:
1435 urllib.parse.splitvalue('')
1436 self.assertEqual(str(cm.warning),
1437 'urllib.parse.splitvalue() is deprecated as of 3.8, '
1438 'use urllib.parse.parse_qsl() instead')
1439
1440 def test_to_bytes_deprecation(self):
1441 with self.assertWarns(DeprecationWarning) as cm:
1442 urllib.parse.to_bytes('')
1443 self.assertEqual(str(cm.warning),
1444 'urllib.parse.to_bytes() is deprecated as of 3.8')
1445
1446
1447 if __name__ == "__main__":
1448 unittest.main()