python (3.12.0)

(root)/
lib/
python3.12/
test/
test_urlparse.py
       1  import sys
       2  import unicodedata
       3  import unittest
       4  import urllib.parse
       5  
       6  RFC1808_BASE = "http://a/b/c/d;p?q#f"
       7  RFC2396_BASE = "http://a/b/c/d;p?q"
       8  RFC3986_BASE = 'http://a/b/c/d;p?q'
       9  SIMPLE_BASE  = 'http://a/b/c/d'
      10  
      11  # Each parse_qsl testcase is a two-tuple that contains
      12  # a string with the query and a list with the expected result.
      13  
      14  parse_qsl_test_cases = [
      15      ("", []),
      16      ("&", []),
      17      ("&&", []),
      18      ("=", [('', '')]),
      19      ("=a", [('', 'a')]),
      20      ("a", [('a', '')]),
      21      ("a=", [('a', '')]),
      22      ("&a=b", [('a', 'b')]),
      23      ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
      24      ("a=1&a=2", [('a', '1'), ('a', '2')]),
      25      (b"", []),
      26      (b"&", []),
      27      (b"&&", []),
      28      (b"=", [(b'', b'')]),
      29      (b"=a", [(b'', b'a')]),
      30      (b"a", [(b'a', b'')]),
      31      (b"a=", [(b'a', b'')]),
      32      (b"&a=b", [(b'a', b'b')]),
      33      (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
      34      (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
      35      (";a=b", [(';a', 'b')]),
      36      ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
      37      (b";a=b", [(b';a', b'b')]),
      38      (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
      39  ]
      40  
      41  # Each parse_qs testcase is a two-tuple that contains
      42  # a string with the query and a dictionary with the expected result.
      43  
      44  parse_qs_test_cases = [
      45      ("", {}),
      46      ("&", {}),
      47      ("&&", {}),
      48      ("=", {'': ['']}),
      49      ("=a", {'': ['a']}),
      50      ("a", {'a': ['']}),
      51      ("a=", {'a': ['']}),
      52      ("&a=b", {'a': ['b']}),
      53      ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
      54      ("a=1&a=2", {'a': ['1', '2']}),
      55      (b"", {}),
      56      (b"&", {}),
      57      (b"&&", {}),
      58      (b"=", {b'': [b'']}),
      59      (b"=a", {b'': [b'a']}),
      60      (b"a", {b'a': [b'']}),
      61      (b"a=", {b'a': [b'']}),
      62      (b"&a=b", {b'a': [b'b']}),
      63      (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
      64      (b"a=1&a=2", {b'a': [b'1', b'2']}),
      65      (";a=b", {';a': ['b']}),
      66      ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
      67      (b";a=b", {b';a': [b'b']}),
      68      (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
      69  ]
      70  
      71  class ESC[4;38;5;81mUrlParseTestCase(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      72  
      73      def checkRoundtrips(self, url, parsed, split):
      74          result = urllib.parse.urlparse(url)
      75          self.assertSequenceEqual(result, parsed)
      76          t = (result.scheme, result.netloc, result.path,
      77               result.params, result.query, result.fragment)
      78          self.assertSequenceEqual(t, parsed)
      79          # put it back together and it should be the same
      80          result2 = urllib.parse.urlunparse(result)
      81          self.assertSequenceEqual(result2, url)
      82          self.assertSequenceEqual(result2, result.geturl())
      83  
      84          # the result of geturl() is a fixpoint; we can always parse it
      85          # again to get the same result:
      86          result3 = urllib.parse.urlparse(result.geturl())
      87          self.assertEqual(result3.geturl(), result.geturl())
      88          self.assertSequenceEqual(result3, result)
      89          self.assertEqual(result3.scheme,   result.scheme)
      90          self.assertEqual(result3.netloc,   result.netloc)
      91          self.assertEqual(result3.path,     result.path)
      92          self.assertEqual(result3.params,   result.params)
      93          self.assertEqual(result3.query,    result.query)
      94          self.assertEqual(result3.fragment, result.fragment)
      95          self.assertEqual(result3.username, result.username)
      96          self.assertEqual(result3.password, result.password)
      97          self.assertEqual(result3.hostname, result.hostname)
      98          self.assertEqual(result3.port,     result.port)
      99  
     100          # check the roundtrip using urlsplit() as well
     101          result = urllib.parse.urlsplit(url)
     102          self.assertSequenceEqual(result, split)
     103          t = (result.scheme, result.netloc, result.path,
     104               result.query, result.fragment)
     105          self.assertSequenceEqual(t, split)
     106          result2 = urllib.parse.urlunsplit(result)
     107          self.assertSequenceEqual(result2, url)
     108          self.assertSequenceEqual(result2, result.geturl())
     109  
     110          # check the fixpoint property of re-parsing the result of geturl()
     111          result3 = urllib.parse.urlsplit(result.geturl())
     112          self.assertEqual(result3.geturl(), result.geturl())
     113          self.assertSequenceEqual(result3, result)
     114          self.assertEqual(result3.scheme,   result.scheme)
     115          self.assertEqual(result3.netloc,   result.netloc)
     116          self.assertEqual(result3.path,     result.path)
     117          self.assertEqual(result3.query,    result.query)
     118          self.assertEqual(result3.fragment, result.fragment)
     119          self.assertEqual(result3.username, result.username)
     120          self.assertEqual(result3.password, result.password)
     121          self.assertEqual(result3.hostname, result.hostname)
     122          self.assertEqual(result3.port,     result.port)
     123  
     124      def test_qsl(self):
     125          for orig, expect in parse_qsl_test_cases:
     126              result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
     127              self.assertEqual(result, expect, "Error parsing %r" % orig)
     128              expect_without_blanks = [v for v in expect if len(v[1])]
     129              result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
     130              self.assertEqual(result, expect_without_blanks,
     131                              "Error parsing %r" % orig)
     132  
     133      def test_qs(self):
     134          for orig, expect in parse_qs_test_cases:
     135              result = urllib.parse.parse_qs(orig, keep_blank_values=True)
     136              self.assertEqual(result, expect, "Error parsing %r" % orig)
     137              expect_without_blanks = {v: expect[v]
     138                                       for v in expect if len(expect[v][0])}
     139              result = urllib.parse.parse_qs(orig, keep_blank_values=False)
     140              self.assertEqual(result, expect_without_blanks,
     141                              "Error parsing %r" % orig)
     142  
     143      def test_roundtrips(self):
     144          str_cases = [
     145              ('file:///tmp/junk.txt',
     146               ('file', '', '/tmp/junk.txt', '', '', ''),
     147               ('file', '', '/tmp/junk.txt', '', '')),
     148              ('imap://mail.python.org/mbox1',
     149               ('imap', 'mail.python.org', '/mbox1', '', '', ''),
     150               ('imap', 'mail.python.org', '/mbox1', '', '')),
     151              ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
     152               ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
     153                '', '', ''),
     154               ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
     155                '', '')),
     156              ('nfs://server/path/to/file.txt',
     157               ('nfs', 'server', '/path/to/file.txt', '', '', ''),
     158               ('nfs', 'server', '/path/to/file.txt', '', '')),
     159              ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
     160               ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
     161                '', '', ''),
     162               ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
     163                '', '')),
     164              ('git+ssh://git@github.com/user/project.git',
     165               ('git+ssh', 'git@github.com','/user/project.git',
     166                '','',''),
     167               ('git+ssh', 'git@github.com','/user/project.git',
     168                '', '')),
     169              ('itms-services://?action=download-manifest&url=https://example.com/app',
     170               ('itms-services', '', '', '',
     171                'action=download-manifest&url=https://example.com/app', ''),
     172               ('itms-services', '', '',
     173                'action=download-manifest&url=https://example.com/app', '')),
     174              ]
     175          def _encode(t):
     176              return (t[0].encode('ascii'),
     177                      tuple(x.encode('ascii') for x in t[1]),
     178                      tuple(x.encode('ascii') for x in t[2]))
     179          bytes_cases = [_encode(x) for x in str_cases]
     180          for url, parsed, split in str_cases + bytes_cases:
     181              self.checkRoundtrips(url, parsed, split)
     182  
     183      def test_http_roundtrips(self):
     184          # urllib.parse.urlsplit treats 'http:' as an optimized special case,
     185          # so we test both 'http:' and 'https:' in all the following.
     186          # Three cheers for white box knowledge!
     187          str_cases = [
     188              ('://www.python.org',
     189               ('www.python.org', '', '', '', ''),
     190               ('www.python.org', '', '', '')),
     191              ('://www.python.org#abc',
     192               ('www.python.org', '', '', '', 'abc'),
     193               ('www.python.org', '', '', 'abc')),
     194              ('://www.python.org?q=abc',
     195               ('www.python.org', '', '', 'q=abc', ''),
     196               ('www.python.org', '', 'q=abc', '')),
     197              ('://www.python.org/#abc',
     198               ('www.python.org', '/', '', '', 'abc'),
     199               ('www.python.org', '/', '', 'abc')),
     200              ('://a/b/c/d;p?q#f',
     201               ('a', '/b/c/d', 'p', 'q', 'f'),
     202               ('a', '/b/c/d;p', 'q', 'f')),
     203              ]
     204          def _encode(t):
     205              return (t[0].encode('ascii'),
     206                      tuple(x.encode('ascii') for x in t[1]),
     207                      tuple(x.encode('ascii') for x in t[2]))
     208          bytes_cases = [_encode(x) for x in str_cases]
     209          str_schemes = ('http', 'https')
     210          bytes_schemes = (b'http', b'https')
     211          str_tests = str_schemes, str_cases
     212          bytes_tests = bytes_schemes, bytes_cases
     213          for schemes, test_cases in (str_tests, bytes_tests):
     214              for scheme in schemes:
     215                  for url, parsed, split in test_cases:
     216                      url = scheme + url
     217                      parsed = (scheme,) + parsed
     218                      split = (scheme,) + split
     219                      self.checkRoundtrips(url, parsed, split)
     220  
     221      def checkJoin(self, base, relurl, expected):
     222          str_components = (base, relurl, expected)
     223          self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
     224          bytes_components = baseb, relurlb, expectedb = [
     225                              x.encode('ascii') for x in str_components]
     226          self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
     227  
     228      def test_unparse_parse(self):
     229          str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
     230          bytes_cases = [x.encode('ascii') for x in str_cases]
     231          for u in str_cases + bytes_cases:
     232              self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
     233              self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
     234  
     235      def test_RFC1808(self):
     236          # "normal" cases from RFC 1808:
     237          self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
     238          self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
     239          self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
     240          self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
     241          self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
     242          self.checkJoin(RFC1808_BASE, '//g', 'http://g')
     243          self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
     244          self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
     245          self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
     246          self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
     247          self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
     248          self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
     249          self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
     250          self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
     251          self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
     252          self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
     253          self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
     254          self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
     255          self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
     256          self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
     257          self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
     258          self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
     259  
     260          # "abnormal" cases from RFC 1808:
     261          self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
     262          self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
     263          self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
     264          self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
     265          self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
     266          self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
     267          self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
     268          self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
     269          self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
     270  
     271          # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
     272          # so we'll not actually run these tests (which expect 1808 behavior).
     273          #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
     274          #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
     275  
     276          # XXX: The following tests are no longer compatible with RFC3986
     277          # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
     278          # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
     279          # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
     280          # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
     281  
     282  
     283      def test_RFC2368(self):
     284          # Issue 11467: path that starts with a number is not parsed correctly
     285          self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
     286                  ('mailto', '', '1337@example.org', '', '', ''))
     287  
     288      def test_RFC2396(self):
     289          # cases from RFC 2396
     290  
     291          self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
     292          self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
     293          self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
     294          self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
     295          self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
     296          self.checkJoin(RFC2396_BASE, '//g', 'http://g')
     297          self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
     298          self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
     299          self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
     300          self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
     301          self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
     302          self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
     303          self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
     304          self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
     305          self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
     306          self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
     307          self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
     308          self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
     309          self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
     310          self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
     311          self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
     312          self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
     313          self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
     314          self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
     315          self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
     316          self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
     317          self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
     318          self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
     319          self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
     320          self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
     321          self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
     322          self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
     323          self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
     324          self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
     325          self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
     326  
     327          # XXX: The following tests are no longer compatible with RFC3986
     328          # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
     329          # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
     330          # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
     331          # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
     332  
     333      def test_RFC3986(self):
     334          self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
     335          self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
     336          self.checkJoin(RFC3986_BASE, 'g:h','g:h')
     337          self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
     338          self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
     339          self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
     340          self.checkJoin(RFC3986_BASE, '/g','http://a/g')
     341          self.checkJoin(RFC3986_BASE, '//g','http://g')
     342          self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
     343          self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
     344          self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
     345          self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
     346          self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
     347          self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
     348          self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
     349          self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
     350          self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
     351          self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
     352          self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
     353          self.checkJoin(RFC3986_BASE, '..','http://a/b/')
     354          self.checkJoin(RFC3986_BASE, '../','http://a/b/')
     355          self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
     356          self.checkJoin(RFC3986_BASE, '../..','http://a/')
     357          self.checkJoin(RFC3986_BASE, '../../','http://a/')
     358          self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
     359          self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
     360  
     361          # Abnormal Examples
     362  
     363          # The 'abnormal scenarios' are incompatible with RFC2986 parsing
     364          # Tests are here for reference.
     365  
     366          self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
     367          self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
     368          self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
     369          self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
     370          self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
     371          self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
     372          self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
     373          self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
     374          self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
     375          self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
     376          self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
     377          self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
     378          self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
     379          self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
     380          self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
     381          self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
     382          self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
     383          self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
     384          #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
     385          self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
     386  
     387          # Test for issue9721
     388          self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
     389  
     390      def test_urljoins(self):
     391          self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
     392          self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
     393          self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
     394          self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
     395          self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
     396          self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
     397          self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
     398          self.checkJoin(SIMPLE_BASE, '//g','http://g')
     399          self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
     400          self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
     401          self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
     402          self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
     403          self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
     404          self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
     405          self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
     406          self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
     407          self.checkJoin(SIMPLE_BASE, '../..','http://a/')
     408          self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
     409          self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
     410          self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
     411          self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
     412          self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
     413          self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
     414          self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
     415          self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
     416          self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
     417          self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
     418          self.checkJoin('http:///', '..','http:///')
     419          self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
     420          self.checkJoin('', 'http://a/./g', 'http://a/./g')
     421          self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
     422          self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
     423          self.checkJoin('ws://a/b','g','ws://a/g')
     424          self.checkJoin('wss://a/b','g','wss://a/g')
     425  
     426          # XXX: The following tests are no longer compatible with RFC3986
     427          # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
     428          # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
     429  
     430          # test for issue22118 duplicate slashes
     431          self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
     432  
     433          # Non-RFC-defined tests, covering variations of base and trailing
     434          # slashes
     435          self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
     436          self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
     437          self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
     438          self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
     439          self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
     440          self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
     441  
     442          # issue 23703: don't duplicate filename
     443          self.checkJoin('a', 'b', 'b')
     444  
     445      def test_RFC2732(self):
     446          str_cases = [
     447              ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
     448              ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
     449              ('http://[::1]:5432/foo/', '::1', 5432),
     450              ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
     451              ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
     452              ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
     453               'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
     454              ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
     455              ('http://[::ffff:12.34.56.78]:5432/foo/',
     456               '::ffff:12.34.56.78', 5432),
     457              ('http://Test.python.org/foo/', 'test.python.org', None),
     458              ('http://12.34.56.78/foo/', '12.34.56.78', None),
     459              ('http://[::1]/foo/', '::1', None),
     460              ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
     461              ('http://[dead:beef::]/foo/', 'dead:beef::', None),
     462              ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
     463               'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
     464              ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
     465              ('http://[::ffff:12.34.56.78]/foo/',
     466               '::ffff:12.34.56.78', None),
     467              ('http://Test.python.org:/foo/', 'test.python.org', None),
     468              ('http://12.34.56.78:/foo/', '12.34.56.78', None),
     469              ('http://[::1]:/foo/', '::1', None),
     470              ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
     471              ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
     472              ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
     473               'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
     474              ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
     475              ('http://[::ffff:12.34.56.78]:/foo/',
     476               '::ffff:12.34.56.78', None),
     477              ]
     478          def _encode(t):
     479              return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
     480          bytes_cases = [_encode(x) for x in str_cases]
     481          for url, hostname, port in str_cases + bytes_cases:
     482              urlparsed = urllib.parse.urlparse(url)
     483              self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
     484  
     485          str_cases = [
     486                  'http://::12.34.56.78]/',
     487                  'http://[::1/foo/',
     488                  'ftp://[::1/foo/bad]/bad',
     489                  'http://[::1/foo/bad]/bad',
     490                  'http://[::ffff:12.34.56.78']
     491          bytes_cases = [x.encode('ascii') for x in str_cases]
     492          for invalid_url in str_cases + bytes_cases:
     493              self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
     494  
     495      def test_urldefrag(self):
     496          str_cases = [
     497              ('http://python.org#frag', 'http://python.org', 'frag'),
     498              ('http://python.org', 'http://python.org', ''),
     499              ('http://python.org/#frag', 'http://python.org/', 'frag'),
     500              ('http://python.org/', 'http://python.org/', ''),
     501              ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
     502              ('http://python.org/?q', 'http://python.org/?q', ''),
     503              ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
     504              ('http://python.org/p?q', 'http://python.org/p?q', ''),
     505              (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
     506              (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
     507          ]
     508          def _encode(t):
     509              return type(t)(x.encode('ascii') for x in t)
     510          bytes_cases = [_encode(x) for x in str_cases]
     511          for url, defrag, frag in str_cases + bytes_cases:
     512              result = urllib.parse.urldefrag(url)
     513              self.assertEqual(result.geturl(), url)
     514              self.assertEqual(result, (defrag, frag))
     515              self.assertEqual(result.url, defrag)
     516              self.assertEqual(result.fragment, frag)
     517  
     518      def test_urlsplit_scoped_IPv6(self):
     519          p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
     520          self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt")
     521          self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234')
     522  
     523          p = urllib.parse.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
     524          self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt")
     525          self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234')
     526  
     527      def test_urlsplit_attributes(self):
     528          url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
     529          p = urllib.parse.urlsplit(url)
     530          self.assertEqual(p.scheme, "http")
     531          self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
     532          self.assertEqual(p.path, "/doc/")
     533          self.assertEqual(p.query, "")
     534          self.assertEqual(p.fragment, "frag")
     535          self.assertEqual(p.username, None)
     536          self.assertEqual(p.password, None)
     537          self.assertEqual(p.hostname, "www.python.org")
     538          self.assertEqual(p.port, None)
     539          # geturl() won't return exactly the original URL in this case
     540          # since the scheme is always case-normalized
     541          # We handle this by ignoring the first 4 characters of the URL
     542          self.assertEqual(p.geturl()[4:], url[4:])
     543  
     544          url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
     545          p = urllib.parse.urlsplit(url)
     546          self.assertEqual(p.scheme, "http")
     547          self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
     548          self.assertEqual(p.path, "/doc/")
     549          self.assertEqual(p.query, "query=yes")
     550          self.assertEqual(p.fragment, "frag")
     551          self.assertEqual(p.username, "User")
     552          self.assertEqual(p.password, "Pass")
     553          self.assertEqual(p.hostname, "www.python.org")
     554          self.assertEqual(p.port, 80)
     555          self.assertEqual(p.geturl(), url)
     556  
     557          # Addressing issue1698, which suggests Username can contain
     558          # "@" characters.  Though not RFC compliant, many ftp sites allow
     559          # and request email addresses as usernames.
     560  
     561          url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
     562          p = urllib.parse.urlsplit(url)
     563          self.assertEqual(p.scheme, "http")
     564          self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
     565          self.assertEqual(p.path, "/doc/")
     566          self.assertEqual(p.query, "query=yes")
     567          self.assertEqual(p.fragment, "frag")
     568          self.assertEqual(p.username, "User@example.com")
     569          self.assertEqual(p.password, "Pass")
     570          self.assertEqual(p.hostname, "www.python.org")
     571          self.assertEqual(p.port, 80)
     572          self.assertEqual(p.geturl(), url)
     573  
     574          # And check them all again, only with bytes this time
     575          url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
     576          p = urllib.parse.urlsplit(url)
     577          self.assertEqual(p.scheme, b"http")
     578          self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
     579          self.assertEqual(p.path, b"/doc/")
     580          self.assertEqual(p.query, b"")
     581          self.assertEqual(p.fragment, b"frag")
     582          self.assertEqual(p.username, None)
     583          self.assertEqual(p.password, None)
     584          self.assertEqual(p.hostname, b"www.python.org")
     585          self.assertEqual(p.port, None)
     586          self.assertEqual(p.geturl()[4:], url[4:])
     587  
     588          url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
     589          p = urllib.parse.urlsplit(url)
     590          self.assertEqual(p.scheme, b"http")
     591          self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
     592          self.assertEqual(p.path, b"/doc/")
     593          self.assertEqual(p.query, b"query=yes")
     594          self.assertEqual(p.fragment, b"frag")
     595          self.assertEqual(p.username, b"User")
     596          self.assertEqual(p.password, b"Pass")
     597          self.assertEqual(p.hostname, b"www.python.org")
     598          self.assertEqual(p.port, 80)
     599          self.assertEqual(p.geturl(), url)
     600  
     601          url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
     602          p = urllib.parse.urlsplit(url)
     603          self.assertEqual(p.scheme, b"http")
     604          self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
     605          self.assertEqual(p.path, b"/doc/")
     606          self.assertEqual(p.query, b"query=yes")
     607          self.assertEqual(p.fragment, b"frag")
     608          self.assertEqual(p.username, b"User@example.com")
     609          self.assertEqual(p.password, b"Pass")
     610          self.assertEqual(p.hostname, b"www.python.org")
     611          self.assertEqual(p.port, 80)
     612          self.assertEqual(p.geturl(), url)
     613  
     614          # Verify an illegal port raises ValueError
     615          url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
     616          p = urllib.parse.urlsplit(url)
     617          with self.assertRaisesRegex(ValueError, "out of range"):
     618              p.port
     619  
     620      def test_urlsplit_remove_unsafe_bytes(self):
     621          # Remove ASCII tabs and newlines from input
     622          url = "http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
     623          p = urllib.parse.urlsplit(url)
     624          self.assertEqual(p.scheme, "http")
     625          self.assertEqual(p.netloc, "www.python.org")
     626          self.assertEqual(p.path, "/javascript:alert('msg')/")
     627          self.assertEqual(p.query, "query=something")
     628          self.assertEqual(p.fragment, "fragment")
     629          self.assertEqual(p.username, None)
     630          self.assertEqual(p.password, None)
     631          self.assertEqual(p.hostname, "www.python.org")
     632          self.assertEqual(p.port, None)
     633          self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
     634  
     635          # Remove ASCII tabs and newlines from input as bytes.
     636          url = b"http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
     637          p = urllib.parse.urlsplit(url)
     638          self.assertEqual(p.scheme, b"http")
     639          self.assertEqual(p.netloc, b"www.python.org")
     640          self.assertEqual(p.path, b"/javascript:alert('msg')/")
     641          self.assertEqual(p.query, b"query=something")
     642          self.assertEqual(p.fragment, b"fragment")
     643          self.assertEqual(p.username, None)
     644          self.assertEqual(p.password, None)
     645          self.assertEqual(p.hostname, b"www.python.org")
     646          self.assertEqual(p.port, None)
     647          self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment")
     648  
     649          # with scheme as cache-key
     650          url = "http://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
     651          scheme = "ht\ntp"
     652          for _ in range(2):
     653              p = urllib.parse.urlsplit(url, scheme=scheme)
     654              self.assertEqual(p.scheme, "http")
     655              self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
     656  
     657      def test_urlsplit_strip_url(self):
     658          noise = bytes(range(0, 0x20 + 1))
     659          base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
     660  
     661          url = noise.decode("utf-8") + base_url
     662          p = urllib.parse.urlsplit(url)
     663          self.assertEqual(p.scheme, "http")
     664          self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
     665          self.assertEqual(p.path, "/doc/")
     666          self.assertEqual(p.query, "query=yes")
     667          self.assertEqual(p.fragment, "frag")
     668          self.assertEqual(p.username, "User")
     669          self.assertEqual(p.password, "Pass")
     670          self.assertEqual(p.hostname, "www.python.org")
     671          self.assertEqual(p.port, 80)
     672          self.assertEqual(p.geturl(), base_url)
     673  
     674          url = noise + base_url.encode("utf-8")
     675          p = urllib.parse.urlsplit(url)
     676          self.assertEqual(p.scheme, b"http")
     677          self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
     678          self.assertEqual(p.path, b"/doc/")
     679          self.assertEqual(p.query, b"query=yes")
     680          self.assertEqual(p.fragment, b"frag")
     681          self.assertEqual(p.username, b"User")
     682          self.assertEqual(p.password, b"Pass")
     683          self.assertEqual(p.hostname, b"www.python.org")
     684          self.assertEqual(p.port, 80)
     685          self.assertEqual(p.geturl(), base_url.encode("utf-8"))
     686  
     687          # Test that trailing space is preserved as some applications rely on
     688          # this within query strings.
     689          query_spaces_url = "https://www.python.org:88/doc/?query=    "
     690          p = urllib.parse.urlsplit(noise.decode("utf-8") + query_spaces_url)
     691          self.assertEqual(p.scheme, "https")
     692          self.assertEqual(p.netloc, "www.python.org:88")
     693          self.assertEqual(p.path, "/doc/")
     694          self.assertEqual(p.query, "query=    ")
     695          self.assertEqual(p.port, 88)
     696          self.assertEqual(p.geturl(), query_spaces_url)
     697  
     698          p = urllib.parse.urlsplit("www.pypi.org ")
     699          # That "hostname" gets considered a "path" due to the
     700          # trailing space and our existing logic...  YUCK...
     701          # and re-assembles via geturl aka unurlsplit into the original.
     702          # django.core.validators.URLValidator (at least through v3.2) relies on
     703          # this, for better or worse, to catch it in a ValidationError via its
     704          # regular expressions.
     705          # Here we test the basic round trip concept of such a trailing space.
     706          self.assertEqual(urllib.parse.urlunsplit(p), "www.pypi.org ")
     707  
     708          # with scheme as cache-key
     709          url = "//www.python.org/"
     710          scheme = noise.decode("utf-8") + "https" + noise.decode("utf-8")
     711          for _ in range(2):
     712              p = urllib.parse.urlsplit(url, scheme=scheme)
     713              self.assertEqual(p.scheme, "https")
     714              self.assertEqual(p.geturl(), "https://www.python.org/")
     715  
     716      def test_attributes_bad_port(self):
     717          """Check handling of invalid ports."""
     718          for bytes in (False, True):
     719              for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
     720                  for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६"):
     721                      with self.subTest(bytes=bytes, parse=parse, port=port):
     722                          netloc = "www.example.net:" + port
     723                          url = "http://" + netloc + "/"
     724                          if bytes:
     725                              if netloc.isascii() and port.isascii():
     726                                  netloc = netloc.encode("ascii")
     727                                  url = url.encode("ascii")
     728                              else:
     729                                  continue
     730                          p = parse(url)
     731                          self.assertEqual(p.netloc, netloc)
     732                          with self.assertRaises(ValueError):
     733                              p.port
     734  
     735      def test_attributes_bad_scheme(self):
     736          """Check handling of invalid schemes."""
     737          for bytes in (False, True):
     738              for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
     739                  for scheme in (".", "+", "-", "0", "http&", "६http"):
     740                      with self.subTest(bytes=bytes, parse=parse, scheme=scheme):
     741                          url = scheme + "://www.example.net"
     742                          if bytes:
     743                              if url.isascii():
     744                                  url = url.encode("ascii")
     745                              else:
     746                                  continue
     747                          p = parse(url)
     748                          if bytes:
     749                              self.assertEqual(p.scheme, b"")
     750                          else:
     751                              self.assertEqual(p.scheme, "")
     752  
     753      def test_attributes_without_netloc(self):
     754          # This example is straight from RFC 3261.  It looks like it
     755          # should allow the username, hostname, and port to be filled
     756          # in, but doesn't.  Since it's a URI and doesn't use the
     757          # scheme://netloc syntax, the netloc and related attributes
     758          # should be left empty.
     759          uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
     760          p = urllib.parse.urlsplit(uri)
     761          self.assertEqual(p.netloc, "")
     762          self.assertEqual(p.username, None)
     763          self.assertEqual(p.password, None)
     764          self.assertEqual(p.hostname, None)
     765          self.assertEqual(p.port, None)
     766          self.assertEqual(p.geturl(), uri)
     767  
     768          p = urllib.parse.urlparse(uri)
     769          self.assertEqual(p.netloc, "")
     770          self.assertEqual(p.username, None)
     771          self.assertEqual(p.password, None)
     772          self.assertEqual(p.hostname, None)
     773          self.assertEqual(p.port, None)
     774          self.assertEqual(p.geturl(), uri)
     775  
     776          # You guessed it, repeating the test with bytes input
     777          uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
     778          p = urllib.parse.urlsplit(uri)
     779          self.assertEqual(p.netloc, b"")
     780          self.assertEqual(p.username, None)
     781          self.assertEqual(p.password, None)
     782          self.assertEqual(p.hostname, None)
     783          self.assertEqual(p.port, None)
     784          self.assertEqual(p.geturl(), uri)
     785  
     786          p = urllib.parse.urlparse(uri)
     787          self.assertEqual(p.netloc, b"")
     788          self.assertEqual(p.username, None)
     789          self.assertEqual(p.password, None)
     790          self.assertEqual(p.hostname, None)
     791          self.assertEqual(p.port, None)
     792          self.assertEqual(p.geturl(), uri)
     793  
     794      def test_noslash(self):
     795          # Issue 1637: http://foo.com?query is legal
     796          self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
     797                           ('http', 'example.com', '', '', 'blahblah=/foo', ''))
     798          self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
     799                           (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
     800  
     801      def test_withoutscheme(self):
     802          # Test urlparse without scheme
     803          # Issue 754016: urlparse goes wrong with IP:port without scheme
     804          # RFC 1808 specifies that netloc should start with //, urlparse expects
     805          # the same, otherwise it classifies the portion of url as path.
     806          self.assertEqual(urllib.parse.urlparse("path"),
     807                  ('','','path','','',''))
     808          self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
     809                  ('','www.python.org:80','','','',''))
     810          self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
     811                  ('http','www.python.org:80','','','',''))
     812          # Repeat for bytes input
     813          self.assertEqual(urllib.parse.urlparse(b"path"),
     814                  (b'',b'',b'path',b'',b'',b''))
     815          self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
     816                  (b'',b'www.python.org:80',b'',b'',b'',b''))
     817          self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
     818                  (b'http',b'www.python.org:80',b'',b'',b'',b''))
     819  
     820      def test_portseparator(self):
     821          # Issue 754016 makes changes for port separator ':' from scheme separator
     822          self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','',''))
     823          self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','',''))
     824          self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))
     825          self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
     826          self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
     827          self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
     828                  ('http','www.python.org:80','','','',''))
     829          # As usual, need to check bytes input as well
     830          self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b''))
     831          self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b''))
     832          self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))
     833          self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
     834          self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
     835          self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
     836                  (b'http',b'www.python.org:80',b'',b'',b'',b''))
     837  
     838      def test_usingsys(self):
     839          # Issue 3314: sys module is used in the error
     840          self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
     841  
     842      def test_anyscheme(self):
     843          # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
     844          self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
     845                           ('s3', 'foo.com', '/stuff', '', '', ''))
     846          self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
     847                           ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
     848          self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
     849                           ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
     850          self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
     851                           ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
     852  
     853          # And for bytes...
     854          self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
     855                           (b's3', b'foo.com', b'/stuff', b'', b'', b''))
     856          self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
     857                           (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
     858          self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
     859                           (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
     860          self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
     861                           (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
     862  
     863      def test_default_scheme(self):
     864          # Exercise the scheme parameter of urlparse() and urlsplit()
     865          for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
     866              with self.subTest(function=func):
     867                  result = func("http://example.net/", "ftp")
     868                  self.assertEqual(result.scheme, "http")
     869                  result = func(b"http://example.net/", b"ftp")
     870                  self.assertEqual(result.scheme, b"http")
     871                  self.assertEqual(func("path", "ftp").scheme, "ftp")
     872                  self.assertEqual(func("path", scheme="ftp").scheme, "ftp")
     873                  self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")
     874                  self.assertEqual(func("path").scheme, "")
     875                  self.assertEqual(func(b"path").scheme, b"")
     876                  self.assertEqual(func(b"path", "").scheme, b"")
     877  
     878      def test_parse_fragments(self):
     879          # Exercise the allow_fragments parameter of urlparse() and urlsplit()
     880          tests = (
     881              ("http:#frag", "path", "frag"),
     882              ("//example.net#frag", "path", "frag"),
     883              ("index.html#frag", "path", "frag"),
     884              (";a=b#frag", "params", "frag"),
     885              ("?a=b#frag", "query", "frag"),
     886              ("#frag", "path", "frag"),
     887              ("abc#@frag", "path", "@frag"),
     888              ("//abc#@frag", "path", "@frag"),
     889              ("//abc:80#@frag", "path", "@frag"),
     890              ("//abc#@frag:80", "path", "@frag:80"),
     891          )
     892          for url, attr, expected_frag in tests:
     893              for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
     894                  if attr == "params" and func is urllib.parse.urlsplit:
     895                      attr = "path"
     896                  with self.subTest(url=url, function=func):
     897                      result = func(url, allow_fragments=False)
     898                      self.assertEqual(result.fragment, "")
     899                      self.assertTrue(
     900                              getattr(result, attr).endswith("#" + expected_frag))
     901                      self.assertEqual(func(url, "", False).fragment, "")
     902  
     903                      result = func(url, allow_fragments=True)
     904                      self.assertEqual(result.fragment, expected_frag)
     905                      self.assertFalse(
     906                              getattr(result, attr).endswith(expected_frag))
     907                      self.assertEqual(func(url, "", True).fragment,
     908                                       expected_frag)
     909                      self.assertEqual(func(url).fragment, expected_frag)
     910  
     911      def test_mixed_types_rejected(self):
     912          # Several functions that process either strings or ASCII encoded bytes
     913          # accept multiple arguments. Check they reject mixed type input
     914          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     915              urllib.parse.urlparse("www.python.org", b"http")
     916          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     917              urllib.parse.urlparse(b"www.python.org", "http")
     918          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     919              urllib.parse.urlsplit("www.python.org", b"http")
     920          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     921              urllib.parse.urlsplit(b"www.python.org", "http")
     922          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     923              urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
     924          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     925              urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
     926          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     927              urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
     928          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     929              urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
     930          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     931              urllib.parse.urljoin("http://python.org", b"http://python.org")
     932          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     933              urllib.parse.urljoin(b"http://python.org", "http://python.org")
     934  
     935      def _check_result_type(self, str_type):
     936          num_args = len(str_type._fields)
     937          bytes_type = str_type._encoded_counterpart
     938          self.assertIs(bytes_type._decoded_counterpart, str_type)
     939          str_args = ('',) * num_args
     940          bytes_args = (b'',) * num_args
     941          str_result = str_type(*str_args)
     942          bytes_result = bytes_type(*bytes_args)
     943          encoding = 'ascii'
     944          errors = 'strict'
     945          self.assertEqual(str_result, str_args)
     946          self.assertEqual(bytes_result.decode(), str_args)
     947          self.assertEqual(bytes_result.decode(), str_result)
     948          self.assertEqual(bytes_result.decode(encoding), str_args)
     949          self.assertEqual(bytes_result.decode(encoding), str_result)
     950          self.assertEqual(bytes_result.decode(encoding, errors), str_args)
     951          self.assertEqual(bytes_result.decode(encoding, errors), str_result)
     952          self.assertEqual(bytes_result, bytes_args)
     953          self.assertEqual(str_result.encode(), bytes_args)
     954          self.assertEqual(str_result.encode(), bytes_result)
     955          self.assertEqual(str_result.encode(encoding), bytes_args)
     956          self.assertEqual(str_result.encode(encoding), bytes_result)
     957          self.assertEqual(str_result.encode(encoding, errors), bytes_args)
     958          self.assertEqual(str_result.encode(encoding, errors), bytes_result)
     959  
     960      def test_result_pairs(self):
     961          # Check encoding and decoding between result pairs
     962          result_types = [
     963            urllib.parse.DefragResult,
     964            urllib.parse.SplitResult,
     965            urllib.parse.ParseResult,
     966          ]
     967          for result_type in result_types:
     968              self._check_result_type(result_type)
     969  
     970      def test_parse_qs_encoding(self):
     971          result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
     972          self.assertEqual(result, {'key': ['\u0141\xE9']})
     973          result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
     974          self.assertEqual(result, {'key': ['\u0141\xE9']})
     975          result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
     976          self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
     977          result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
     978          self.assertEqual(result, {'key': ['\u0141\ufffd-']})
     979          result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
     980                                                            errors="ignore")
     981          self.assertEqual(result, {'key': ['\u0141-']})
     982  
     983      def test_parse_qsl_encoding(self):
     984          result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
     985          self.assertEqual(result, [('key', '\u0141\xE9')])
     986          result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
     987          self.assertEqual(result, [('key', '\u0141\xE9')])
     988          result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
     989          self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
     990          result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
     991          self.assertEqual(result, [('key', '\u0141\ufffd-')])
     992          result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
     993                                                            errors="ignore")
     994          self.assertEqual(result, [('key', '\u0141-')])
     995  
     996      def test_parse_qsl_max_num_fields(self):
     997          with self.assertRaises(ValueError):
     998              urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
     999          urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
    1000  
    1001      def test_parse_qs_separator(self):
    1002          parse_qs_semicolon_cases = [
    1003              (";", {}),
    1004              (";;", {}),
    1005              (";a=b", {'a': ['b']}),
    1006              ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
    1007              ("a=1;a=2", {'a': ['1', '2']}),
    1008              (b";", {}),
    1009              (b";;", {}),
    1010              (b";a=b", {b'a': [b'b']}),
    1011              (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
    1012              (b"a=1;a=2", {b'a': [b'1', b'2']}),
    1013          ]
    1014          for orig, expect in parse_qs_semicolon_cases:
    1015              with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
    1016                  result = urllib.parse.parse_qs(orig, separator=';')
    1017                  self.assertEqual(result, expect, "Error parsing %r" % orig)
    1018                  result_bytes = urllib.parse.parse_qs(orig, separator=b';')
    1019                  self.assertEqual(result_bytes, expect, "Error parsing %r" % orig)
    1020  
    1021  
    1022      def test_parse_qsl_separator(self):
    1023          parse_qsl_semicolon_cases = [
    1024              (";", []),
    1025              (";;", []),
    1026              (";a=b", [('a', 'b')]),
    1027              ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
    1028              ("a=1;a=2", [('a', '1'), ('a', '2')]),
    1029              (b";", []),
    1030              (b";;", []),
    1031              (b";a=b", [(b'a', b'b')]),
    1032              (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
    1033              (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
    1034          ]
    1035          for orig, expect in parse_qsl_semicolon_cases:
    1036              with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
    1037                  result = urllib.parse.parse_qsl(orig, separator=';')
    1038                  self.assertEqual(result, expect, "Error parsing %r" % orig)
    1039                  result_bytes = urllib.parse.parse_qsl(orig, separator=b';')
    1040                  self.assertEqual(result_bytes, expect, "Error parsing %r" % orig)
    1041  
    1042  
    1043      def test_urlencode_sequences(self):
    1044          # Other tests incidentally urlencode things; test non-covered cases:
    1045          # Sequence and object values.
    1046          result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
    1047          # we cannot rely on ordering here
    1048          assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
    1049  
    1050          class ESC[4;38;5;81mTrivial:
    1051              def __str__(self):
    1052                  return 'trivial'
    1053  
    1054          result = urllib.parse.urlencode({'a': Trivial()}, True)
    1055          self.assertEqual(result, 'a=trivial')
    1056  
    1057      def test_urlencode_quote_via(self):
    1058          result = urllib.parse.urlencode({'a': 'some value'})
    1059          self.assertEqual(result, "a=some+value")
    1060          result = urllib.parse.urlencode({'a': 'some value/another'},
    1061                                          quote_via=urllib.parse.quote)
    1062          self.assertEqual(result, "a=some%20value%2Fanother")
    1063          result = urllib.parse.urlencode({'a': 'some value/another'},
    1064                                          safe='/', quote_via=urllib.parse.quote)
    1065          self.assertEqual(result, "a=some%20value/another")
    1066  
    1067      def test_quote_from_bytes(self):
    1068          self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
    1069          result = urllib.parse.quote_from_bytes(b'archaeological arcana')
    1070          self.assertEqual(result, 'archaeological%20arcana')
    1071          result = urllib.parse.quote_from_bytes(b'')
    1072          self.assertEqual(result, '')
    1073          result = urllib.parse.quote_from_bytes(b'A'*10_000)
    1074          self.assertEqual(result, 'A'*10_000)
    1075          result = urllib.parse.quote_from_bytes(b'z\x01/ '*253_183)
    1076          self.assertEqual(result, 'z%01/%20'*253_183)
    1077  
    1078      def test_unquote_to_bytes(self):
    1079          result = urllib.parse.unquote_to_bytes('abc%20def')
    1080          self.assertEqual(result, b'abc def')
    1081          result = urllib.parse.unquote_to_bytes('')
    1082          self.assertEqual(result, b'')
    1083  
    1084      def test_quote_errors(self):
    1085          self.assertRaises(TypeError, urllib.parse.quote, b'foo',
    1086                            encoding='utf-8')
    1087          self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
    1088  
    1089      def test_issue14072(self):
    1090          p1 = urllib.parse.urlsplit('tel:+31-641044153')
    1091          self.assertEqual(p1.scheme, 'tel')
    1092          self.assertEqual(p1.path, '+31-641044153')
    1093          p2 = urllib.parse.urlsplit('tel:+31641044153')
    1094          self.assertEqual(p2.scheme, 'tel')
    1095          self.assertEqual(p2.path, '+31641044153')
    1096          # assert the behavior for urlparse
    1097          p1 = urllib.parse.urlparse('tel:+31-641044153')
    1098          self.assertEqual(p1.scheme, 'tel')
    1099          self.assertEqual(p1.path, '+31-641044153')
    1100          p2 = urllib.parse.urlparse('tel:+31641044153')
    1101          self.assertEqual(p2.scheme, 'tel')
    1102          self.assertEqual(p2.path, '+31641044153')
    1103  
    1104      def test_invalid_bracketed_hosts(self):
    1105          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query')
    1106          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query')
    1107          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query')
    1108          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v12ae]/Path?Query')
    1109          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v.IP]/Path?Query')
    1110          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123.]/Path?Query')
    1111          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query')
    1112          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query')
    1113          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query')
    1114          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path')
    1115  
    1116      def test_splitting_bracketed_hosts(self):
    1117          p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query')
    1118          self.assertEqual(p1.hostname, 'v6a.ip')
    1119          self.assertEqual(p1.username, 'user')
    1120          self.assertEqual(p1.path, '/path')
    1121          p2 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7%test]/path?query')
    1122          self.assertEqual(p2.hostname, '0439:23af:2309::fae7%test')
    1123          self.assertEqual(p2.username, 'user')
    1124          self.assertEqual(p2.path, '/path')
    1125          p3 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146%test]/path?query')
    1126          self.assertEqual(p3.hostname, '0439:23af:2309::fae7:1234:192.0.2.146%test')
    1127          self.assertEqual(p3.username, 'user')
    1128          self.assertEqual(p3.path, '/path')
    1129  
    1130      def test_port_casting_failure_message(self):
    1131          message = "Port could not be cast to integer value as 'oracle'"
    1132          p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')
    1133          with self.assertRaisesRegex(ValueError, message):
    1134              p1.port
    1135  
    1136          p2 = urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle')
    1137          with self.assertRaisesRegex(ValueError, message):
    1138              p2.port
    1139  
    1140      def test_telurl_params(self):
    1141          p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
    1142          self.assertEqual(p1.scheme, 'tel')
    1143          self.assertEqual(p1.path, '123-4')
    1144          self.assertEqual(p1.params, 'phone-context=+1-650-516')
    1145  
    1146          p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
    1147          self.assertEqual(p1.scheme, 'tel')
    1148          self.assertEqual(p1.path, '+1-201-555-0123')
    1149          self.assertEqual(p1.params, '')
    1150  
    1151          p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
    1152          self.assertEqual(p1.scheme, 'tel')
    1153          self.assertEqual(p1.path, '7042')
    1154          self.assertEqual(p1.params, 'phone-context=example.com')
    1155  
    1156          p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
    1157          self.assertEqual(p1.scheme, 'tel')
    1158          self.assertEqual(p1.path, '863-1234')
    1159          self.assertEqual(p1.params, 'phone-context=+1-914-555')
    1160  
    1161      def test_Quoter_repr(self):
    1162          quoter = urllib.parse._Quoter(urllib.parse._ALWAYS_SAFE)
    1163          self.assertIn('Quoter', repr(quoter))
    1164  
    1165      def test_clear_cache_for_code_coverage(self):
    1166          urllib.parse.clear_cache()
    1167  
    1168      def test_urllib_parse_getattr_failure(self):
    1169          """Test that urllib.parse.__getattr__() fails correctly."""
    1170          with self.assertRaises(AttributeError):
    1171              unused = urllib.parse.this_does_not_exist
    1172  
    1173      def test_all(self):
    1174          expected = []
    1175          undocumented = {
    1176              'splitattr', 'splithost', 'splitnport', 'splitpasswd',
    1177              'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',
    1178              'splitvalue',
    1179              'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
    1180          }
    1181          for name in dir(urllib.parse):
    1182              if name.startswith('_') or name in undocumented:
    1183                  continue
    1184              object = getattr(urllib.parse, name)
    1185              if getattr(object, '__module__', None) == 'urllib.parse':
    1186                  expected.append(name)
    1187          self.assertCountEqual(urllib.parse.__all__, expected)
    1188  
    1189      def test_urlsplit_normalization(self):
    1190          # Certain characters should never occur in the netloc,
    1191          # including under normalization.
    1192          # Ensure that ALL of them are detected and cause an error
    1193          illegal_chars = '/:#?@'
    1194          hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
    1195          denorm_chars = [
    1196              c for c in map(chr, range(128, sys.maxunicode))
    1197              if unicodedata.decomposition(c)
    1198              and (hex_chars & set(unicodedata.decomposition(c).split()))
    1199              and c not in illegal_chars
    1200          ]
    1201          # Sanity check that we found at least one such character
    1202          self.assertIn('\u2100', denorm_chars)
    1203          self.assertIn('\uFF03', denorm_chars)
    1204  
    1205          # bpo-36742: Verify port separators are ignored when they
    1206          # existed prior to decomposition
    1207          urllib.parse.urlsplit('http://\u30d5\u309a:80')
    1208          with self.assertRaises(ValueError):
    1209              urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380')
    1210  
    1211          for scheme in ["http", "https", "ftp"]:
    1212              for netloc in ["netloc{}false.netloc", "n{}user@netloc"]:
    1213                  for c in denorm_chars:
    1214                      url = "{}://{}/path".format(scheme, netloc.format(c))
    1215                      with self.subTest(url=url, char='{:04X}'.format(ord(c))):
    1216                          with self.assertRaises(ValueError):
    1217                              urllib.parse.urlsplit(url)
    1218  
    1219  class ESC[4;38;5;81mUtility_Tests(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
    1220      """Testcase to test the various utility functions in the urllib."""
    1221      # In Python 2 this test class was in test_urllib.
    1222  
    1223      def test_splittype(self):
    1224          splittype = urllib.parse._splittype
    1225          self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
    1226          self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
    1227          self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
    1228          self.assertEqual(splittype('type:'), ('type', ''))
    1229          self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
    1230  
    1231      def test_splithost(self):
    1232          splithost = urllib.parse._splithost
    1233          self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
    1234                           ('www.example.org:80', '/foo/bar/baz.html'))
    1235          self.assertEqual(splithost('//www.example.org:80'),
    1236                           ('www.example.org:80', ''))
    1237          self.assertEqual(splithost('/foo/bar/baz.html'),
    1238                           (None, '/foo/bar/baz.html'))
    1239  
    1240          # bpo-30500: # starts a fragment.
    1241          self.assertEqual(splithost('//127.0.0.1#@host.com'),
    1242                           ('127.0.0.1', '/#@host.com'))
    1243          self.assertEqual(splithost('//127.0.0.1#@host.com:80'),
    1244                           ('127.0.0.1', '/#@host.com:80'))
    1245          self.assertEqual(splithost('//127.0.0.1:80#@host.com'),
    1246                           ('127.0.0.1:80', '/#@host.com'))
    1247  
    1248          # Empty host is returned as empty string.
    1249          self.assertEqual(splithost("///file"),
    1250                           ('', '/file'))
    1251  
    1252          # Trailing semicolon, question mark and hash symbol are kept.
    1253          self.assertEqual(splithost("//example.net/file;"),
    1254                           ('example.net', '/file;'))
    1255          self.assertEqual(splithost("//example.net/file?"),
    1256                           ('example.net', '/file?'))
    1257          self.assertEqual(splithost("//example.net/file#"),
    1258                           ('example.net', '/file#'))
    1259  
    1260      def test_splituser(self):
    1261          splituser = urllib.parse._splituser
    1262          self.assertEqual(splituser('User:Pass@www.python.org:080'),
    1263                           ('User:Pass', 'www.python.org:080'))
    1264          self.assertEqual(splituser('@www.python.org:080'),
    1265                           ('', 'www.python.org:080'))
    1266          self.assertEqual(splituser('www.python.org:080'),
    1267                           (None, 'www.python.org:080'))
    1268          self.assertEqual(splituser('User:Pass@'),
    1269                           ('User:Pass', ''))
    1270          self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
    1271                           ('User@example.com:Pass', 'www.python.org:080'))
    1272  
    1273      def test_splitpasswd(self):
    1274          # Some of the password examples are not sensible, but it is added to
    1275          # confirming to RFC2617 and addressing issue4675.
    1276          splitpasswd = urllib.parse._splitpasswd
    1277          self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
    1278          self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
    1279          self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
    1280          self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
    1281          self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
    1282          self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
    1283          self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
    1284          self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
    1285          self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
    1286          self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
    1287          self.assertEqual(splitpasswd('user:'), ('user', ''))
    1288          self.assertEqual(splitpasswd('user'), ('user', None))
    1289          self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
    1290  
    1291      def test_splitport(self):
    1292          splitport = urllib.parse._splitport
    1293          self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
    1294          self.assertEqual(splitport('parrot'), ('parrot', None))
    1295          self.assertEqual(splitport('parrot:'), ('parrot', None))
    1296          self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
    1297          self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
    1298          self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
    1299          self.assertEqual(splitport('[::1]'), ('[::1]', None))
    1300          self.assertEqual(splitport(':88'), ('', '88'))
    1301  
    1302      def test_splitnport(self):
    1303          splitnport = urllib.parse._splitnport
    1304          self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
    1305          self.assertEqual(splitnport('parrot'), ('parrot', -1))
    1306          self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
    1307          self.assertEqual(splitnport('parrot:'), ('parrot', -1))
    1308          self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
    1309          self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
    1310          self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
    1311          self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
    1312          self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
    1313          self.assertEqual(splitnport('parrot: +1_0 '), ('parrot', None))
    1314  
    1315      def test_splitquery(self):
    1316          # Normal cases are exercised by other tests; ensure that we also
    1317          # catch cases with no port specified (testcase ensuring coverage)
    1318          splitquery = urllib.parse._splitquery
    1319          self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
    1320                           ('http://python.org/fake', 'foo=bar'))
    1321          self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
    1322                           ('http://python.org/fake?foo=bar', ''))
    1323          self.assertEqual(splitquery('http://python.org/fake'),
    1324                           ('http://python.org/fake', None))
    1325          self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
    1326  
    1327      def test_splittag(self):
    1328          splittag = urllib.parse._splittag
    1329          self.assertEqual(splittag('http://example.com?foo=bar#baz'),
    1330                           ('http://example.com?foo=bar', 'baz'))
    1331          self.assertEqual(splittag('http://example.com?foo=bar#'),
    1332                           ('http://example.com?foo=bar', ''))
    1333          self.assertEqual(splittag('#baz'), ('', 'baz'))
    1334          self.assertEqual(splittag('http://example.com?foo=bar'),
    1335                           ('http://example.com?foo=bar', None))
    1336          self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
    1337                           ('http://example.com?foo=bar#baz', 'boo'))
    1338  
    1339      def test_splitattr(self):
    1340          splitattr = urllib.parse._splitattr
    1341          self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
    1342                           ('/path', ['attr1=value1', 'attr2=value2']))
    1343          self.assertEqual(splitattr('/path;'), ('/path', ['']))
    1344          self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
    1345                           ('', ['attr1=value1', 'attr2=value2']))
    1346          self.assertEqual(splitattr('/path'), ('/path', []))
    1347  
    1348      def test_splitvalue(self):
    1349          # Normal cases are exercised by other tests; test pathological cases
    1350          # with no key/value pairs. (testcase ensuring coverage)
    1351          splitvalue = urllib.parse._splitvalue
    1352          self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
    1353          self.assertEqual(splitvalue('foo='), ('foo', ''))
    1354          self.assertEqual(splitvalue('=bar'), ('', 'bar'))
    1355          self.assertEqual(splitvalue('foobar'), ('foobar', None))
    1356          self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
    1357  
    1358      def test_to_bytes(self):
    1359          result = urllib.parse._to_bytes('http://www.python.org')
    1360          self.assertEqual(result, 'http://www.python.org')
    1361          self.assertRaises(UnicodeError, urllib.parse._to_bytes,
    1362                            'http://www.python.org/medi\u00e6val')
    1363  
    1364      def test_unwrap(self):
    1365          for wrapped_url in ('<URL:scheme://host/path>', '<scheme://host/path>',
    1366                              'URL:scheme://host/path', 'scheme://host/path'):
    1367              url = urllib.parse.unwrap(wrapped_url)
    1368              self.assertEqual(url, 'scheme://host/path')
    1369  
    1370  
    1371  class ESC[4;38;5;81mDeprecationTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
    1372  
    1373      def test_Quoter_deprecation(self):
    1374          with self.assertWarns(DeprecationWarning) as cm:
    1375              old_class = urllib.parse.Quoter
    1376              self.assertIs(old_class, urllib.parse._Quoter)
    1377          self.assertIn('Quoter will be removed', str(cm.warning))
    1378  
    1379      def test_splittype_deprecation(self):
    1380          with self.assertWarns(DeprecationWarning) as cm:
    1381              urllib.parse.splittype('')
    1382          self.assertEqual(str(cm.warning),
    1383                           'urllib.parse.splittype() is deprecated as of 3.8, '
    1384                           'use urllib.parse.urlparse() instead')
    1385  
    1386      def test_splithost_deprecation(self):
    1387          with self.assertWarns(DeprecationWarning) as cm:
    1388              urllib.parse.splithost('')
    1389          self.assertEqual(str(cm.warning),
    1390                           'urllib.parse.splithost() is deprecated as of 3.8, '
    1391                           'use urllib.parse.urlparse() instead')
    1392  
    1393      def test_splituser_deprecation(self):
    1394          with self.assertWarns(DeprecationWarning) as cm:
    1395              urllib.parse.splituser('')
    1396          self.assertEqual(str(cm.warning),
    1397                           'urllib.parse.splituser() is deprecated as of 3.8, '
    1398                           'use urllib.parse.urlparse() instead')
    1399  
    1400      def test_splitpasswd_deprecation(self):
    1401          with self.assertWarns(DeprecationWarning) as cm:
    1402              urllib.parse.splitpasswd('')
    1403          self.assertEqual(str(cm.warning),
    1404                           'urllib.parse.splitpasswd() is deprecated as of 3.8, '
    1405                           'use urllib.parse.urlparse() instead')
    1406  
    1407      def test_splitport_deprecation(self):
    1408          with self.assertWarns(DeprecationWarning) as cm:
    1409              urllib.parse.splitport('')
    1410          self.assertEqual(str(cm.warning),
    1411                           'urllib.parse.splitport() is deprecated as of 3.8, '
    1412                           'use urllib.parse.urlparse() instead')
    1413  
    1414      def test_splitnport_deprecation(self):
    1415          with self.assertWarns(DeprecationWarning) as cm:
    1416              urllib.parse.splitnport('')
    1417          self.assertEqual(str(cm.warning),
    1418                           'urllib.parse.splitnport() is deprecated as of 3.8, '
    1419                           'use urllib.parse.urlparse() instead')
    1420  
    1421      def test_splitquery_deprecation(self):
    1422          with self.assertWarns(DeprecationWarning) as cm:
    1423              urllib.parse.splitquery('')
    1424          self.assertEqual(str(cm.warning),
    1425                           'urllib.parse.splitquery() is deprecated as of 3.8, '
    1426                           'use urllib.parse.urlparse() instead')
    1427  
    1428      def test_splittag_deprecation(self):
    1429          with self.assertWarns(DeprecationWarning) as cm:
    1430              urllib.parse.splittag('')
    1431          self.assertEqual(str(cm.warning),
    1432                           'urllib.parse.splittag() is deprecated as of 3.8, '
    1433                           'use urllib.parse.urlparse() instead')
    1434  
    1435      def test_splitattr_deprecation(self):
    1436          with self.assertWarns(DeprecationWarning) as cm:
    1437              urllib.parse.splitattr('')
    1438          self.assertEqual(str(cm.warning),
    1439                           'urllib.parse.splitattr() is deprecated as of 3.8, '
    1440                           'use urllib.parse.urlparse() instead')
    1441  
    1442      def test_splitvalue_deprecation(self):
    1443          with self.assertWarns(DeprecationWarning) as cm:
    1444              urllib.parse.splitvalue('')
    1445          self.assertEqual(str(cm.warning),
    1446                           'urllib.parse.splitvalue() is deprecated as of 3.8, '
    1447                           'use urllib.parse.parse_qsl() instead')
    1448  
    1449      def test_to_bytes_deprecation(self):
    1450          with self.assertWarns(DeprecationWarning) as cm:
    1451              urllib.parse.to_bytes('')
    1452          self.assertEqual(str(cm.warning),
    1453                           'urllib.parse.to_bytes() is deprecated as of 3.8')
    1454  
    1455  
    1456  if __name__ == "__main__":
    1457      unittest.main()