1  import sys
       2  import unicodedata
       3  import unittest
       4  import urllib.parse
       5  
       6  RFC1808_BASE = "http://a/b/c/d;p?q#f"
       7  RFC2396_BASE = "http://a/b/c/d;p?q"
       8  RFC3986_BASE = 'http://a/b/c/d;p?q'
       9  SIMPLE_BASE  = 'http://a/b/c/d'
      10  
      11  # Each parse_qsl testcase is a two-tuple that contains
      12  # a string with the query and a list with the expected result.
      13  
      14  parse_qsl_test_cases = [
      15      ("", []),
      16      ("&", []),
      17      ("&&", []),
      18      ("=", [('', '')]),
      19      ("=a", [('', 'a')]),
      20      ("a", [('a', '')]),
      21      ("a=", [('a', '')]),
      22      ("&a=b", [('a', 'b')]),
      23      ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
      24      ("a=1&a=2", [('a', '1'), ('a', '2')]),
      25      (b"", []),
      26      (b"&", []),
      27      (b"&&", []),
      28      (b"=", [(b'', b'')]),
      29      (b"=a", [(b'', b'a')]),
      30      (b"a", [(b'a', b'')]),
      31      (b"a=", [(b'a', b'')]),
      32      (b"&a=b", [(b'a', b'b')]),
      33      (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
      34      (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
      35      (";a=b", [(';a', 'b')]),
      36      ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
      37      (b";a=b", [(b';a', b'b')]),
      38      (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
      39  ]
      40  
      41  # Each parse_qs testcase is a two-tuple that contains
      42  # a string with the query and a dictionary with the expected result.
      43  
      44  parse_qs_test_cases = [
      45      ("", {}),
      46      ("&", {}),
      47      ("&&", {}),
      48      ("=", {'': ['']}),
      49      ("=a", {'': ['a']}),
      50      ("a", {'a': ['']}),
      51      ("a=", {'a': ['']}),
      52      ("&a=b", {'a': ['b']}),
      53      ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
      54      ("a=1&a=2", {'a': ['1', '2']}),
      55      (b"", {}),
      56      (b"&", {}),
      57      (b"&&", {}),
      58      (b"=", {b'': [b'']}),
      59      (b"=a", {b'': [b'a']}),
      60      (b"a", {b'a': [b'']}),
      61      (b"a=", {b'a': [b'']}),
      62      (b"&a=b", {b'a': [b'b']}),
      63      (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
      64      (b"a=1&a=2", {b'a': [b'1', b'2']}),
      65      (";a=b", {';a': ['b']}),
      66      ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
      67      (b";a=b", {b';a': [b'b']}),
      68      (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
      69  ]
      70  
      71  class ESC[4;38;5;81mUrlParseTestCase(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      72  
      73      def checkRoundtrips(self, url, parsed, split):
      74          result = urllib.parse.urlparse(url)
      75          self.assertEqual(result, parsed)
      76          t = (result.scheme, result.netloc, result.path,
      77               result.params, result.query, result.fragment)
      78          self.assertEqual(t, parsed)
      79          # put it back together and it should be the same
      80          result2 = urllib.parse.urlunparse(result)
      81          self.assertEqual(result2, url)
      82          self.assertEqual(result2, result.geturl())
      83  
      84          # the result of geturl() is a fixpoint; we can always parse it
      85          # again to get the same result:
      86          result3 = urllib.parse.urlparse(result.geturl())
      87          self.assertEqual(result3.geturl(), result.geturl())
      88          self.assertEqual(result3,          result)
      89          self.assertEqual(result3.scheme,   result.scheme)
      90          self.assertEqual(result3.netloc,   result.netloc)
      91          self.assertEqual(result3.path,     result.path)
      92          self.assertEqual(result3.params,   result.params)
      93          self.assertEqual(result3.query,    result.query)
      94          self.assertEqual(result3.fragment, result.fragment)
      95          self.assertEqual(result3.username, result.username)
      96          self.assertEqual(result3.password, result.password)
      97          self.assertEqual(result3.hostname, result.hostname)
      98          self.assertEqual(result3.port,     result.port)
      99  
     100          # check the roundtrip using urlsplit() as well
     101          result = urllib.parse.urlsplit(url)
     102          self.assertEqual(result, split)
     103          t = (result.scheme, result.netloc, result.path,
     104               result.query, result.fragment)
     105          self.assertEqual(t, split)
     106          result2 = urllib.parse.urlunsplit(result)
     107          self.assertEqual(result2, url)
     108          self.assertEqual(result2, result.geturl())
     109  
     110          # check the fixpoint property of re-parsing the result of geturl()
     111          result3 = urllib.parse.urlsplit(result.geturl())
     112          self.assertEqual(result3.geturl(), result.geturl())
     113          self.assertEqual(result3,          result)
     114          self.assertEqual(result3.scheme,   result.scheme)
     115          self.assertEqual(result3.netloc,   result.netloc)
     116          self.assertEqual(result3.path,     result.path)
     117          self.assertEqual(result3.query,    result.query)
     118          self.assertEqual(result3.fragment, result.fragment)
     119          self.assertEqual(result3.username, result.username)
     120          self.assertEqual(result3.password, result.password)
     121          self.assertEqual(result3.hostname, result.hostname)
     122          self.assertEqual(result3.port,     result.port)
     123  
     124      def test_qsl(self):
     125          for orig, expect in parse_qsl_test_cases:
     126              result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
     127              self.assertEqual(result, expect, "Error parsing %r" % orig)
     128              expect_without_blanks = [v for v in expect if len(v[1])]
     129              result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
     130              self.assertEqual(result, expect_without_blanks,
     131                              "Error parsing %r" % orig)
     132  
     133      def test_qs(self):
     134          for orig, expect in parse_qs_test_cases:
     135              result = urllib.parse.parse_qs(orig, keep_blank_values=True)
     136              self.assertEqual(result, expect, "Error parsing %r" % orig)
     137              expect_without_blanks = {v: expect[v]
     138                                       for v in expect if len(expect[v][0])}
     139              result = urllib.parse.parse_qs(orig, keep_blank_values=False)
     140              self.assertEqual(result, expect_without_blanks,
     141                              "Error parsing %r" % orig)
     142  
     143      def test_roundtrips(self):
     144          str_cases = [
     145              ('file:///tmp/junk.txt',
     146               ('file', '', '/tmp/junk.txt', '', '', ''),
     147               ('file', '', '/tmp/junk.txt', '', '')),
     148              ('imap://mail.python.org/mbox1',
     149               ('imap', 'mail.python.org', '/mbox1', '', '', ''),
     150               ('imap', 'mail.python.org', '/mbox1', '', '')),
     151              ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
     152               ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
     153                '', '', ''),
     154               ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
     155                '', '')),
     156              ('nfs://server/path/to/file.txt',
     157               ('nfs', 'server', '/path/to/file.txt', '', '', ''),
     158               ('nfs', 'server', '/path/to/file.txt', '', '')),
     159              ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
     160               ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
     161                '', '', ''),
     162               ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
     163                '', '')),
     164              ('git+ssh://git@github.com/user/project.git',
     165              ('git+ssh', 'git@github.com','/user/project.git',
     166               '','',''),
     167              ('git+ssh', 'git@github.com','/user/project.git',
     168               '', '')),
     169              ]
     170          def _encode(t):
     171              return (t[0].encode('ascii'),
     172                      tuple(x.encode('ascii') for x in t[1]),
     173                      tuple(x.encode('ascii') for x in t[2]))
     174          bytes_cases = [_encode(x) for x in str_cases]
     175          for url, parsed, split in str_cases + bytes_cases:
     176              self.checkRoundtrips(url, parsed, split)
     177  
     178      def test_http_roundtrips(self):
     179          # urllib.parse.urlsplit treats 'http:' as an optimized special case,
     180          # so we test both 'http:' and 'https:' in all the following.
     181          # Three cheers for white box knowledge!
     182          str_cases = [
     183              ('://www.python.org',
     184               ('www.python.org', '', '', '', ''),
     185               ('www.python.org', '', '', '')),
     186              ('://www.python.org#abc',
     187               ('www.python.org', '', '', '', 'abc'),
     188               ('www.python.org', '', '', 'abc')),
     189              ('://www.python.org?q=abc',
     190               ('www.python.org', '', '', 'q=abc', ''),
     191               ('www.python.org', '', 'q=abc', '')),
     192              ('://www.python.org/#abc',
     193               ('www.python.org', '/', '', '', 'abc'),
     194               ('www.python.org', '/', '', 'abc')),
     195              ('://a/b/c/d;p?q#f',
     196               ('a', '/b/c/d', 'p', 'q', 'f'),
     197               ('a', '/b/c/d;p', 'q', 'f')),
     198              ]
     199          def _encode(t):
     200              return (t[0].encode('ascii'),
     201                      tuple(x.encode('ascii') for x in t[1]),
     202                      tuple(x.encode('ascii') for x in t[2]))
     203          bytes_cases = [_encode(x) for x in str_cases]
     204          str_schemes = ('http', 'https')
     205          bytes_schemes = (b'http', b'https')
     206          str_tests = str_schemes, str_cases
     207          bytes_tests = bytes_schemes, bytes_cases
     208          for schemes, test_cases in (str_tests, bytes_tests):
     209              for scheme in schemes:
     210                  for url, parsed, split in test_cases:
     211                      url = scheme + url
     212                      parsed = (scheme,) + parsed
     213                      split = (scheme,) + split
     214                      self.checkRoundtrips(url, parsed, split)
     215  
     216      def checkJoin(self, base, relurl, expected):
     217          str_components = (base, relurl, expected)
     218          self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
     219          bytes_components = baseb, relurlb, expectedb = [
     220                              x.encode('ascii') for x in str_components]
     221          self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
     222  
     223      def test_unparse_parse(self):
     224          str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
     225          bytes_cases = [x.encode('ascii') for x in str_cases]
     226          for u in str_cases + bytes_cases:
     227              self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
     228              self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
     229  
     230      def test_RFC1808(self):
     231          # "normal" cases from RFC 1808:
     232          self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
     233          self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
     234          self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
     235          self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
     236          self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
     237          self.checkJoin(RFC1808_BASE, '//g', 'http://g')
     238          self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
     239          self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
     240          self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
     241          self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
     242          self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
     243          self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
     244          self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
     245          self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
     246          self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
     247          self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
     248          self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
     249          self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
     250          self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
     251          self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
     252          self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
     253          self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
     254  
     255          # "abnormal" cases from RFC 1808:
     256          self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
     257          self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
     258          self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
     259          self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
     260          self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
     261          self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
     262          self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
     263          self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
     264          self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
     265  
     266          # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
     267          # so we'll not actually run these tests (which expect 1808 behavior).
     268          #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
     269          #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
     270  
     271          # XXX: The following tests are no longer compatible with RFC3986
     272          # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
     273          # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
     274          # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
     275          # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
     276  
     277  
     278      def test_RFC2368(self):
     279          # Issue 11467: path that starts with a number is not parsed correctly
     280          self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
     281                  ('mailto', '', '1337@example.org', '', '', ''))
     282  
     283      def test_RFC2396(self):
     284          # cases from RFC 2396
     285  
     286          self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
     287          self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
     288          self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
     289          self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
     290          self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
     291          self.checkJoin(RFC2396_BASE, '//g', 'http://g')
     292          self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
     293          self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
     294          self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
     295          self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
     296          self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
     297          self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
     298          self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
     299          self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
     300          self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
     301          self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
     302          self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
     303          self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
     304          self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
     305          self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
     306          self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
     307          self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
     308          self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
     309          self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
     310          self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
     311          self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
     312          self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
     313          self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
     314          self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
     315          self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
     316          self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
     317          self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
     318          self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
     319          self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
     320          self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
     321  
     322          # XXX: The following tests are no longer compatible with RFC3986
     323          # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
     324          # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
     325          # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
     326          # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
     327  
     328      def test_RFC3986(self):
     329          self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
     330          self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
     331          self.checkJoin(RFC3986_BASE, 'g:h','g:h')
     332          self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
     333          self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
     334          self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
     335          self.checkJoin(RFC3986_BASE, '/g','http://a/g')
     336          self.checkJoin(RFC3986_BASE, '//g','http://g')
     337          self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
     338          self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
     339          self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
     340          self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
     341          self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
     342          self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
     343          self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
     344          self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
     345          self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
     346          self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
     347          self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
     348          self.checkJoin(RFC3986_BASE, '..','http://a/b/')
     349          self.checkJoin(RFC3986_BASE, '../','http://a/b/')
     350          self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
     351          self.checkJoin(RFC3986_BASE, '../..','http://a/')
     352          self.checkJoin(RFC3986_BASE, '../../','http://a/')
     353          self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
     354          self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
     355  
     356          # Abnormal Examples
     357  
     358          # The 'abnormal scenarios' are incompatible with RFC2986 parsing
     359          # Tests are here for reference.
     360  
     361          self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
     362          self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
     363          self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
     364          self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
     365          self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
     366          self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
     367          self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
     368          self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
     369          self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
     370          self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
     371          self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
     372          self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
     373          self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
     374          self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
     375          self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
     376          self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
     377          self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
     378          self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
     379          #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
     380          self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
     381  
     382          # Test for issue9721
     383          self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
     384  
     385      def test_urljoins(self):
     386          self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
     387          self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
     388          self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
     389          self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
     390          self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
     391          self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
     392          self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
     393          self.checkJoin(SIMPLE_BASE, '//g','http://g')
     394          self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
     395          self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
     396          self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
     397          self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
     398          self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
     399          self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
     400          self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
     401          self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
     402          self.checkJoin(SIMPLE_BASE, '../..','http://a/')
     403          self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
     404          self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
     405          self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
     406          self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
     407          self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
     408          self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
     409          self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
     410          self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
     411          self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
     412          self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
     413          self.checkJoin('http:///', '..','http:///')
     414          self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
     415          self.checkJoin('', 'http://a/./g', 'http://a/./g')
     416          self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
     417          self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
     418          self.checkJoin('ws://a/b','g','ws://a/g')
     419          self.checkJoin('wss://a/b','g','wss://a/g')
     420  
     421          # XXX: The following tests are no longer compatible with RFC3986
     422          # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
     423          # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
     424  
     425          # test for issue22118 duplicate slashes
     426          self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
     427  
     428          # Non-RFC-defined tests, covering variations of base and trailing
     429          # slashes
     430          self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
     431          self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
     432          self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
     433          self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
     434          self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
     435          self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
     436  
     437          # issue 23703: don't duplicate filename
     438          self.checkJoin('a', 'b', 'b')
     439  
     440      def test_RFC2732(self):
     441          str_cases = [
     442              ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
     443              ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
     444              ('http://[::1]:5432/foo/', '::1', 5432),
     445              ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
     446              ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
     447              ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
     448               'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
     449              ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
     450              ('http://[::ffff:12.34.56.78]:5432/foo/',
     451               '::ffff:12.34.56.78', 5432),
     452              ('http://Test.python.org/foo/', 'test.python.org', None),
     453              ('http://12.34.56.78/foo/', '12.34.56.78', None),
     454              ('http://[::1]/foo/', '::1', None),
     455              ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
     456              ('http://[dead:beef::]/foo/', 'dead:beef::', None),
     457              ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
     458               'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
     459              ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
     460              ('http://[::ffff:12.34.56.78]/foo/',
     461               '::ffff:12.34.56.78', None),
     462              ('http://Test.python.org:/foo/', 'test.python.org', None),
     463              ('http://12.34.56.78:/foo/', '12.34.56.78', None),
     464              ('http://[::1]:/foo/', '::1', None),
     465              ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
     466              ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
     467              ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
     468               'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
     469              ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
     470              ('http://[::ffff:12.34.56.78]:/foo/',
     471               '::ffff:12.34.56.78', None),
     472              ]
     473          def _encode(t):
     474              return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
     475          bytes_cases = [_encode(x) for x in str_cases]
     476          for url, hostname, port in str_cases + bytes_cases:
     477              urlparsed = urllib.parse.urlparse(url)
     478              self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
     479  
     480          str_cases = [
     481                  'http://::12.34.56.78]/',
     482                  'http://[::1/foo/',
     483                  'ftp://[::1/foo/bad]/bad',
     484                  'http://[::1/foo/bad]/bad',
     485                  'http://[::ffff:12.34.56.78']
     486          bytes_cases = [x.encode('ascii') for x in str_cases]
     487          for invalid_url in str_cases + bytes_cases:
     488              self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
     489  
     490      def test_urldefrag(self):
     491          str_cases = [
     492              ('http://python.org#frag', 'http://python.org', 'frag'),
     493              ('http://python.org', 'http://python.org', ''),
     494              ('http://python.org/#frag', 'http://python.org/', 'frag'),
     495              ('http://python.org/', 'http://python.org/', ''),
     496              ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
     497              ('http://python.org/?q', 'http://python.org/?q', ''),
     498              ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
     499              ('http://python.org/p?q', 'http://python.org/p?q', ''),
     500              (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
     501              (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
     502          ]
     503          def _encode(t):
     504              return type(t)(x.encode('ascii') for x in t)
     505          bytes_cases = [_encode(x) for x in str_cases]
     506          for url, defrag, frag in str_cases + bytes_cases:
     507              result = urllib.parse.urldefrag(url)
     508              self.assertEqual(result.geturl(), url)
     509              self.assertEqual(result, (defrag, frag))
     510              self.assertEqual(result.url, defrag)
     511              self.assertEqual(result.fragment, frag)
     512  
     513      def test_urlsplit_scoped_IPv6(self):
     514          p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
     515          self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt")
     516          self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234')
     517  
     518          p = urllib.parse.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
     519          self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt")
     520          self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234')
     521  
     522      def test_urlsplit_attributes(self):
     523          url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
     524          p = urllib.parse.urlsplit(url)
     525          self.assertEqual(p.scheme, "http")
     526          self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
     527          self.assertEqual(p.path, "/doc/")
     528          self.assertEqual(p.query, "")
     529          self.assertEqual(p.fragment, "frag")
     530          self.assertEqual(p.username, None)
     531          self.assertEqual(p.password, None)
     532          self.assertEqual(p.hostname, "www.python.org")
     533          self.assertEqual(p.port, None)
     534          # geturl() won't return exactly the original URL in this case
     535          # since the scheme is always case-normalized
     536          # We handle this by ignoring the first 4 characters of the URL
     537          self.assertEqual(p.geturl()[4:], url[4:])
     538  
     539          url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
     540          p = urllib.parse.urlsplit(url)
     541          self.assertEqual(p.scheme, "http")
     542          self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
     543          self.assertEqual(p.path, "/doc/")
     544          self.assertEqual(p.query, "query=yes")
     545          self.assertEqual(p.fragment, "frag")
     546          self.assertEqual(p.username, "User")
     547          self.assertEqual(p.password, "Pass")
     548          self.assertEqual(p.hostname, "www.python.org")
     549          self.assertEqual(p.port, 80)
     550          self.assertEqual(p.geturl(), url)
     551  
     552          # Addressing issue1698, which suggests Username can contain
     553          # "@" characters.  Though not RFC compliant, many ftp sites allow
     554          # and request email addresses as usernames.
     555  
     556          url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
     557          p = urllib.parse.urlsplit(url)
     558          self.assertEqual(p.scheme, "http")
     559          self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
     560          self.assertEqual(p.path, "/doc/")
     561          self.assertEqual(p.query, "query=yes")
     562          self.assertEqual(p.fragment, "frag")
     563          self.assertEqual(p.username, "User@example.com")
     564          self.assertEqual(p.password, "Pass")
     565          self.assertEqual(p.hostname, "www.python.org")
     566          self.assertEqual(p.port, 80)
     567          self.assertEqual(p.geturl(), url)
     568  
     569          # And check them all again, only with bytes this time
     570          url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
     571          p = urllib.parse.urlsplit(url)
     572          self.assertEqual(p.scheme, b"http")
     573          self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
     574          self.assertEqual(p.path, b"/doc/")
     575          self.assertEqual(p.query, b"")
     576          self.assertEqual(p.fragment, b"frag")
     577          self.assertEqual(p.username, None)
     578          self.assertEqual(p.password, None)
     579          self.assertEqual(p.hostname, b"www.python.org")
     580          self.assertEqual(p.port, None)
     581          self.assertEqual(p.geturl()[4:], url[4:])
     582  
     583          url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
     584          p = urllib.parse.urlsplit(url)
     585          self.assertEqual(p.scheme, b"http")
     586          self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
     587          self.assertEqual(p.path, b"/doc/")
     588          self.assertEqual(p.query, b"query=yes")
     589          self.assertEqual(p.fragment, b"frag")
     590          self.assertEqual(p.username, b"User")
     591          self.assertEqual(p.password, b"Pass")
     592          self.assertEqual(p.hostname, b"www.python.org")
     593          self.assertEqual(p.port, 80)
     594          self.assertEqual(p.geturl(), url)
     595  
     596          url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
     597          p = urllib.parse.urlsplit(url)
     598          self.assertEqual(p.scheme, b"http")
     599          self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
     600          self.assertEqual(p.path, b"/doc/")
     601          self.assertEqual(p.query, b"query=yes")
     602          self.assertEqual(p.fragment, b"frag")
     603          self.assertEqual(p.username, b"User@example.com")
     604          self.assertEqual(p.password, b"Pass")
     605          self.assertEqual(p.hostname, b"www.python.org")
     606          self.assertEqual(p.port, 80)
     607          self.assertEqual(p.geturl(), url)
     608  
     609          # Verify an illegal port raises ValueError
     610          url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
     611          p = urllib.parse.urlsplit(url)
     612          with self.assertRaisesRegex(ValueError, "out of range"):
     613              p.port
     614  
     615      def test_urlsplit_remove_unsafe_bytes(self):
     616          # Remove ASCII tabs and newlines from input
     617          url = "http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
     618          p = urllib.parse.urlsplit(url)
     619          self.assertEqual(p.scheme, "http")
     620          self.assertEqual(p.netloc, "www.python.org")
     621          self.assertEqual(p.path, "/javascript:alert('msg')/")
     622          self.assertEqual(p.query, "query=something")
     623          self.assertEqual(p.fragment, "fragment")
     624          self.assertEqual(p.username, None)
     625          self.assertEqual(p.password, None)
     626          self.assertEqual(p.hostname, "www.python.org")
     627          self.assertEqual(p.port, None)
     628          self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
     629  
     630          # Remove ASCII tabs and newlines from input as bytes.
     631          url = b"http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
     632          p = urllib.parse.urlsplit(url)
     633          self.assertEqual(p.scheme, b"http")
     634          self.assertEqual(p.netloc, b"www.python.org")
     635          self.assertEqual(p.path, b"/javascript:alert('msg')/")
     636          self.assertEqual(p.query, b"query=something")
     637          self.assertEqual(p.fragment, b"fragment")
     638          self.assertEqual(p.username, None)
     639          self.assertEqual(p.password, None)
     640          self.assertEqual(p.hostname, b"www.python.org")
     641          self.assertEqual(p.port, None)
     642          self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment")
     643  
     644          # with scheme as cache-key
     645          url = "http://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
     646          scheme = "ht\ntp"
     647          for _ in range(2):
     648              p = urllib.parse.urlsplit(url, scheme=scheme)
     649              self.assertEqual(p.scheme, "http")
     650              self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
     651  
     652      def test_urlsplit_strip_url(self):
     653          noise = bytes(range(0, 0x20 + 1))
     654          base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
     655  
     656          url = noise.decode("utf-8") + base_url
     657          p = urllib.parse.urlsplit(url)
     658          self.assertEqual(p.scheme, "http")
     659          self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
     660          self.assertEqual(p.path, "/doc/")
     661          self.assertEqual(p.query, "query=yes")
     662          self.assertEqual(p.fragment, "frag")
     663          self.assertEqual(p.username, "User")
     664          self.assertEqual(p.password, "Pass")
     665          self.assertEqual(p.hostname, "www.python.org")
     666          self.assertEqual(p.port, 80)
     667          self.assertEqual(p.geturl(), base_url)
     668  
     669          url = noise + base_url.encode("utf-8")
     670          p = urllib.parse.urlsplit(url)
     671          self.assertEqual(p.scheme, b"http")
     672          self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
     673          self.assertEqual(p.path, b"/doc/")
     674          self.assertEqual(p.query, b"query=yes")
     675          self.assertEqual(p.fragment, b"frag")
     676          self.assertEqual(p.username, b"User")
     677          self.assertEqual(p.password, b"Pass")
     678          self.assertEqual(p.hostname, b"www.python.org")
     679          self.assertEqual(p.port, 80)
     680          self.assertEqual(p.geturl(), base_url.encode("utf-8"))
     681  
     682          # Test that trailing space is preserved as some applications rely on
     683          # this within query strings.
     684          query_spaces_url = "https://www.python.org:88/doc/?query=    "
     685          p = urllib.parse.urlsplit(noise.decode("utf-8") + query_spaces_url)
     686          self.assertEqual(p.scheme, "https")
     687          self.assertEqual(p.netloc, "www.python.org:88")
     688          self.assertEqual(p.path, "/doc/")
     689          self.assertEqual(p.query, "query=    ")
     690          self.assertEqual(p.port, 88)
     691          self.assertEqual(p.geturl(), query_spaces_url)
     692  
     693          p = urllib.parse.urlsplit("www.pypi.org ")
     694          # That "hostname" gets considered a "path" due to the
     695          # trailing space and our existing logic...  YUCK...
     696          # and re-assembles via geturl aka unurlsplit into the original.
     697          # django.core.validators.URLValidator (at least through v3.2) relies on
     698          # this, for better or worse, to catch it in a ValidationError via its
     699          # regular expressions.
     700          # Here we test the basic round trip concept of such a trailing space.
     701          self.assertEqual(urllib.parse.urlunsplit(p), "www.pypi.org ")
     702  
     703          # with scheme as cache-key
     704          url = "//www.python.org/"
     705          scheme = noise.decode("utf-8") + "https" + noise.decode("utf-8")
     706          for _ in range(2):
     707              p = urllib.parse.urlsplit(url, scheme=scheme)
     708              self.assertEqual(p.scheme, "https")
     709              self.assertEqual(p.geturl(), "https://www.python.org/")
     710  
     711      def test_attributes_bad_port(self):
     712          """Check handling of invalid ports."""
     713          for bytes in (False, True):
     714              for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
     715                  for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६"):
     716                      with self.subTest(bytes=bytes, parse=parse, port=port):
     717                          netloc = "www.example.net:" + port
     718                          url = "http://" + netloc + "/"
     719                          if bytes:
     720                              if netloc.isascii() and port.isascii():
     721                                  netloc = netloc.encode("ascii")
     722                                  url = url.encode("ascii")
     723                              else:
     724                                  continue
     725                          p = parse(url)
     726                          self.assertEqual(p.netloc, netloc)
     727                          with self.assertRaises(ValueError):
     728                              p.port
     729  
     730      def test_attributes_bad_scheme(self):
     731          """Check handling of invalid schemes."""
     732          for bytes in (False, True):
     733              for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
     734                  for scheme in (".", "+", "-", "0", "http&", "६http"):
     735                      with self.subTest(bytes=bytes, parse=parse, scheme=scheme):
     736                          url = scheme + "://www.example.net"
     737                          if bytes:
     738                              if url.isascii():
     739                                  url = url.encode("ascii")
     740                              else:
     741                                  continue
     742                          p = parse(url)
     743                          if bytes:
     744                              self.assertEqual(p.scheme, b"")
     745                          else:
     746                              self.assertEqual(p.scheme, "")
     747  
     748      def test_attributes_without_netloc(self):
     749          # This example is straight from RFC 3261.  It looks like it
     750          # should allow the username, hostname, and port to be filled
     751          # in, but doesn't.  Since it's a URI and doesn't use the
     752          # scheme://netloc syntax, the netloc and related attributes
     753          # should be left empty.
     754          uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
     755          p = urllib.parse.urlsplit(uri)
     756          self.assertEqual(p.netloc, "")
     757          self.assertEqual(p.username, None)
     758          self.assertEqual(p.password, None)
     759          self.assertEqual(p.hostname, None)
     760          self.assertEqual(p.port, None)
     761          self.assertEqual(p.geturl(), uri)
     762  
     763          p = urllib.parse.urlparse(uri)
     764          self.assertEqual(p.netloc, "")
     765          self.assertEqual(p.username, None)
     766          self.assertEqual(p.password, None)
     767          self.assertEqual(p.hostname, None)
     768          self.assertEqual(p.port, None)
     769          self.assertEqual(p.geturl(), uri)
     770  
     771          # You guessed it, repeating the test with bytes input
     772          uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
     773          p = urllib.parse.urlsplit(uri)
     774          self.assertEqual(p.netloc, b"")
     775          self.assertEqual(p.username, None)
     776          self.assertEqual(p.password, None)
     777          self.assertEqual(p.hostname, None)
     778          self.assertEqual(p.port, None)
     779          self.assertEqual(p.geturl(), uri)
     780  
     781          p = urllib.parse.urlparse(uri)
     782          self.assertEqual(p.netloc, b"")
     783          self.assertEqual(p.username, None)
     784          self.assertEqual(p.password, None)
     785          self.assertEqual(p.hostname, None)
     786          self.assertEqual(p.port, None)
     787          self.assertEqual(p.geturl(), uri)
     788  
     789      def test_noslash(self):
     790          # Issue 1637: http://foo.com?query is legal
     791          self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
     792                           ('http', 'example.com', '', '', 'blahblah=/foo', ''))
     793          self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
     794                           (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
     795  
     796      def test_withoutscheme(self):
     797          # Test urlparse without scheme
     798          # Issue 754016: urlparse goes wrong with IP:port without scheme
     799          # RFC 1808 specifies that netloc should start with //, urlparse expects
     800          # the same, otherwise it classifies the portion of url as path.
     801          self.assertEqual(urllib.parse.urlparse("path"),
     802                  ('','','path','','',''))
     803          self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
     804                  ('','www.python.org:80','','','',''))
     805          self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
     806                  ('http','www.python.org:80','','','',''))
     807          # Repeat for bytes input
     808          self.assertEqual(urllib.parse.urlparse(b"path"),
     809                  (b'',b'',b'path',b'',b'',b''))
     810          self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
     811                  (b'',b'www.python.org:80',b'',b'',b'',b''))
     812          self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
     813                  (b'http',b'www.python.org:80',b'',b'',b'',b''))
     814  
     815      def test_portseparator(self):
     816          # Issue 754016 makes changes for port separator ':' from scheme separator
     817          self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','',''))
     818          self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','',''))
     819          self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))
     820          self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
     821          self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
     822          self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
     823                  ('http','www.python.org:80','','','',''))
     824          # As usual, need to check bytes input as well
     825          self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b''))
     826          self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b''))
     827          self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))
     828          self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
     829          self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
     830          self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
     831                  (b'http',b'www.python.org:80',b'',b'',b'',b''))
     832  
     833      def test_usingsys(self):
     834          # Issue 3314: sys module is used in the error
     835          self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
     836  
     837      def test_anyscheme(self):
     838          # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
     839          self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
     840                           ('s3', 'foo.com', '/stuff', '', '', ''))
     841          self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
     842                           ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
     843          self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
     844                           ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
     845          self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
     846                           ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
     847  
     848          # And for bytes...
     849          self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
     850                           (b's3', b'foo.com', b'/stuff', b'', b'', b''))
     851          self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
     852                           (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
     853          self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
     854                           (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
     855          self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
     856                           (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
     857  
     858      def test_default_scheme(self):
     859          # Exercise the scheme parameter of urlparse() and urlsplit()
     860          for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
     861              with self.subTest(function=func):
     862                  result = func("http://example.net/", "ftp")
     863                  self.assertEqual(result.scheme, "http")
     864                  result = func(b"http://example.net/", b"ftp")
     865                  self.assertEqual(result.scheme, b"http")
     866                  self.assertEqual(func("path", "ftp").scheme, "ftp")
     867                  self.assertEqual(func("path", scheme="ftp").scheme, "ftp")
     868                  self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")
     869                  self.assertEqual(func("path").scheme, "")
     870                  self.assertEqual(func(b"path").scheme, b"")
     871                  self.assertEqual(func(b"path", "").scheme, b"")
     872  
     873      def test_parse_fragments(self):
     874          # Exercise the allow_fragments parameter of urlparse() and urlsplit()
     875          tests = (
     876              ("http:#frag", "path", "frag"),
     877              ("//example.net#frag", "path", "frag"),
     878              ("index.html#frag", "path", "frag"),
     879              (";a=b#frag", "params", "frag"),
     880              ("?a=b#frag", "query", "frag"),
     881              ("#frag", "path", "frag"),
     882              ("abc#@frag", "path", "@frag"),
     883              ("//abc#@frag", "path", "@frag"),
     884              ("//abc:80#@frag", "path", "@frag"),
     885              ("//abc#@frag:80", "path", "@frag:80"),
     886          )
     887          for url, attr, expected_frag in tests:
     888              for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
     889                  if attr == "params" and func is urllib.parse.urlsplit:
     890                      attr = "path"
     891                  with self.subTest(url=url, function=func):
     892                      result = func(url, allow_fragments=False)
     893                      self.assertEqual(result.fragment, "")
     894                      self.assertTrue(
     895                              getattr(result, attr).endswith("#" + expected_frag))
     896                      self.assertEqual(func(url, "", False).fragment, "")
     897  
     898                      result = func(url, allow_fragments=True)
     899                      self.assertEqual(result.fragment, expected_frag)
     900                      self.assertFalse(
     901                              getattr(result, attr).endswith(expected_frag))
     902                      self.assertEqual(func(url, "", True).fragment,
     903                                       expected_frag)
     904                      self.assertEqual(func(url).fragment, expected_frag)
     905  
     906      def test_mixed_types_rejected(self):
     907          # Several functions that process either strings or ASCII encoded bytes
     908          # accept multiple arguments. Check they reject mixed type input
     909          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     910              urllib.parse.urlparse("www.python.org", b"http")
     911          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     912              urllib.parse.urlparse(b"www.python.org", "http")
     913          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     914              urllib.parse.urlsplit("www.python.org", b"http")
     915          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     916              urllib.parse.urlsplit(b"www.python.org", "http")
     917          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     918              urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
     919          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     920              urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
     921          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     922              urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
     923          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     924              urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
     925          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     926              urllib.parse.urljoin("http://python.org", b"http://python.org")
     927          with self.assertRaisesRegex(TypeError, "Cannot mix str"):
     928              urllib.parse.urljoin(b"http://python.org", "http://python.org")
     929  
     930      def _check_result_type(self, str_type):
     931          num_args = len(str_type._fields)
     932          bytes_type = str_type._encoded_counterpart
     933          self.assertIs(bytes_type._decoded_counterpart, str_type)
     934          str_args = ('',) * num_args
     935          bytes_args = (b'',) * num_args
     936          str_result = str_type(*str_args)
     937          bytes_result = bytes_type(*bytes_args)
     938          encoding = 'ascii'
     939          errors = 'strict'
     940          self.assertEqual(str_result, str_args)
     941          self.assertEqual(bytes_result.decode(), str_args)
     942          self.assertEqual(bytes_result.decode(), str_result)
     943          self.assertEqual(bytes_result.decode(encoding), str_args)
     944          self.assertEqual(bytes_result.decode(encoding), str_result)
     945          self.assertEqual(bytes_result.decode(encoding, errors), str_args)
     946          self.assertEqual(bytes_result.decode(encoding, errors), str_result)
     947          self.assertEqual(bytes_result, bytes_args)
     948          self.assertEqual(str_result.encode(), bytes_args)
     949          self.assertEqual(str_result.encode(), bytes_result)
     950          self.assertEqual(str_result.encode(encoding), bytes_args)
     951          self.assertEqual(str_result.encode(encoding), bytes_result)
     952          self.assertEqual(str_result.encode(encoding, errors), bytes_args)
     953          self.assertEqual(str_result.encode(encoding, errors), bytes_result)
     954  
     955      def test_result_pairs(self):
     956          # Check encoding and decoding between result pairs
     957          result_types = [
     958            urllib.parse.DefragResult,
     959            urllib.parse.SplitResult,
     960            urllib.parse.ParseResult,
     961          ]
     962          for result_type in result_types:
     963              self._check_result_type(result_type)
     964  
     965      def test_parse_qs_encoding(self):
     966          result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
     967          self.assertEqual(result, {'key': ['\u0141\xE9']})
     968          result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
     969          self.assertEqual(result, {'key': ['\u0141\xE9']})
     970          result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
     971          self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
     972          result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
     973          self.assertEqual(result, {'key': ['\u0141\ufffd-']})
     974          result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
     975                                                            errors="ignore")
     976          self.assertEqual(result, {'key': ['\u0141-']})
     977  
     978      def test_parse_qsl_encoding(self):
     979          result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
     980          self.assertEqual(result, [('key', '\u0141\xE9')])
     981          result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
     982          self.assertEqual(result, [('key', '\u0141\xE9')])
     983          result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
     984          self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
     985          result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
     986          self.assertEqual(result, [('key', '\u0141\ufffd-')])
     987          result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
     988                                                            errors="ignore")
     989          self.assertEqual(result, [('key', '\u0141-')])
     990  
     991      def test_parse_qsl_max_num_fields(self):
     992          with self.assertRaises(ValueError):
     993              urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
     994          urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
     995  
     996      def test_parse_qs_separator(self):
     997          parse_qs_semicolon_cases = [
     998              (";", {}),
     999              (";;", {}),
    1000              (";a=b", {'a': ['b']}),
    1001              ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
    1002              ("a=1;a=2", {'a': ['1', '2']}),
    1003              (b";", {}),
    1004              (b";;", {}),
    1005              (b";a=b", {b'a': [b'b']}),
    1006              (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
    1007              (b"a=1;a=2", {b'a': [b'1', b'2']}),
    1008          ]
    1009          for orig, expect in parse_qs_semicolon_cases:
    1010              with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
    1011                  result = urllib.parse.parse_qs(orig, separator=';')
    1012                  self.assertEqual(result, expect, "Error parsing %r" % orig)
    1013                  result_bytes = urllib.parse.parse_qs(orig, separator=b';')
    1014                  self.assertEqual(result_bytes, expect, "Error parsing %r" % orig)
    1015  
    1016  
    1017      def test_parse_qsl_separator(self):
    1018          parse_qsl_semicolon_cases = [
    1019              (";", []),
    1020              (";;", []),
    1021              (";a=b", [('a', 'b')]),
    1022              ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
    1023              ("a=1;a=2", [('a', '1'), ('a', '2')]),
    1024              (b";", []),
    1025              (b";;", []),
    1026              (b";a=b", [(b'a', b'b')]),
    1027              (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
    1028              (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
    1029          ]
    1030          for orig, expect in parse_qsl_semicolon_cases:
    1031              with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
    1032                  result = urllib.parse.parse_qsl(orig, separator=';')
    1033                  self.assertEqual(result, expect, "Error parsing %r" % orig)
    1034                  result_bytes = urllib.parse.parse_qsl(orig, separator=b';')
    1035                  self.assertEqual(result_bytes, expect, "Error parsing %r" % orig)
    1036  
    1037  
    1038      def test_urlencode_sequences(self):
    1039          # Other tests incidentally urlencode things; test non-covered cases:
    1040          # Sequence and object values.
    1041          result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
    1042          # we cannot rely on ordering here
    1043          assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
    1044  
    1045          class ESC[4;38;5;81mTrivial:
    1046              def __str__(self):
    1047                  return 'trivial'
    1048  
    1049          result = urllib.parse.urlencode({'a': Trivial()}, True)
    1050          self.assertEqual(result, 'a=trivial')
    1051  
    1052      def test_urlencode_quote_via(self):
    1053          result = urllib.parse.urlencode({'a': 'some value'})
    1054          self.assertEqual(result, "a=some+value")
    1055          result = urllib.parse.urlencode({'a': 'some value/another'},
    1056                                          quote_via=urllib.parse.quote)
    1057          self.assertEqual(result, "a=some%20value%2Fanother")
    1058          result = urllib.parse.urlencode({'a': 'some value/another'},
    1059                                          safe='/', quote_via=urllib.parse.quote)
    1060          self.assertEqual(result, "a=some%20value/another")
    1061  
    1062      def test_quote_from_bytes(self):
    1063          self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
    1064          result = urllib.parse.quote_from_bytes(b'archaeological arcana')
    1065          self.assertEqual(result, 'archaeological%20arcana')
    1066          result = urllib.parse.quote_from_bytes(b'')
    1067          self.assertEqual(result, '')
    1068  
    1069      def test_unquote_to_bytes(self):
    1070          result = urllib.parse.unquote_to_bytes('abc%20def')
    1071          self.assertEqual(result, b'abc def')
    1072          result = urllib.parse.unquote_to_bytes('')
    1073          self.assertEqual(result, b'')
    1074  
    1075      def test_quote_errors(self):
    1076          self.assertRaises(TypeError, urllib.parse.quote, b'foo',
    1077                            encoding='utf-8')
    1078          self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
    1079  
    1080      def test_issue14072(self):
    1081          p1 = urllib.parse.urlsplit('tel:+31-641044153')
    1082          self.assertEqual(p1.scheme, 'tel')
    1083          self.assertEqual(p1.path, '+31-641044153')
    1084          p2 = urllib.parse.urlsplit('tel:+31641044153')
    1085          self.assertEqual(p2.scheme, 'tel')
    1086          self.assertEqual(p2.path, '+31641044153')
    1087          # assert the behavior for urlparse
    1088          p1 = urllib.parse.urlparse('tel:+31-641044153')
    1089          self.assertEqual(p1.scheme, 'tel')
    1090          self.assertEqual(p1.path, '+31-641044153')
    1091          p2 = urllib.parse.urlparse('tel:+31641044153')
    1092          self.assertEqual(p2.scheme, 'tel')
    1093          self.assertEqual(p2.path, '+31641044153')
    1094  
    1095      def test_invalid_bracketed_hosts(self):
    1096          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query')
    1097          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query')
    1098          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query')
    1099          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v12ae]/Path?Query')
    1100          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v.IP]/Path?Query')
    1101          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123.]/Path?Query')
    1102          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query')
    1103          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query')
    1104          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query')
    1105          self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path')
    1106  
    1107      def test_splitting_bracketed_hosts(self):
    1108          p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query')
    1109          self.assertEqual(p1.hostname, 'v6a.ip')
    1110          self.assertEqual(p1.username, 'user')
    1111          self.assertEqual(p1.path, '/path')
    1112          p2 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7%test]/path?query')
    1113          self.assertEqual(p2.hostname, '0439:23af:2309::fae7%test')
    1114          self.assertEqual(p2.username, 'user')
    1115          self.assertEqual(p2.path, '/path')
    1116          p3 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146%test]/path?query')
    1117          self.assertEqual(p3.hostname, '0439:23af:2309::fae7:1234:192.0.2.146%test')
    1118          self.assertEqual(p3.username, 'user')
    1119          self.assertEqual(p3.path, '/path')
    1120  
    1121      def test_port_casting_failure_message(self):
    1122          message = "Port could not be cast to integer value as 'oracle'"
    1123          p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')
    1124          with self.assertRaisesRegex(ValueError, message):
    1125              p1.port
    1126  
    1127          p2 = urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle')
    1128          with self.assertRaisesRegex(ValueError, message):
    1129              p2.port
    1130  
    1131      def test_telurl_params(self):
    1132          p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
    1133          self.assertEqual(p1.scheme, 'tel')
    1134          self.assertEqual(p1.path, '123-4')
    1135          self.assertEqual(p1.params, 'phone-context=+1-650-516')
    1136  
    1137          p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
    1138          self.assertEqual(p1.scheme, 'tel')
    1139          self.assertEqual(p1.path, '+1-201-555-0123')
    1140          self.assertEqual(p1.params, '')
    1141  
    1142          p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
    1143          self.assertEqual(p1.scheme, 'tel')
    1144          self.assertEqual(p1.path, '7042')
    1145          self.assertEqual(p1.params, 'phone-context=example.com')
    1146  
    1147          p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
    1148          self.assertEqual(p1.scheme, 'tel')
    1149          self.assertEqual(p1.path, '863-1234')
    1150          self.assertEqual(p1.params, 'phone-context=+1-914-555')
    1151  
    1152      def test_Quoter_repr(self):
    1153          quoter = urllib.parse._Quoter(urllib.parse._ALWAYS_SAFE)
    1154          self.assertIn('Quoter', repr(quoter))
    1155  
    1156      def test_clear_cache_for_code_coverage(self):
    1157          urllib.parse.clear_cache()
    1158  
    1159      def test_urllib_parse_getattr_failure(self):
    1160          """Test that urllib.parse.__getattr__() fails correctly."""
    1161          with self.assertRaises(AttributeError):
    1162              unused = urllib.parse.this_does_not_exist
    1163  
    1164      def test_all(self):
    1165          expected = []
    1166          undocumented = {
    1167              'splitattr', 'splithost', 'splitnport', 'splitpasswd',
    1168              'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',
    1169              'splitvalue',
    1170              'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
    1171          }
    1172          for name in dir(urllib.parse):
    1173              if name.startswith('_') or name in undocumented:
    1174                  continue
    1175              object = getattr(urllib.parse, name)
    1176              if getattr(object, '__module__', None) == 'urllib.parse':
    1177                  expected.append(name)
    1178          self.assertCountEqual(urllib.parse.__all__, expected)
    1179  
    1180      def test_urlsplit_normalization(self):
    1181          # Certain characters should never occur in the netloc,
    1182          # including under normalization.
    1183          # Ensure that ALL of them are detected and cause an error
    1184          illegal_chars = '/:#?@'
    1185          hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
    1186          denorm_chars = [
    1187              c for c in map(chr, range(128, sys.maxunicode))
    1188              if unicodedata.decomposition(c)
    1189              and (hex_chars & set(unicodedata.decomposition(c).split()))
    1190              and c not in illegal_chars
    1191          ]
    1192          # Sanity check that we found at least one such character
    1193          self.assertIn('\u2100', denorm_chars)
    1194          self.assertIn('\uFF03', denorm_chars)
    1195  
    1196          # bpo-36742: Verify port separators are ignored when they
    1197          # existed prior to decomposition
    1198          urllib.parse.urlsplit('http://\u30d5\u309a:80')
    1199          with self.assertRaises(ValueError):
    1200              urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380')
    1201  
    1202          for scheme in ["http", "https", "ftp"]:
    1203              for netloc in ["netloc{}false.netloc", "n{}user@netloc"]:
    1204                  for c in denorm_chars:
    1205                      url = "{}://{}/path".format(scheme, netloc.format(c))
    1206                      with self.subTest(url=url, char='{:04X}'.format(ord(c))):
    1207                          with self.assertRaises(ValueError):
    1208                              urllib.parse.urlsplit(url)
    1209  
    1210  class ESC[4;38;5;81mUtility_Tests(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
    1211      """Testcase to test the various utility functions in the urllib."""
    1212      # In Python 2 this test class was in test_urllib.
    1213  
    1214      def test_splittype(self):
    1215          splittype = urllib.parse._splittype
    1216          self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
    1217          self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
    1218          self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
    1219          self.assertEqual(splittype('type:'), ('type', ''))
    1220          self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
    1221  
    1222      def test_splithost(self):
    1223          splithost = urllib.parse._splithost
    1224          self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
    1225                           ('www.example.org:80', '/foo/bar/baz.html'))
    1226          self.assertEqual(splithost('//www.example.org:80'),
    1227                           ('www.example.org:80', ''))
    1228          self.assertEqual(splithost('/foo/bar/baz.html'),
    1229                           (None, '/foo/bar/baz.html'))
    1230  
    1231          # bpo-30500: # starts a fragment.
    1232          self.assertEqual(splithost('//127.0.0.1#@host.com'),
    1233                           ('127.0.0.1', '/#@host.com'))
    1234          self.assertEqual(splithost('//127.0.0.1#@host.com:80'),
    1235                           ('127.0.0.1', '/#@host.com:80'))
    1236          self.assertEqual(splithost('//127.0.0.1:80#@host.com'),
    1237                           ('127.0.0.1:80', '/#@host.com'))
    1238  
    1239          # Empty host is returned as empty string.
    1240          self.assertEqual(splithost("///file"),
    1241                           ('', '/file'))
    1242  
    1243          # Trailing semicolon, question mark and hash symbol are kept.
    1244          self.assertEqual(splithost("//example.net/file;"),
    1245                           ('example.net', '/file;'))
    1246          self.assertEqual(splithost("//example.net/file?"),
    1247                           ('example.net', '/file?'))
    1248          self.assertEqual(splithost("//example.net/file#"),
    1249                           ('example.net', '/file#'))
    1250  
    1251      def test_splituser(self):
    1252          splituser = urllib.parse._splituser
    1253          self.assertEqual(splituser('User:Pass@www.python.org:080'),
    1254                           ('User:Pass', 'www.python.org:080'))
    1255          self.assertEqual(splituser('@www.python.org:080'),
    1256                           ('', 'www.python.org:080'))
    1257          self.assertEqual(splituser('www.python.org:080'),
    1258                           (None, 'www.python.org:080'))
    1259          self.assertEqual(splituser('User:Pass@'),
    1260                           ('User:Pass', ''))
    1261          self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
    1262                           ('User@example.com:Pass', 'www.python.org:080'))
    1263  
    1264      def test_splitpasswd(self):
    1265          # Some of the password examples are not sensible, but it is added to
    1266          # confirming to RFC2617 and addressing issue4675.
    1267          splitpasswd = urllib.parse._splitpasswd
    1268          self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
    1269          self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
    1270          self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
    1271          self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
    1272          self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
    1273          self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
    1274          self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
    1275          self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
    1276          self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
    1277          self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
    1278          self.assertEqual(splitpasswd('user:'), ('user', ''))
    1279          self.assertEqual(splitpasswd('user'), ('user', None))
    1280          self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
    1281  
    1282      def test_splitport(self):
    1283          splitport = urllib.parse._splitport
    1284          self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
    1285          self.assertEqual(splitport('parrot'), ('parrot', None))
    1286          self.assertEqual(splitport('parrot:'), ('parrot', None))
    1287          self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
    1288          self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
    1289          self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
    1290          self.assertEqual(splitport('[::1]'), ('[::1]', None))
    1291          self.assertEqual(splitport(':88'), ('', '88'))
    1292  
    1293      def test_splitnport(self):
    1294          splitnport = urllib.parse._splitnport
    1295          self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
    1296          self.assertEqual(splitnport('parrot'), ('parrot', -1))
    1297          self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
    1298          self.assertEqual(splitnport('parrot:'), ('parrot', -1))
    1299          self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
    1300          self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
    1301          self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
    1302          self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
    1303          self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
    1304          self.assertEqual(splitnport('parrot: +1_0 '), ('parrot', None))
    1305  
    1306      def test_splitquery(self):
    1307          # Normal cases are exercised by other tests; ensure that we also
    1308          # catch cases with no port specified (testcase ensuring coverage)
    1309          splitquery = urllib.parse._splitquery
    1310          self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
    1311                           ('http://python.org/fake', 'foo=bar'))
    1312          self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
    1313                           ('http://python.org/fake?foo=bar', ''))
    1314          self.assertEqual(splitquery('http://python.org/fake'),
    1315                           ('http://python.org/fake', None))
    1316          self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
    1317  
    1318      def test_splittag(self):
    1319          splittag = urllib.parse._splittag
    1320          self.assertEqual(splittag('http://example.com?foo=bar#baz'),
    1321                           ('http://example.com?foo=bar', 'baz'))
    1322          self.assertEqual(splittag('http://example.com?foo=bar#'),
    1323                           ('http://example.com?foo=bar', ''))
    1324          self.assertEqual(splittag('#baz'), ('', 'baz'))
    1325          self.assertEqual(splittag('http://example.com?foo=bar'),
    1326                           ('http://example.com?foo=bar', None))
    1327          self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
    1328                           ('http://example.com?foo=bar#baz', 'boo'))
    1329  
    1330      def test_splitattr(self):
    1331          splitattr = urllib.parse._splitattr
    1332          self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
    1333                           ('/path', ['attr1=value1', 'attr2=value2']))
    1334          self.assertEqual(splitattr('/path;'), ('/path', ['']))
    1335          self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
    1336                           ('', ['attr1=value1', 'attr2=value2']))
    1337          self.assertEqual(splitattr('/path'), ('/path', []))
    1338  
    1339      def test_splitvalue(self):
    1340          # Normal cases are exercised by other tests; test pathological cases
    1341          # with no key/value pairs. (testcase ensuring coverage)
    1342          splitvalue = urllib.parse._splitvalue
    1343          self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
    1344          self.assertEqual(splitvalue('foo='), ('foo', ''))
    1345          self.assertEqual(splitvalue('=bar'), ('', 'bar'))
    1346          self.assertEqual(splitvalue('foobar'), ('foobar', None))
    1347          self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
    1348  
    1349      def test_to_bytes(self):
    1350          result = urllib.parse._to_bytes('http://www.python.org')
    1351          self.assertEqual(result, 'http://www.python.org')
    1352          self.assertRaises(UnicodeError, urllib.parse._to_bytes,
    1353                            'http://www.python.org/medi\u00e6val')
    1354  
    1355      def test_unwrap(self):
    1356          for wrapped_url in ('<URL:scheme://host/path>', '<scheme://host/path>',
    1357                              'URL:scheme://host/path', 'scheme://host/path'):
    1358              url = urllib.parse.unwrap(wrapped_url)
    1359              self.assertEqual(url, 'scheme://host/path')
    1360  
    1361  
    1362  class ESC[4;38;5;81mDeprecationTest(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
    1363  
    1364      def test_Quoter_deprecation(self):
    1365          with self.assertWarns(DeprecationWarning) as cm:
    1366              old_class = urllib.parse.Quoter
    1367              self.assertIs(old_class, urllib.parse._Quoter)
    1368          self.assertIn('Quoter will be removed', str(cm.warning))
    1369  
    1370      def test_splittype_deprecation(self):
    1371          with self.assertWarns(DeprecationWarning) as cm:
    1372              urllib.parse.splittype('')
    1373          self.assertEqual(str(cm.warning),
    1374                           'urllib.parse.splittype() is deprecated as of 3.8, '
    1375                           'use urllib.parse.urlparse() instead')
    1376  
    1377      def test_splithost_deprecation(self):
    1378          with self.assertWarns(DeprecationWarning) as cm:
    1379              urllib.parse.splithost('')
    1380          self.assertEqual(str(cm.warning),
    1381                           'urllib.parse.splithost() is deprecated as of 3.8, '
    1382                           'use urllib.parse.urlparse() instead')
    1383  
    1384      def test_splituser_deprecation(self):
    1385          with self.assertWarns(DeprecationWarning) as cm:
    1386              urllib.parse.splituser('')
    1387          self.assertEqual(str(cm.warning),
    1388                           'urllib.parse.splituser() is deprecated as of 3.8, '
    1389                           'use urllib.parse.urlparse() instead')
    1390  
    1391      def test_splitpasswd_deprecation(self):
    1392          with self.assertWarns(DeprecationWarning) as cm:
    1393              urllib.parse.splitpasswd('')
    1394          self.assertEqual(str(cm.warning),
    1395                           'urllib.parse.splitpasswd() is deprecated as of 3.8, '
    1396                           'use urllib.parse.urlparse() instead')
    1397  
    1398      def test_splitport_deprecation(self):
    1399          with self.assertWarns(DeprecationWarning) as cm:
    1400              urllib.parse.splitport('')
    1401          self.assertEqual(str(cm.warning),
    1402                           'urllib.parse.splitport() is deprecated as of 3.8, '
    1403                           'use urllib.parse.urlparse() instead')
    1404  
    1405      def test_splitnport_deprecation(self):
    1406          with self.assertWarns(DeprecationWarning) as cm:
    1407              urllib.parse.splitnport('')
    1408          self.assertEqual(str(cm.warning),
    1409                           'urllib.parse.splitnport() is deprecated as of 3.8, '
    1410                           'use urllib.parse.urlparse() instead')
    1411  
    1412      def test_splitquery_deprecation(self):
    1413          with self.assertWarns(DeprecationWarning) as cm:
    1414              urllib.parse.splitquery('')
    1415          self.assertEqual(str(cm.warning),
    1416                           'urllib.parse.splitquery() is deprecated as of 3.8, '
    1417                           'use urllib.parse.urlparse() instead')
    1418  
    1419      def test_splittag_deprecation(self):
    1420          with self.assertWarns(DeprecationWarning) as cm:
    1421              urllib.parse.splittag('')
    1422          self.assertEqual(str(cm.warning),
    1423                           'urllib.parse.splittag() is deprecated as of 3.8, '
    1424                           'use urllib.parse.urlparse() instead')
    1425  
    1426      def test_splitattr_deprecation(self):
    1427          with self.assertWarns(DeprecationWarning) as cm:
    1428              urllib.parse.splitattr('')
    1429          self.assertEqual(str(cm.warning),
    1430                           'urllib.parse.splitattr() is deprecated as of 3.8, '
    1431                           'use urllib.parse.urlparse() instead')
    1432  
    1433      def test_splitvalue_deprecation(self):
    1434          with self.assertWarns(DeprecationWarning) as cm:
    1435              urllib.parse.splitvalue('')
    1436          self.assertEqual(str(cm.warning),
    1437                           'urllib.parse.splitvalue() is deprecated as of 3.8, '
    1438                           'use urllib.parse.parse_qsl() instead')
    1439  
    1440      def test_to_bytes_deprecation(self):
    1441          with self.assertWarns(DeprecationWarning) as cm:
    1442              urllib.parse.to_bytes('')
    1443          self.assertEqual(str(cm.warning),
    1444                           'urllib.parse.to_bytes() is deprecated as of 3.8')
    1445  
    1446  
    1447  if __name__ == "__main__":
    1448      unittest.main()