(root)/
Python-3.11.7/
Lib/
test/
test_string_literals.py
       1  r"""Test correct treatment of various string literals by the parser.
       2  
       3  There are four types of string literals:
       4  
       5      'abc'             -- normal str
       6      r'abc'            -- raw str
       7      b'xyz'            -- normal bytes
       8      br'xyz' | rb'xyz' -- raw bytes
       9  
      10  The difference between normal and raw strings is of course that in a
      11  raw string, \ escapes (while still used to determine the end of the
      12  literal) are not interpreted, so that r'\x00' contains four
      13  characters: a backslash, an x, and two zeros; while '\x00' contains a
      14  single character (code point zero).
      15  
      16  The tricky thing is what should happen when non-ASCII bytes are used
      17  inside literals.  For bytes literals, this is considered illegal.  But
      18  for str literals, those bytes are supposed to be decoded using the
      19  encoding declared for the file (UTF-8 by default).
      20  
      21  We have to test this with various file encodings.  We also test it with
      22  exec()/eval(), which uses a different code path.
      23  
      24  This file is really about correct treatment of encodings and
      25  backslashes.  It doesn't concern itself with issues like single
      26  vs. double quotes or singly- vs. triply-quoted strings: that's dealt
      27  with elsewhere (I assume).
      28  """
      29  
      30  import os
      31  import sys
      32  import shutil
      33  import tempfile
      34  import unittest
      35  import warnings
      36  
      37  
      38  TEMPLATE = r"""# coding: %s
      39  a = 'x'
      40  assert ord(a) == 120
      41  b = '\x01'
      42  assert ord(b) == 1
      43  c = r'\x01'
      44  assert list(map(ord, c)) == [92, 120, 48, 49]
      45  d = '\x81'
      46  assert ord(d) == 0x81
      47  e = r'\x81'
      48  assert list(map(ord, e)) == [92, 120, 56, 49]
      49  f = '\u1881'
      50  assert ord(f) == 0x1881
      51  g = r'\u1881'
      52  assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
      53  h = '\U0001d120'
      54  assert ord(h) == 0x1d120
      55  i = r'\U0001d120'
      56  assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
      57  """
      58  
      59  
      60  def byte(i):
      61      return bytes([i])
      62  
      63  
      64  class ESC[4;38;5;81mTestLiterals(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      65  
      66      def setUp(self):
      67          self.save_path = sys.path[:]
      68          self.tmpdir = tempfile.mkdtemp()
      69          sys.path.insert(0, self.tmpdir)
      70  
      71      def tearDown(self):
      72          sys.path[:] = self.save_path
      73          shutil.rmtree(self.tmpdir, ignore_errors=True)
      74  
      75      def test_template(self):
      76          # Check that the template doesn't contain any non-printables
      77          # except for \n.
      78          for c in TEMPLATE:
      79              assert c == '\n' or ' ' <= c <= '~', repr(c)
      80  
      81      def test_eval_str_normal(self):
      82          self.assertEqual(eval(""" 'x' """), 'x')
      83          self.assertEqual(eval(r""" '\x01' """), chr(1))
      84          self.assertEqual(eval(""" '\x01' """), chr(1))
      85          self.assertEqual(eval(r""" '\x81' """), chr(0x81))
      86          self.assertEqual(eval(""" '\x81' """), chr(0x81))
      87          self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
      88          self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
      89          self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
      90          self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
      91  
      92      def test_eval_str_incomplete(self):
      93          self.assertRaises(SyntaxError, eval, r""" '\x' """)
      94          self.assertRaises(SyntaxError, eval, r""" '\x0' """)
      95          self.assertRaises(SyntaxError, eval, r""" '\u' """)
      96          self.assertRaises(SyntaxError, eval, r""" '\u0' """)
      97          self.assertRaises(SyntaxError, eval, r""" '\u00' """)
      98          self.assertRaises(SyntaxError, eval, r""" '\u000' """)
      99          self.assertRaises(SyntaxError, eval, r""" '\U' """)
     100          self.assertRaises(SyntaxError, eval, r""" '\U0' """)
     101          self.assertRaises(SyntaxError, eval, r""" '\U00' """)
     102          self.assertRaises(SyntaxError, eval, r""" '\U000' """)
     103          self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
     104          self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
     105          self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
     106          self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
     107  
     108      def test_eval_str_invalid_escape(self):
     109          for b in range(1, 128):
     110              if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
     111                  continue
     112              with self.assertWarns(DeprecationWarning):
     113                  self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
     114  
     115          with warnings.catch_warnings(record=True) as w:
     116              warnings.simplefilter('always', category=DeprecationWarning)
     117              eval("'''\n\\z'''")
     118          self.assertEqual(len(w), 1)
     119          self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
     120          self.assertEqual(w[0].filename, '<string>')
     121          self.assertEqual(w[0].lineno, 1)
     122  
     123          with warnings.catch_warnings(record=True) as w:
     124              warnings.simplefilter('error', category=DeprecationWarning)
     125              with self.assertRaises(SyntaxError) as cm:
     126                  eval("'''\n\\z'''")
     127              exc = cm.exception
     128          self.assertEqual(w, [])
     129          self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
     130          self.assertEqual(exc.filename, '<string>')
     131          self.assertEqual(exc.lineno, 1)
     132          self.assertEqual(exc.offset, 1)
     133  
     134          # Check that the warning is raised ony once if there are syntax errors
     135  
     136          with warnings.catch_warnings(record=True) as w:
     137              warnings.simplefilter('always', category=DeprecationWarning)
     138              with self.assertRaises(SyntaxError) as cm:
     139                  eval("'\\e' $")
     140              exc = cm.exception
     141          self.assertEqual(len(w), 1)
     142          self.assertEqual(w[0].category, DeprecationWarning)
     143          self.assertRegex(str(w[0].message), 'invalid escape sequence')
     144          self.assertEqual(w[0].filename, '<string>')
     145  
     146      def test_eval_str_invalid_octal_escape(self):
     147          for i in range(0o400, 0o1000):
     148              with self.assertWarns(DeprecationWarning):
     149                  self.assertEqual(eval(r"'\%o'" % i), chr(i))
     150  
     151          with warnings.catch_warnings(record=True) as w:
     152              warnings.simplefilter('always', category=DeprecationWarning)
     153              eval("'''\n\\407'''")
     154          self.assertEqual(len(w), 1)
     155          self.assertEqual(str(w[0].message),
     156                           r"invalid octal escape sequence '\407'")
     157          self.assertEqual(w[0].filename, '<string>')
     158          self.assertEqual(w[0].lineno, 1)
     159  
     160          with warnings.catch_warnings(record=True) as w:
     161              warnings.simplefilter('error', category=DeprecationWarning)
     162              with self.assertRaises(SyntaxError) as cm:
     163                  eval("'''\n\\407'''")
     164              exc = cm.exception
     165          self.assertEqual(w, [])
     166          self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
     167          self.assertEqual(exc.filename, '<string>')
     168          self.assertEqual(exc.lineno, 1)
     169          self.assertEqual(exc.offset, 1)
     170  
     171      def test_eval_str_raw(self):
     172          self.assertEqual(eval(""" r'x' """), 'x')
     173          self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
     174          self.assertEqual(eval(""" r'\x01' """), chr(1))
     175          self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
     176          self.assertEqual(eval(""" r'\x81' """), chr(0x81))
     177          self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
     178          self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
     179          self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
     180          self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
     181  
     182      def test_eval_bytes_normal(self):
     183          self.assertEqual(eval(""" b'x' """), b'x')
     184          self.assertEqual(eval(r""" b'\x01' """), byte(1))
     185          self.assertEqual(eval(""" b'\x01' """), byte(1))
     186          self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
     187          self.assertRaises(SyntaxError, eval, """ b'\x81' """)
     188          self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881')
     189          self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
     190          self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120')
     191          self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
     192  
     193      def test_eval_bytes_incomplete(self):
     194          self.assertRaises(SyntaxError, eval, r""" b'\x' """)
     195          self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
     196  
     197      def test_eval_bytes_invalid_escape(self):
     198          for b in range(1, 128):
     199              if b in b"""\n\r"'01234567\\abfnrtvx""":
     200                  continue
     201              with self.assertWarns(DeprecationWarning):
     202                  self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
     203  
     204          with warnings.catch_warnings(record=True) as w:
     205              warnings.simplefilter('always', category=DeprecationWarning)
     206              eval("b'''\n\\z'''")
     207          self.assertEqual(len(w), 1)
     208          self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
     209          self.assertEqual(w[0].filename, '<string>')
     210          self.assertEqual(w[0].lineno, 1)
     211  
     212          with warnings.catch_warnings(record=True) as w:
     213              warnings.simplefilter('error', category=DeprecationWarning)
     214              with self.assertRaises(SyntaxError) as cm:
     215                  eval("b'''\n\\z'''")
     216              exc = cm.exception
     217          self.assertEqual(w, [])
     218          self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
     219          self.assertEqual(exc.filename, '<string>')
     220          self.assertEqual(exc.lineno, 1)
     221  
     222      def test_eval_bytes_invalid_octal_escape(self):
     223          for i in range(0o400, 0o1000):
     224              with self.assertWarns(DeprecationWarning):
     225                  self.assertEqual(eval(r"b'\%o'" % i), bytes([i & 0o377]))
     226  
     227          with warnings.catch_warnings(record=True) as w:
     228              warnings.simplefilter('always', category=DeprecationWarning)
     229              eval("b'''\n\\407'''")
     230          self.assertEqual(len(w), 1)
     231          self.assertEqual(str(w[0].message),
     232                           r"invalid octal escape sequence '\407'")
     233          self.assertEqual(w[0].filename, '<string>')
     234          self.assertEqual(w[0].lineno, 1)
     235  
     236          with warnings.catch_warnings(record=True) as w:
     237              warnings.simplefilter('error', category=DeprecationWarning)
     238              with self.assertRaises(SyntaxError) as cm:
     239                  eval("b'''\n\\407'''")
     240              exc = cm.exception
     241          self.assertEqual(w, [])
     242          self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
     243          self.assertEqual(exc.filename, '<string>')
     244          self.assertEqual(exc.lineno, 1)
     245  
     246      def test_eval_bytes_raw(self):
     247          self.assertEqual(eval(""" br'x' """), b'x')
     248          self.assertEqual(eval(""" rb'x' """), b'x')
     249          self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
     250          self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
     251          self.assertEqual(eval(""" br'\x01' """), byte(1))
     252          self.assertEqual(eval(""" rb'\x01' """), byte(1))
     253          self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
     254          self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
     255          self.assertRaises(SyntaxError, eval, """ br'\x81' """)
     256          self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
     257          self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
     258          self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
     259          self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
     260          self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
     261          self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
     262          self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
     263          self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
     264          self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
     265          self.assertRaises(SyntaxError, eval, """ bb'' """)
     266          self.assertRaises(SyntaxError, eval, """ rr'' """)
     267          self.assertRaises(SyntaxError, eval, """ brr'' """)
     268          self.assertRaises(SyntaxError, eval, """ bbr'' """)
     269          self.assertRaises(SyntaxError, eval, """ rrb'' """)
     270          self.assertRaises(SyntaxError, eval, """ rbb'' """)
     271  
     272      def test_eval_str_u(self):
     273          self.assertEqual(eval(""" u'x' """), 'x')
     274          self.assertEqual(eval(""" U'\u00e4' """), 'ä')
     275          self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
     276          self.assertRaises(SyntaxError, eval, """ ur'' """)
     277          self.assertRaises(SyntaxError, eval, """ ru'' """)
     278          self.assertRaises(SyntaxError, eval, """ bu'' """)
     279          self.assertRaises(SyntaxError, eval, """ ub'' """)
     280  
     281      def test_uppercase_prefixes(self):
     282          self.assertEqual(eval(""" B'x' """), b'x')
     283          self.assertEqual(eval(r""" R'\x01' """), r'\x01')
     284          self.assertEqual(eval(r""" BR'\x01' """), br'\x01')
     285          self.assertEqual(eval(""" F'{1+1}' """), f'{1+1}')
     286          self.assertEqual(eval(r""" U'\U0001d120' """), u'\U0001d120')
     287  
     288      def check_encoding(self, encoding, extra=""):
     289          modname = "xx_" + encoding.replace("-", "_")
     290          fn = os.path.join(self.tmpdir, modname + ".py")
     291          f = open(fn, "w", encoding=encoding)
     292          try:
     293              f.write(TEMPLATE % encoding)
     294              f.write(extra)
     295          finally:
     296              f.close()
     297          __import__(modname)
     298          del sys.modules[modname]
     299  
     300      def test_file_utf_8(self):
     301          extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
     302          self.check_encoding("utf-8", extra)
     303  
     304      def test_file_utf_8_error(self):
     305          extra = "b'\x80'\n"
     306          self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
     307  
     308      def test_file_utf8(self):
     309          self.check_encoding("utf-8")
     310  
     311      def test_file_iso_8859_1(self):
     312          self.check_encoding("iso-8859-1")
     313  
     314      def test_file_latin_1(self):
     315          self.check_encoding("latin-1")
     316  
     317      def test_file_latin9(self):
     318          self.check_encoding("latin9")
     319  
     320  
     321  if __name__ == "__main__":
     322      unittest.main()