(root)/
Python-3.12.0/
Lib/
test/
test_string_literals.py
       1  r"""Test correct treatment of various string literals by the parser.
       2  
       3  There are four types of string literals:
       4  
       5      'abc'             -- normal str
       6      r'abc'            -- raw str
       7      b'xyz'            -- normal bytes
       8      br'xyz' | rb'xyz' -- raw bytes
       9  
      10  The difference between normal and raw strings is of course that in a
      11  raw string, \ escapes (while still used to determine the end of the
      12  literal) are not interpreted, so that r'\x00' contains four
      13  characters: a backslash, an x, and two zeros; while '\x00' contains a
      14  single character (code point zero).
      15  
      16  The tricky thing is what should happen when non-ASCII bytes are used
      17  inside literals.  For bytes literals, this is considered illegal.  But
      18  for str literals, those bytes are supposed to be decoded using the
      19  encoding declared for the file (UTF-8 by default).
      20  
      21  We have to test this with various file encodings.  We also test it with
      22  exec()/eval(), which uses a different code path.
      23  
      24  This file is really about correct treatment of encodings and
      25  backslashes.  It doesn't concern itself with issues like single
      26  vs. double quotes or singly- vs. triply-quoted strings: that's dealt
      27  with elsewhere (I assume).
      28  """
      29  
      30  import os
      31  import sys
      32  import shutil
      33  import tempfile
      34  import unittest
      35  import warnings
      36  
      37  
      38  TEMPLATE = r"""# coding: %s
      39  a = 'x'
      40  assert ord(a) == 120
      41  b = '\x01'
      42  assert ord(b) == 1
      43  c = r'\x01'
      44  assert list(map(ord, c)) == [92, 120, 48, 49]
      45  d = '\x81'
      46  assert ord(d) == 0x81
      47  e = r'\x81'
      48  assert list(map(ord, e)) == [92, 120, 56, 49]
      49  f = '\u1881'
      50  assert ord(f) == 0x1881
      51  g = r'\u1881'
      52  assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
      53  h = '\U0001d120'
      54  assert ord(h) == 0x1d120
      55  i = r'\U0001d120'
      56  assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
      57  """
      58  
      59  
      60  def byte(i):
      61      return bytes([i])
      62  
      63  
      64  class ESC[4;38;5;81mTestLiterals(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      65  
      66      def setUp(self):
      67          self.save_path = sys.path[:]
      68          self.tmpdir = tempfile.mkdtemp()
      69          sys.path.insert(0, self.tmpdir)
      70  
      71      def tearDown(self):
      72          sys.path[:] = self.save_path
      73          shutil.rmtree(self.tmpdir, ignore_errors=True)
      74  
      75      def test_template(self):
      76          # Check that the template doesn't contain any non-printables
      77          # except for \n.
      78          for c in TEMPLATE:
      79              assert c == '\n' or ' ' <= c <= '~', repr(c)
      80  
      81      def test_eval_str_normal(self):
      82          self.assertEqual(eval(""" 'x' """), 'x')
      83          self.assertEqual(eval(r""" '\x01' """), chr(1))
      84          self.assertEqual(eval(""" '\x01' """), chr(1))
      85          self.assertEqual(eval(r""" '\x81' """), chr(0x81))
      86          self.assertEqual(eval(""" '\x81' """), chr(0x81))
      87          self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
      88          self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
      89          self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
      90          self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
      91  
      92      def test_eval_str_incomplete(self):
      93          self.assertRaises(SyntaxError, eval, r""" '\x' """)
      94          self.assertRaises(SyntaxError, eval, r""" '\x0' """)
      95          self.assertRaises(SyntaxError, eval, r""" '\u' """)
      96          self.assertRaises(SyntaxError, eval, r""" '\u0' """)
      97          self.assertRaises(SyntaxError, eval, r""" '\u00' """)
      98          self.assertRaises(SyntaxError, eval, r""" '\u000' """)
      99          self.assertRaises(SyntaxError, eval, r""" '\U' """)
     100          self.assertRaises(SyntaxError, eval, r""" '\U0' """)
     101          self.assertRaises(SyntaxError, eval, r""" '\U00' """)
     102          self.assertRaises(SyntaxError, eval, r""" '\U000' """)
     103          self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
     104          self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
     105          self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
     106          self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
     107  
     108      def test_eval_str_invalid_escape(self):
     109          for b in range(1, 128):
     110              if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
     111                  continue
     112              with self.assertWarns(SyntaxWarning):
     113                  self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
     114  
     115          with warnings.catch_warnings(record=True) as w:
     116              warnings.simplefilter('always', category=SyntaxWarning)
     117              eval("'''\n\\z'''")
     118          self.assertEqual(len(w), 1)
     119          self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
     120          self.assertEqual(w[0].filename, '<string>')
     121          self.assertEqual(w[0].lineno, 1)
     122  
     123          with warnings.catch_warnings(record=True) as w:
     124              warnings.simplefilter('error', category=SyntaxWarning)
     125              with self.assertRaises(SyntaxError) as cm:
     126                  eval("'''\n\\z'''")
     127              exc = cm.exception
     128          self.assertEqual(w, [])
     129          self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
     130          self.assertEqual(exc.filename, '<string>')
     131          self.assertEqual(exc.lineno, 1)
     132          self.assertEqual(exc.offset, 1)
     133  
     134      def test_eval_str_invalid_octal_escape(self):
     135          for i in range(0o400, 0o1000):
     136              with self.assertWarns(SyntaxWarning):
     137                  self.assertEqual(eval(r"'\%o'" % i), chr(i))
     138  
     139          with warnings.catch_warnings(record=True) as w:
     140              warnings.simplefilter('always', category=SyntaxWarning)
     141              eval("'''\n\\407'''")
     142          self.assertEqual(len(w), 1)
     143          self.assertEqual(str(w[0].message),
     144                           r"invalid octal escape sequence '\407'")
     145          self.assertEqual(w[0].filename, '<string>')
     146          self.assertEqual(w[0].lineno, 1)
     147  
     148          with warnings.catch_warnings(record=True) as w:
     149              warnings.simplefilter('error', category=SyntaxWarning)
     150              with self.assertRaises(SyntaxError) as cm:
     151                  eval("'''\n\\407'''")
     152              exc = cm.exception
     153          self.assertEqual(w, [])
     154          self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
     155          self.assertEqual(exc.filename, '<string>')
     156          self.assertEqual(exc.lineno, 1)
     157          self.assertEqual(exc.offset, 1)
     158  
     159      def test_eval_str_raw(self):
     160          self.assertEqual(eval(""" r'x' """), 'x')
     161          self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
     162          self.assertEqual(eval(""" r'\x01' """), chr(1))
     163          self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
     164          self.assertEqual(eval(""" r'\x81' """), chr(0x81))
     165          self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
     166          self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
     167          self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
     168          self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
     169  
     170      def test_eval_bytes_normal(self):
     171          self.assertEqual(eval(""" b'x' """), b'x')
     172          self.assertEqual(eval(r""" b'\x01' """), byte(1))
     173          self.assertEqual(eval(""" b'\x01' """), byte(1))
     174          self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
     175          self.assertRaises(SyntaxError, eval, """ b'\x81' """)
     176          self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881')
     177          self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
     178          self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120')
     179          self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
     180  
     181      def test_eval_bytes_incomplete(self):
     182          self.assertRaises(SyntaxError, eval, r""" b'\x' """)
     183          self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
     184  
     185      def test_eval_bytes_invalid_escape(self):
     186          for b in range(1, 128):
     187              if b in b"""\n\r"'01234567\\abfnrtvx""":
     188                  continue
     189              with self.assertWarns(SyntaxWarning):
     190                  self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
     191  
     192          with warnings.catch_warnings(record=True) as w:
     193              warnings.simplefilter('always', category=SyntaxWarning)
     194              eval("b'''\n\\z'''")
     195          self.assertEqual(len(w), 1)
     196          self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
     197          self.assertEqual(w[0].filename, '<string>')
     198          self.assertEqual(w[0].lineno, 1)
     199  
     200          with warnings.catch_warnings(record=True) as w:
     201              warnings.simplefilter('error', category=SyntaxWarning)
     202              with self.assertRaises(SyntaxError) as cm:
     203                  eval("b'''\n\\z'''")
     204              exc = cm.exception
     205          self.assertEqual(w, [])
     206          self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
     207          self.assertEqual(exc.filename, '<string>')
     208          self.assertEqual(exc.lineno, 1)
     209  
     210      def test_eval_bytes_invalid_octal_escape(self):
     211          for i in range(0o400, 0o1000):
     212              with self.assertWarns(SyntaxWarning):
     213                  self.assertEqual(eval(r"b'\%o'" % i), bytes([i & 0o377]))
     214  
     215          with warnings.catch_warnings(record=True) as w:
     216              warnings.simplefilter('always', category=SyntaxWarning)
     217              eval("b'''\n\\407'''")
     218          self.assertEqual(len(w), 1)
     219          self.assertEqual(str(w[0].message),
     220                           r"invalid octal escape sequence '\407'")
     221          self.assertEqual(w[0].filename, '<string>')
     222          self.assertEqual(w[0].lineno, 1)
     223  
     224          with warnings.catch_warnings(record=True) as w:
     225              warnings.simplefilter('error', category=SyntaxWarning)
     226              with self.assertRaises(SyntaxError) as cm:
     227                  eval("b'''\n\\407'''")
     228              exc = cm.exception
     229          self.assertEqual(w, [])
     230          self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
     231          self.assertEqual(exc.filename, '<string>')
     232          self.assertEqual(exc.lineno, 1)
     233  
     234      def test_eval_bytes_raw(self):
     235          self.assertEqual(eval(""" br'x' """), b'x')
     236          self.assertEqual(eval(""" rb'x' """), b'x')
     237          self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
     238          self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
     239          self.assertEqual(eval(""" br'\x01' """), byte(1))
     240          self.assertEqual(eval(""" rb'\x01' """), byte(1))
     241          self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
     242          self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
     243          self.assertRaises(SyntaxError, eval, """ br'\x81' """)
     244          self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
     245          self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
     246          self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
     247          self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
     248          self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
     249          self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
     250          self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
     251          self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
     252          self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
     253          self.assertRaises(SyntaxError, eval, """ bb'' """)
     254          self.assertRaises(SyntaxError, eval, """ rr'' """)
     255          self.assertRaises(SyntaxError, eval, """ brr'' """)
     256          self.assertRaises(SyntaxError, eval, """ bbr'' """)
     257          self.assertRaises(SyntaxError, eval, """ rrb'' """)
     258          self.assertRaises(SyntaxError, eval, """ rbb'' """)
     259  
     260      def test_eval_str_u(self):
     261          self.assertEqual(eval(""" u'x' """), 'x')
     262          self.assertEqual(eval(""" U'\u00e4' """), 'ä')
     263          self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
     264          self.assertRaises(SyntaxError, eval, """ ur'' """)
     265          self.assertRaises(SyntaxError, eval, """ ru'' """)
     266          self.assertRaises(SyntaxError, eval, """ bu'' """)
     267          self.assertRaises(SyntaxError, eval, """ ub'' """)
     268  
     269      def test_uppercase_prefixes(self):
     270          self.assertEqual(eval(""" B'x' """), b'x')
     271          self.assertEqual(eval(r""" R'\x01' """), r'\x01')
     272          self.assertEqual(eval(r""" BR'\x01' """), br'\x01')
     273          self.assertEqual(eval(""" F'{1+1}' """), f'{1+1}')
     274          self.assertEqual(eval(r""" U'\U0001d120' """), u'\U0001d120')
     275  
     276      def check_encoding(self, encoding, extra=""):
     277          modname = "xx_" + encoding.replace("-", "_")
     278          fn = os.path.join(self.tmpdir, modname + ".py")
     279          f = open(fn, "w", encoding=encoding)
     280          try:
     281              f.write(TEMPLATE % encoding)
     282              f.write(extra)
     283          finally:
     284              f.close()
     285          __import__(modname)
     286          del sys.modules[modname]
     287  
     288      def test_file_utf_8(self):
     289          extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
     290          self.check_encoding("utf-8", extra)
     291  
     292      def test_file_utf_8_error(self):
     293          extra = "b'\x80'\n"
     294          self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
     295  
     296      def test_file_utf8(self):
     297          self.check_encoding("utf-8")
     298  
     299      def test_file_iso_8859_1(self):
     300          self.check_encoding("iso-8859-1")
     301  
     302      def test_file_latin_1(self):
     303          self.check_encoding("latin-1")
     304  
     305      def test_file_latin9(self):
     306          self.check_encoding("latin9")
     307  
     308  
     309  if __name__ == "__main__":
     310      unittest.main()