1 r"""Test correct treatment of various string literals by the parser.
2
3 There are four types of string literals:
4
5 'abc' -- normal str
6 r'abc' -- raw str
7 b'xyz' -- normal bytes
8 br'xyz' | rb'xyz' -- raw bytes
9
10 The difference between normal and raw strings is of course that in a
11 raw string, \ escapes (while still used to determine the end of the
12 literal) are not interpreted, so that r'\x00' contains four
13 characters: a backslash, an x, and two zeros; while '\x00' contains a
14 single character (code point zero).
15
16 The tricky thing is what should happen when non-ASCII bytes are used
17 inside literals. For bytes literals, this is considered illegal. But
18 for str literals, those bytes are supposed to be decoded using the
19 encoding declared for the file (UTF-8 by default).
20
21 We have to test this with various file encodings. We also test it with
22 exec()/eval(), which uses a different code path.
23
24 This file is really about correct treatment of encodings and
25 backslashes. It doesn't concern itself with issues like single
26 vs. double quotes or singly- vs. triply-quoted strings: that's dealt
27 with elsewhere (I assume).
28 """
29
30 import os
31 import sys
32 import shutil
33 import tempfile
34 import unittest
35 import warnings
36
37
38 TEMPLATE = r"""# coding: %s
39 a = 'x'
40 assert ord(a) == 120
41 b = '\x01'
42 assert ord(b) == 1
43 c = r'\x01'
44 assert list(map(ord, c)) == [92, 120, 48, 49]
45 d = '\x81'
46 assert ord(d) == 0x81
47 e = r'\x81'
48 assert list(map(ord, e)) == [92, 120, 56, 49]
49 f = '\u1881'
50 assert ord(f) == 0x1881
51 g = r'\u1881'
52 assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
53 h = '\U0001d120'
54 assert ord(h) == 0x1d120
55 i = r'\U0001d120'
56 assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
57 """
58
59
60 def byte(i):
61 return bytes([i])
62
63
64 class ESC[4;38;5;81mTestLiterals(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
65
66 def setUp(self):
67 self.save_path = sys.path[:]
68 self.tmpdir = tempfile.mkdtemp()
69 sys.path.insert(0, self.tmpdir)
70
71 def tearDown(self):
72 sys.path[:] = self.save_path
73 shutil.rmtree(self.tmpdir, ignore_errors=True)
74
75 def test_template(self):
76 # Check that the template doesn't contain any non-printables
77 # except for \n.
78 for c in TEMPLATE:
79 assert c == '\n' or ' ' <= c <= '~', repr(c)
80
81 def test_eval_str_normal(self):
82 self.assertEqual(eval(""" 'x' """), 'x')
83 self.assertEqual(eval(r""" '\x01' """), chr(1))
84 self.assertEqual(eval(""" '\x01' """), chr(1))
85 self.assertEqual(eval(r""" '\x81' """), chr(0x81))
86 self.assertEqual(eval(""" '\x81' """), chr(0x81))
87 self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
88 self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
89 self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
90 self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
91
92 def test_eval_str_incomplete(self):
93 self.assertRaises(SyntaxError, eval, r""" '\x' """)
94 self.assertRaises(SyntaxError, eval, r""" '\x0' """)
95 self.assertRaises(SyntaxError, eval, r""" '\u' """)
96 self.assertRaises(SyntaxError, eval, r""" '\u0' """)
97 self.assertRaises(SyntaxError, eval, r""" '\u00' """)
98 self.assertRaises(SyntaxError, eval, r""" '\u000' """)
99 self.assertRaises(SyntaxError, eval, r""" '\U' """)
100 self.assertRaises(SyntaxError, eval, r""" '\U0' """)
101 self.assertRaises(SyntaxError, eval, r""" '\U00' """)
102 self.assertRaises(SyntaxError, eval, r""" '\U000' """)
103 self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
104 self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
105 self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
106 self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
107
108 def test_eval_str_invalid_escape(self):
109 for b in range(1, 128):
110 if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
111 continue
112 with self.assertWarns(DeprecationWarning):
113 self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
114
115 with warnings.catch_warnings(record=True) as w:
116 warnings.simplefilter('always', category=DeprecationWarning)
117 eval("'''\n\\z'''")
118 self.assertEqual(len(w), 1)
119 self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
120 self.assertEqual(w[0].filename, '<string>')
121 self.assertEqual(w[0].lineno, 1)
122
123 with warnings.catch_warnings(record=True) as w:
124 warnings.simplefilter('error', category=DeprecationWarning)
125 with self.assertRaises(SyntaxError) as cm:
126 eval("'''\n\\z'''")
127 exc = cm.exception
128 self.assertEqual(w, [])
129 self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
130 self.assertEqual(exc.filename, '<string>')
131 self.assertEqual(exc.lineno, 1)
132 self.assertEqual(exc.offset, 1)
133
134 # Check that the warning is raised ony once if there are syntax errors
135
136 with warnings.catch_warnings(record=True) as w:
137 warnings.simplefilter('always', category=DeprecationWarning)
138 with self.assertRaises(SyntaxError) as cm:
139 eval("'\\e' $")
140 exc = cm.exception
141 self.assertEqual(len(w), 1)
142 self.assertEqual(w[0].category, DeprecationWarning)
143 self.assertRegex(str(w[0].message), 'invalid escape sequence')
144 self.assertEqual(w[0].filename, '<string>')
145
146 def test_eval_str_invalid_octal_escape(self):
147 for i in range(0o400, 0o1000):
148 with self.assertWarns(DeprecationWarning):
149 self.assertEqual(eval(r"'\%o'" % i), chr(i))
150
151 with warnings.catch_warnings(record=True) as w:
152 warnings.simplefilter('always', category=DeprecationWarning)
153 eval("'''\n\\407'''")
154 self.assertEqual(len(w), 1)
155 self.assertEqual(str(w[0].message),
156 r"invalid octal escape sequence '\407'")
157 self.assertEqual(w[0].filename, '<string>')
158 self.assertEqual(w[0].lineno, 1)
159
160 with warnings.catch_warnings(record=True) as w:
161 warnings.simplefilter('error', category=DeprecationWarning)
162 with self.assertRaises(SyntaxError) as cm:
163 eval("'''\n\\407'''")
164 exc = cm.exception
165 self.assertEqual(w, [])
166 self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
167 self.assertEqual(exc.filename, '<string>')
168 self.assertEqual(exc.lineno, 1)
169 self.assertEqual(exc.offset, 1)
170
171 def test_eval_str_raw(self):
172 self.assertEqual(eval(""" r'x' """), 'x')
173 self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
174 self.assertEqual(eval(""" r'\x01' """), chr(1))
175 self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
176 self.assertEqual(eval(""" r'\x81' """), chr(0x81))
177 self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
178 self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
179 self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
180 self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
181
182 def test_eval_bytes_normal(self):
183 self.assertEqual(eval(""" b'x' """), b'x')
184 self.assertEqual(eval(r""" b'\x01' """), byte(1))
185 self.assertEqual(eval(""" b'\x01' """), byte(1))
186 self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
187 self.assertRaises(SyntaxError, eval, """ b'\x81' """)
188 self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881')
189 self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
190 self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120')
191 self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
192
193 def test_eval_bytes_incomplete(self):
194 self.assertRaises(SyntaxError, eval, r""" b'\x' """)
195 self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
196
197 def test_eval_bytes_invalid_escape(self):
198 for b in range(1, 128):
199 if b in b"""\n\r"'01234567\\abfnrtvx""":
200 continue
201 with self.assertWarns(DeprecationWarning):
202 self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
203
204 with warnings.catch_warnings(record=True) as w:
205 warnings.simplefilter('always', category=DeprecationWarning)
206 eval("b'''\n\\z'''")
207 self.assertEqual(len(w), 1)
208 self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
209 self.assertEqual(w[0].filename, '<string>')
210 self.assertEqual(w[0].lineno, 1)
211
212 with warnings.catch_warnings(record=True) as w:
213 warnings.simplefilter('error', category=DeprecationWarning)
214 with self.assertRaises(SyntaxError) as cm:
215 eval("b'''\n\\z'''")
216 exc = cm.exception
217 self.assertEqual(w, [])
218 self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
219 self.assertEqual(exc.filename, '<string>')
220 self.assertEqual(exc.lineno, 1)
221
222 def test_eval_bytes_invalid_octal_escape(self):
223 for i in range(0o400, 0o1000):
224 with self.assertWarns(DeprecationWarning):
225 self.assertEqual(eval(r"b'\%o'" % i), bytes([i & 0o377]))
226
227 with warnings.catch_warnings(record=True) as w:
228 warnings.simplefilter('always', category=DeprecationWarning)
229 eval("b'''\n\\407'''")
230 self.assertEqual(len(w), 1)
231 self.assertEqual(str(w[0].message),
232 r"invalid octal escape sequence '\407'")
233 self.assertEqual(w[0].filename, '<string>')
234 self.assertEqual(w[0].lineno, 1)
235
236 with warnings.catch_warnings(record=True) as w:
237 warnings.simplefilter('error', category=DeprecationWarning)
238 with self.assertRaises(SyntaxError) as cm:
239 eval("b'''\n\\407'''")
240 exc = cm.exception
241 self.assertEqual(w, [])
242 self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
243 self.assertEqual(exc.filename, '<string>')
244 self.assertEqual(exc.lineno, 1)
245
246 def test_eval_bytes_raw(self):
247 self.assertEqual(eval(""" br'x' """), b'x')
248 self.assertEqual(eval(""" rb'x' """), b'x')
249 self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
250 self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
251 self.assertEqual(eval(""" br'\x01' """), byte(1))
252 self.assertEqual(eval(""" rb'\x01' """), byte(1))
253 self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
254 self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
255 self.assertRaises(SyntaxError, eval, """ br'\x81' """)
256 self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
257 self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
258 self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
259 self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
260 self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
261 self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
262 self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
263 self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
264 self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
265 self.assertRaises(SyntaxError, eval, """ bb'' """)
266 self.assertRaises(SyntaxError, eval, """ rr'' """)
267 self.assertRaises(SyntaxError, eval, """ brr'' """)
268 self.assertRaises(SyntaxError, eval, """ bbr'' """)
269 self.assertRaises(SyntaxError, eval, """ rrb'' """)
270 self.assertRaises(SyntaxError, eval, """ rbb'' """)
271
272 def test_eval_str_u(self):
273 self.assertEqual(eval(""" u'x' """), 'x')
274 self.assertEqual(eval(""" U'\u00e4' """), 'ä')
275 self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
276 self.assertRaises(SyntaxError, eval, """ ur'' """)
277 self.assertRaises(SyntaxError, eval, """ ru'' """)
278 self.assertRaises(SyntaxError, eval, """ bu'' """)
279 self.assertRaises(SyntaxError, eval, """ ub'' """)
280
281 def test_uppercase_prefixes(self):
282 self.assertEqual(eval(""" B'x' """), b'x')
283 self.assertEqual(eval(r""" R'\x01' """), r'\x01')
284 self.assertEqual(eval(r""" BR'\x01' """), br'\x01')
285 self.assertEqual(eval(""" F'{1+1}' """), f'{1+1}')
286 self.assertEqual(eval(r""" U'\U0001d120' """), u'\U0001d120')
287
288 def check_encoding(self, encoding, extra=""):
289 modname = "xx_" + encoding.replace("-", "_")
290 fn = os.path.join(self.tmpdir, modname + ".py")
291 f = open(fn, "w", encoding=encoding)
292 try:
293 f.write(TEMPLATE % encoding)
294 f.write(extra)
295 finally:
296 f.close()
297 __import__(modname)
298 del sys.modules[modname]
299
300 def test_file_utf_8(self):
301 extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
302 self.check_encoding("utf-8", extra)
303
304 def test_file_utf_8_error(self):
305 extra = "b'\x80'\n"
306 self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
307
308 def test_file_utf8(self):
309 self.check_encoding("utf-8")
310
311 def test_file_iso_8859_1(self):
312 self.check_encoding("iso-8859-1")
313
314 def test_file_latin_1(self):
315 self.check_encoding("latin-1")
316
317 def test_file_latin9(self):
318 self.check_encoding("latin9")
319
320
321 if __name__ == "__main__":
322 unittest.main()