1 r"""Test correct treatment of various string literals by the parser.
2
3 There are four types of string literals:
4
5 'abc' -- normal str
6 r'abc' -- raw str
7 b'xyz' -- normal bytes
8 br'xyz' | rb'xyz' -- raw bytes
9
10 The difference between normal and raw strings is of course that in a
11 raw string, \ escapes (while still used to determine the end of the
12 literal) are not interpreted, so that r'\x00' contains four
13 characters: a backslash, an x, and two zeros; while '\x00' contains a
14 single character (code point zero).
15
16 The tricky thing is what should happen when non-ASCII bytes are used
17 inside literals. For bytes literals, this is considered illegal. But
18 for str literals, those bytes are supposed to be decoded using the
19 encoding declared for the file (UTF-8 by default).
20
21 We have to test this with various file encodings. We also test it with
22 exec()/eval(), which uses a different code path.
23
24 This file is really about correct treatment of encodings and
25 backslashes. It doesn't concern itself with issues like single
26 vs. double quotes or singly- vs. triply-quoted strings: that's dealt
27 with elsewhere (I assume).
28 """
29
30 import os
31 import sys
32 import shutil
33 import tempfile
34 import unittest
35 import warnings
36
37
38 TEMPLATE = r"""# coding: %s
39 a = 'x'
40 assert ord(a) == 120
41 b = '\x01'
42 assert ord(b) == 1
43 c = r'\x01'
44 assert list(map(ord, c)) == [92, 120, 48, 49]
45 d = '\x81'
46 assert ord(d) == 0x81
47 e = r'\x81'
48 assert list(map(ord, e)) == [92, 120, 56, 49]
49 f = '\u1881'
50 assert ord(f) == 0x1881
51 g = r'\u1881'
52 assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
53 h = '\U0001d120'
54 assert ord(h) == 0x1d120
55 i = r'\U0001d120'
56 assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
57 """
58
59
60 def byte(i):
61 return bytes([i])
62
63
64 class ESC[4;38;5;81mTestLiterals(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
65
66 def setUp(self):
67 self.save_path = sys.path[:]
68 self.tmpdir = tempfile.mkdtemp()
69 sys.path.insert(0, self.tmpdir)
70
71 def tearDown(self):
72 sys.path[:] = self.save_path
73 shutil.rmtree(self.tmpdir, ignore_errors=True)
74
75 def test_template(self):
76 # Check that the template doesn't contain any non-printables
77 # except for \n.
78 for c in TEMPLATE:
79 assert c == '\n' or ' ' <= c <= '~', repr(c)
80
81 def test_eval_str_normal(self):
82 self.assertEqual(eval(""" 'x' """), 'x')
83 self.assertEqual(eval(r""" '\x01' """), chr(1))
84 self.assertEqual(eval(""" '\x01' """), chr(1))
85 self.assertEqual(eval(r""" '\x81' """), chr(0x81))
86 self.assertEqual(eval(""" '\x81' """), chr(0x81))
87 self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
88 self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
89 self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
90 self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
91
92 def test_eval_str_incomplete(self):
93 self.assertRaises(SyntaxError, eval, r""" '\x' """)
94 self.assertRaises(SyntaxError, eval, r""" '\x0' """)
95 self.assertRaises(SyntaxError, eval, r""" '\u' """)
96 self.assertRaises(SyntaxError, eval, r""" '\u0' """)
97 self.assertRaises(SyntaxError, eval, r""" '\u00' """)
98 self.assertRaises(SyntaxError, eval, r""" '\u000' """)
99 self.assertRaises(SyntaxError, eval, r""" '\U' """)
100 self.assertRaises(SyntaxError, eval, r""" '\U0' """)
101 self.assertRaises(SyntaxError, eval, r""" '\U00' """)
102 self.assertRaises(SyntaxError, eval, r""" '\U000' """)
103 self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
104 self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
105 self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
106 self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
107
108 def test_eval_str_invalid_escape(self):
109 for b in range(1, 128):
110 if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
111 continue
112 with self.assertWarns(SyntaxWarning):
113 self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
114
115 with warnings.catch_warnings(record=True) as w:
116 warnings.simplefilter('always', category=SyntaxWarning)
117 eval("'''\n\\z'''")
118 self.assertEqual(len(w), 1)
119 self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
120 self.assertEqual(w[0].filename, '<string>')
121 self.assertEqual(w[0].lineno, 1)
122
123 with warnings.catch_warnings(record=True) as w:
124 warnings.simplefilter('error', category=SyntaxWarning)
125 with self.assertRaises(SyntaxError) as cm:
126 eval("'''\n\\z'''")
127 exc = cm.exception
128 self.assertEqual(w, [])
129 self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
130 self.assertEqual(exc.filename, '<string>')
131 self.assertEqual(exc.lineno, 1)
132 self.assertEqual(exc.offset, 1)
133
134 def test_eval_str_invalid_octal_escape(self):
135 for i in range(0o400, 0o1000):
136 with self.assertWarns(SyntaxWarning):
137 self.assertEqual(eval(r"'\%o'" % i), chr(i))
138
139 with warnings.catch_warnings(record=True) as w:
140 warnings.simplefilter('always', category=SyntaxWarning)
141 eval("'''\n\\407'''")
142 self.assertEqual(len(w), 1)
143 self.assertEqual(str(w[0].message),
144 r"invalid octal escape sequence '\407'")
145 self.assertEqual(w[0].filename, '<string>')
146 self.assertEqual(w[0].lineno, 1)
147
148 with warnings.catch_warnings(record=True) as w:
149 warnings.simplefilter('error', category=SyntaxWarning)
150 with self.assertRaises(SyntaxError) as cm:
151 eval("'''\n\\407'''")
152 exc = cm.exception
153 self.assertEqual(w, [])
154 self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
155 self.assertEqual(exc.filename, '<string>')
156 self.assertEqual(exc.lineno, 1)
157 self.assertEqual(exc.offset, 1)
158
159 def test_eval_str_raw(self):
160 self.assertEqual(eval(""" r'x' """), 'x')
161 self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
162 self.assertEqual(eval(""" r'\x01' """), chr(1))
163 self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
164 self.assertEqual(eval(""" r'\x81' """), chr(0x81))
165 self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
166 self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
167 self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
168 self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
169
170 def test_eval_bytes_normal(self):
171 self.assertEqual(eval(""" b'x' """), b'x')
172 self.assertEqual(eval(r""" b'\x01' """), byte(1))
173 self.assertEqual(eval(""" b'\x01' """), byte(1))
174 self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
175 self.assertRaises(SyntaxError, eval, """ b'\x81' """)
176 self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881')
177 self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
178 self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120')
179 self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
180
181 def test_eval_bytes_incomplete(self):
182 self.assertRaises(SyntaxError, eval, r""" b'\x' """)
183 self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
184
185 def test_eval_bytes_invalid_escape(self):
186 for b in range(1, 128):
187 if b in b"""\n\r"'01234567\\abfnrtvx""":
188 continue
189 with self.assertWarns(SyntaxWarning):
190 self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
191
192 with warnings.catch_warnings(record=True) as w:
193 warnings.simplefilter('always', category=SyntaxWarning)
194 eval("b'''\n\\z'''")
195 self.assertEqual(len(w), 1)
196 self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
197 self.assertEqual(w[0].filename, '<string>')
198 self.assertEqual(w[0].lineno, 1)
199
200 with warnings.catch_warnings(record=True) as w:
201 warnings.simplefilter('error', category=SyntaxWarning)
202 with self.assertRaises(SyntaxError) as cm:
203 eval("b'''\n\\z'''")
204 exc = cm.exception
205 self.assertEqual(w, [])
206 self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
207 self.assertEqual(exc.filename, '<string>')
208 self.assertEqual(exc.lineno, 1)
209
210 def test_eval_bytes_invalid_octal_escape(self):
211 for i in range(0o400, 0o1000):
212 with self.assertWarns(SyntaxWarning):
213 self.assertEqual(eval(r"b'\%o'" % i), bytes([i & 0o377]))
214
215 with warnings.catch_warnings(record=True) as w:
216 warnings.simplefilter('always', category=SyntaxWarning)
217 eval("b'''\n\\407'''")
218 self.assertEqual(len(w), 1)
219 self.assertEqual(str(w[0].message),
220 r"invalid octal escape sequence '\407'")
221 self.assertEqual(w[0].filename, '<string>')
222 self.assertEqual(w[0].lineno, 1)
223
224 with warnings.catch_warnings(record=True) as w:
225 warnings.simplefilter('error', category=SyntaxWarning)
226 with self.assertRaises(SyntaxError) as cm:
227 eval("b'''\n\\407'''")
228 exc = cm.exception
229 self.assertEqual(w, [])
230 self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
231 self.assertEqual(exc.filename, '<string>')
232 self.assertEqual(exc.lineno, 1)
233
234 def test_eval_bytes_raw(self):
235 self.assertEqual(eval(""" br'x' """), b'x')
236 self.assertEqual(eval(""" rb'x' """), b'x')
237 self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
238 self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
239 self.assertEqual(eval(""" br'\x01' """), byte(1))
240 self.assertEqual(eval(""" rb'\x01' """), byte(1))
241 self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
242 self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
243 self.assertRaises(SyntaxError, eval, """ br'\x81' """)
244 self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
245 self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
246 self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
247 self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
248 self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
249 self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
250 self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
251 self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
252 self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
253 self.assertRaises(SyntaxError, eval, """ bb'' """)
254 self.assertRaises(SyntaxError, eval, """ rr'' """)
255 self.assertRaises(SyntaxError, eval, """ brr'' """)
256 self.assertRaises(SyntaxError, eval, """ bbr'' """)
257 self.assertRaises(SyntaxError, eval, """ rrb'' """)
258 self.assertRaises(SyntaxError, eval, """ rbb'' """)
259
260 def test_eval_str_u(self):
261 self.assertEqual(eval(""" u'x' """), 'x')
262 self.assertEqual(eval(""" U'\u00e4' """), 'ä')
263 self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
264 self.assertRaises(SyntaxError, eval, """ ur'' """)
265 self.assertRaises(SyntaxError, eval, """ ru'' """)
266 self.assertRaises(SyntaxError, eval, """ bu'' """)
267 self.assertRaises(SyntaxError, eval, """ ub'' """)
268
269 def test_uppercase_prefixes(self):
270 self.assertEqual(eval(""" B'x' """), b'x')
271 self.assertEqual(eval(r""" R'\x01' """), r'\x01')
272 self.assertEqual(eval(r""" BR'\x01' """), br'\x01')
273 self.assertEqual(eval(""" F'{1+1}' """), f'{1+1}')
274 self.assertEqual(eval(r""" U'\U0001d120' """), u'\U0001d120')
275
276 def check_encoding(self, encoding, extra=""):
277 modname = "xx_" + encoding.replace("-", "_")
278 fn = os.path.join(self.tmpdir, modname + ".py")
279 f = open(fn, "w", encoding=encoding)
280 try:
281 f.write(TEMPLATE % encoding)
282 f.write(extra)
283 finally:
284 f.close()
285 __import__(modname)
286 del sys.modules[modname]
287
288 def test_file_utf_8(self):
289 extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
290 self.check_encoding("utf-8", extra)
291
292 def test_file_utf_8_error(self):
293 extra = "b'\x80'\n"
294 self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
295
296 def test_file_utf8(self):
297 self.check_encoding("utf-8")
298
299 def test_file_iso_8859_1(self):
300 self.check_encoding("iso-8859-1")
301
302 def test_file_latin_1(self):
303 self.check_encoding("latin-1")
304
305 def test_file_latin9(self):
306 self.check_encoding("latin9")
307
308
309 if __name__ == "__main__":
310 unittest.main()