1 from test import support
2 from test.support import os_helper
3 from tokenize import (tokenize, untokenize, NUMBER, NAME, OP,
4 STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
5 open as tokenize_open, Untokenizer, generate_tokens,
6 NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT, TokenInfo,
7 TokenError)
8 from io import BytesIO, StringIO
9 import unittest
10 from textwrap import dedent
11 from unittest import TestCase, mock
12 from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
13 INVALID_UNDERSCORE_LITERALS)
14 from test.support import os_helper
15 from test.support.script_helper import run_test_script, make_script, run_python_until_end
16 import os
17 import token
18
19 # Converts a source string into a list of textual representation
20 # of the tokens such as:
21 # ` NAME 'if' (1, 0) (1, 2)`
22 # to make writing tests easier.
23 def stringify_tokens_from_source(token_generator, source_string):
24 result = []
25 num_lines = len(source_string.splitlines())
26 missing_trailing_nl = source_string[-1] not in '\r\n'
27
28 for type, token, start, end, line in token_generator:
29 if type == ENDMARKER:
30 break
31 # Ignore the new line on the last line if the input lacks one
32 if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
33 continue
34 type = tok_name[type]
35 result.append(f" {type:10} {token!r:13} {start} {end}")
36
37 return result
38
39 class ESC[4;38;5;81mTokenizeTest(ESC[4;38;5;149mTestCase):
40 # Tests for the tokenize module.
41
42 # The tests can be really simple. Given a small fragment of source
43 # code, print out a table with tokens. The ENDMARKER, ENCODING and
44 # final NEWLINE are omitted for brevity.
45
46 def check_tokenize(self, s, expected):
47 # Format the tokens in s in a table format.
48 # The ENDMARKER and final NEWLINE are omitted.
49 f = BytesIO(s.encode('utf-8'))
50 result = stringify_tokens_from_source(tokenize(f.readline), s)
51 self.assertEqual(result,
52 [" ENCODING 'utf-8' (0, 0) (0, 0)"] +
53 expected.rstrip().splitlines())
54
55 def test_invalid_readline(self):
56 def gen():
57 yield "sdfosdg"
58 yield "sdfosdg"
59 with self.assertRaises(TypeError):
60 list(tokenize(gen().__next__))
61
62 def gen():
63 yield b"sdfosdg"
64 yield b"sdfosdg"
65 with self.assertRaises(TypeError):
66 list(generate_tokens(gen().__next__))
67
68 def gen():
69 yield "sdfosdg"
70 1/0
71 with self.assertRaises(ZeroDivisionError):
72 list(generate_tokens(gen().__next__))
73
74 def test_implicit_newline(self):
75 # Make sure that the tokenizer puts in an implicit NEWLINE
76 # when the input lacks a trailing new line.
77 f = BytesIO("x".encode('utf-8'))
78 tokens = list(tokenize(f.readline))
79 self.assertEqual(tokens[-2].type, NEWLINE)
80 self.assertEqual(tokens[-1].type, ENDMARKER)
81
82 def test_basic(self):
83 self.check_tokenize("1 + 1", """\
84 NUMBER '1' (1, 0) (1, 1)
85 OP '+' (1, 2) (1, 3)
86 NUMBER '1' (1, 4) (1, 5)
87 """)
88 self.check_tokenize("if False:\n"
89 " # NL\n"
90 " \n"
91 " True = False # NEWLINE\n", """\
92 NAME 'if' (1, 0) (1, 2)
93 NAME 'False' (1, 3) (1, 8)
94 OP ':' (1, 8) (1, 9)
95 NEWLINE '\\n' (1, 9) (1, 10)
96 COMMENT '# NL' (2, 4) (2, 8)
97 NL '\\n' (2, 8) (2, 9)
98 NL '\\n' (3, 4) (3, 5)
99 INDENT ' ' (4, 0) (4, 4)
100 NAME 'True' (4, 4) (4, 8)
101 OP '=' (4, 9) (4, 10)
102 NAME 'False' (4, 11) (4, 16)
103 COMMENT '# NEWLINE' (4, 17) (4, 26)
104 NEWLINE '\\n' (4, 26) (4, 27)
105 DEDENT '' (5, 0) (5, 0)
106 """)
107
108 self.check_tokenize("if True:\r\n # NL\r\n foo='bar'\r\n\r\n", """\
109 NAME 'if' (1, 0) (1, 2)
110 NAME 'True' (1, 3) (1, 7)
111 OP ':' (1, 7) (1, 8)
112 NEWLINE '\\r\\n' (1, 8) (1, 10)
113 COMMENT '# NL' (2, 4) (2, 8)
114 NL '\\r\\n' (2, 8) (2, 10)
115 INDENT ' ' (3, 0) (3, 4)
116 NAME 'foo' (3, 4) (3, 7)
117 OP '=' (3, 7) (3, 8)
118 STRING "\'bar\'" (3, 8) (3, 13)
119 NEWLINE '\\r\\n' (3, 13) (3, 15)
120 NL '\\r\\n' (4, 0) (4, 2)
121 DEDENT '' (5, 0) (5, 0)
122 """)
123
124 self.check_tokenize("x = 1 + \\\r\n1\r\n", """\
125 NAME 'x' (1, 0) (1, 1)
126 OP '=' (1, 2) (1, 3)
127 NUMBER '1' (1, 4) (1, 5)
128 OP '+' (1, 6) (1, 7)
129 NUMBER '1' (2, 0) (2, 1)
130 NEWLINE '\\r\\n' (2, 1) (2, 3)
131 """)
132
133 indent_error_file = b"""\
134 def k(x):
135 x += 2
136 x += 5
137 """
138 readline = BytesIO(indent_error_file).readline
139 with self.assertRaisesRegex(IndentationError,
140 "unindent does not match any "
141 "outer indentation level") as e:
142 for tok in tokenize(readline):
143 pass
144 self.assertEqual(e.exception.lineno, 3)
145 self.assertEqual(e.exception.filename, '<string>')
146 self.assertEqual(e.exception.end_lineno, None)
147 self.assertEqual(e.exception.end_offset, None)
148 self.assertEqual(
149 e.exception.msg,
150 'unindent does not match any outer indentation level')
151 self.assertEqual(e.exception.offset, 9)
152 self.assertEqual(e.exception.text, ' x += 5')
153
154 def test_int(self):
155 # Ordinary integers and binary operators
156 self.check_tokenize("0xff <= 255", """\
157 NUMBER '0xff' (1, 0) (1, 4)
158 OP '<=' (1, 5) (1, 7)
159 NUMBER '255' (1, 8) (1, 11)
160 """)
161 self.check_tokenize("0b10 <= 255", """\
162 NUMBER '0b10' (1, 0) (1, 4)
163 OP '<=' (1, 5) (1, 7)
164 NUMBER '255' (1, 8) (1, 11)
165 """)
166 self.check_tokenize("0o123 <= 0O123", """\
167 NUMBER '0o123' (1, 0) (1, 5)
168 OP '<=' (1, 6) (1, 8)
169 NUMBER '0O123' (1, 9) (1, 14)
170 """)
171 self.check_tokenize("1234567 > ~0x15", """\
172 NUMBER '1234567' (1, 0) (1, 7)
173 OP '>' (1, 8) (1, 9)
174 OP '~' (1, 10) (1, 11)
175 NUMBER '0x15' (1, 11) (1, 15)
176 """)
177 self.check_tokenize("2134568 != 1231515", """\
178 NUMBER '2134568' (1, 0) (1, 7)
179 OP '!=' (1, 8) (1, 10)
180 NUMBER '1231515' (1, 11) (1, 18)
181 """)
182 self.check_tokenize("(-124561-1) & 200000000", """\
183 OP '(' (1, 0) (1, 1)
184 OP '-' (1, 1) (1, 2)
185 NUMBER '124561' (1, 2) (1, 8)
186 OP '-' (1, 8) (1, 9)
187 NUMBER '1' (1, 9) (1, 10)
188 OP ')' (1, 10) (1, 11)
189 OP '&' (1, 12) (1, 13)
190 NUMBER '200000000' (1, 14) (1, 23)
191 """)
192 self.check_tokenize("0xdeadbeef != -1", """\
193 NUMBER '0xdeadbeef' (1, 0) (1, 10)
194 OP '!=' (1, 11) (1, 13)
195 OP '-' (1, 14) (1, 15)
196 NUMBER '1' (1, 15) (1, 16)
197 """)
198 self.check_tokenize("0xdeadc0de & 12345", """\
199 NUMBER '0xdeadc0de' (1, 0) (1, 10)
200 OP '&' (1, 11) (1, 12)
201 NUMBER '12345' (1, 13) (1, 18)
202 """)
203 self.check_tokenize("0xFF & 0x15 | 1234", """\
204 NUMBER '0xFF' (1, 0) (1, 4)
205 OP '&' (1, 5) (1, 6)
206 NUMBER '0x15' (1, 7) (1, 11)
207 OP '|' (1, 12) (1, 13)
208 NUMBER '1234' (1, 14) (1, 18)
209 """)
210
211 def test_long(self):
212 # Long integers
213 self.check_tokenize("x = 0", """\
214 NAME 'x' (1, 0) (1, 1)
215 OP '=' (1, 2) (1, 3)
216 NUMBER '0' (1, 4) (1, 5)
217 """)
218 self.check_tokenize("x = 0xfffffffffff", """\
219 NAME 'x' (1, 0) (1, 1)
220 OP '=' (1, 2) (1, 3)
221 NUMBER '0xfffffffffff' (1, 4) (1, 17)
222 """)
223 self.check_tokenize("x = 123141242151251616110", """\
224 NAME 'x' (1, 0) (1, 1)
225 OP '=' (1, 2) (1, 3)
226 NUMBER '123141242151251616110' (1, 4) (1, 25)
227 """)
228 self.check_tokenize("x = -15921590215012591", """\
229 NAME 'x' (1, 0) (1, 1)
230 OP '=' (1, 2) (1, 3)
231 OP '-' (1, 4) (1, 5)
232 NUMBER '15921590215012591' (1, 5) (1, 22)
233 """)
234
235 def test_float(self):
236 # Floating point numbers
237 self.check_tokenize("x = 3.14159", """\
238 NAME 'x' (1, 0) (1, 1)
239 OP '=' (1, 2) (1, 3)
240 NUMBER '3.14159' (1, 4) (1, 11)
241 """)
242 self.check_tokenize("x = 314159.", """\
243 NAME 'x' (1, 0) (1, 1)
244 OP '=' (1, 2) (1, 3)
245 NUMBER '314159.' (1, 4) (1, 11)
246 """)
247 self.check_tokenize("x = .314159", """\
248 NAME 'x' (1, 0) (1, 1)
249 OP '=' (1, 2) (1, 3)
250 NUMBER '.314159' (1, 4) (1, 11)
251 """)
252 self.check_tokenize("x = 3e14159", """\
253 NAME 'x' (1, 0) (1, 1)
254 OP '=' (1, 2) (1, 3)
255 NUMBER '3e14159' (1, 4) (1, 11)
256 """)
257 self.check_tokenize("x = 3E123", """\
258 NAME 'x' (1, 0) (1, 1)
259 OP '=' (1, 2) (1, 3)
260 NUMBER '3E123' (1, 4) (1, 9)
261 """)
262 self.check_tokenize("x+y = 3e-1230", """\
263 NAME 'x' (1, 0) (1, 1)
264 OP '+' (1, 1) (1, 2)
265 NAME 'y' (1, 2) (1, 3)
266 OP '=' (1, 4) (1, 5)
267 NUMBER '3e-1230' (1, 6) (1, 13)
268 """)
269 self.check_tokenize("x = 3.14e159", """\
270 NAME 'x' (1, 0) (1, 1)
271 OP '=' (1, 2) (1, 3)
272 NUMBER '3.14e159' (1, 4) (1, 12)
273 """)
274
275 def test_underscore_literals(self):
276 def number_token(s):
277 f = BytesIO(s.encode('utf-8'))
278 for toktype, token, start, end, line in tokenize(f.readline):
279 if toktype == NUMBER:
280 return token
281 return 'invalid token'
282 for lit in VALID_UNDERSCORE_LITERALS:
283 if '(' in lit:
284 # this won't work with compound complex inputs
285 continue
286 self.assertEqual(number_token(lit), lit)
287 # Valid cases with extra underscores in the tokenize module
288 # See gh-105549 for context
289 extra_valid_cases = {"0_7", "09_99"}
290 for lit in INVALID_UNDERSCORE_LITERALS:
291 if lit in extra_valid_cases:
292 continue
293 try:
294 number_token(lit)
295 except TokenError:
296 continue
297 self.assertNotEqual(number_token(lit), lit)
298
299 def test_string(self):
300 # String literals
301 self.check_tokenize("x = ''; y = \"\"", """\
302 NAME 'x' (1, 0) (1, 1)
303 OP '=' (1, 2) (1, 3)
304 STRING "''" (1, 4) (1, 6)
305 OP ';' (1, 6) (1, 7)
306 NAME 'y' (1, 8) (1, 9)
307 OP '=' (1, 10) (1, 11)
308 STRING '""' (1, 12) (1, 14)
309 """)
310 self.check_tokenize("x = '\"'; y = \"'\"", """\
311 NAME 'x' (1, 0) (1, 1)
312 OP '=' (1, 2) (1, 3)
313 STRING '\\'"\\'' (1, 4) (1, 7)
314 OP ';' (1, 7) (1, 8)
315 NAME 'y' (1, 9) (1, 10)
316 OP '=' (1, 11) (1, 12)
317 STRING '"\\'"' (1, 13) (1, 16)
318 """)
319 self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
320 NAME 'x' (1, 0) (1, 1)
321 OP '=' (1, 2) (1, 3)
322 STRING '"doesn\\'t "' (1, 4) (1, 14)
323 NAME 'shrink' (1, 14) (1, 20)
324 STRING '", does it"' (1, 20) (1, 31)
325 """)
326 self.check_tokenize("x = 'abc' + 'ABC'", """\
327 NAME 'x' (1, 0) (1, 1)
328 OP '=' (1, 2) (1, 3)
329 STRING "'abc'" (1, 4) (1, 9)
330 OP '+' (1, 10) (1, 11)
331 STRING "'ABC'" (1, 12) (1, 17)
332 """)
333 self.check_tokenize('y = "ABC" + "ABC"', """\
334 NAME 'y' (1, 0) (1, 1)
335 OP '=' (1, 2) (1, 3)
336 STRING '"ABC"' (1, 4) (1, 9)
337 OP '+' (1, 10) (1, 11)
338 STRING '"ABC"' (1, 12) (1, 17)
339 """)
340 self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
341 NAME 'x' (1, 0) (1, 1)
342 OP '=' (1, 2) (1, 3)
343 STRING "r'abc'" (1, 4) (1, 10)
344 OP '+' (1, 11) (1, 12)
345 STRING "r'ABC'" (1, 13) (1, 19)
346 OP '+' (1, 20) (1, 21)
347 STRING "R'ABC'" (1, 22) (1, 28)
348 OP '+' (1, 29) (1, 30)
349 STRING "R'ABC'" (1, 31) (1, 37)
350 """)
351 self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
352 NAME 'y' (1, 0) (1, 1)
353 OP '=' (1, 2) (1, 3)
354 STRING 'r"abc"' (1, 4) (1, 10)
355 OP '+' (1, 11) (1, 12)
356 STRING 'r"ABC"' (1, 13) (1, 19)
357 OP '+' (1, 20) (1, 21)
358 STRING 'R"ABC"' (1, 22) (1, 28)
359 OP '+' (1, 29) (1, 30)
360 STRING 'R"ABC"' (1, 31) (1, 37)
361 """)
362
363 self.check_tokenize("u'abc' + U'abc'", """\
364 STRING "u'abc'" (1, 0) (1, 6)
365 OP '+' (1, 7) (1, 8)
366 STRING "U'abc'" (1, 9) (1, 15)
367 """)
368 self.check_tokenize('u"abc" + U"abc"', """\
369 STRING 'u"abc"' (1, 0) (1, 6)
370 OP '+' (1, 7) (1, 8)
371 STRING 'U"abc"' (1, 9) (1, 15)
372 """)
373
374 self.check_tokenize("b'abc' + B'abc'", """\
375 STRING "b'abc'" (1, 0) (1, 6)
376 OP '+' (1, 7) (1, 8)
377 STRING "B'abc'" (1, 9) (1, 15)
378 """)
379 self.check_tokenize('b"abc" + B"abc"', """\
380 STRING 'b"abc"' (1, 0) (1, 6)
381 OP '+' (1, 7) (1, 8)
382 STRING 'B"abc"' (1, 9) (1, 15)
383 """)
384 self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
385 STRING "br'abc'" (1, 0) (1, 7)
386 OP '+' (1, 8) (1, 9)
387 STRING "bR'abc'" (1, 10) (1, 17)
388 OP '+' (1, 18) (1, 19)
389 STRING "Br'abc'" (1, 20) (1, 27)
390 OP '+' (1, 28) (1, 29)
391 STRING "BR'abc'" (1, 30) (1, 37)
392 """)
393 self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
394 STRING 'br"abc"' (1, 0) (1, 7)
395 OP '+' (1, 8) (1, 9)
396 STRING 'bR"abc"' (1, 10) (1, 17)
397 OP '+' (1, 18) (1, 19)
398 STRING 'Br"abc"' (1, 20) (1, 27)
399 OP '+' (1, 28) (1, 29)
400 STRING 'BR"abc"' (1, 30) (1, 37)
401 """)
402 self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
403 STRING "rb'abc'" (1, 0) (1, 7)
404 OP '+' (1, 8) (1, 9)
405 STRING "rB'abc'" (1, 10) (1, 17)
406 OP '+' (1, 18) (1, 19)
407 STRING "Rb'abc'" (1, 20) (1, 27)
408 OP '+' (1, 28) (1, 29)
409 STRING "RB'abc'" (1, 30) (1, 37)
410 """)
411 self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
412 STRING 'rb"abc"' (1, 0) (1, 7)
413 OP '+' (1, 8) (1, 9)
414 STRING 'rB"abc"' (1, 10) (1, 17)
415 OP '+' (1, 18) (1, 19)
416 STRING 'Rb"abc"' (1, 20) (1, 27)
417 OP '+' (1, 28) (1, 29)
418 STRING 'RB"abc"' (1, 30) (1, 37)
419 """)
420 # Check 0, 1, and 2 character string prefixes.
421 self.check_tokenize(r'"a\
422 de\
423 fg"', """\
424 STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
425 """)
426 self.check_tokenize(r'u"a\
427 de"', """\
428 STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)
429 """)
430 self.check_tokenize(r'rb"a\
431 d"', """\
432 STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)
433 """)
434 self.check_tokenize(r'"""a\
435 b"""', """\
436 STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
437 """)
438 self.check_tokenize(r'u"""a\
439 b"""', """\
440 STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
441 """)
442 self.check_tokenize(r'rb"""a\
443 b\
444 c"""', """\
445 STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
446 """)
447 self.check_tokenize('f"abc"', """\
448 FSTRING_START 'f"' (1, 0) (1, 2)
449 FSTRING_MIDDLE 'abc' (1, 2) (1, 5)
450 FSTRING_END '"' (1, 5) (1, 6)
451 """)
452 self.check_tokenize('fR"a{b}c"', """\
453 FSTRING_START 'fR"' (1, 0) (1, 3)
454 FSTRING_MIDDLE 'a' (1, 3) (1, 4)
455 OP '{' (1, 4) (1, 5)
456 NAME 'b' (1, 5) (1, 6)
457 OP '}' (1, 6) (1, 7)
458 FSTRING_MIDDLE 'c' (1, 7) (1, 8)
459 FSTRING_END '"' (1, 8) (1, 9)
460 """)
461 self.check_tokenize('fR"a{{{b!r}}}c"', """\
462 FSTRING_START 'fR"' (1, 0) (1, 3)
463 FSTRING_MIDDLE 'a{' (1, 3) (1, 5)
464 OP '{' (1, 6) (1, 7)
465 NAME 'b' (1, 7) (1, 8)
466 OP '!' (1, 8) (1, 9)
467 NAME 'r' (1, 9) (1, 10)
468 OP '}' (1, 10) (1, 11)
469 FSTRING_MIDDLE '}' (1, 11) (1, 12)
470 FSTRING_MIDDLE 'c' (1, 13) (1, 14)
471 FSTRING_END '"' (1, 14) (1, 15)
472 """)
473 self.check_tokenize('f"{{{1+1}}}"', """\
474 FSTRING_START 'f"' (1, 0) (1, 2)
475 FSTRING_MIDDLE '{' (1, 2) (1, 3)
476 OP '{' (1, 4) (1, 5)
477 NUMBER '1' (1, 5) (1, 6)
478 OP '+' (1, 6) (1, 7)
479 NUMBER '1' (1, 7) (1, 8)
480 OP '}' (1, 8) (1, 9)
481 FSTRING_MIDDLE '}' (1, 9) (1, 10)
482 FSTRING_END '"' (1, 11) (1, 12)
483 """)
484 self.check_tokenize('f"""{f\'\'\'{f\'{f"{1+1}"}\'}\'\'\'}"""', """\
485 FSTRING_START 'f\"""' (1, 0) (1, 4)
486 OP '{' (1, 4) (1, 5)
487 FSTRING_START "f'''" (1, 5) (1, 9)
488 OP '{' (1, 9) (1, 10)
489 FSTRING_START "f'" (1, 10) (1, 12)
490 OP '{' (1, 12) (1, 13)
491 FSTRING_START 'f"' (1, 13) (1, 15)
492 OP '{' (1, 15) (1, 16)
493 NUMBER '1' (1, 16) (1, 17)
494 OP '+' (1, 17) (1, 18)
495 NUMBER '1' (1, 18) (1, 19)
496 OP '}' (1, 19) (1, 20)
497 FSTRING_END '"' (1, 20) (1, 21)
498 OP '}' (1, 21) (1, 22)
499 FSTRING_END "'" (1, 22) (1, 23)
500 OP '}' (1, 23) (1, 24)
501 FSTRING_END "'''" (1, 24) (1, 27)
502 OP '}' (1, 27) (1, 28)
503 FSTRING_END '\"""' (1, 28) (1, 31)
504 """)
505 self.check_tokenize('f""" x\nstr(data, encoding={invalid!r})\n"""', """\
506 FSTRING_START 'f\"""' (1, 0) (1, 4)
507 FSTRING_MIDDLE ' x\\nstr(data, encoding=' (1, 4) (2, 19)
508 OP '{' (2, 19) (2, 20)
509 NAME 'invalid' (2, 20) (2, 27)
510 OP '!' (2, 27) (2, 28)
511 NAME 'r' (2, 28) (2, 29)
512 OP '}' (2, 29) (2, 30)
513 FSTRING_MIDDLE ')\\n' (2, 30) (3, 0)
514 FSTRING_END '\"""' (3, 0) (3, 3)
515 """)
516 self.check_tokenize('f"""123456789\nsomething{None}bad"""', """\
517 FSTRING_START 'f\"""' (1, 0) (1, 4)
518 FSTRING_MIDDLE '123456789\\nsomething' (1, 4) (2, 9)
519 OP '{' (2, 9) (2, 10)
520 NAME 'None' (2, 10) (2, 14)
521 OP '}' (2, 14) (2, 15)
522 FSTRING_MIDDLE 'bad' (2, 15) (2, 18)
523 FSTRING_END '\"""' (2, 18) (2, 21)
524 """)
525 self.check_tokenize('f"""abc"""', """\
526 FSTRING_START 'f\"""' (1, 0) (1, 4)
527 FSTRING_MIDDLE 'abc' (1, 4) (1, 7)
528 FSTRING_END '\"""' (1, 7) (1, 10)
529 """)
530 self.check_tokenize(r'f"abc\
531 def"', """\
532 FSTRING_START 'f"' (1, 0) (1, 2)
533 FSTRING_MIDDLE 'abc\\\\\\ndef' (1, 2) (2, 3)
534 FSTRING_END '"' (2, 3) (2, 4)
535 """)
536 self.check_tokenize(r'Rf"abc\
537 def"', """\
538 FSTRING_START 'Rf"' (1, 0) (1, 3)
539 FSTRING_MIDDLE 'abc\\\\\\ndef' (1, 3) (2, 3)
540 FSTRING_END '"' (2, 3) (2, 4)
541 """)
542 self.check_tokenize("f'some words {a+b:.3f} more words {c+d=} final words'", """\
543 FSTRING_START "f'" (1, 0) (1, 2)
544 FSTRING_MIDDLE 'some words ' (1, 2) (1, 13)
545 OP '{' (1, 13) (1, 14)
546 NAME 'a' (1, 14) (1, 15)
547 OP '+' (1, 15) (1, 16)
548 NAME 'b' (1, 16) (1, 17)
549 OP ':' (1, 17) (1, 18)
550 FSTRING_MIDDLE '.3f' (1, 18) (1, 21)
551 OP '}' (1, 21) (1, 22)
552 FSTRING_MIDDLE ' more words ' (1, 22) (1, 34)
553 OP '{' (1, 34) (1, 35)
554 NAME 'c' (1, 35) (1, 36)
555 OP '+' (1, 36) (1, 37)
556 NAME 'd' (1, 37) (1, 38)
557 OP '=' (1, 38) (1, 39)
558 OP '}' (1, 39) (1, 40)
559 FSTRING_MIDDLE ' final words' (1, 40) (1, 52)
560 FSTRING_END "'" (1, 52) (1, 53)
561 """)
562 self.check_tokenize("""\
563 f'''{
564 3
565 =}'''""", """\
566 FSTRING_START "f'''" (1, 0) (1, 4)
567 OP '{' (1, 4) (1, 5)
568 NL '\\n' (1, 5) (1, 6)
569 NUMBER '3' (2, 0) (2, 1)
570 NL '\\n' (2, 1) (2, 2)
571 OP '=' (3, 0) (3, 1)
572 OP '}' (3, 1) (3, 2)
573 FSTRING_END "'''" (3, 2) (3, 5)
574 """)
575
576 def test_function(self):
577 self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
578 NAME 'def' (1, 0) (1, 3)
579 NAME 'd22' (1, 4) (1, 7)
580 OP '(' (1, 7) (1, 8)
581 NAME 'a' (1, 8) (1, 9)
582 OP ',' (1, 9) (1, 10)
583 NAME 'b' (1, 11) (1, 12)
584 OP ',' (1, 12) (1, 13)
585 NAME 'c' (1, 14) (1, 15)
586 OP '=' (1, 15) (1, 16)
587 NUMBER '2' (1, 16) (1, 17)
588 OP ',' (1, 17) (1, 18)
589 NAME 'd' (1, 19) (1, 20)
590 OP '=' (1, 20) (1, 21)
591 NUMBER '2' (1, 21) (1, 22)
592 OP ',' (1, 22) (1, 23)
593 OP '*' (1, 24) (1, 25)
594 NAME 'k' (1, 25) (1, 26)
595 OP ')' (1, 26) (1, 27)
596 OP ':' (1, 27) (1, 28)
597 NAME 'pass' (1, 29) (1, 33)
598 """)
599 self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
600 NAME 'def' (1, 0) (1, 3)
601 NAME 'd01v_' (1, 4) (1, 9)
602 OP '(' (1, 9) (1, 10)
603 NAME 'a' (1, 10) (1, 11)
604 OP '=' (1, 11) (1, 12)
605 NUMBER '1' (1, 12) (1, 13)
606 OP ',' (1, 13) (1, 14)
607 OP '*' (1, 15) (1, 16)
608 NAME 'k' (1, 16) (1, 17)
609 OP ',' (1, 17) (1, 18)
610 OP '**' (1, 19) (1, 21)
611 NAME 'w' (1, 21) (1, 22)
612 OP ')' (1, 22) (1, 23)
613 OP ':' (1, 23) (1, 24)
614 NAME 'pass' (1, 25) (1, 29)
615 """)
616 self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\
617 NAME 'def' (1, 0) (1, 3)
618 NAME 'd23' (1, 4) (1, 7)
619 OP '(' (1, 7) (1, 8)
620 NAME 'a' (1, 8) (1, 9)
621 OP ':' (1, 9) (1, 10)
622 NAME 'str' (1, 11) (1, 14)
623 OP ',' (1, 14) (1, 15)
624 NAME 'b' (1, 16) (1, 17)
625 OP ':' (1, 17) (1, 18)
626 NAME 'int' (1, 19) (1, 22)
627 OP '=' (1, 22) (1, 23)
628 NUMBER '3' (1, 23) (1, 24)
629 OP ')' (1, 24) (1, 25)
630 OP '->' (1, 26) (1, 28)
631 NAME 'int' (1, 29) (1, 32)
632 OP ':' (1, 32) (1, 33)
633 NAME 'pass' (1, 34) (1, 38)
634 """)
635
636 def test_comparison(self):
637 # Comparison
638 self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
639 "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
640 NAME 'if' (1, 0) (1, 2)
641 NUMBER '1' (1, 3) (1, 4)
642 OP '<' (1, 5) (1, 6)
643 NUMBER '1' (1, 7) (1, 8)
644 OP '>' (1, 9) (1, 10)
645 NUMBER '1' (1, 11) (1, 12)
646 OP '==' (1, 13) (1, 15)
647 NUMBER '1' (1, 16) (1, 17)
648 OP '>=' (1, 18) (1, 20)
649 NUMBER '5' (1, 21) (1, 22)
650 OP '<=' (1, 23) (1, 25)
651 NUMBER '0x15' (1, 26) (1, 30)
652 OP '<=' (1, 31) (1, 33)
653 NUMBER '0x12' (1, 34) (1, 38)
654 OP '!=' (1, 39) (1, 41)
655 NUMBER '1' (1, 42) (1, 43)
656 NAME 'and' (1, 44) (1, 47)
657 NUMBER '5' (1, 48) (1, 49)
658 NAME 'in' (1, 50) (1, 52)
659 NUMBER '1' (1, 53) (1, 54)
660 NAME 'not' (1, 55) (1, 58)
661 NAME 'in' (1, 59) (1, 61)
662 NUMBER '1' (1, 62) (1, 63)
663 NAME 'is' (1, 64) (1, 66)
664 NUMBER '1' (1, 67) (1, 68)
665 NAME 'or' (1, 69) (1, 71)
666 NUMBER '5' (1, 72) (1, 73)
667 NAME 'is' (1, 74) (1, 76)
668 NAME 'not' (1, 77) (1, 80)
669 NUMBER '1' (1, 81) (1, 82)
670 OP ':' (1, 82) (1, 83)
671 NAME 'pass' (1, 84) (1, 88)
672 """)
673
674 def test_shift(self):
675 # Shift
676 self.check_tokenize("x = 1 << 1 >> 5", """\
677 NAME 'x' (1, 0) (1, 1)
678 OP '=' (1, 2) (1, 3)
679 NUMBER '1' (1, 4) (1, 5)
680 OP '<<' (1, 6) (1, 8)
681 NUMBER '1' (1, 9) (1, 10)
682 OP '>>' (1, 11) (1, 13)
683 NUMBER '5' (1, 14) (1, 15)
684 """)
685
686 def test_additive(self):
687 # Additive
688 self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\
689 NAME 'x' (1, 0) (1, 1)
690 OP '=' (1, 2) (1, 3)
691 NUMBER '1' (1, 4) (1, 5)
692 OP '-' (1, 6) (1, 7)
693 NAME 'y' (1, 8) (1, 9)
694 OP '+' (1, 10) (1, 11)
695 NUMBER '15' (1, 12) (1, 14)
696 OP '-' (1, 15) (1, 16)
697 NUMBER '1' (1, 17) (1, 18)
698 OP '+' (1, 19) (1, 20)
699 NUMBER '0x124' (1, 21) (1, 26)
700 OP '+' (1, 27) (1, 28)
701 NAME 'z' (1, 29) (1, 30)
702 OP '+' (1, 31) (1, 32)
703 NAME 'a' (1, 33) (1, 34)
704 OP '[' (1, 34) (1, 35)
705 NUMBER '5' (1, 35) (1, 36)
706 OP ']' (1, 36) (1, 37)
707 """)
708
709 def test_multiplicative(self):
710 # Multiplicative
711 self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\
712 NAME 'x' (1, 0) (1, 1)
713 OP '=' (1, 2) (1, 3)
714 NUMBER '1' (1, 4) (1, 5)
715 OP '//' (1, 5) (1, 7)
716 NUMBER '1' (1, 7) (1, 8)
717 OP '*' (1, 8) (1, 9)
718 NUMBER '1' (1, 9) (1, 10)
719 OP '/' (1, 10) (1, 11)
720 NUMBER '5' (1, 11) (1, 12)
721 OP '*' (1, 12) (1, 13)
722 NUMBER '12' (1, 13) (1, 15)
723 OP '%' (1, 15) (1, 16)
724 NUMBER '0x12' (1, 16) (1, 20)
725 OP '@' (1, 20) (1, 21)
726 NUMBER '42' (1, 21) (1, 23)
727 """)
728
729 def test_unary(self):
730 # Unary
731 self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
732 OP '~' (1, 0) (1, 1)
733 NUMBER '1' (1, 1) (1, 2)
734 OP '^' (1, 3) (1, 4)
735 NUMBER '1' (1, 5) (1, 6)
736 OP '&' (1, 7) (1, 8)
737 NUMBER '1' (1, 9) (1, 10)
738 OP '|' (1, 11) (1, 12)
739 NUMBER '1' (1, 12) (1, 13)
740 OP '^' (1, 14) (1, 15)
741 OP '-' (1, 16) (1, 17)
742 NUMBER '1' (1, 17) (1, 18)
743 """)
744 self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
745 OP '-' (1, 0) (1, 1)
746 NUMBER '1' (1, 1) (1, 2)
747 OP '*' (1, 2) (1, 3)
748 NUMBER '1' (1, 3) (1, 4)
749 OP '/' (1, 4) (1, 5)
750 NUMBER '1' (1, 5) (1, 6)
751 OP '+' (1, 6) (1, 7)
752 NUMBER '1' (1, 7) (1, 8)
753 OP '*' (1, 8) (1, 9)
754 NUMBER '1' (1, 9) (1, 10)
755 OP '//' (1, 10) (1, 12)
756 NUMBER '1' (1, 12) (1, 13)
757 OP '-' (1, 14) (1, 15)
758 OP '-' (1, 16) (1, 17)
759 OP '-' (1, 17) (1, 18)
760 OP '-' (1, 18) (1, 19)
761 NUMBER '1' (1, 19) (1, 20)
762 OP '**' (1, 20) (1, 22)
763 NUMBER '1' (1, 22) (1, 23)
764 """)
765
766 def test_selector(self):
767 # Selector
768 self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
769 NAME 'import' (1, 0) (1, 6)
770 NAME 'sys' (1, 7) (1, 10)
771 OP ',' (1, 10) (1, 11)
772 NAME 'time' (1, 12) (1, 16)
773 NEWLINE '\\n' (1, 16) (1, 17)
774 NAME 'x' (2, 0) (2, 1)
775 OP '=' (2, 2) (2, 3)
776 NAME 'sys' (2, 4) (2, 7)
777 OP '.' (2, 7) (2, 8)
778 NAME 'modules' (2, 8) (2, 15)
779 OP '[' (2, 15) (2, 16)
780 STRING "'time'" (2, 16) (2, 22)
781 OP ']' (2, 22) (2, 23)
782 OP '.' (2, 23) (2, 24)
783 NAME 'time' (2, 24) (2, 28)
784 OP '(' (2, 28) (2, 29)
785 OP ')' (2, 29) (2, 30)
786 """)
787
788 def test_method(self):
789 # Methods
790 self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\
791 OP '@' (1, 0) (1, 1)
792 NAME 'staticmethod' (1, 1) (1, 13)
793 NEWLINE '\\n' (1, 13) (1, 14)
794 NAME 'def' (2, 0) (2, 3)
795 NAME 'foo' (2, 4) (2, 7)
796 OP '(' (2, 7) (2, 8)
797 NAME 'x' (2, 8) (2, 9)
798 OP ',' (2, 9) (2, 10)
799 NAME 'y' (2, 10) (2, 11)
800 OP ')' (2, 11) (2, 12)
801 OP ':' (2, 12) (2, 13)
802 NAME 'pass' (2, 14) (2, 18)
803 """)
804
805 def test_tabs(self):
806 # Evil tabs
807 self.check_tokenize("def f():\n"
808 "\tif x\n"
809 " \tpass", """\
810 NAME 'def' (1, 0) (1, 3)
811 NAME 'f' (1, 4) (1, 5)
812 OP '(' (1, 5) (1, 6)
813 OP ')' (1, 6) (1, 7)
814 OP ':' (1, 7) (1, 8)
815 NEWLINE '\\n' (1, 8) (1, 9)
816 INDENT '\\t' (2, 0) (2, 1)
817 NAME 'if' (2, 1) (2, 3)
818 NAME 'x' (2, 4) (2, 5)
819 NEWLINE '\\n' (2, 5) (2, 6)
820 INDENT ' \\t' (3, 0) (3, 9)
821 NAME 'pass' (3, 9) (3, 13)
822 DEDENT '' (4, 0) (4, 0)
823 DEDENT '' (4, 0) (4, 0)
824 """)
825
826 def test_non_ascii_identifiers(self):
827 # Non-ascii identifiers
828 self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\
829 NAME 'Örter' (1, 0) (1, 5)
830 OP '=' (1, 6) (1, 7)
831 STRING "'places'" (1, 8) (1, 16)
832 NEWLINE '\\n' (1, 16) (1, 17)
833 NAME 'grün' (2, 0) (2, 4)
834 OP '=' (2, 5) (2, 6)
835 STRING "'green'" (2, 7) (2, 14)
836 """)
837
838 def test_unicode(self):
839 # Legacy unicode literals:
840 self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
841 NAME 'Örter' (1, 0) (1, 5)
842 OP '=' (1, 6) (1, 7)
843 STRING "u'places'" (1, 8) (1, 17)
844 NEWLINE '\\n' (1, 17) (1, 18)
845 NAME 'grün' (2, 0) (2, 4)
846 OP '=' (2, 5) (2, 6)
847 STRING "U'green'" (2, 7) (2, 15)
848 """)
849
850 def test_async(self):
851 # Async/await extension:
852 self.check_tokenize("async = 1", """\
853 NAME 'async' (1, 0) (1, 5)
854 OP '=' (1, 6) (1, 7)
855 NUMBER '1' (1, 8) (1, 9)
856 """)
857
858 self.check_tokenize("a = (async = 1)", """\
859 NAME 'a' (1, 0) (1, 1)
860 OP '=' (1, 2) (1, 3)
861 OP '(' (1, 4) (1, 5)
862 NAME 'async' (1, 5) (1, 10)
863 OP '=' (1, 11) (1, 12)
864 NUMBER '1' (1, 13) (1, 14)
865 OP ')' (1, 14) (1, 15)
866 """)
867
868 self.check_tokenize("async()", """\
869 NAME 'async' (1, 0) (1, 5)
870 OP '(' (1, 5) (1, 6)
871 OP ')' (1, 6) (1, 7)
872 """)
873
874 self.check_tokenize("class async(Bar):pass", """\
875 NAME 'class' (1, 0) (1, 5)
876 NAME 'async' (1, 6) (1, 11)
877 OP '(' (1, 11) (1, 12)
878 NAME 'Bar' (1, 12) (1, 15)
879 OP ')' (1, 15) (1, 16)
880 OP ':' (1, 16) (1, 17)
881 NAME 'pass' (1, 17) (1, 21)
882 """)
883
884 self.check_tokenize("class async:pass", """\
885 NAME 'class' (1, 0) (1, 5)
886 NAME 'async' (1, 6) (1, 11)
887 OP ':' (1, 11) (1, 12)
888 NAME 'pass' (1, 12) (1, 16)
889 """)
890
891 self.check_tokenize("await = 1", """\
892 NAME 'await' (1, 0) (1, 5)
893 OP '=' (1, 6) (1, 7)
894 NUMBER '1' (1, 8) (1, 9)
895 """)
896
897 self.check_tokenize("foo.async", """\
898 NAME 'foo' (1, 0) (1, 3)
899 OP '.' (1, 3) (1, 4)
900 NAME 'async' (1, 4) (1, 9)
901 """)
902
903 self.check_tokenize("async for a in b: pass", """\
904 NAME 'async' (1, 0) (1, 5)
905 NAME 'for' (1, 6) (1, 9)
906 NAME 'a' (1, 10) (1, 11)
907 NAME 'in' (1, 12) (1, 14)
908 NAME 'b' (1, 15) (1, 16)
909 OP ':' (1, 16) (1, 17)
910 NAME 'pass' (1, 18) (1, 22)
911 """)
912
913 self.check_tokenize("async with a as b: pass", """\
914 NAME 'async' (1, 0) (1, 5)
915 NAME 'with' (1, 6) (1, 10)
916 NAME 'a' (1, 11) (1, 12)
917 NAME 'as' (1, 13) (1, 15)
918 NAME 'b' (1, 16) (1, 17)
919 OP ':' (1, 17) (1, 18)
920 NAME 'pass' (1, 19) (1, 23)
921 """)
922
923 self.check_tokenize("async.foo", """\
924 NAME 'async' (1, 0) (1, 5)
925 OP '.' (1, 5) (1, 6)
926 NAME 'foo' (1, 6) (1, 9)
927 """)
928
929 self.check_tokenize("async", """\
930 NAME 'async' (1, 0) (1, 5)
931 """)
932
933 self.check_tokenize("async\n#comment\nawait", """\
934 NAME 'async' (1, 0) (1, 5)
935 NEWLINE '\\n' (1, 5) (1, 6)
936 COMMENT '#comment' (2, 0) (2, 8)
937 NL '\\n' (2, 8) (2, 9)
938 NAME 'await' (3, 0) (3, 5)
939 """)
940
941 self.check_tokenize("async\n...\nawait", """\
942 NAME 'async' (1, 0) (1, 5)
943 NEWLINE '\\n' (1, 5) (1, 6)
944 OP '...' (2, 0) (2, 3)
945 NEWLINE '\\n' (2, 3) (2, 4)
946 NAME 'await' (3, 0) (3, 5)
947 """)
948
949 self.check_tokenize("async\nawait", """\
950 NAME 'async' (1, 0) (1, 5)
951 NEWLINE '\\n' (1, 5) (1, 6)
952 NAME 'await' (2, 0) (2, 5)
953 """)
954
955 self.check_tokenize("foo.async + 1", """\
956 NAME 'foo' (1, 0) (1, 3)
957 OP '.' (1, 3) (1, 4)
958 NAME 'async' (1, 4) (1, 9)
959 OP '+' (1, 10) (1, 11)
960 NUMBER '1' (1, 12) (1, 13)
961 """)
962
963 self.check_tokenize("async def foo(): pass", """\
964 NAME 'async' (1, 0) (1, 5)
965 NAME 'def' (1, 6) (1, 9)
966 NAME 'foo' (1, 10) (1, 13)
967 OP '(' (1, 13) (1, 14)
968 OP ')' (1, 14) (1, 15)
969 OP ':' (1, 15) (1, 16)
970 NAME 'pass' (1, 17) (1, 21)
971 """)
972
973 self.check_tokenize('''\
974 async def foo():
975 def foo(await):
976 await = 1
977 if 1:
978 await
979 async += 1
980 ''', """\
981 NAME 'async' (1, 0) (1, 5)
982 NAME 'def' (1, 6) (1, 9)
983 NAME 'foo' (1, 10) (1, 13)
984 OP '(' (1, 13) (1, 14)
985 OP ')' (1, 14) (1, 15)
986 OP ':' (1, 15) (1, 16)
987 NEWLINE '\\n' (1, 16) (1, 17)
988 INDENT ' ' (2, 0) (2, 2)
989 NAME 'def' (2, 2) (2, 5)
990 NAME 'foo' (2, 6) (2, 9)
991 OP '(' (2, 9) (2, 10)
992 NAME 'await' (2, 10) (2, 15)
993 OP ')' (2, 15) (2, 16)
994 OP ':' (2, 16) (2, 17)
995 NEWLINE '\\n' (2, 17) (2, 18)
996 INDENT ' ' (3, 0) (3, 4)
997 NAME 'await' (3, 4) (3, 9)
998 OP '=' (3, 10) (3, 11)
999 NUMBER '1' (3, 12) (3, 13)
1000 NEWLINE '\\n' (3, 13) (3, 14)
1001 DEDENT '' (4, 2) (4, 2)
1002 NAME 'if' (4, 2) (4, 4)
1003 NUMBER '1' (4, 5) (4, 6)
1004 OP ':' (4, 6) (4, 7)
1005 NEWLINE '\\n' (4, 7) (4, 8)
1006 INDENT ' ' (5, 0) (5, 4)
1007 NAME 'await' (5, 4) (5, 9)
1008 NEWLINE '\\n' (5, 9) (5, 10)
1009 DEDENT '' (6, 0) (6, 0)
1010 DEDENT '' (6, 0) (6, 0)
1011 NAME 'async' (6, 0) (6, 5)
1012 OP '+=' (6, 6) (6, 8)
1013 NUMBER '1' (6, 9) (6, 10)
1014 NEWLINE '\\n' (6, 10) (6, 11)
1015 """)
1016
1017 self.check_tokenize('''\
1018 async def foo():
1019 async for i in 1: pass''', """\
1020 NAME 'async' (1, 0) (1, 5)
1021 NAME 'def' (1, 6) (1, 9)
1022 NAME 'foo' (1, 10) (1, 13)
1023 OP '(' (1, 13) (1, 14)
1024 OP ')' (1, 14) (1, 15)
1025 OP ':' (1, 15) (1, 16)
1026 NEWLINE '\\n' (1, 16) (1, 17)
1027 INDENT ' ' (2, 0) (2, 2)
1028 NAME 'async' (2, 2) (2, 7)
1029 NAME 'for' (2, 8) (2, 11)
1030 NAME 'i' (2, 12) (2, 13)
1031 NAME 'in' (2, 14) (2, 16)
1032 NUMBER '1' (2, 17) (2, 18)
1033 OP ':' (2, 18) (2, 19)
1034 NAME 'pass' (2, 20) (2, 24)
1035 DEDENT '' (3, 0) (3, 0)
1036 """)
1037
1038 self.check_tokenize('''async def foo(async): await''', """\
1039 NAME 'async' (1, 0) (1, 5)
1040 NAME 'def' (1, 6) (1, 9)
1041 NAME 'foo' (1, 10) (1, 13)
1042 OP '(' (1, 13) (1, 14)
1043 NAME 'async' (1, 14) (1, 19)
1044 OP ')' (1, 19) (1, 20)
1045 OP ':' (1, 20) (1, 21)
1046 NAME 'await' (1, 22) (1, 27)
1047 """)
1048
1049 self.check_tokenize('''\
1050 def f():
1051
1052 def baz(): pass
1053 async def bar(): pass
1054
1055 await = 2''', """\
1056 NAME 'def' (1, 0) (1, 3)
1057 NAME 'f' (1, 4) (1, 5)
1058 OP '(' (1, 5) (1, 6)
1059 OP ')' (1, 6) (1, 7)
1060 OP ':' (1, 7) (1, 8)
1061 NEWLINE '\\n' (1, 8) (1, 9)
1062 NL '\\n' (2, 0) (2, 1)
1063 INDENT ' ' (3, 0) (3, 2)
1064 NAME 'def' (3, 2) (3, 5)
1065 NAME 'baz' (3, 6) (3, 9)
1066 OP '(' (3, 9) (3, 10)
1067 OP ')' (3, 10) (3, 11)
1068 OP ':' (3, 11) (3, 12)
1069 NAME 'pass' (3, 13) (3, 17)
1070 NEWLINE '\\n' (3, 17) (3, 18)
1071 NAME 'async' (4, 2) (4, 7)
1072 NAME 'def' (4, 8) (4, 11)
1073 NAME 'bar' (4, 12) (4, 15)
1074 OP '(' (4, 15) (4, 16)
1075 OP ')' (4, 16) (4, 17)
1076 OP ':' (4, 17) (4, 18)
1077 NAME 'pass' (4, 19) (4, 23)
1078 NEWLINE '\\n' (4, 23) (4, 24)
1079 NL '\\n' (5, 0) (5, 1)
1080 NAME 'await' (6, 2) (6, 7)
1081 OP '=' (6, 8) (6, 9)
1082 NUMBER '2' (6, 10) (6, 11)
1083 DEDENT '' (7, 0) (7, 0)
1084 """)
1085
1086 self.check_tokenize('''\
1087 async def f():
1088
1089 def baz(): pass
1090 async def bar(): pass
1091
1092 await = 2''', """\
1093 NAME 'async' (1, 0) (1, 5)
1094 NAME 'def' (1, 6) (1, 9)
1095 NAME 'f' (1, 10) (1, 11)
1096 OP '(' (1, 11) (1, 12)
1097 OP ')' (1, 12) (1, 13)
1098 OP ':' (1, 13) (1, 14)
1099 NEWLINE '\\n' (1, 14) (1, 15)
1100 NL '\\n' (2, 0) (2, 1)
1101 INDENT ' ' (3, 0) (3, 2)
1102 NAME 'def' (3, 2) (3, 5)
1103 NAME 'baz' (3, 6) (3, 9)
1104 OP '(' (3, 9) (3, 10)
1105 OP ')' (3, 10) (3, 11)
1106 OP ':' (3, 11) (3, 12)
1107 NAME 'pass' (3, 13) (3, 17)
1108 NEWLINE '\\n' (3, 17) (3, 18)
1109 NAME 'async' (4, 2) (4, 7)
1110 NAME 'def' (4, 8) (4, 11)
1111 NAME 'bar' (4, 12) (4, 15)
1112 OP '(' (4, 15) (4, 16)
1113 OP ')' (4, 16) (4, 17)
1114 OP ':' (4, 17) (4, 18)
1115 NAME 'pass' (4, 19) (4, 23)
1116 NEWLINE '\\n' (4, 23) (4, 24)
1117 NL '\\n' (5, 0) (5, 1)
1118 NAME 'await' (6, 2) (6, 7)
1119 OP '=' (6, 8) (6, 9)
1120 NUMBER '2' (6, 10) (6, 11)
1121 DEDENT '' (7, 0) (7, 0)
1122 """)
1123
1124 def test_newline_after_parenthesized_block_with_comment(self):
1125 self.check_tokenize('''\
1126 [
1127 # A comment here
1128 1
1129 ]
1130 ''', """\
1131 OP '[' (1, 0) (1, 1)
1132 NL '\\n' (1, 1) (1, 2)
1133 COMMENT '# A comment here' (2, 4) (2, 20)
1134 NL '\\n' (2, 20) (2, 21)
1135 NUMBER '1' (3, 4) (3, 5)
1136 NL '\\n' (3, 5) (3, 6)
1137 OP ']' (4, 0) (4, 1)
1138 NEWLINE '\\n' (4, 1) (4, 2)
1139 """)
1140
1141 def test_closing_parenthesis_from_different_line(self):
1142 self.check_tokenize("); x", """\
1143 OP ')' (1, 0) (1, 1)
1144 OP ';' (1, 1) (1, 2)
1145 NAME 'x' (1, 3) (1, 4)
1146 """)
1147
1148 class ESC[4;38;5;81mGenerateTokensTest(ESC[4;38;5;149mTokenizeTest):
1149 def check_tokenize(self, s, expected):
1150 # Format the tokens in s in a table format.
1151 # The ENDMARKER and final NEWLINE are omitted.
1152 f = StringIO(s)
1153 result = stringify_tokens_from_source(generate_tokens(f.readline), s)
1154 self.assertEqual(result, expected.rstrip().splitlines())
1155
1156
1157 def decistmt(s):
1158 result = []
1159 g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
1160 for toknum, tokval, _, _, _ in g:
1161 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
1162 result.extend([
1163 (NAME, 'Decimal'),
1164 (OP, '('),
1165 (STRING, repr(tokval)),
1166 (OP, ')')
1167 ])
1168 else:
1169 result.append((toknum, tokval))
1170 return untokenize(result).decode('utf-8').strip()
1171
1172 class ESC[4;38;5;81mTestMisc(ESC[4;38;5;149mTestCase):
1173
1174 def test_decistmt(self):
1175 # Substitute Decimals for floats in a string of statements.
1176 # This is an example from the docs.
1177
1178 from decimal import Decimal
1179 s = '+21.3e-5*-.1234/81.7'
1180 self.assertEqual(decistmt(s),
1181 "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
1182
1183 # The format of the exponent is inherited from the platform C library.
1184 # Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
1185 # we're only showing 11 digits, and the 12th isn't close to 5, the
1186 # rest of the output should be platform-independent.
1187 self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')
1188
1189 # Output from calculations with Decimal should be identical across all
1190 # platforms.
1191 self.assertEqual(eval(decistmt(s)),
1192 Decimal('-3.217160342717258261933904529E-7'))
1193
1194
1195 class ESC[4;38;5;81mTestTokenizerAdheresToPep0263(ESC[4;38;5;149mTestCase):
1196 """
1197 Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.
1198 """
1199
1200 def _testFile(self, filename):
1201 path = os.path.join(os.path.dirname(__file__), filename)
1202 with open(path, 'rb') as f:
1203 TestRoundtrip.check_roundtrip(self, f)
1204
1205 def test_utf8_coding_cookie_and_no_utf8_bom(self):
1206 f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
1207 self._testFile(f)
1208
1209 def test_latin1_coding_cookie_and_utf8_bom(self):
1210 """
1211 As per PEP 0263, if a file starts with a utf-8 BOM signature, the only
1212 allowed encoding for the comment is 'utf-8'. The text file used in
1213 this test starts with a BOM signature, but specifies latin1 as the
1214 coding, so verify that a SyntaxError is raised, which matches the
1215 behaviour of the interpreter when it encounters a similar condition.
1216 """
1217 f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'
1218 self.assertRaises(SyntaxError, self._testFile, f)
1219
1220 def test_no_coding_cookie_and_utf8_bom(self):
1221 f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'
1222 self._testFile(f)
1223
1224 def test_utf8_coding_cookie_and_utf8_bom(self):
1225 f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
1226 self._testFile(f)
1227
1228 def test_bad_coding_cookie(self):
1229 self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')
1230 self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')
1231
1232
1233 class ESC[4;38;5;81mTest_Tokenize(ESC[4;38;5;149mTestCase):
1234
1235 def test__tokenize_decodes_with_specified_encoding(self):
1236 literal = '"ЉЊЈЁЂ"'
1237 line = literal.encode('utf-8')
1238 first = False
1239 def readline():
1240 nonlocal first
1241 if not first:
1242 first = True
1243 yield line
1244 else:
1245 yield b''
1246
1247 # skip the initial encoding token and the end tokens
1248 tokens = list(_generate_tokens_from_c_tokenizer(readline().__next__, encoding='utf-8',
1249 extra_tokens=True))[:-2]
1250 expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
1251 self.assertEqual(tokens, expected_tokens,
1252 "bytes not decoded with encoding")
1253
1254
1255 class ESC[4;38;5;81mTestDetectEncoding(ESC[4;38;5;149mTestCase):
1256
1257 def get_readline(self, lines):
1258 index = 0
1259 def readline():
1260 nonlocal index
1261 if index == len(lines):
1262 raise StopIteration
1263 line = lines[index]
1264 index += 1
1265 return line
1266 return readline
1267
1268 def test_no_bom_no_encoding_cookie(self):
1269 lines = (
1270 b'# something\n',
1271 b'print(something)\n',
1272 b'do_something(else)\n'
1273 )
1274 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1275 self.assertEqual(encoding, 'utf-8')
1276 self.assertEqual(consumed_lines, list(lines[:2]))
1277
1278 def test_bom_no_cookie(self):
1279 lines = (
1280 b'\xef\xbb\xbf# something\n',
1281 b'print(something)\n',
1282 b'do_something(else)\n'
1283 )
1284 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1285 self.assertEqual(encoding, 'utf-8-sig')
1286 self.assertEqual(consumed_lines,
1287 [b'# something\n', b'print(something)\n'])
1288
1289 def test_cookie_first_line_no_bom(self):
1290 lines = (
1291 b'# -*- coding: latin-1 -*-\n',
1292 b'print(something)\n',
1293 b'do_something(else)\n'
1294 )
1295 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1296 self.assertEqual(encoding, 'iso-8859-1')
1297 self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
1298
1299 def test_matched_bom_and_cookie_first_line(self):
1300 lines = (
1301 b'\xef\xbb\xbf# coding=utf-8\n',
1302 b'print(something)\n',
1303 b'do_something(else)\n'
1304 )
1305 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1306 self.assertEqual(encoding, 'utf-8-sig')
1307 self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
1308
1309 def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
1310 lines = (
1311 b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',
1312 b'print(something)\n',
1313 b'do_something(else)\n'
1314 )
1315 readline = self.get_readline(lines)
1316 self.assertRaises(SyntaxError, detect_encoding, readline)
1317
1318 def test_cookie_second_line_no_bom(self):
1319 lines = (
1320 b'#! something\n',
1321 b'# vim: set fileencoding=ascii :\n',
1322 b'print(something)\n',
1323 b'do_something(else)\n'
1324 )
1325 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1326 self.assertEqual(encoding, 'ascii')
1327 expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
1328 self.assertEqual(consumed_lines, expected)
1329
1330 def test_matched_bom_and_cookie_second_line(self):
1331 lines = (
1332 b'\xef\xbb\xbf#! something\n',
1333 b'f# coding=utf-8\n',
1334 b'print(something)\n',
1335 b'do_something(else)\n'
1336 )
1337 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1338 self.assertEqual(encoding, 'utf-8-sig')
1339 self.assertEqual(consumed_lines,
1340 [b'#! something\n', b'f# coding=utf-8\n'])
1341
1342 def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
1343 lines = (
1344 b'\xef\xbb\xbf#! something\n',
1345 b'# vim: set fileencoding=ascii :\n',
1346 b'print(something)\n',
1347 b'do_something(else)\n'
1348 )
1349 readline = self.get_readline(lines)
1350 self.assertRaises(SyntaxError, detect_encoding, readline)
1351
1352 def test_cookie_second_line_noncommented_first_line(self):
1353 lines = (
1354 b"print('\xc2\xa3')\n",
1355 b'# vim: set fileencoding=iso8859-15 :\n',
1356 b"print('\xe2\x82\xac')\n"
1357 )
1358 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1359 self.assertEqual(encoding, 'utf-8')
1360 expected = [b"print('\xc2\xa3')\n"]
1361 self.assertEqual(consumed_lines, expected)
1362
1363 def test_cookie_second_line_commented_first_line(self):
1364 lines = (
1365 b"#print('\xc2\xa3')\n",
1366 b'# vim: set fileencoding=iso8859-15 :\n',
1367 b"print('\xe2\x82\xac')\n"
1368 )
1369 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1370 self.assertEqual(encoding, 'iso8859-15')
1371 expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
1372 self.assertEqual(consumed_lines, expected)
1373
1374 def test_cookie_second_line_empty_first_line(self):
1375 lines = (
1376 b'\n',
1377 b'# vim: set fileencoding=iso8859-15 :\n',
1378 b"print('\xe2\x82\xac')\n"
1379 )
1380 encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1381 self.assertEqual(encoding, 'iso8859-15')
1382 expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
1383 self.assertEqual(consumed_lines, expected)
1384
1385 def test_latin1_normalization(self):
1386 # See get_normal_name() in tokenizer.c.
1387 encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
1388 "iso-8859-1-unix", "iso-latin-1-mac")
1389 for encoding in encodings:
1390 for rep in ("-", "_"):
1391 enc = encoding.replace("-", rep)
1392 lines = (b"#!/usr/bin/python\n",
1393 b"# coding: " + enc.encode("ascii") + b"\n",
1394 b"print(things)\n",
1395 b"do_something += 4\n")
1396 rl = self.get_readline(lines)
1397 found, consumed_lines = detect_encoding(rl)
1398 self.assertEqual(found, "iso-8859-1")
1399
1400 def test_syntaxerror_latin1(self):
1401 # Issue 14629: need to raise TokenError if the first
1402 # line(s) have non-UTF-8 characters
1403 lines = (
1404 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
1405 )
1406 readline = self.get_readline(lines)
1407 self.assertRaises(SyntaxError, detect_encoding, readline)
1408
1409
1410 def test_utf8_normalization(self):
1411 # See get_normal_name() in tokenizer.c.
1412 encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
1413 for encoding in encodings:
1414 for rep in ("-", "_"):
1415 enc = encoding.replace("-", rep)
1416 lines = (b"#!/usr/bin/python\n",
1417 b"# coding: " + enc.encode("ascii") + b"\n",
1418 b"1 + 3\n")
1419 rl = self.get_readline(lines)
1420 found, consumed_lines = detect_encoding(rl)
1421 self.assertEqual(found, "utf-8")
1422
1423 def test_short_files(self):
1424 readline = self.get_readline((b'print(something)\n',))
1425 encoding, consumed_lines = detect_encoding(readline)
1426 self.assertEqual(encoding, 'utf-8')
1427 self.assertEqual(consumed_lines, [b'print(something)\n'])
1428
1429 encoding, consumed_lines = detect_encoding(self.get_readline(()))
1430 self.assertEqual(encoding, 'utf-8')
1431 self.assertEqual(consumed_lines, [])
1432
1433 readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
1434 encoding, consumed_lines = detect_encoding(readline)
1435 self.assertEqual(encoding, 'utf-8-sig')
1436 self.assertEqual(consumed_lines, [b'print(something)\n'])
1437
1438 readline = self.get_readline((b'\xef\xbb\xbf',))
1439 encoding, consumed_lines = detect_encoding(readline)
1440 self.assertEqual(encoding, 'utf-8-sig')
1441 self.assertEqual(consumed_lines, [])
1442
1443 readline = self.get_readline((b'# coding: bad\n',))
1444 self.assertRaises(SyntaxError, detect_encoding, readline)
1445
1446 def test_false_encoding(self):
1447 # Issue 18873: "Encoding" detected in non-comment lines
1448 readline = self.get_readline((b'print("#coding=fake")',))
1449 encoding, consumed_lines = detect_encoding(readline)
1450 self.assertEqual(encoding, 'utf-8')
1451 self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
1452
1453 def test_open(self):
1454 filename = os_helper.TESTFN + '.py'
1455 self.addCleanup(os_helper.unlink, filename)
1456
1457 # test coding cookie
1458 for encoding in ('iso-8859-15', 'utf-8'):
1459 with open(filename, 'w', encoding=encoding) as fp:
1460 print("# coding: %s" % encoding, file=fp)
1461 print("print('euro:\u20ac')", file=fp)
1462 with tokenize_open(filename) as fp:
1463 self.assertEqual(fp.encoding, encoding)
1464 self.assertEqual(fp.mode, 'r')
1465
1466 # test BOM (no coding cookie)
1467 with open(filename, 'w', encoding='utf-8-sig') as fp:
1468 print("print('euro:\u20ac')", file=fp)
1469 with tokenize_open(filename) as fp:
1470 self.assertEqual(fp.encoding, 'utf-8-sig')
1471 self.assertEqual(fp.mode, 'r')
1472
1473 def test_filename_in_exception(self):
1474 # When possible, include the file name in the exception.
1475 path = 'some_file_path'
1476 lines = (
1477 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
1478 )
1479 class ESC[4;38;5;81mBunk:
1480 def __init__(self, lines, path):
1481 self.name = path
1482 self._lines = lines
1483 self._index = 0
1484
1485 def readline(self):
1486 if self._index == len(lines):
1487 raise StopIteration
1488 line = lines[self._index]
1489 self._index += 1
1490 return line
1491
1492 with self.assertRaises(SyntaxError):
1493 ins = Bunk(lines, path)
1494 # Make sure lacking a name isn't an issue.
1495 del ins.name
1496 detect_encoding(ins.readline)
1497 with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
1498 ins = Bunk(lines, path)
1499 detect_encoding(ins.readline)
1500
1501 def test_open_error(self):
1502 # Issue #23840: open() must close the binary file on error
1503 m = BytesIO(b'#coding:xxx')
1504 with mock.patch('tokenize._builtin_open', return_value=m):
1505 self.assertRaises(SyntaxError, tokenize_open, 'foobar')
1506 self.assertTrue(m.closed)
1507
1508
1509 class ESC[4;38;5;81mTestTokenize(ESC[4;38;5;149mTestCase):
1510
1511 def test_tokenize(self):
1512 import tokenize as tokenize_module
1513 encoding = "utf-8"
1514 encoding_used = None
1515 def mock_detect_encoding(readline):
1516 return encoding, [b'first', b'second']
1517
1518 def mock__tokenize(readline, encoding, **kwargs):
1519 nonlocal encoding_used
1520 encoding_used = encoding
1521 out = []
1522 while True:
1523 try:
1524 next_line = readline()
1525 except StopIteration:
1526 return out
1527 if next_line:
1528 out.append(next_line)
1529 continue
1530 return out
1531
1532 counter = 0
1533 def mock_readline():
1534 nonlocal counter
1535 counter += 1
1536 if counter == 5:
1537 return b''
1538 return str(counter).encode()
1539
1540 orig_detect_encoding = tokenize_module.detect_encoding
1541 orig_c_token = tokenize_module._generate_tokens_from_c_tokenizer
1542 tokenize_module.detect_encoding = mock_detect_encoding
1543 tokenize_module._generate_tokens_from_c_tokenizer = mock__tokenize
1544 try:
1545 results = tokenize(mock_readline)
1546 self.assertEqual(list(results)[1:],
1547 [b'first', b'second', b'1', b'2', b'3', b'4'])
1548 finally:
1549 tokenize_module.detect_encoding = orig_detect_encoding
1550 tokenize_module._generate_tokens_from_c_tokenizer = orig_c_token
1551
1552 self.assertEqual(encoding_used, encoding)
1553
1554 def test_oneline_defs(self):
1555 buf = []
1556 for i in range(500):
1557 buf.append('def i{i}(): return {i}'.format(i=i))
1558 buf.append('OK')
1559 buf = '\n'.join(buf)
1560
1561 # Test that 500 consequent, one-line defs is OK
1562 toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
1563 self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER
1564 # [-2] is always NEWLINE
1565
1566 def assertExactTypeEqual(self, opstr, *optypes):
1567 tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
1568 num_optypes = len(optypes)
1569 self.assertEqual(len(tokens), 3 + num_optypes)
1570 self.assertEqual(tok_name[tokens[0].exact_type],
1571 tok_name[ENCODING])
1572 for i in range(num_optypes):
1573 self.assertEqual(tok_name[tokens[i + 1].exact_type],
1574 tok_name[optypes[i]])
1575 self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],
1576 tok_name[token.NEWLINE])
1577 self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],
1578 tok_name[token.ENDMARKER])
1579
1580 def test_exact_type(self):
1581 self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
1582 self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)
1583 self.assertExactTypeEqual(':', token.COLON)
1584 self.assertExactTypeEqual(',', token.COMMA)
1585 self.assertExactTypeEqual(';', token.SEMI)
1586 self.assertExactTypeEqual('+', token.PLUS)
1587 self.assertExactTypeEqual('-', token.MINUS)
1588 self.assertExactTypeEqual('*', token.STAR)
1589 self.assertExactTypeEqual('/', token.SLASH)
1590 self.assertExactTypeEqual('|', token.VBAR)
1591 self.assertExactTypeEqual('&', token.AMPER)
1592 self.assertExactTypeEqual('<', token.LESS)
1593 self.assertExactTypeEqual('>', token.GREATER)
1594 self.assertExactTypeEqual('=', token.EQUAL)
1595 self.assertExactTypeEqual('.', token.DOT)
1596 self.assertExactTypeEqual('%', token.PERCENT)
1597 self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)
1598 self.assertExactTypeEqual('==', token.EQEQUAL)
1599 self.assertExactTypeEqual('!=', token.NOTEQUAL)
1600 self.assertExactTypeEqual('<=', token.LESSEQUAL)
1601 self.assertExactTypeEqual('>=', token.GREATEREQUAL)
1602 self.assertExactTypeEqual('~', token.TILDE)
1603 self.assertExactTypeEqual('^', token.CIRCUMFLEX)
1604 self.assertExactTypeEqual('<<', token.LEFTSHIFT)
1605 self.assertExactTypeEqual('>>', token.RIGHTSHIFT)
1606 self.assertExactTypeEqual('**', token.DOUBLESTAR)
1607 self.assertExactTypeEqual('+=', token.PLUSEQUAL)
1608 self.assertExactTypeEqual('-=', token.MINEQUAL)
1609 self.assertExactTypeEqual('*=', token.STAREQUAL)
1610 self.assertExactTypeEqual('/=', token.SLASHEQUAL)
1611 self.assertExactTypeEqual('%=', token.PERCENTEQUAL)
1612 self.assertExactTypeEqual('&=', token.AMPEREQUAL)
1613 self.assertExactTypeEqual('|=', token.VBAREQUAL)
1614 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1615 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1616 self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)
1617 self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)
1618 self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)
1619 self.assertExactTypeEqual('//', token.DOUBLESLASH)
1620 self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)
1621 self.assertExactTypeEqual(':=', token.COLONEQUAL)
1622 self.assertExactTypeEqual('...', token.ELLIPSIS)
1623 self.assertExactTypeEqual('->', token.RARROW)
1624 self.assertExactTypeEqual('@', token.AT)
1625 self.assertExactTypeEqual('@=', token.ATEQUAL)
1626
1627 self.assertExactTypeEqual('a**2+b**2==c**2',
1628 NAME, token.DOUBLESTAR, NUMBER,
1629 token.PLUS,
1630 NAME, token.DOUBLESTAR, NUMBER,
1631 token.EQEQUAL,
1632 NAME, token.DOUBLESTAR, NUMBER)
1633 self.assertExactTypeEqual('{1, 2, 3}',
1634 token.LBRACE,
1635 token.NUMBER, token.COMMA,
1636 token.NUMBER, token.COMMA,
1637 token.NUMBER,
1638 token.RBRACE)
1639 self.assertExactTypeEqual('^(x & 0x1)',
1640 token.CIRCUMFLEX,
1641 token.LPAR,
1642 token.NAME, token.AMPER, token.NUMBER,
1643 token.RPAR)
1644
1645 def test_pathological_trailing_whitespace(self):
1646 # See http://bugs.python.org/issue16152
1647 self.assertExactTypeEqual('@ ', token.AT)
1648
1649 def test_comment_at_the_end_of_the_source_without_newline(self):
1650 # See http://bugs.python.org/issue44667
1651 source = 'b = 1\n\n#test'
1652 expected_tokens = [
1653 TokenInfo(type=token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
1654 TokenInfo(type=token.NAME, string='b', start=(1, 0), end=(1, 1), line='b = 1\n'),
1655 TokenInfo(type=token.OP, string='=', start=(1, 2), end=(1, 3), line='b = 1\n'),
1656 TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'),
1657 TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'),
1658 TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'),
1659 TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test'),
1660 TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test'),
1661 TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='')
1662 ]
1663
1664 tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
1665 self.assertEqual(tokens, expected_tokens)
1666
1667 def test_newline_and_space_at_the_end_of_the_source_without_newline(self):
1668 # See https://github.com/python/cpython/issues/105435
1669 source = 'a\n '
1670 expected_tokens = [
1671 TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
1672 TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'),
1673 TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'),
1674 TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' '),
1675 TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='')
1676 ]
1677
1678 tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
1679 self.assertEqual(tokens, expected_tokens)
1680
1681 def test_invalid_character_in_fstring_middle(self):
1682 # See gh-103824
1683 script = b'''F"""
1684 \xe5"""'''
1685
1686 with os_helper.temp_dir() as temp_dir:
1687 filename = os.path.join(temp_dir, "script.py")
1688 with open(filename, 'wb') as file:
1689 file.write(script)
1690 rs, _ = run_python_until_end(filename)
1691 self.assertIn(b"SyntaxError", rs.err)
1692
1693
1694 class ESC[4;38;5;81mUntokenizeTest(ESC[4;38;5;149mTestCase):
1695
1696 def test_bad_input_order(self):
1697 # raise if previous row
1698 u = Untokenizer()
1699 u.prev_row = 2
1700 u.prev_col = 2
1701 with self.assertRaises(ValueError) as cm:
1702 u.add_whitespace((1,3))
1703 self.assertEqual(cm.exception.args[0],
1704 'start (1,3) precedes previous end (2,2)')
1705 # raise if previous column in row
1706 self.assertRaises(ValueError, u.add_whitespace, (2,1))
1707
1708 def test_backslash_continuation(self):
1709 # The problem is that <whitespace>\<newline> leaves no token
1710 u = Untokenizer()
1711 u.prev_row = 1
1712 u.prev_col = 1
1713 u.tokens = []
1714 u.add_whitespace((2, 0))
1715 self.assertEqual(u.tokens, ['\\\n'])
1716 u.prev_row = 2
1717 u.add_whitespace((4, 4))
1718 self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])
1719 TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')
1720
1721 def test_iter_compat(self):
1722 u = Untokenizer()
1723 token = (NAME, 'Hello')
1724 tokens = [(ENCODING, 'utf-8'), token]
1725 u.compat(token, iter([]))
1726 self.assertEqual(u.tokens, ["Hello "])
1727 u = Untokenizer()
1728 self.assertEqual(u.untokenize(iter([token])), 'Hello ')
1729 u = Untokenizer()
1730 self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
1731 self.assertEqual(u.encoding, 'utf-8')
1732 self.assertEqual(untokenize(iter(tokens)), b'Hello ')
1733
1734
1735 class ESC[4;38;5;81mTestRoundtrip(ESC[4;38;5;149mTestCase):
1736
1737 def check_roundtrip(self, f):
1738 """
1739 Test roundtrip for `untokenize`. `f` is an open file or a string.
1740 The source code in f is tokenized to both 5- and 2-tuples.
1741 Both sequences are converted back to source code via
1742 tokenize.untokenize(), and the latter tokenized again to 2-tuples.
1743 The test fails if the 3 pair tokenizations do not match.
1744
1745 When untokenize bugs are fixed, untokenize with 5-tuples should
1746 reproduce code that does not contain a backslash continuation
1747 following spaces. A proper test should test this.
1748 """
1749 # Get source code and original tokenizations
1750 if isinstance(f, str):
1751 code = f.encode('utf-8')
1752 else:
1753 code = f.read()
1754 readline = iter(code.splitlines(keepends=True)).__next__
1755 tokens5 = list(tokenize(readline))
1756 tokens2 = [tok[:2] for tok in tokens5]
1757 # Reproduce tokens2 from pairs
1758 bytes_from2 = untokenize(tokens2)
1759 readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
1760 tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
1761 self.assertEqual(tokens2_from2, tokens2)
1762 # Reproduce tokens2 from 5-tuples
1763 bytes_from5 = untokenize(tokens5)
1764 readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
1765 tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
1766 self.assertEqual(tokens2_from5, tokens2)
1767
1768 def check_line_extraction(self, f):
1769 if isinstance(f, str):
1770 code = f.encode('utf-8')
1771 else:
1772 code = f.read()
1773 readline = iter(code.splitlines(keepends=True)).__next__
1774 for tok in tokenize(readline):
1775 if tok.type in {ENCODING, ENDMARKER}:
1776 continue
1777 self.assertEqual(tok.string, tok.line[tok.start[1]: tok.end[1]])
1778
1779 def test_roundtrip(self):
1780 # There are some standard formatting practices that are easy to get right.
1781
1782 self.check_roundtrip("if x == 1:\n"
1783 " print(x)\n")
1784 self.check_roundtrip("# This is a comment\n"
1785 "# This also\n")
1786
1787 # Some people use different formatting conventions, which makes
1788 # untokenize a little trickier. Note that this test involves trailing
1789 # whitespace after the colon. Note that we use hex escapes to make the
1790 # two trailing blanks apparent in the expected output.
1791
1792 self.check_roundtrip("if x == 1 : \n"
1793 " print(x)\n")
1794 fn = support.findfile("tokenize_tests.txt")
1795 with open(fn, 'rb') as f:
1796 self.check_roundtrip(f)
1797 self.check_roundtrip("if x == 1:\n"
1798 " # A comment by itself.\n"
1799 " print(x) # Comment here, too.\n"
1800 " # Another comment.\n"
1801 "after_if = True\n")
1802 self.check_roundtrip("if (x # The comments need to go in the right place\n"
1803 " == 1):\n"
1804 " print('x==1')\n")
1805 self.check_roundtrip("class Test: # A comment here\n"
1806 " # A comment with weird indent\n"
1807 " after_com = 5\n"
1808 " def x(m): return m*5 # a one liner\n"
1809 " def y(m): # A whitespace after the colon\n"
1810 " return y*4 # 3-space indent\n")
1811
1812 # Some error-handling code
1813 self.check_roundtrip("try: import somemodule\n"
1814 "except ImportError: # comment\n"
1815 " print('Can not import' # comment2\n)"
1816 "else: print('Loaded')\n")
1817
1818 def test_continuation(self):
1819 # Balancing continuation
1820 self.check_roundtrip("a = (3,4, \n"
1821 "5,6)\n"
1822 "y = [3, 4,\n"
1823 "5]\n"
1824 "z = {'a': 5,\n"
1825 "'b':15, 'c':True}\n"
1826 "x = len(y) + 5 - a[\n"
1827 "3] - a[2]\n"
1828 "+ len(z) - z[\n"
1829 "'b']\n")
1830
1831 def test_backslash_continuation(self):
1832 # Backslash means line continuation, except for comments
1833 self.check_roundtrip("x=1+\\\n"
1834 "1\n"
1835 "# This is a comment\\\n"
1836 "# This also\n")
1837 self.check_roundtrip("# Comment \\\n"
1838 "x = 0")
1839
1840 def test_string_concatenation(self):
1841 # Two string literals on the same line
1842 self.check_roundtrip("'' ''")
1843
1844 def test_random_files(self):
1845 # Test roundtrip on random python modules.
1846 # pass the '-ucpu' option to process the full directory.
1847
1848 import glob, random
1849 fn = support.findfile("tokenize_tests.txt")
1850 tempdir = os.path.dirname(fn) or os.curdir
1851 testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))
1852
1853 # Tokenize is broken on test_pep3131.py because regular expressions are
1854 # broken on the obscure unicode identifiers in it. *sigh*
1855 # With roundtrip extended to test the 5-tuple mode of untokenize,
1856 # 7 more testfiles fail. Remove them also until the failure is diagnosed.
1857
1858 testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
1859
1860 # TODO: Remove this once we can untokenize PEP 701 syntax
1861 testfiles.remove(os.path.join(tempdir, "test_fstring.py"))
1862
1863 for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
1864 testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
1865
1866 if not support.is_resource_enabled("cpu"):
1867 testfiles = random.sample(testfiles, 10)
1868
1869 for testfile in testfiles:
1870 if support.verbose >= 2:
1871 print('tokenize', testfile)
1872 with open(testfile, 'rb') as f:
1873 with self.subTest(file=testfile):
1874 self.check_roundtrip(f)
1875 self.check_line_extraction(f)
1876
1877
1878 def roundtrip(self, code):
1879 if isinstance(code, str):
1880 code = code.encode('utf-8')
1881 return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
1882
1883 def test_indentation_semantics_retained(self):
1884 """
1885 Ensure that although whitespace might be mutated in a roundtrip,
1886 the semantic meaning of the indentation remains consistent.
1887 """
1888 code = "if False:\n\tx=3\n\tx=3\n"
1889 codelines = self.roundtrip(code).split('\n')
1890 self.assertEqual(codelines[1], codelines[2])
1891 self.check_roundtrip(code)
1892
1893
1894 class ESC[4;38;5;81mInvalidPythonTests(ESC[4;38;5;149mTestCase):
1895 def test_number_followed_by_name(self):
1896 # See issue #gh-105549
1897 source = "2sin(x)"
1898 expected_tokens = [
1899 TokenInfo(type=token.NUMBER, string='2', start=(1, 0), end=(1, 1), line='2sin(x)'),
1900 TokenInfo(type=token.NAME, string='sin', start=(1, 1), end=(1, 4), line='2sin(x)'),
1901 TokenInfo(type=token.OP, string='(', start=(1, 4), end=(1, 5), line='2sin(x)'),
1902 TokenInfo(type=token.NAME, string='x', start=(1, 5), end=(1, 6), line='2sin(x)'),
1903 TokenInfo(type=token.OP, string=')', start=(1, 6), end=(1, 7), line='2sin(x)'),
1904 TokenInfo(type=token.NEWLINE, string='', start=(1, 7), end=(1, 8), line='2sin(x)'),
1905 TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
1906 ]
1907
1908 tokens = list(generate_tokens(StringIO(source).readline))
1909 self.assertEqual(tokens, expected_tokens)
1910
1911 def test_number_starting_with_zero(self):
1912 source = "01234"
1913 expected_tokens = [
1914 TokenInfo(type=token.NUMBER, string='01234', start=(1, 0), end=(1, 5), line='01234'),
1915 TokenInfo(type=token.NEWLINE, string='', start=(1, 5), end=(1, 6), line='01234'),
1916 TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
1917 ]
1918
1919 tokens = list(generate_tokens(StringIO(source).readline))
1920 self.assertEqual(tokens, expected_tokens)
1921
1922 class ESC[4;38;5;81mCTokenizeTest(ESC[4;38;5;149mTestCase):
1923 def check_tokenize(self, s, expected):
1924 # Format the tokens in s in a table format.
1925 # The ENDMARKER and final NEWLINE are omitted.
1926 f = StringIO(s)
1927 with self.subTest(source=s):
1928 result = stringify_tokens_from_source(
1929 _generate_tokens_from_c_tokenizer(f.readline), s
1930 )
1931 self.assertEqual(result, expected.rstrip().splitlines())
1932
1933 def test_encoding(self):
1934 def readline(encoding):
1935 yield "1+1".encode(encoding)
1936
1937 expected = [
1938 TokenInfo(type=NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1'),
1939 TokenInfo(type=OP, string='+', start=(1, 1), end=(1, 2), line='1+1'),
1940 TokenInfo(type=NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1'),
1941 TokenInfo(type=NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1'),
1942 TokenInfo(type=ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
1943 ]
1944 for encoding in ["utf-8", "latin-1", "utf-16"]:
1945 with self.subTest(encoding=encoding):
1946 tokens = list(_generate_tokens_from_c_tokenizer(
1947 readline(encoding).__next__,
1948 extra_tokens=True,
1949 encoding=encoding,
1950 ))
1951 self.assertEqual(tokens, expected)
1952
1953 def test_int(self):
1954
1955 self.check_tokenize('0xff <= 255', """\
1956 NUMBER '0xff' (1, 0) (1, 4)
1957 LESSEQUAL '<=' (1, 5) (1, 7)
1958 NUMBER '255' (1, 8) (1, 11)
1959 """)
1960
1961 self.check_tokenize('0b10 <= 255', """\
1962 NUMBER '0b10' (1, 0) (1, 4)
1963 LESSEQUAL '<=' (1, 5) (1, 7)
1964 NUMBER '255' (1, 8) (1, 11)
1965 """)
1966
1967 self.check_tokenize('0o123 <= 0O123', """\
1968 NUMBER '0o123' (1, 0) (1, 5)
1969 LESSEQUAL '<=' (1, 6) (1, 8)
1970 NUMBER '0O123' (1, 9) (1, 14)
1971 """)
1972
1973 self.check_tokenize('1234567 > ~0x15', """\
1974 NUMBER '1234567' (1, 0) (1, 7)
1975 GREATER '>' (1, 8) (1, 9)
1976 TILDE '~' (1, 10) (1, 11)
1977 NUMBER '0x15' (1, 11) (1, 15)
1978 """)
1979
1980 self.check_tokenize('2134568 != 1231515', """\
1981 NUMBER '2134568' (1, 0) (1, 7)
1982 NOTEQUAL '!=' (1, 8) (1, 10)
1983 NUMBER '1231515' (1, 11) (1, 18)
1984 """)
1985
1986 self.check_tokenize('(-124561-1) & 200000000', """\
1987 LPAR '(' (1, 0) (1, 1)
1988 MINUS '-' (1, 1) (1, 2)
1989 NUMBER '124561' (1, 2) (1, 8)
1990 MINUS '-' (1, 8) (1, 9)
1991 NUMBER '1' (1, 9) (1, 10)
1992 RPAR ')' (1, 10) (1, 11)
1993 AMPER '&' (1, 12) (1, 13)
1994 NUMBER '200000000' (1, 14) (1, 23)
1995 """)
1996
1997 self.check_tokenize('0xdeadbeef != -1', """\
1998 NUMBER '0xdeadbeef' (1, 0) (1, 10)
1999 NOTEQUAL '!=' (1, 11) (1, 13)
2000 MINUS '-' (1, 14) (1, 15)
2001 NUMBER '1' (1, 15) (1, 16)
2002 """)
2003
2004 self.check_tokenize('0xdeadc0de & 12345', """\
2005 NUMBER '0xdeadc0de' (1, 0) (1, 10)
2006 AMPER '&' (1, 11) (1, 12)
2007 NUMBER '12345' (1, 13) (1, 18)
2008 """)
2009
2010 self.check_tokenize('0xFF & 0x15 | 1234', """\
2011 NUMBER '0xFF' (1, 0) (1, 4)
2012 AMPER '&' (1, 5) (1, 6)
2013 NUMBER '0x15' (1, 7) (1, 11)
2014 VBAR '|' (1, 12) (1, 13)
2015 NUMBER '1234' (1, 14) (1, 18)
2016 """)
2017
2018 def test_float(self):
2019
2020 self.check_tokenize('x = 3.14159', """\
2021 NAME 'x' (1, 0) (1, 1)
2022 EQUAL '=' (1, 2) (1, 3)
2023 NUMBER '3.14159' (1, 4) (1, 11)
2024 """)
2025
2026 self.check_tokenize('x = 314159.', """\
2027 NAME 'x' (1, 0) (1, 1)
2028 EQUAL '=' (1, 2) (1, 3)
2029 NUMBER '314159.' (1, 4) (1, 11)
2030 """)
2031
2032 self.check_tokenize('x = .314159', """\
2033 NAME 'x' (1, 0) (1, 1)
2034 EQUAL '=' (1, 2) (1, 3)
2035 NUMBER '.314159' (1, 4) (1, 11)
2036 """)
2037
2038 self.check_tokenize('x = 3e14159', """\
2039 NAME 'x' (1, 0) (1, 1)
2040 EQUAL '=' (1, 2) (1, 3)
2041 NUMBER '3e14159' (1, 4) (1, 11)
2042 """)
2043
2044 self.check_tokenize('x = 3E123', """\
2045 NAME 'x' (1, 0) (1, 1)
2046 EQUAL '=' (1, 2) (1, 3)
2047 NUMBER '3E123' (1, 4) (1, 9)
2048 """)
2049
2050 self.check_tokenize('x+y = 3e-1230', """\
2051 NAME 'x' (1, 0) (1, 1)
2052 PLUS '+' (1, 1) (1, 2)
2053 NAME 'y' (1, 2) (1, 3)
2054 EQUAL '=' (1, 4) (1, 5)
2055 NUMBER '3e-1230' (1, 6) (1, 13)
2056 """)
2057
2058 self.check_tokenize('x = 3.14e159', """\
2059 NAME 'x' (1, 0) (1, 1)
2060 EQUAL '=' (1, 2) (1, 3)
2061 NUMBER '3.14e159' (1, 4) (1, 12)
2062 """)
2063
2064 def test_string(self):
2065
2066 self.check_tokenize('x = \'\'; y = ""', """\
2067 NAME 'x' (1, 0) (1, 1)
2068 EQUAL '=' (1, 2) (1, 3)
2069 STRING "''" (1, 4) (1, 6)
2070 SEMI ';' (1, 6) (1, 7)
2071 NAME 'y' (1, 8) (1, 9)
2072 EQUAL '=' (1, 10) (1, 11)
2073 STRING '""' (1, 12) (1, 14)
2074 """)
2075
2076 self.check_tokenize('x = \'"\'; y = "\'"', """\
2077 NAME 'x' (1, 0) (1, 1)
2078 EQUAL '=' (1, 2) (1, 3)
2079 STRING '\\'"\\'' (1, 4) (1, 7)
2080 SEMI ';' (1, 7) (1, 8)
2081 NAME 'y' (1, 9) (1, 10)
2082 EQUAL '=' (1, 11) (1, 12)
2083 STRING '"\\'"' (1, 13) (1, 16)
2084 """)
2085
2086 self.check_tokenize('x = "doesn\'t "shrink", does it"', """\
2087 NAME 'x' (1, 0) (1, 1)
2088 EQUAL '=' (1, 2) (1, 3)
2089 STRING '"doesn\\'t "' (1, 4) (1, 14)
2090 NAME 'shrink' (1, 14) (1, 20)
2091 STRING '", does it"' (1, 20) (1, 31)
2092 """)
2093
2094 self.check_tokenize("x = 'abc' + 'ABC'", """\
2095 NAME 'x' (1, 0) (1, 1)
2096 EQUAL '=' (1, 2) (1, 3)
2097 STRING "'abc'" (1, 4) (1, 9)
2098 PLUS '+' (1, 10) (1, 11)
2099 STRING "'ABC'" (1, 12) (1, 17)
2100 """)
2101
2102 self.check_tokenize('y = "ABC" + "ABC"', """\
2103 NAME 'y' (1, 0) (1, 1)
2104 EQUAL '=' (1, 2) (1, 3)
2105 STRING '"ABC"' (1, 4) (1, 9)
2106 PLUS '+' (1, 10) (1, 11)
2107 STRING '"ABC"' (1, 12) (1, 17)
2108 """)
2109
2110 self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
2111 NAME 'x' (1, 0) (1, 1)
2112 EQUAL '=' (1, 2) (1, 3)
2113 STRING "r'abc'" (1, 4) (1, 10)
2114 PLUS '+' (1, 11) (1, 12)
2115 STRING "r'ABC'" (1, 13) (1, 19)
2116 PLUS '+' (1, 20) (1, 21)
2117 STRING "R'ABC'" (1, 22) (1, 28)
2118 PLUS '+' (1, 29) (1, 30)
2119 STRING "R'ABC'" (1, 31) (1, 37)
2120 """)
2121
2122 self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
2123 NAME 'y' (1, 0) (1, 1)
2124 EQUAL '=' (1, 2) (1, 3)
2125 STRING 'r"abc"' (1, 4) (1, 10)
2126 PLUS '+' (1, 11) (1, 12)
2127 STRING 'r"ABC"' (1, 13) (1, 19)
2128 PLUS '+' (1, 20) (1, 21)
2129 STRING 'R"ABC"' (1, 22) (1, 28)
2130 PLUS '+' (1, 29) (1, 30)
2131 STRING 'R"ABC"' (1, 31) (1, 37)
2132 """)
2133
2134 self.check_tokenize("u'abc' + U'abc'", """\
2135 STRING "u'abc'" (1, 0) (1, 6)
2136 PLUS '+' (1, 7) (1, 8)
2137 STRING "U'abc'" (1, 9) (1, 15)
2138 """)
2139
2140 self.check_tokenize('u"abc" + U"abc"', """\
2141 STRING 'u"abc"' (1, 0) (1, 6)
2142 PLUS '+' (1, 7) (1, 8)
2143 STRING 'U"abc"' (1, 9) (1, 15)
2144 """)
2145
2146 self.check_tokenize("b'abc' + B'abc'", """\
2147 STRING "b'abc'" (1, 0) (1, 6)
2148 PLUS '+' (1, 7) (1, 8)
2149 STRING "B'abc'" (1, 9) (1, 15)
2150 """)
2151
2152 self.check_tokenize('b"abc" + B"abc"', """\
2153 STRING 'b"abc"' (1, 0) (1, 6)
2154 PLUS '+' (1, 7) (1, 8)
2155 STRING 'B"abc"' (1, 9) (1, 15)
2156 """)
2157
2158 self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
2159 STRING "br'abc'" (1, 0) (1, 7)
2160 PLUS '+' (1, 8) (1, 9)
2161 STRING "bR'abc'" (1, 10) (1, 17)
2162 PLUS '+' (1, 18) (1, 19)
2163 STRING "Br'abc'" (1, 20) (1, 27)
2164 PLUS '+' (1, 28) (1, 29)
2165 STRING "BR'abc'" (1, 30) (1, 37)
2166 """)
2167
2168 self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
2169 STRING 'br"abc"' (1, 0) (1, 7)
2170 PLUS '+' (1, 8) (1, 9)
2171 STRING 'bR"abc"' (1, 10) (1, 17)
2172 PLUS '+' (1, 18) (1, 19)
2173 STRING 'Br"abc"' (1, 20) (1, 27)
2174 PLUS '+' (1, 28) (1, 29)
2175 STRING 'BR"abc"' (1, 30) (1, 37)
2176 """)
2177
2178 self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
2179 STRING "rb'abc'" (1, 0) (1, 7)
2180 PLUS '+' (1, 8) (1, 9)
2181 STRING "rB'abc'" (1, 10) (1, 17)
2182 PLUS '+' (1, 18) (1, 19)
2183 STRING "Rb'abc'" (1, 20) (1, 27)
2184 PLUS '+' (1, 28) (1, 29)
2185 STRING "RB'abc'" (1, 30) (1, 37)
2186 """)
2187
2188 self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
2189 STRING 'rb"abc"' (1, 0) (1, 7)
2190 PLUS '+' (1, 8) (1, 9)
2191 STRING 'rB"abc"' (1, 10) (1, 17)
2192 PLUS '+' (1, 18) (1, 19)
2193 STRING 'Rb"abc"' (1, 20) (1, 27)
2194 PLUS '+' (1, 28) (1, 29)
2195 STRING 'RB"abc"' (1, 30) (1, 37)
2196 """)
2197
2198 self.check_tokenize('"a\\\nde\\\nfg"', """\
2199 STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
2200 """)
2201
2202 self.check_tokenize('u"a\\\nde"', """\
2203 STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)
2204 """)
2205
2206 self.check_tokenize('rb"a\\\nd"', """\
2207 STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)
2208 """)
2209
2210 self.check_tokenize(r'"""a\
2211 b"""', """\
2212 STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
2213 """)
2214 self.check_tokenize(r'u"""a\
2215 b"""', """\
2216 STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
2217 """)
2218 self.check_tokenize(r'rb"""a\
2219 b\
2220 c"""', """\
2221 STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
2222 """)
2223
2224 self.check_tokenize(r'"hola\\\r\ndfgf"', """\
2225 STRING \'"hola\\\\\\\\\\\\r\\\\ndfgf"\' (1, 0) (1, 16)
2226 """)
2227
2228 self.check_tokenize('f"abc"', """\
2229 FSTRING_START 'f"' (1, 0) (1, 2)
2230 FSTRING_MIDDLE 'abc' (1, 2) (1, 5)
2231 FSTRING_END '"' (1, 5) (1, 6)
2232 """)
2233
2234 self.check_tokenize('fR"a{b}c"', """\
2235 FSTRING_START 'fR"' (1, 0) (1, 3)
2236 FSTRING_MIDDLE 'a' (1, 3) (1, 4)
2237 LBRACE '{' (1, 4) (1, 5)
2238 NAME 'b' (1, 5) (1, 6)
2239 RBRACE '}' (1, 6) (1, 7)
2240 FSTRING_MIDDLE 'c' (1, 7) (1, 8)
2241 FSTRING_END '"' (1, 8) (1, 9)
2242 """)
2243
2244 self.check_tokenize('f"""abc"""', """\
2245 FSTRING_START 'f\"""' (1, 0) (1, 4)
2246 FSTRING_MIDDLE 'abc' (1, 4) (1, 7)
2247 FSTRING_END '\"""' (1, 7) (1, 10)
2248 """)
2249
2250 self.check_tokenize(r'f"abc\
2251 def"', """\
2252 FSTRING_START \'f"\' (1, 0) (1, 2)
2253 FSTRING_MIDDLE 'abc\\\\\\ndef' (1, 2) (2, 3)
2254 FSTRING_END '"' (2, 3) (2, 4)
2255 """)
2256
2257 self.check_tokenize('''\
2258 f"{
2259 a}"''', """\
2260 FSTRING_START 'f"' (1, 0) (1, 2)
2261 LBRACE '{' (1, 2) (1, 3)
2262 NAME 'a' (2, 0) (2, 1)
2263 RBRACE '}' (2, 1) (2, 2)
2264 FSTRING_END '"' (2, 2) (2, 3)
2265 """)
2266
2267 self.check_tokenize(r'Rf"abc\
2268 def"', """\
2269 FSTRING_START 'Rf"' (1, 0) (1, 3)
2270 FSTRING_MIDDLE 'abc\\\\\\ndef' (1, 3) (2, 3)
2271 FSTRING_END '"' (2, 3) (2, 4)
2272 """)
2273
2274 self.check_tokenize(r'f"hola\\\r\ndfgf"', """\
2275 FSTRING_START \'f"\' (1, 0) (1, 2)
2276 FSTRING_MIDDLE 'hola\\\\\\\\\\\\r\\\\ndfgf' (1, 2) (1, 16)
2277 FSTRING_END \'"\' (1, 16) (1, 17)
2278 """)
2279
2280 def test_function(self):
2281
2282 self.check_tokenize('def d22(a, b, c=2, d=2, *k): pass', """\
2283 NAME 'def' (1, 0) (1, 3)
2284 NAME 'd22' (1, 4) (1, 7)
2285 LPAR '(' (1, 7) (1, 8)
2286 NAME 'a' (1, 8) (1, 9)
2287 COMMA ',' (1, 9) (1, 10)
2288 NAME 'b' (1, 11) (1, 12)
2289 COMMA ',' (1, 12) (1, 13)
2290 NAME 'c' (1, 14) (1, 15)
2291 EQUAL '=' (1, 15) (1, 16)
2292 NUMBER '2' (1, 16) (1, 17)
2293 COMMA ',' (1, 17) (1, 18)
2294 NAME 'd' (1, 19) (1, 20)
2295 EQUAL '=' (1, 20) (1, 21)
2296 NUMBER '2' (1, 21) (1, 22)
2297 COMMA ',' (1, 22) (1, 23)
2298 STAR '*' (1, 24) (1, 25)
2299 NAME 'k' (1, 25) (1, 26)
2300 RPAR ')' (1, 26) (1, 27)
2301 COLON ':' (1, 27) (1, 28)
2302 NAME 'pass' (1, 29) (1, 33)
2303 """)
2304
2305 self.check_tokenize('def d01v_(a=1, *k, **w): pass', """\
2306 NAME 'def' (1, 0) (1, 3)
2307 NAME 'd01v_' (1, 4) (1, 9)
2308 LPAR '(' (1, 9) (1, 10)
2309 NAME 'a' (1, 10) (1, 11)
2310 EQUAL '=' (1, 11) (1, 12)
2311 NUMBER '1' (1, 12) (1, 13)
2312 COMMA ',' (1, 13) (1, 14)
2313 STAR '*' (1, 15) (1, 16)
2314 NAME 'k' (1, 16) (1, 17)
2315 COMMA ',' (1, 17) (1, 18)
2316 DOUBLESTAR '**' (1, 19) (1, 21)
2317 NAME 'w' (1, 21) (1, 22)
2318 RPAR ')' (1, 22) (1, 23)
2319 COLON ':' (1, 23) (1, 24)
2320 NAME 'pass' (1, 25) (1, 29)
2321 """)
2322
2323 self.check_tokenize('def d23(a: str, b: int=3) -> int: pass', """\
2324 NAME 'def' (1, 0) (1, 3)
2325 NAME 'd23' (1, 4) (1, 7)
2326 LPAR '(' (1, 7) (1, 8)
2327 NAME 'a' (1, 8) (1, 9)
2328 COLON ':' (1, 9) (1, 10)
2329 NAME 'str' (1, 11) (1, 14)
2330 COMMA ',' (1, 14) (1, 15)
2331 NAME 'b' (1, 16) (1, 17)
2332 COLON ':' (1, 17) (1, 18)
2333 NAME 'int' (1, 19) (1, 22)
2334 EQUAL '=' (1, 22) (1, 23)
2335 NUMBER '3' (1, 23) (1, 24)
2336 RPAR ')' (1, 24) (1, 25)
2337 RARROW '->' (1, 26) (1, 28)
2338 NAME 'int' (1, 29) (1, 32)
2339 COLON ':' (1, 32) (1, 33)
2340 NAME 'pass' (1, 34) (1, 38)
2341 """)
2342
2343 def test_comparison(self):
2344
2345 self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
2346 "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
2347 NAME 'if' (1, 0) (1, 2)
2348 NUMBER '1' (1, 3) (1, 4)
2349 LESS '<' (1, 5) (1, 6)
2350 NUMBER '1' (1, 7) (1, 8)
2351 GREATER '>' (1, 9) (1, 10)
2352 NUMBER '1' (1, 11) (1, 12)
2353 EQEQUAL '==' (1, 13) (1, 15)
2354 NUMBER '1' (1, 16) (1, 17)
2355 GREATEREQUAL '>=' (1, 18) (1, 20)
2356 NUMBER '5' (1, 21) (1, 22)
2357 LESSEQUAL '<=' (1, 23) (1, 25)
2358 NUMBER '0x15' (1, 26) (1, 30)
2359 LESSEQUAL '<=' (1, 31) (1, 33)
2360 NUMBER '0x12' (1, 34) (1, 38)
2361 NOTEQUAL '!=' (1, 39) (1, 41)
2362 NUMBER '1' (1, 42) (1, 43)
2363 NAME 'and' (1, 44) (1, 47)
2364 NUMBER '5' (1, 48) (1, 49)
2365 NAME 'in' (1, 50) (1, 52)
2366 NUMBER '1' (1, 53) (1, 54)
2367 NAME 'not' (1, 55) (1, 58)
2368 NAME 'in' (1, 59) (1, 61)
2369 NUMBER '1' (1, 62) (1, 63)
2370 NAME 'is' (1, 64) (1, 66)
2371 NUMBER '1' (1, 67) (1, 68)
2372 NAME 'or' (1, 69) (1, 71)
2373 NUMBER '5' (1, 72) (1, 73)
2374 NAME 'is' (1, 74) (1, 76)
2375 NAME 'not' (1, 77) (1, 80)
2376 NUMBER '1' (1, 81) (1, 82)
2377 COLON ':' (1, 82) (1, 83)
2378 NAME 'pass' (1, 84) (1, 88)
2379 """)
2380
2381 def test_additive(self):
2382
2383 self.check_tokenize('x = 1 - y + 15 - 1 + 0x124 + z + a[5]', """\
2384 NAME 'x' (1, 0) (1, 1)
2385 EQUAL '=' (1, 2) (1, 3)
2386 NUMBER '1' (1, 4) (1, 5)
2387 MINUS '-' (1, 6) (1, 7)
2388 NAME 'y' (1, 8) (1, 9)
2389 PLUS '+' (1, 10) (1, 11)
2390 NUMBER '15' (1, 12) (1, 14)
2391 MINUS '-' (1, 15) (1, 16)
2392 NUMBER '1' (1, 17) (1, 18)
2393 PLUS '+' (1, 19) (1, 20)
2394 NUMBER '0x124' (1, 21) (1, 26)
2395 PLUS '+' (1, 27) (1, 28)
2396 NAME 'z' (1, 29) (1, 30)
2397 PLUS '+' (1, 31) (1, 32)
2398 NAME 'a' (1, 33) (1, 34)
2399 LSQB '[' (1, 34) (1, 35)
2400 NUMBER '5' (1, 35) (1, 36)
2401 RSQB ']' (1, 36) (1, 37)
2402 """)
2403
2404 def test_multiplicative(self):
2405
2406 self.check_tokenize('x = 1//1*1/5*12%0x12@42', """\
2407 NAME 'x' (1, 0) (1, 1)
2408 EQUAL '=' (1, 2) (1, 3)
2409 NUMBER '1' (1, 4) (1, 5)
2410 DOUBLESLASH '//' (1, 5) (1, 7)
2411 NUMBER '1' (1, 7) (1, 8)
2412 STAR '*' (1, 8) (1, 9)
2413 NUMBER '1' (1, 9) (1, 10)
2414 SLASH '/' (1, 10) (1, 11)
2415 NUMBER '5' (1, 11) (1, 12)
2416 STAR '*' (1, 12) (1, 13)
2417 NUMBER '12' (1, 13) (1, 15)
2418 PERCENT '%' (1, 15) (1, 16)
2419 NUMBER '0x12' (1, 16) (1, 20)
2420 AT '@' (1, 20) (1, 21)
2421 NUMBER '42' (1, 21) (1, 23)
2422 """)
2423
2424 def test_unary(self):
2425
2426 self.check_tokenize('~1 ^ 1 & 1 |1 ^ -1', """\
2427 TILDE '~' (1, 0) (1, 1)
2428 NUMBER '1' (1, 1) (1, 2)
2429 CIRCUMFLEX '^' (1, 3) (1, 4)
2430 NUMBER '1' (1, 5) (1, 6)
2431 AMPER '&' (1, 7) (1, 8)
2432 NUMBER '1' (1, 9) (1, 10)
2433 VBAR '|' (1, 11) (1, 12)
2434 NUMBER '1' (1, 12) (1, 13)
2435 CIRCUMFLEX '^' (1, 14) (1, 15)
2436 MINUS '-' (1, 16) (1, 17)
2437 NUMBER '1' (1, 17) (1, 18)
2438 """)
2439
2440 self.check_tokenize('-1*1/1+1*1//1 - ---1**1', """\
2441 MINUS '-' (1, 0) (1, 1)
2442 NUMBER '1' (1, 1) (1, 2)
2443 STAR '*' (1, 2) (1, 3)
2444 NUMBER '1' (1, 3) (1, 4)
2445 SLASH '/' (1, 4) (1, 5)
2446 NUMBER '1' (1, 5) (1, 6)
2447 PLUS '+' (1, 6) (1, 7)
2448 NUMBER '1' (1, 7) (1, 8)
2449 STAR '*' (1, 8) (1, 9)
2450 NUMBER '1' (1, 9) (1, 10)
2451 DOUBLESLASH '//' (1, 10) (1, 12)
2452 NUMBER '1' (1, 12) (1, 13)
2453 MINUS '-' (1, 14) (1, 15)
2454 MINUS '-' (1, 16) (1, 17)
2455 MINUS '-' (1, 17) (1, 18)
2456 MINUS '-' (1, 18) (1, 19)
2457 NUMBER '1' (1, 19) (1, 20)
2458 DOUBLESTAR '**' (1, 20) (1, 22)
2459 NUMBER '1' (1, 22) (1, 23)
2460 """)
2461
2462 def test_selector(self):
2463
2464 self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
2465 NAME 'import' (1, 0) (1, 6)
2466 NAME 'sys' (1, 7) (1, 10)
2467 COMMA ',' (1, 10) (1, 11)
2468 NAME 'time' (1, 12) (1, 16)
2469 NEWLINE '' (1, 16) (1, 16)
2470 NAME 'x' (2, 0) (2, 1)
2471 EQUAL '=' (2, 2) (2, 3)
2472 NAME 'sys' (2, 4) (2, 7)
2473 DOT '.' (2, 7) (2, 8)
2474 NAME 'modules' (2, 8) (2, 15)
2475 LSQB '[' (2, 15) (2, 16)
2476 STRING "'time'" (2, 16) (2, 22)
2477 RSQB ']' (2, 22) (2, 23)
2478 DOT '.' (2, 23) (2, 24)
2479 NAME 'time' (2, 24) (2, 28)
2480 LPAR '(' (2, 28) (2, 29)
2481 RPAR ')' (2, 29) (2, 30)
2482 """)
2483
2484 def test_method(self):
2485
2486 self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
2487 AT '@' (1, 0) (1, 1)
2488 NAME 'staticmethod' (1, 1) (1, 13)
2489 NEWLINE '' (1, 13) (1, 13)
2490 NAME 'def' (2, 0) (2, 3)
2491 NAME 'foo' (2, 4) (2, 7)
2492 LPAR '(' (2, 7) (2, 8)
2493 NAME 'x' (2, 8) (2, 9)
2494 COMMA ',' (2, 9) (2, 10)
2495 NAME 'y' (2, 10) (2, 11)
2496 RPAR ')' (2, 11) (2, 12)
2497 COLON ':' (2, 12) (2, 13)
2498 NAME 'pass' (2, 14) (2, 18)
2499 """)
2500
2501 def test_tabs(self):
2502
2503 self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
2504 AT '@' (1, 0) (1, 1)
2505 NAME 'staticmethod' (1, 1) (1, 13)
2506 NEWLINE '' (1, 13) (1, 13)
2507 NAME 'def' (2, 0) (2, 3)
2508 NAME 'foo' (2, 4) (2, 7)
2509 LPAR '(' (2, 7) (2, 8)
2510 NAME 'x' (2, 8) (2, 9)
2511 COMMA ',' (2, 9) (2, 10)
2512 NAME 'y' (2, 10) (2, 11)
2513 RPAR ')' (2, 11) (2, 12)
2514 COLON ':' (2, 12) (2, 13)
2515 NAME 'pass' (2, 14) (2, 18)
2516 """)
2517
2518 def test_async(self):
2519
2520 self.check_tokenize('async = 1', """\
2521 ASYNC 'async' (1, 0) (1, 5)
2522 EQUAL '=' (1, 6) (1, 7)
2523 NUMBER '1' (1, 8) (1, 9)
2524 """)
2525
2526 self.check_tokenize('a = (async = 1)', """\
2527 NAME 'a' (1, 0) (1, 1)
2528 EQUAL '=' (1, 2) (1, 3)
2529 LPAR '(' (1, 4) (1, 5)
2530 ASYNC 'async' (1, 5) (1, 10)
2531 EQUAL '=' (1, 11) (1, 12)
2532 NUMBER '1' (1, 13) (1, 14)
2533 RPAR ')' (1, 14) (1, 15)
2534 """)
2535
2536 self.check_tokenize('async()', """\
2537 ASYNC 'async' (1, 0) (1, 5)
2538 LPAR '(' (1, 5) (1, 6)
2539 RPAR ')' (1, 6) (1, 7)
2540 """)
2541
2542 self.check_tokenize('class async(Bar):pass', """\
2543 NAME 'class' (1, 0) (1, 5)
2544 ASYNC 'async' (1, 6) (1, 11)
2545 LPAR '(' (1, 11) (1, 12)
2546 NAME 'Bar' (1, 12) (1, 15)
2547 RPAR ')' (1, 15) (1, 16)
2548 COLON ':' (1, 16) (1, 17)
2549 NAME 'pass' (1, 17) (1, 21)
2550 """)
2551
2552 self.check_tokenize('class async:pass', """\
2553 NAME 'class' (1, 0) (1, 5)
2554 ASYNC 'async' (1, 6) (1, 11)
2555 COLON ':' (1, 11) (1, 12)
2556 NAME 'pass' (1, 12) (1, 16)
2557 """)
2558
2559 self.check_tokenize('await = 1', """\
2560 AWAIT 'await' (1, 0) (1, 5)
2561 EQUAL '=' (1, 6) (1, 7)
2562 NUMBER '1' (1, 8) (1, 9)
2563 """)
2564
2565 self.check_tokenize('foo.async', """\
2566 NAME 'foo' (1, 0) (1, 3)
2567 DOT '.' (1, 3) (1, 4)
2568 ASYNC 'async' (1, 4) (1, 9)
2569 """)
2570
2571 self.check_tokenize('async for a in b: pass', """\
2572 ASYNC 'async' (1, 0) (1, 5)
2573 NAME 'for' (1, 6) (1, 9)
2574 NAME 'a' (1, 10) (1, 11)
2575 NAME 'in' (1, 12) (1, 14)
2576 NAME 'b' (1, 15) (1, 16)
2577 COLON ':' (1, 16) (1, 17)
2578 NAME 'pass' (1, 18) (1, 22)
2579 """)
2580
2581 self.check_tokenize('async with a as b: pass', """\
2582 ASYNC 'async' (1, 0) (1, 5)
2583 NAME 'with' (1, 6) (1, 10)
2584 NAME 'a' (1, 11) (1, 12)
2585 NAME 'as' (1, 13) (1, 15)
2586 NAME 'b' (1, 16) (1, 17)
2587 COLON ':' (1, 17) (1, 18)
2588 NAME 'pass' (1, 19) (1, 23)
2589 """)
2590
2591 self.check_tokenize('async.foo', """\
2592 ASYNC 'async' (1, 0) (1, 5)
2593 DOT '.' (1, 5) (1, 6)
2594 NAME 'foo' (1, 6) (1, 9)
2595 """)
2596
2597 self.check_tokenize('async', """\
2598 ASYNC 'async' (1, 0) (1, 5)
2599 """)
2600
2601 self.check_tokenize('async\n#comment\nawait', """\
2602 ASYNC 'async' (1, 0) (1, 5)
2603 NEWLINE '' (1, 5) (1, 5)
2604 AWAIT 'await' (3, 0) (3, 5)
2605 """)
2606
2607 self.check_tokenize('async\n...\nawait', """\
2608 ASYNC 'async' (1, 0) (1, 5)
2609 NEWLINE '' (1, 5) (1, 5)
2610 ELLIPSIS '...' (2, 0) (2, 3)
2611 NEWLINE '' (2, 3) (2, 3)
2612 AWAIT 'await' (3, 0) (3, 5)
2613 """)
2614
2615 self.check_tokenize('async\nawait', """\
2616 ASYNC 'async' (1, 0) (1, 5)
2617 NEWLINE '' (1, 5) (1, 5)
2618 AWAIT 'await' (2, 0) (2, 5)
2619 """)
2620
2621 self.check_tokenize('foo.async + 1', """\
2622 NAME 'foo' (1, 0) (1, 3)
2623 DOT '.' (1, 3) (1, 4)
2624 ASYNC 'async' (1, 4) (1, 9)
2625 PLUS '+' (1, 10) (1, 11)
2626 NUMBER '1' (1, 12) (1, 13)
2627 """)
2628
2629 self.check_tokenize('async def foo(): pass', """\
2630 ASYNC 'async' (1, 0) (1, 5)
2631 NAME 'def' (1, 6) (1, 9)
2632 NAME 'foo' (1, 10) (1, 13)
2633 LPAR '(' (1, 13) (1, 14)
2634 RPAR ')' (1, 14) (1, 15)
2635 COLON ':' (1, 15) (1, 16)
2636 NAME 'pass' (1, 17) (1, 21)
2637 """)
2638
2639 self.check_tokenize('''\
2640 async def foo():
2641 def foo(await):
2642 await = 1
2643 if 1:
2644 await
2645 async += 1
2646 ''', """\
2647 ASYNC 'async' (1, 0) (1, 5)
2648 NAME 'def' (1, 6) (1, 9)
2649 NAME 'foo' (1, 10) (1, 13)
2650 LPAR '(' (1, 13) (1, 14)
2651 RPAR ')' (1, 14) (1, 15)
2652 COLON ':' (1, 15) (1, 16)
2653 NEWLINE '' (1, 16) (1, 16)
2654 INDENT '' (2, -1) (2, -1)
2655 NAME 'def' (2, 2) (2, 5)
2656 NAME 'foo' (2, 6) (2, 9)
2657 LPAR '(' (2, 9) (2, 10)
2658 AWAIT 'await' (2, 10) (2, 15)
2659 RPAR ')' (2, 15) (2, 16)
2660 COLON ':' (2, 16) (2, 17)
2661 NEWLINE '' (2, 17) (2, 17)
2662 INDENT '' (3, -1) (3, -1)
2663 AWAIT 'await' (3, 4) (3, 9)
2664 EQUAL '=' (3, 10) (3, 11)
2665 NUMBER '1' (3, 12) (3, 13)
2666 NEWLINE '' (3, 13) (3, 13)
2667 DEDENT '' (4, -1) (4, -1)
2668 NAME 'if' (4, 2) (4, 4)
2669 NUMBER '1' (4, 5) (4, 6)
2670 COLON ':' (4, 6) (4, 7)
2671 NEWLINE '' (4, 7) (4, 7)
2672 INDENT '' (5, -1) (5, -1)
2673 AWAIT 'await' (5, 4) (5, 9)
2674 NEWLINE '' (5, 9) (5, 9)
2675 DEDENT '' (6, -1) (6, -1)
2676 DEDENT '' (6, -1) (6, -1)
2677 ASYNC 'async' (6, 0) (6, 5)
2678 PLUSEQUAL '+=' (6, 6) (6, 8)
2679 NUMBER '1' (6, 9) (6, 10)
2680 NEWLINE '' (6, 10) (6, 10)
2681 """)
2682
2683 self.check_tokenize('async def foo():\n async for i in 1: pass', """\
2684 ASYNC 'async' (1, 0) (1, 5)
2685 NAME 'def' (1, 6) (1, 9)
2686 NAME 'foo' (1, 10) (1, 13)
2687 LPAR '(' (1, 13) (1, 14)
2688 RPAR ')' (1, 14) (1, 15)
2689 COLON ':' (1, 15) (1, 16)
2690 NEWLINE '' (1, 16) (1, 16)
2691 INDENT '' (2, -1) (2, -1)
2692 ASYNC 'async' (2, 2) (2, 7)
2693 NAME 'for' (2, 8) (2, 11)
2694 NAME 'i' (2, 12) (2, 13)
2695 NAME 'in' (2, 14) (2, 16)
2696 NUMBER '1' (2, 17) (2, 18)
2697 COLON ':' (2, 18) (2, 19)
2698 NAME 'pass' (2, 20) (2, 24)
2699 DEDENT '' (2, -1) (2, -1)
2700 """)
2701
2702 self.check_tokenize('async def foo(async): await', """\
2703 ASYNC 'async' (1, 0) (1, 5)
2704 NAME 'def' (1, 6) (1, 9)
2705 NAME 'foo' (1, 10) (1, 13)
2706 LPAR '(' (1, 13) (1, 14)
2707 ASYNC 'async' (1, 14) (1, 19)
2708 RPAR ')' (1, 19) (1, 20)
2709 COLON ':' (1, 20) (1, 21)
2710 AWAIT 'await' (1, 22) (1, 27)
2711 """)
2712
2713 self.check_tokenize('''\
2714 def f():
2715
2716 def baz(): pass
2717 async def bar(): pass
2718
2719 await = 2''', """\
2720 NAME 'def' (1, 0) (1, 3)
2721 NAME 'f' (1, 4) (1, 5)
2722 LPAR '(' (1, 5) (1, 6)
2723 RPAR ')' (1, 6) (1, 7)
2724 COLON ':' (1, 7) (1, 8)
2725 NEWLINE '' (1, 8) (1, 8)
2726 INDENT '' (3, -1) (3, -1)
2727 NAME 'def' (3, 2) (3, 5)
2728 NAME 'baz' (3, 6) (3, 9)
2729 LPAR '(' (3, 9) (3, 10)
2730 RPAR ')' (3, 10) (3, 11)
2731 COLON ':' (3, 11) (3, 12)
2732 NAME 'pass' (3, 13) (3, 17)
2733 NEWLINE '' (3, 17) (3, 17)
2734 ASYNC 'async' (4, 2) (4, 7)
2735 NAME 'def' (4, 8) (4, 11)
2736 NAME 'bar' (4, 12) (4, 15)
2737 LPAR '(' (4, 15) (4, 16)
2738 RPAR ')' (4, 16) (4, 17)
2739 COLON ':' (4, 17) (4, 18)
2740 NAME 'pass' (4, 19) (4, 23)
2741 NEWLINE '' (4, 23) (4, 23)
2742 AWAIT 'await' (6, 2) (6, 7)
2743 EQUAL '=' (6, 8) (6, 9)
2744 NUMBER '2' (6, 10) (6, 11)
2745 DEDENT '' (6, -1) (6, -1)
2746 """)
2747
2748 self.check_tokenize('''\
2749 async def f():
2750
2751 def baz(): pass
2752 async def bar(): pass
2753
2754 await = 2''', """\
2755 ASYNC 'async' (1, 0) (1, 5)
2756 NAME 'def' (1, 6) (1, 9)
2757 NAME 'f' (1, 10) (1, 11)
2758 LPAR '(' (1, 11) (1, 12)
2759 RPAR ')' (1, 12) (1, 13)
2760 COLON ':' (1, 13) (1, 14)
2761 NEWLINE '' (1, 14) (1, 14)
2762 INDENT '' (3, -1) (3, -1)
2763 NAME 'def' (3, 2) (3, 5)
2764 NAME 'baz' (3, 6) (3, 9)
2765 LPAR '(' (3, 9) (3, 10)
2766 RPAR ')' (3, 10) (3, 11)
2767 COLON ':' (3, 11) (3, 12)
2768 NAME 'pass' (3, 13) (3, 17)
2769 NEWLINE '' (3, 17) (3, 17)
2770 ASYNC 'async' (4, 2) (4, 7)
2771 NAME 'def' (4, 8) (4, 11)
2772 NAME 'bar' (4, 12) (4, 15)
2773 LPAR '(' (4, 15) (4, 16)
2774 RPAR ')' (4, 16) (4, 17)
2775 COLON ':' (4, 17) (4, 18)
2776 NAME 'pass' (4, 19) (4, 23)
2777 NEWLINE '' (4, 23) (4, 23)
2778 AWAIT 'await' (6, 2) (6, 7)
2779 EQUAL '=' (6, 8) (6, 9)
2780 NUMBER '2' (6, 10) (6, 11)
2781 DEDENT '' (6, -1) (6, -1)
2782 """)
2783
2784 def test_unicode(self):
2785
2786 self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
2787 NAME 'Örter' (1, 0) (1, 5)
2788 EQUAL '=' (1, 6) (1, 7)
2789 STRING "u'places'" (1, 8) (1, 17)
2790 NEWLINE '' (1, 17) (1, 17)
2791 NAME 'grün' (2, 0) (2, 4)
2792 EQUAL '=' (2, 5) (2, 6)
2793 STRING "U'green'" (2, 7) (2, 15)
2794 """)
2795
2796 def test_invalid_syntax(self):
2797 def get_tokens(string):
2798 the_string = StringIO(string)
2799 return list(_generate_tokens_from_c_tokenizer(the_string.readline))
2800
2801 for case in [
2802 "(1+2]",
2803 "(1+2}",
2804 "{1+2]",
2805 "1_",
2806 "1.2_",
2807 "1e2_",
2808 "1e+",
2809
2810 "\xa0",
2811 "€",
2812 "0b12",
2813 "0b1_2",
2814 "0b2",
2815 "0b1_",
2816 "0b",
2817 "0o18",
2818 "0o1_8",
2819 "0o8",
2820 "0o1_",
2821 "0o",
2822 "0x1_",
2823 "0x",
2824 "1_",
2825 "012",
2826 "1.2_",
2827 "1e2_",
2828 "1e+",
2829 "'sdfsdf",
2830 "'''sdfsdf''",
2831 "("*1000+"a"+")"*1000,
2832 "]",
2833 ]:
2834 with self.subTest(case=case):
2835 self.assertRaises(TokenError, get_tokens, case)
2836
2837 def test_max_indent(self):
2838 MAXINDENT = 100
2839
2840 def generate_source(indents):
2841 source = ''.join((' ' * x) + 'if True:\n' for x in range(indents))
2842 source += ' ' * indents + 'pass\n'
2843 return source
2844
2845 valid = generate_source(MAXINDENT - 1)
2846 the_input = StringIO(valid)
2847 tokens = list(_generate_tokens_from_c_tokenizer(the_input.readline))
2848 self.assertEqual(tokens[-2].type, DEDENT)
2849 self.assertEqual(tokens[-1].type, ENDMARKER)
2850 compile(valid, "<string>", "exec")
2851
2852 invalid = generate_source(MAXINDENT)
2853 the_input = StringIO(invalid)
2854 self.assertRaises(IndentationError, lambda: list(_generate_tokens_from_c_tokenizer(the_input.readline)))
2855 self.assertRaises(
2856 IndentationError, compile, invalid, "<string>", "exec"
2857 )
2858
2859 def test_continuation_lines_indentation(self):
2860 def get_tokens(string):
2861 the_string = StringIO(string)
2862 return [(kind, string) for (kind, string, *_)
2863 in _generate_tokens_from_c_tokenizer(the_string.readline)]
2864
2865 code = dedent("""
2866 def fib(n):
2867 \\
2868 '''Print a Fibonacci series up to n.'''
2869 \\
2870 a, b = 0, 1
2871 """)
2872
2873 self.check_tokenize(code, """\
2874 NAME 'def' (2, 0) (2, 3)
2875 NAME 'fib' (2, 4) (2, 7)
2876 LPAR '(' (2, 7) (2, 8)
2877 NAME 'n' (2, 8) (2, 9)
2878 RPAR ')' (2, 9) (2, 10)
2879 COLON ':' (2, 10) (2, 11)
2880 NEWLINE '' (2, 11) (2, 11)
2881 INDENT '' (4, -1) (4, -1)
2882 STRING "'''Print a Fibonacci series up to n.'''" (4, 0) (4, 39)
2883 NEWLINE '' (4, 39) (4, 39)
2884 NAME 'a' (6, 0) (6, 1)
2885 COMMA ',' (6, 1) (6, 2)
2886 NAME 'b' (6, 3) (6, 4)
2887 EQUAL '=' (6, 5) (6, 6)
2888 NUMBER '0' (6, 7) (6, 8)
2889 COMMA ',' (6, 8) (6, 9)
2890 NUMBER '1' (6, 10) (6, 11)
2891 NEWLINE '' (6, 11) (6, 11)
2892 DEDENT '' (6, -1) (6, -1)
2893 """)
2894
2895 code_no_cont = dedent("""
2896 def fib(n):
2897 '''Print a Fibonacci series up to n.'''
2898 a, b = 0, 1
2899 """)
2900
2901 self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
2902
2903 code = dedent("""
2904 pass
2905 \\
2906
2907 pass
2908 """)
2909
2910 self.check_tokenize(code, """\
2911 NAME 'pass' (2, 0) (2, 4)
2912 NEWLINE '' (2, 4) (2, 4)
2913 NAME 'pass' (5, 0) (5, 4)
2914 NEWLINE '' (5, 4) (5, 4)
2915 """)
2916
2917 code_no_cont = dedent("""
2918 pass
2919 pass
2920 """)
2921
2922 self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
2923
2924 code = dedent("""
2925 if x:
2926 y = 1
2927 \\
2928 \\
2929 \\
2930 \\
2931 foo = 1
2932 """)
2933
2934 self.check_tokenize(code, """\
2935 NAME 'if' (2, 0) (2, 2)
2936 NAME 'x' (2, 3) (2, 4)
2937 COLON ':' (2, 4) (2, 5)
2938 NEWLINE '' (2, 5) (2, 5)
2939 INDENT '' (3, -1) (3, -1)
2940 NAME 'y' (3, 4) (3, 5)
2941 EQUAL '=' (3, 6) (3, 7)
2942 NUMBER '1' (3, 8) (3, 9)
2943 NEWLINE '' (3, 9) (3, 9)
2944 NAME 'foo' (8, 4) (8, 7)
2945 EQUAL '=' (8, 8) (8, 9)
2946 NUMBER '1' (8, 10) (8, 11)
2947 NEWLINE '' (8, 11) (8, 11)
2948 DEDENT '' (8, -1) (8, -1)
2949 """)
2950
2951 code_no_cont = dedent("""
2952 if x:
2953 y = 1
2954 foo = 1
2955 """)
2956
2957 self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
2958
2959
2960 class ESC[4;38;5;81mCTokenizerBufferTests(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
2961 def test_newline_at_the_end_of_buffer(self):
2962 # See issue 99581: Make sure that if we need to add a new line at the
2963 # end of the buffer, we have enough space in the buffer, specially when
2964 # the current line is as long as the buffer space available.
2965 test_script = f"""\
2966 #coding: latin-1
2967 #{"a"*10000}
2968 #{"a"*10002}"""
2969 with os_helper.temp_dir() as temp_dir:
2970 file_name = make_script(temp_dir, 'foo', test_script)
2971 run_test_script(file_name)
2972
2973
2974 if __name__ == "__main__":
2975 unittest.main()