1 import sysconfig
2 import textwrap
3 import unittest
4 import os
5 import shutil
6 import tempfile
7 from pathlib import Path
8
9 from test import test_tools
10 from test import support
11 from test.support import os_helper
12 from test.support.script_helper import assert_python_ok
13
14 _py_cflags_nodist = sysconfig.get_config_var("PY_CFLAGS_NODIST")
15 _pgo_flag = sysconfig.get_config_var("PGO_PROF_USE_FLAG")
16 if _pgo_flag and _py_cflags_nodist and _pgo_flag in _py_cflags_nodist:
17 raise unittest.SkipTest("peg_generator test disabled under PGO build")
18
19 test_tools.skip_if_missing("peg_generator")
20 with test_tools.imports_under_tool("peg_generator"):
21 from pegen.grammar_parser import GeneratedParser as GrammarParser
22 from pegen.testutil import (
23 parse_string,
24 generate_parser_c_extension,
25 generate_c_parser_source,
26 )
27 from pegen.ast_dump import ast_dump
28
29
30 TEST_TEMPLATE = """
31 tmp_dir = {extension_path!r}
32
33 import ast
34 import traceback
35 import sys
36 import unittest
37
38 from test import test_tools
39 with test_tools.imports_under_tool("peg_generator"):
40 from pegen.ast_dump import ast_dump
41
42 sys.path.insert(0, tmp_dir)
43 import parse
44
45 class Tests(unittest.TestCase):
46
47 def check_input_strings_for_grammar(
48 self,
49 valid_cases = (),
50 invalid_cases = (),
51 ):
52 if valid_cases:
53 for case in valid_cases:
54 parse.parse_string(case, mode=0)
55
56 if invalid_cases:
57 for case in invalid_cases:
58 with self.assertRaises(SyntaxError):
59 parse.parse_string(case, mode=0)
60
61 def verify_ast_generation(self, stmt):
62 expected_ast = ast.parse(stmt)
63 actual_ast = parse.parse_string(stmt, mode=1)
64 self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast))
65
66 def test_parse(self):
67 {test_source}
68
69 unittest.main()
70 """
71
72
73 @support.requires_subprocess()
74 class ESC[4;38;5;81mTestCParser(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
75
76 @classmethod
77 def setUpClass(cls):
78 # When running under regtest, a seperate tempdir is used
79 # as the current directory and watched for left-overs.
80 # Reusing that as the base for temporary directories
81 # ensures everything is cleaned up properly and
82 # cleans up afterwards if not (with warnings).
83 cls.tmp_base = os.getcwd()
84 if os.path.samefile(cls.tmp_base, os_helper.SAVEDCWD):
85 cls.tmp_base = None
86 # Create a directory for the reuseable static library part of
87 # the pegen extension build process. This greatly reduces the
88 # runtime overhead of spawning compiler processes.
89 cls.library_dir = tempfile.mkdtemp(dir=cls.tmp_base)
90 cls.addClassCleanup(shutil.rmtree, cls.library_dir)
91
92 def setUp(self):
93 self._backup_config_vars = dict(sysconfig._CONFIG_VARS)
94 cmd = support.missing_compiler_executable()
95 if cmd is not None:
96 self.skipTest("The %r command is not found" % cmd)
97 self.old_cwd = os.getcwd()
98 self.tmp_path = tempfile.mkdtemp(dir=self.tmp_base)
99 self.enterContext(os_helper.change_cwd(self.tmp_path))
100
101 def tearDown(self):
102 os.chdir(self.old_cwd)
103 shutil.rmtree(self.tmp_path)
104 sysconfig._CONFIG_VARS.clear()
105 sysconfig._CONFIG_VARS.update(self._backup_config_vars)
106
107 def build_extension(self, grammar_source):
108 grammar = parse_string(grammar_source, GrammarParser)
109 # Because setUp() already changes the current directory to the
110 # temporary path, use a relative path here to prevent excessive
111 # path lengths when compiling.
112 generate_parser_c_extension(grammar, Path('.'), library_dir=self.library_dir)
113
114 def run_test(self, grammar_source, test_source):
115 self.build_extension(grammar_source)
116 test_source = textwrap.indent(textwrap.dedent(test_source), 8 * " ")
117 assert_python_ok(
118 "-c",
119 TEST_TEMPLATE.format(extension_path=self.tmp_path, test_source=test_source),
120 )
121
122 def test_c_parser(self) -> None:
123 grammar_source = """
124 start[mod_ty]: a[asdl_stmt_seq*]=stmt* $ { _PyAST_Module(a, NULL, p->arena) }
125 stmt[stmt_ty]: a=expr_stmt { a }
126 expr_stmt[stmt_ty]: a=expression NEWLINE { _PyAST_Expr(a, EXTRA) }
127 expression[expr_ty]: ( l=expression '+' r=term { _PyAST_BinOp(l, Add, r, EXTRA) }
128 | l=expression '-' r=term { _PyAST_BinOp(l, Sub, r, EXTRA) }
129 | t=term { t }
130 )
131 term[expr_ty]: ( l=term '*' r=factor { _PyAST_BinOp(l, Mult, r, EXTRA) }
132 | l=term '/' r=factor { _PyAST_BinOp(l, Div, r, EXTRA) }
133 | f=factor { f }
134 )
135 factor[expr_ty]: ('(' e=expression ')' { e }
136 | a=atom { a }
137 )
138 atom[expr_ty]: ( n=NAME { n }
139 | n=NUMBER { n }
140 | s=STRING { s }
141 )
142 """
143 test_source = """
144 expressions = [
145 "4+5",
146 "4-5",
147 "4*5",
148 "1+4*5",
149 "1+4/5",
150 "(1+1) + (1+1)",
151 "(1+1) - (1+1)",
152 "(1+1) * (1+1)",
153 "(1+1) / (1+1)",
154 ]
155
156 for expr in expressions:
157 the_ast = parse.parse_string(expr, mode=1)
158 expected_ast = ast.parse(expr)
159 self.assertEqual(ast_dump(the_ast), ast_dump(expected_ast))
160 """
161 self.run_test(grammar_source, test_source)
162
163 def test_lookahead(self) -> None:
164 grammar_source = """
165 start: NAME &NAME expr NEWLINE? ENDMARKER
166 expr: NAME | NUMBER
167 """
168 test_source = """
169 valid_cases = ["foo bar"]
170 invalid_cases = ["foo 34"]
171 self.check_input_strings_for_grammar(valid_cases, invalid_cases)
172 """
173 self.run_test(grammar_source, test_source)
174
175 def test_negative_lookahead(self) -> None:
176 grammar_source = """
177 start: NAME !NAME expr NEWLINE? ENDMARKER
178 expr: NAME | NUMBER
179 """
180 test_source = """
181 valid_cases = ["foo 34"]
182 invalid_cases = ["foo bar"]
183 self.check_input_strings_for_grammar(valid_cases, invalid_cases)
184 """
185 self.run_test(grammar_source, test_source)
186
187 def test_cut(self) -> None:
188 grammar_source = """
189 start: X ~ Y Z | X Q S
190 X: 'x'
191 Y: 'y'
192 Z: 'z'
193 Q: 'q'
194 S: 's'
195 """
196 test_source = """
197 valid_cases = ["x y z"]
198 invalid_cases = ["x q s"]
199 self.check_input_strings_for_grammar(valid_cases, invalid_cases)
200 """
201 self.run_test(grammar_source, test_source)
202
203 def test_gather(self) -> None:
204 grammar_source = """
205 start: ';'.pass_stmt+ NEWLINE
206 pass_stmt: 'pass'
207 """
208 test_source = """
209 valid_cases = ["pass", "pass; pass"]
210 invalid_cases = ["pass;", "pass; pass;"]
211 self.check_input_strings_for_grammar(valid_cases, invalid_cases)
212 """
213 self.run_test(grammar_source, test_source)
214
215 def test_left_recursion(self) -> None:
216 grammar_source = """
217 start: expr NEWLINE
218 expr: ('-' term | expr '+' term | term)
219 term: NUMBER
220 """
221 test_source = """
222 valid_cases = ["-34", "34", "34 + 12", "1 + 1 + 2 + 3"]
223 self.check_input_strings_for_grammar(valid_cases)
224 """
225 self.run_test(grammar_source, test_source)
226
227 def test_advanced_left_recursive(self) -> None:
228 grammar_source = """
229 start: NUMBER | sign start
230 sign: ['-']
231 """
232 test_source = """
233 valid_cases = ["23", "-34"]
234 self.check_input_strings_for_grammar(valid_cases)
235 """
236 self.run_test(grammar_source, test_source)
237
238 def test_mutually_left_recursive(self) -> None:
239 grammar_source = """
240 start: foo 'E'
241 foo: bar 'A' | 'B'
242 bar: foo 'C' | 'D'
243 """
244 test_source = """
245 valid_cases = ["B E", "D A C A E"]
246 self.check_input_strings_for_grammar(valid_cases)
247 """
248 self.run_test(grammar_source, test_source)
249
250 def test_nasty_mutually_left_recursive(self) -> None:
251 grammar_source = """
252 start: target '='
253 target: maybe '+' | NAME
254 maybe: maybe '-' | target
255 """
256 test_source = """
257 valid_cases = ["x ="]
258 invalid_cases = ["x - + ="]
259 self.check_input_strings_for_grammar(valid_cases, invalid_cases)
260 """
261 self.run_test(grammar_source, test_source)
262
263 def test_return_stmt_noexpr_action(self) -> None:
264 grammar_source = """
265 start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
266 statements[asdl_stmt_seq*]: a[asdl_stmt_seq*]=statement+ { a }
267 statement[stmt_ty]: simple_stmt
268 simple_stmt[stmt_ty]: small_stmt
269 small_stmt[stmt_ty]: return_stmt
270 return_stmt[stmt_ty]: a='return' NEWLINE { _PyAST_Return(NULL, EXTRA) }
271 """
272 test_source = """
273 stmt = "return"
274 self.verify_ast_generation(stmt)
275 """
276 self.run_test(grammar_source, test_source)
277
278 def test_gather_action_ast(self) -> None:
279 grammar_source = """
280 start[mod_ty]: a[asdl_stmt_seq*]=';'.pass_stmt+ NEWLINE ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
281 pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA)}
282 """
283 test_source = """
284 stmt = "pass; pass"
285 self.verify_ast_generation(stmt)
286 """
287 self.run_test(grammar_source, test_source)
288
289 def test_pass_stmt_action(self) -> None:
290 grammar_source = """
291 start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
292 statements[asdl_stmt_seq*]: a[asdl_stmt_seq*]=statement+ { a }
293 statement[stmt_ty]: simple_stmt
294 simple_stmt[stmt_ty]: small_stmt
295 small_stmt[stmt_ty]: pass_stmt
296 pass_stmt[stmt_ty]: a='pass' NEWLINE { _PyAST_Pass(EXTRA) }
297 """
298 test_source = """
299 stmt = "pass"
300 self.verify_ast_generation(stmt)
301 """
302 self.run_test(grammar_source, test_source)
303
304 def test_if_stmt_action(self) -> None:
305 grammar_source = """
306 start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
307 statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) }
308 statement[asdl_stmt_seq*]: a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } | simple_stmt
309
310 simple_stmt[asdl_stmt_seq*]: a=small_stmt b=further_small_stmt* [';'] NEWLINE {
311 (asdl_stmt_seq*)_PyPegen_seq_insert_in_front(p, a, b) }
312 further_small_stmt[stmt_ty]: ';' a=small_stmt { a }
313
314 block: simple_stmt | NEWLINE INDENT a=statements DEDENT { a }
315
316 compound_stmt: if_stmt
317
318 if_stmt: 'if' a=full_expression ':' b=block { _PyAST_If(a, b, NULL, EXTRA) }
319
320 small_stmt[stmt_ty]: pass_stmt
321
322 pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA) }
323
324 full_expression: NAME
325 """
326 test_source = """
327 stmt = "pass"
328 self.verify_ast_generation(stmt)
329 """
330 self.run_test(grammar_source, test_source)
331
332 def test_same_name_different_types(self) -> None:
333 grammar_source = """
334 start[mod_ty]: a[asdl_stmt_seq*]=import_from+ NEWLINE ENDMARKER { _PyAST_Module(a, NULL, p->arena)}
335 import_from[stmt_ty]: ( a='from' !'import' c=simple_name 'import' d=import_as_names_from {
336 _PyAST_ImportFrom(c->v.Name.id, d, 0, EXTRA) }
337 | a='from' '.' 'import' c=import_as_names_from {
338 _PyAST_ImportFrom(NULL, c, 1, EXTRA) }
339 )
340 simple_name[expr_ty]: NAME
341 import_as_names_from[asdl_alias_seq*]: a[asdl_alias_seq*]=','.import_as_name_from+ { a }
342 import_as_name_from[alias_ty]: a=NAME 'as' b=NAME { _PyAST_alias(((expr_ty) a)->v.Name.id, ((expr_ty) b)->v.Name.id, EXTRA) }
343 """
344 test_source = """
345 for stmt in ("from a import b as c", "from . import a as b"):
346 expected_ast = ast.parse(stmt)
347 actual_ast = parse.parse_string(stmt, mode=1)
348 self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast))
349 """
350 self.run_test(grammar_source, test_source)
351
352 def test_with_stmt_with_paren(self) -> None:
353 grammar_source = """
354 start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
355 statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) }
356 statement[asdl_stmt_seq*]: a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) }
357 compound_stmt[stmt_ty]: with_stmt
358 with_stmt[stmt_ty]: (
359 a='with' '(' b[asdl_withitem_seq*]=','.with_item+ ')' ':' c=block {
360 _PyAST_With(b, (asdl_stmt_seq*) _PyPegen_singleton_seq(p, c), NULL, EXTRA) }
361 )
362 with_item[withitem_ty]: (
363 e=NAME o=['as' t=NAME { t }] { _PyAST_withitem(e, _PyPegen_set_expr_context(p, o, Store), p->arena) }
364 )
365 block[stmt_ty]: a=pass_stmt NEWLINE { a } | NEWLINE INDENT a=pass_stmt DEDENT { a }
366 pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA) }
367 """
368 test_source = """
369 stmt = "with (\\n a as b,\\n c as d\\n): pass"
370 the_ast = parse.parse_string(stmt, mode=1)
371 self.assertTrue(ast_dump(the_ast).startswith(
372 "Module(body=[With(items=[withitem(context_expr=Name(id='a', ctx=Load()), optional_vars=Name(id='b', ctx=Store())), "
373 "withitem(context_expr=Name(id='c', ctx=Load()), optional_vars=Name(id='d', ctx=Store()))]"
374 ))
375 """
376 self.run_test(grammar_source, test_source)
377
378 def test_ternary_operator(self) -> None:
379 grammar_source = """
380 start[mod_ty]: a=expr ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
381 expr[asdl_stmt_seq*]: a=listcomp NEWLINE { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, _PyAST_Expr(a, EXTRA)) }
382 listcomp[expr_ty]: (
383 a='[' b=NAME c=for_if_clauses d=']' { _PyAST_ListComp(b, c, EXTRA) }
384 )
385 for_if_clauses[asdl_comprehension_seq*]: (
386 a[asdl_comprehension_seq*]=(y=[ASYNC] 'for' a=NAME 'in' b=NAME c[asdl_expr_seq*]=('if' z=NAME { z })*
387 { _PyAST_comprehension(_PyAST_Name(((expr_ty) a)->v.Name.id, Store, EXTRA), b, c, (y == NULL) ? 0 : 1, p->arena) })+ { a }
388 )
389 """
390 test_source = """
391 stmt = "[i for i in a if b]"
392 self.verify_ast_generation(stmt)
393 """
394 self.run_test(grammar_source, test_source)
395
396 def test_syntax_error_for_string(self) -> None:
397 grammar_source = """
398 start: expr+ NEWLINE? ENDMARKER
399 expr: NAME
400 """
401 test_source = r"""
402 for text in ("a b 42 b a", "\u540d \u540d 42 \u540d \u540d"):
403 try:
404 parse.parse_string(text, mode=0)
405 except SyntaxError as e:
406 tb = traceback.format_exc()
407 self.assertTrue('File "<string>", line 1' in tb)
408 self.assertTrue(f"SyntaxError: invalid syntax" in tb)
409 """
410 self.run_test(grammar_source, test_source)
411
412 def test_headers_and_trailer(self) -> None:
413 grammar_source = """
414 @header 'SOME HEADER'
415 @subheader 'SOME SUBHEADER'
416 @trailer 'SOME TRAILER'
417 start: expr+ NEWLINE? ENDMARKER
418 expr: x=NAME
419 """
420 grammar = parse_string(grammar_source, GrammarParser)
421 parser_source = generate_c_parser_source(grammar)
422
423 self.assertTrue("SOME HEADER" in parser_source)
424 self.assertTrue("SOME SUBHEADER" in parser_source)
425 self.assertTrue("SOME TRAILER" in parser_source)
426
427 def test_error_in_rules(self) -> None:
428 grammar_source = """
429 start: expr+ NEWLINE? ENDMARKER
430 expr: NAME {PyTuple_New(-1)}
431 """
432 # PyTuple_New raises SystemError if an invalid argument was passed.
433 test_source = """
434 with self.assertRaises(SystemError):
435 parse.parse_string("a", mode=0)
436 """
437 self.run_test(grammar_source, test_source)
438
439 def test_no_soft_keywords(self) -> None:
440 grammar_source = """
441 start: expr+ NEWLINE? ENDMARKER
442 expr: 'foo'
443 """
444 grammar = parse_string(grammar_source, GrammarParser)
445 parser_source = generate_c_parser_source(grammar)
446 assert "expect_soft_keyword" not in parser_source
447
448 def test_soft_keywords(self) -> None:
449 grammar_source = """
450 start: expr+ NEWLINE? ENDMARKER
451 expr: "foo"
452 """
453 grammar = parse_string(grammar_source, GrammarParser)
454 parser_source = generate_c_parser_source(grammar)
455 assert "expect_soft_keyword" in parser_source
456
457 def test_soft_keywords_parse(self) -> None:
458 grammar_source = """
459 start: "if" expr '+' expr NEWLINE
460 expr: NAME
461 """
462 test_source = """
463 valid_cases = ["if if + if"]
464 invalid_cases = ["if if"]
465 self.check_input_strings_for_grammar(valid_cases, invalid_cases)
466 """
467 self.run_test(grammar_source, test_source)
468
469 def test_soft_keywords_lookahead(self) -> None:
470 grammar_source = """
471 start: &"if" "if" expr '+' expr NEWLINE
472 expr: NAME
473 """
474 test_source = """
475 valid_cases = ["if if + if"]
476 invalid_cases = ["if if"]
477 self.check_input_strings_for_grammar(valid_cases, invalid_cases)
478 """
479 self.run_test(grammar_source, test_source)
480
481 def test_forced(self) -> None:
482 grammar_source = """
483 start: NAME &&':' | NAME
484 """
485 test_source = """
486 self.assertEqual(parse.parse_string("number :", mode=0), None)
487 with self.assertRaises(SyntaxError) as e:
488 parse.parse_string("a", mode=0)
489 self.assertIn("expected ':'", str(e.exception))
490 """
491 self.run_test(grammar_source, test_source)
492
493 def test_forced_with_group(self) -> None:
494 grammar_source = """
495 start: NAME &&(':' | ';') | NAME
496 """
497 test_source = """
498 self.assertEqual(parse.parse_string("number :", mode=0), None)
499 self.assertEqual(parse.parse_string("number ;", mode=0), None)
500 with self.assertRaises(SyntaxError) as e:
501 parse.parse_string("a", mode=0)
502 self.assertIn("expected (':' | ';')", e.exception.args[0])
503 """
504 self.run_test(grammar_source, test_source)