(root)/
Python-3.11.7/
Lib/
test/
test_peg_generator/
test_c_parser.py
       1  import sysconfig
       2  import textwrap
       3  import unittest
       4  import os
       5  import shutil
       6  import tempfile
       7  from pathlib import Path
       8  
       9  from test import test_tools
      10  from test import support
      11  from test.support import os_helper
      12  from test.support.script_helper import assert_python_ok
      13  
      14  _py_cflags_nodist = sysconfig.get_config_var("PY_CFLAGS_NODIST")
      15  _pgo_flag = sysconfig.get_config_var("PGO_PROF_USE_FLAG")
      16  if _pgo_flag and _py_cflags_nodist and _pgo_flag in _py_cflags_nodist:
      17      raise unittest.SkipTest("peg_generator test disabled under PGO build")
      18  
      19  test_tools.skip_if_missing("peg_generator")
      20  with test_tools.imports_under_tool("peg_generator"):
      21      from pegen.grammar_parser import GeneratedParser as GrammarParser
      22      from pegen.testutil import (
      23          parse_string,
      24          generate_parser_c_extension,
      25          generate_c_parser_source,
      26      )
      27      from pegen.ast_dump import ast_dump
      28  
      29  
      30  TEST_TEMPLATE = """
      31  tmp_dir = {extension_path!r}
      32  
      33  import ast
      34  import traceback
      35  import sys
      36  import unittest
      37  
      38  from test import test_tools
      39  with test_tools.imports_under_tool("peg_generator"):
      40      from pegen.ast_dump import ast_dump
      41  
      42  sys.path.insert(0, tmp_dir)
      43  import parse
      44  
      45  class Tests(unittest.TestCase):
      46  
      47      def check_input_strings_for_grammar(
      48          self,
      49          valid_cases = (),
      50          invalid_cases = (),
      51      ):
      52          if valid_cases:
      53              for case in valid_cases:
      54                  parse.parse_string(case, mode=0)
      55  
      56          if invalid_cases:
      57              for case in invalid_cases:
      58                  with self.assertRaises(SyntaxError):
      59                      parse.parse_string(case, mode=0)
      60  
      61      def verify_ast_generation(self, stmt):
      62          expected_ast = ast.parse(stmt)
      63          actual_ast = parse.parse_string(stmt, mode=1)
      64          self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast))
      65  
      66      def test_parse(self):
      67          {test_source}
      68  
      69  unittest.main()
      70  """
      71  
      72  
      73  @support.requires_subprocess()
      74  class ESC[4;38;5;81mTestCParser(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      75  
      76      @classmethod
      77      def setUpClass(cls):
      78          # When running under regtest, a seperate tempdir is used
      79          # as the current directory and watched for left-overs.
      80          # Reusing that as the base for temporary directories
      81          # ensures everything is cleaned up properly and
      82          # cleans up afterwards if not (with warnings).
      83          cls.tmp_base = os.getcwd()
      84          if os.path.samefile(cls.tmp_base, os_helper.SAVEDCWD):
      85              cls.tmp_base = None
      86          # Create a directory for the reuseable static library part of
      87          # the pegen extension build process.  This greatly reduces the
      88          # runtime overhead of spawning compiler processes.
      89          cls.library_dir = tempfile.mkdtemp(dir=cls.tmp_base)
      90          cls.addClassCleanup(shutil.rmtree, cls.library_dir)
      91  
      92      def setUp(self):
      93          self._backup_config_vars = dict(sysconfig._CONFIG_VARS)
      94          cmd = support.missing_compiler_executable()
      95          if cmd is not None:
      96              self.skipTest("The %r command is not found" % cmd)
      97          self.old_cwd = os.getcwd()
      98          self.tmp_path = tempfile.mkdtemp(dir=self.tmp_base)
      99          self.enterContext(os_helper.change_cwd(self.tmp_path))
     100  
     101      def tearDown(self):
     102          os.chdir(self.old_cwd)
     103          shutil.rmtree(self.tmp_path)
     104          sysconfig._CONFIG_VARS.clear()
     105          sysconfig._CONFIG_VARS.update(self._backup_config_vars)
     106  
     107      def build_extension(self, grammar_source):
     108          grammar = parse_string(grammar_source, GrammarParser)
     109          # Because setUp() already changes the current directory to the
     110          # temporary path, use a relative path here to prevent excessive
     111          # path lengths when compiling.
     112          generate_parser_c_extension(grammar, Path('.'), library_dir=self.library_dir)
     113  
     114      def run_test(self, grammar_source, test_source):
     115          self.build_extension(grammar_source)
     116          test_source = textwrap.indent(textwrap.dedent(test_source), 8 * " ")
     117          assert_python_ok(
     118              "-c",
     119              TEST_TEMPLATE.format(extension_path=self.tmp_path, test_source=test_source),
     120          )
     121  
     122      def test_c_parser(self) -> None:
     123          grammar_source = """
     124          start[mod_ty]: a[asdl_stmt_seq*]=stmt* $ { _PyAST_Module(a, NULL, p->arena) }
     125          stmt[stmt_ty]: a=expr_stmt { a }
     126          expr_stmt[stmt_ty]: a=expression NEWLINE { _PyAST_Expr(a, EXTRA) }
     127          expression[expr_ty]: ( l=expression '+' r=term { _PyAST_BinOp(l, Add, r, EXTRA) }
     128                              | l=expression '-' r=term { _PyAST_BinOp(l, Sub, r, EXTRA) }
     129                              | t=term { t }
     130                              )
     131          term[expr_ty]: ( l=term '*' r=factor { _PyAST_BinOp(l, Mult, r, EXTRA) }
     132                      | l=term '/' r=factor { _PyAST_BinOp(l, Div, r, EXTRA) }
     133                      | f=factor { f }
     134                      )
     135          factor[expr_ty]: ('(' e=expression ')' { e }
     136                          | a=atom { a }
     137                          )
     138          atom[expr_ty]: ( n=NAME { n }
     139                      | n=NUMBER { n }
     140                      | s=STRING { s }
     141                      )
     142          """
     143          test_source = """
     144          expressions = [
     145              "4+5",
     146              "4-5",
     147              "4*5",
     148              "1+4*5",
     149              "1+4/5",
     150              "(1+1) + (1+1)",
     151              "(1+1) - (1+1)",
     152              "(1+1) * (1+1)",
     153              "(1+1) / (1+1)",
     154          ]
     155  
     156          for expr in expressions:
     157              the_ast = parse.parse_string(expr, mode=1)
     158              expected_ast = ast.parse(expr)
     159              self.assertEqual(ast_dump(the_ast), ast_dump(expected_ast))
     160          """
     161          self.run_test(grammar_source, test_source)
     162  
     163      def test_lookahead(self) -> None:
     164          grammar_source = """
     165          start: NAME &NAME expr NEWLINE? ENDMARKER
     166          expr: NAME | NUMBER
     167          """
     168          test_source = """
     169          valid_cases = ["foo bar"]
     170          invalid_cases = ["foo 34"]
     171          self.check_input_strings_for_grammar(valid_cases, invalid_cases)
     172          """
     173          self.run_test(grammar_source, test_source)
     174  
     175      def test_negative_lookahead(self) -> None:
     176          grammar_source = """
     177          start: NAME !NAME expr NEWLINE? ENDMARKER
     178          expr: NAME | NUMBER
     179          """
     180          test_source = """
     181          valid_cases = ["foo 34"]
     182          invalid_cases = ["foo bar"]
     183          self.check_input_strings_for_grammar(valid_cases, invalid_cases)
     184          """
     185          self.run_test(grammar_source, test_source)
     186  
     187      def test_cut(self) -> None:
     188          grammar_source = """
     189          start: X ~ Y Z | X Q S
     190          X: 'x'
     191          Y: 'y'
     192          Z: 'z'
     193          Q: 'q'
     194          S: 's'
     195          """
     196          test_source = """
     197          valid_cases = ["x y z"]
     198          invalid_cases = ["x q s"]
     199          self.check_input_strings_for_grammar(valid_cases, invalid_cases)
     200          """
     201          self.run_test(grammar_source, test_source)
     202  
     203      def test_gather(self) -> None:
     204          grammar_source = """
     205          start: ';'.pass_stmt+ NEWLINE
     206          pass_stmt: 'pass'
     207          """
     208          test_source = """
     209          valid_cases = ["pass", "pass; pass"]
     210          invalid_cases = ["pass;", "pass; pass;"]
     211          self.check_input_strings_for_grammar(valid_cases, invalid_cases)
     212          """
     213          self.run_test(grammar_source, test_source)
     214  
     215      def test_left_recursion(self) -> None:
     216          grammar_source = """
     217          start: expr NEWLINE
     218          expr: ('-' term | expr '+' term | term)
     219          term: NUMBER
     220          """
     221          test_source = """
     222          valid_cases = ["-34", "34", "34 + 12", "1 + 1 + 2 + 3"]
     223          self.check_input_strings_for_grammar(valid_cases)
     224          """
     225          self.run_test(grammar_source, test_source)
     226  
     227      def test_advanced_left_recursive(self) -> None:
     228          grammar_source = """
     229          start: NUMBER | sign start
     230          sign: ['-']
     231          """
     232          test_source = """
     233          valid_cases = ["23", "-34"]
     234          self.check_input_strings_for_grammar(valid_cases)
     235          """
     236          self.run_test(grammar_source, test_source)
     237  
     238      def test_mutually_left_recursive(self) -> None:
     239          grammar_source = """
     240          start: foo 'E'
     241          foo: bar 'A' | 'B'
     242          bar: foo 'C' | 'D'
     243          """
     244          test_source = """
     245          valid_cases = ["B E", "D A C A E"]
     246          self.check_input_strings_for_grammar(valid_cases)
     247          """
     248          self.run_test(grammar_source, test_source)
     249  
     250      def test_nasty_mutually_left_recursive(self) -> None:
     251          grammar_source = """
     252          start: target '='
     253          target: maybe '+' | NAME
     254          maybe: maybe '-' | target
     255          """
     256          test_source = """
     257          valid_cases = ["x ="]
     258          invalid_cases = ["x - + ="]
     259          self.check_input_strings_for_grammar(valid_cases, invalid_cases)
     260          """
     261          self.run_test(grammar_source, test_source)
     262  
     263      def test_return_stmt_noexpr_action(self) -> None:
     264          grammar_source = """
     265          start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
     266          statements[asdl_stmt_seq*]: a[asdl_stmt_seq*]=statement+ { a }
     267          statement[stmt_ty]: simple_stmt
     268          simple_stmt[stmt_ty]: small_stmt
     269          small_stmt[stmt_ty]: return_stmt
     270          return_stmt[stmt_ty]: a='return' NEWLINE { _PyAST_Return(NULL, EXTRA) }
     271          """
     272          test_source = """
     273          stmt = "return"
     274          self.verify_ast_generation(stmt)
     275          """
     276          self.run_test(grammar_source, test_source)
     277  
     278      def test_gather_action_ast(self) -> None:
     279          grammar_source = """
     280          start[mod_ty]: a[asdl_stmt_seq*]=';'.pass_stmt+ NEWLINE ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
     281          pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA)}
     282          """
     283          test_source = """
     284          stmt = "pass; pass"
     285          self.verify_ast_generation(stmt)
     286          """
     287          self.run_test(grammar_source, test_source)
     288  
     289      def test_pass_stmt_action(self) -> None:
     290          grammar_source = """
     291          start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
     292          statements[asdl_stmt_seq*]: a[asdl_stmt_seq*]=statement+ { a }
     293          statement[stmt_ty]: simple_stmt
     294          simple_stmt[stmt_ty]: small_stmt
     295          small_stmt[stmt_ty]: pass_stmt
     296          pass_stmt[stmt_ty]: a='pass' NEWLINE { _PyAST_Pass(EXTRA) }
     297          """
     298          test_source = """
     299          stmt = "pass"
     300          self.verify_ast_generation(stmt)
     301          """
     302          self.run_test(grammar_source, test_source)
     303  
     304      def test_if_stmt_action(self) -> None:
     305          grammar_source = """
     306          start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
     307          statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) }
     308          statement[asdl_stmt_seq*]:  a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } | simple_stmt
     309  
     310          simple_stmt[asdl_stmt_seq*]: a=small_stmt b=further_small_stmt* [';'] NEWLINE {
     311                                              (asdl_stmt_seq*)_PyPegen_seq_insert_in_front(p, a, b) }
     312          further_small_stmt[stmt_ty]: ';' a=small_stmt { a }
     313  
     314          block: simple_stmt | NEWLINE INDENT a=statements DEDENT { a }
     315  
     316          compound_stmt: if_stmt
     317  
     318          if_stmt: 'if' a=full_expression ':' b=block { _PyAST_If(a, b, NULL, EXTRA) }
     319  
     320          small_stmt[stmt_ty]: pass_stmt
     321  
     322          pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA) }
     323  
     324          full_expression: NAME
     325          """
     326          test_source = """
     327          stmt = "pass"
     328          self.verify_ast_generation(stmt)
     329          """
     330          self.run_test(grammar_source, test_source)
     331  
     332      def test_same_name_different_types(self) -> None:
     333          grammar_source = """
     334          start[mod_ty]: a[asdl_stmt_seq*]=import_from+ NEWLINE ENDMARKER { _PyAST_Module(a, NULL, p->arena)}
     335          import_from[stmt_ty]: ( a='from' !'import' c=simple_name 'import' d=import_as_names_from {
     336                                  _PyAST_ImportFrom(c->v.Name.id, d, 0, EXTRA) }
     337                              | a='from' '.' 'import' c=import_as_names_from {
     338                                  _PyAST_ImportFrom(NULL, c, 1, EXTRA) }
     339                              )
     340          simple_name[expr_ty]: NAME
     341          import_as_names_from[asdl_alias_seq*]: a[asdl_alias_seq*]=','.import_as_name_from+ { a }
     342          import_as_name_from[alias_ty]: a=NAME 'as' b=NAME { _PyAST_alias(((expr_ty) a)->v.Name.id, ((expr_ty) b)->v.Name.id, EXTRA) }
     343          """
     344          test_source = """
     345          for stmt in ("from a import b as c", "from . import a as b"):
     346              expected_ast = ast.parse(stmt)
     347              actual_ast = parse.parse_string(stmt, mode=1)
     348              self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast))
     349          """
     350          self.run_test(grammar_source, test_source)
     351  
     352      def test_with_stmt_with_paren(self) -> None:
     353          grammar_source = """
     354          start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
     355          statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) }
     356          statement[asdl_stmt_seq*]: a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) }
     357          compound_stmt[stmt_ty]: with_stmt
     358          with_stmt[stmt_ty]: (
     359              a='with' '(' b[asdl_withitem_seq*]=','.with_item+ ')' ':' c=block {
     360                  _PyAST_With(b, (asdl_stmt_seq*) _PyPegen_singleton_seq(p, c), NULL, EXTRA) }
     361          )
     362          with_item[withitem_ty]: (
     363              e=NAME o=['as' t=NAME { t }] { _PyAST_withitem(e, _PyPegen_set_expr_context(p, o, Store), p->arena) }
     364          )
     365          block[stmt_ty]: a=pass_stmt NEWLINE { a } | NEWLINE INDENT a=pass_stmt DEDENT { a }
     366          pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA) }
     367          """
     368          test_source = """
     369          stmt = "with (\\n    a as b,\\n    c as d\\n): pass"
     370          the_ast = parse.parse_string(stmt, mode=1)
     371          self.assertTrue(ast_dump(the_ast).startswith(
     372              "Module(body=[With(items=[withitem(context_expr=Name(id='a', ctx=Load()), optional_vars=Name(id='b', ctx=Store())), "
     373              "withitem(context_expr=Name(id='c', ctx=Load()), optional_vars=Name(id='d', ctx=Store()))]"
     374          ))
     375          """
     376          self.run_test(grammar_source, test_source)
     377  
     378      def test_ternary_operator(self) -> None:
     379          grammar_source = """
     380          start[mod_ty]: a=expr ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
     381          expr[asdl_stmt_seq*]: a=listcomp NEWLINE { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, _PyAST_Expr(a, EXTRA)) }
     382          listcomp[expr_ty]: (
     383              a='[' b=NAME c=for_if_clauses d=']' { _PyAST_ListComp(b, c, EXTRA) }
     384          )
     385          for_if_clauses[asdl_comprehension_seq*]: (
     386              a[asdl_comprehension_seq*]=(y=[ASYNC] 'for' a=NAME 'in' b=NAME c[asdl_expr_seq*]=('if' z=NAME { z })*
     387                  { _PyAST_comprehension(_PyAST_Name(((expr_ty) a)->v.Name.id, Store, EXTRA), b, c, (y == NULL) ? 0 : 1, p->arena) })+ { a }
     388          )
     389          """
     390          test_source = """
     391          stmt = "[i for i in a if b]"
     392          self.verify_ast_generation(stmt)
     393          """
     394          self.run_test(grammar_source, test_source)
     395  
     396      def test_syntax_error_for_string(self) -> None:
     397          grammar_source = """
     398          start: expr+ NEWLINE? ENDMARKER
     399          expr: NAME
     400          """
     401          test_source = r"""
     402          for text in ("a b 42 b a", "\u540d \u540d 42 \u540d \u540d"):
     403              try:
     404                  parse.parse_string(text, mode=0)
     405              except SyntaxError as e:
     406                  tb = traceback.format_exc()
     407              self.assertTrue('File "<string>", line 1' in tb)
     408              self.assertTrue(f"SyntaxError: invalid syntax" in tb)
     409          """
     410          self.run_test(grammar_source, test_source)
     411  
     412      def test_headers_and_trailer(self) -> None:
     413          grammar_source = """
     414          @header 'SOME HEADER'
     415          @subheader 'SOME SUBHEADER'
     416          @trailer 'SOME TRAILER'
     417          start: expr+ NEWLINE? ENDMARKER
     418          expr: x=NAME
     419          """
     420          grammar = parse_string(grammar_source, GrammarParser)
     421          parser_source = generate_c_parser_source(grammar)
     422  
     423          self.assertTrue("SOME HEADER" in parser_source)
     424          self.assertTrue("SOME SUBHEADER" in parser_source)
     425          self.assertTrue("SOME TRAILER" in parser_source)
     426  
     427      def test_error_in_rules(self) -> None:
     428          grammar_source = """
     429          start: expr+ NEWLINE? ENDMARKER
     430          expr: NAME {PyTuple_New(-1)}
     431          """
     432          # PyTuple_New raises SystemError if an invalid argument was passed.
     433          test_source = """
     434          with self.assertRaises(SystemError):
     435              parse.parse_string("a", mode=0)
     436          """
     437          self.run_test(grammar_source, test_source)
     438  
     439      def test_no_soft_keywords(self) -> None:
     440          grammar_source = """
     441          start: expr+ NEWLINE? ENDMARKER
     442          expr: 'foo'
     443          """
     444          grammar = parse_string(grammar_source, GrammarParser)
     445          parser_source = generate_c_parser_source(grammar)
     446          assert "expect_soft_keyword" not in parser_source
     447  
     448      def test_soft_keywords(self) -> None:
     449          grammar_source = """
     450          start: expr+ NEWLINE? ENDMARKER
     451          expr: "foo"
     452          """
     453          grammar = parse_string(grammar_source, GrammarParser)
     454          parser_source = generate_c_parser_source(grammar)
     455          assert "expect_soft_keyword" in parser_source
     456  
     457      def test_soft_keywords_parse(self) -> None:
     458          grammar_source = """
     459          start: "if" expr '+' expr NEWLINE
     460          expr: NAME
     461          """
     462          test_source = """
     463          valid_cases = ["if if + if"]
     464          invalid_cases = ["if if"]
     465          self.check_input_strings_for_grammar(valid_cases, invalid_cases)
     466          """
     467          self.run_test(grammar_source, test_source)
     468  
     469      def test_soft_keywords_lookahead(self) -> None:
     470          grammar_source = """
     471          start: &"if" "if" expr '+' expr NEWLINE
     472          expr: NAME
     473          """
     474          test_source = """
     475          valid_cases = ["if if + if"]
     476          invalid_cases = ["if if"]
     477          self.check_input_strings_for_grammar(valid_cases, invalid_cases)
     478          """
     479          self.run_test(grammar_source, test_source)
     480  
     481      def test_forced(self) -> None:
     482          grammar_source = """
     483          start: NAME &&':' | NAME
     484          """
     485          test_source = """
     486          self.assertEqual(parse.parse_string("number :", mode=0), None)
     487          with self.assertRaises(SyntaxError) as e:
     488              parse.parse_string("a", mode=0)
     489          self.assertIn("expected ':'", str(e.exception))
     490          """
     491          self.run_test(grammar_source, test_source)
     492  
     493      def test_forced_with_group(self) -> None:
     494          grammar_source = """
     495          start: NAME &&(':' | ';') | NAME
     496          """
     497          test_source = """
     498          self.assertEqual(parse.parse_string("number :", mode=0), None)
     499          self.assertEqual(parse.parse_string("number ;", mode=0), None)
     500          with self.assertRaises(SyntaxError) as e:
     501              parse.parse_string("a", mode=0)
     502          self.assertIn("expected (':' | ';')", e.exception.args[0])
     503          """
     504          self.run_test(grammar_source, test_source)