1  import contextlib
       2  import subprocess
       3  import sysconfig
       4  import textwrap
       5  import unittest
       6  import os
       7  import shutil
       8  import tempfile
       9  from pathlib import Path
      10  
      11  from test import test_tools
      12  from test import support
      13  from test.support import os_helper, import_helper
      14  from test.support.script_helper import assert_python_ok
      15  
      16  _py_cflags_nodist = sysconfig.get_config_var("PY_CFLAGS_NODIST")
      17  _pgo_flag = sysconfig.get_config_var("PGO_PROF_USE_FLAG")
      18  if _pgo_flag and _py_cflags_nodist and _pgo_flag in _py_cflags_nodist:
      19      raise unittest.SkipTest("peg_generator test disabled under PGO build")
      20  
      21  test_tools.skip_if_missing("peg_generator")
      22  with test_tools.imports_under_tool("peg_generator"):
      23      from pegen.grammar_parser import GeneratedParser as GrammarParser
      24      from pegen.testutil import (
      25          parse_string,
      26          generate_parser_c_extension,
      27          generate_c_parser_source,
      28      )
      29  
      30  
      31  TEST_TEMPLATE = """
      32  tmp_dir = {extension_path!r}
      33  
      34  import ast
      35  import traceback
      36  import sys
      37  import unittest
      38  
      39  from test import test_tools
      40  with test_tools.imports_under_tool("peg_generator"):
      41      from pegen.ast_dump import ast_dump
      42  
      43  sys.path.insert(0, tmp_dir)
      44  import parse
      45  
      46  class Tests(unittest.TestCase):
      47  
      48      def check_input_strings_for_grammar(
      49          self,
      50          valid_cases = (),
      51          invalid_cases = (),
      52      ):
      53          if valid_cases:
      54              for case in valid_cases:
      55                  parse.parse_string(case, mode=0)
      56  
      57          if invalid_cases:
      58              for case in invalid_cases:
      59                  with self.assertRaises(SyntaxError):
      60                      parse.parse_string(case, mode=0)
      61  
      62      def verify_ast_generation(self, stmt):
      63          expected_ast = ast.parse(stmt)
      64          actual_ast = parse.parse_string(stmt, mode=1)
      65          self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast))
      66  
      67      def test_parse(self):
      68          {test_source}
      69  
      70  unittest.main()
      71  """
      72  
      73  
      74  @support.requires_subprocess()
      75  class ESC[4;38;5;81mTestCParser(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      76  
      77      _has_run = False
      78  
      79      @classmethod
      80      def setUpClass(cls):
      81          if cls._has_run:
      82              # Since gh-104798 (Use setuptools in peg-generator and reenable
      83              # tests), this test case has been producing ref leaks. Initial
      84              # debugging points to bug(s) in setuptools and/or importlib.
      85              # See gh-105063 for more info.
      86              raise unittest.SkipTest("gh-105063: can not rerun because of ref. leaks")
      87          cls._has_run = True
      88  
      89          # When running under regtest, a separate tempdir is used
      90          # as the current directory and watched for left-overs.
      91          # Reusing that as the base for temporary directories
      92          # ensures everything is cleaned up properly and
      93          # cleans up afterwards if not (with warnings).
      94          cls.tmp_base = os.getcwd()
      95          if os.path.samefile(cls.tmp_base, os_helper.SAVEDCWD):
      96              cls.tmp_base = None
      97          # Create a directory for the reuseable static library part of
      98          # the pegen extension build process.  This greatly reduces the
      99          # runtime overhead of spawning compiler processes.
     100          cls.library_dir = tempfile.mkdtemp(dir=cls.tmp_base)
     101          cls.addClassCleanup(shutil.rmtree, cls.library_dir)
     102  
     103          with contextlib.ExitStack() as stack:
     104              python_exe = stack.enter_context(support.setup_venv_with_pip_setuptools_wheel("venv"))
     105              sitepackages = subprocess.check_output(
     106                  [python_exe, "-c", "import sysconfig; print(sysconfig.get_path('platlib'))"],
     107                  text=True,
     108              ).strip()
     109              stack.enter_context(import_helper.DirsOnSysPath(sitepackages))
     110              cls.addClassCleanup(stack.pop_all().close)
     111  
     112      @support.requires_venv_with_pip()
     113      def setUp(self):
     114          self._backup_config_vars = dict(sysconfig._CONFIG_VARS)
     115          cmd = support.missing_compiler_executable()
     116          if cmd is not None:
     117              self.skipTest("The %r command is not found" % cmd)
     118          self.old_cwd = os.getcwd()
     119          self.tmp_path = tempfile.mkdtemp(dir=self.tmp_base)
     120          self.enterContext(os_helper.change_cwd(self.tmp_path))
     121  
     122      def tearDown(self):
     123          os.chdir(self.old_cwd)
     124          shutil.rmtree(self.tmp_path)
     125          sysconfig._CONFIG_VARS.clear()
     126          sysconfig._CONFIG_VARS.update(self._backup_config_vars)
     127  
     128      def build_extension(self, grammar_source):
     129          grammar = parse_string(grammar_source, GrammarParser)
     130          # Because setUp() already changes the current directory to the
     131          # temporary path, use a relative path here to prevent excessive
     132          # path lengths when compiling.
     133          generate_parser_c_extension(grammar, Path('.'), library_dir=self.library_dir)
     134  
     135      def run_test(self, grammar_source, test_source):
     136          self.build_extension(grammar_source)
     137          test_source = textwrap.indent(textwrap.dedent(test_source), 8 * " ")
     138          assert_python_ok(
     139              "-c",
     140              TEST_TEMPLATE.format(extension_path=self.tmp_path, test_source=test_source),
     141          )
     142  
     143      def test_c_parser(self) -> None:
     144          grammar_source = """
     145          start[mod_ty]: a[asdl_stmt_seq*]=stmt* $ { _PyAST_Module(a, NULL, p->arena) }
     146          stmt[stmt_ty]: a=expr_stmt { a }
     147          expr_stmt[stmt_ty]: a=expression NEWLINE { _PyAST_Expr(a, EXTRA) }
     148          expression[expr_ty]: ( l=expression '+' r=term { _PyAST_BinOp(l, Add, r, EXTRA) }
     149                              | l=expression '-' r=term { _PyAST_BinOp(l, Sub, r, EXTRA) }
     150                              | t=term { t }
     151                              )
     152          term[expr_ty]: ( l=term '*' r=factor { _PyAST_BinOp(l, Mult, r, EXTRA) }
     153                      | l=term '/' r=factor { _PyAST_BinOp(l, Div, r, EXTRA) }
     154                      | f=factor { f }
     155                      )
     156          factor[expr_ty]: ('(' e=expression ')' { e }
     157                          | a=atom { a }
     158                          )
     159          atom[expr_ty]: ( n=NAME { n }
     160                      | n=NUMBER { n }
     161                      | s=STRING { s }
     162                      )
     163          """
     164          test_source = """
     165          expressions = [
     166              "4+5",
     167              "4-5",
     168              "4*5",
     169              "1+4*5",
     170              "1+4/5",
     171              "(1+1) + (1+1)",
     172              "(1+1) - (1+1)",
     173              "(1+1) * (1+1)",
     174              "(1+1) / (1+1)",
     175          ]
     176  
     177          for expr in expressions:
     178              the_ast = parse.parse_string(expr, mode=1)
     179              expected_ast = ast.parse(expr)
     180              self.assertEqual(ast_dump(the_ast), ast_dump(expected_ast))
     181          """
     182          self.run_test(grammar_source, test_source)
     183  
     184      def test_lookahead(self) -> None:
     185          grammar_source = """
     186          start: NAME &NAME expr NEWLINE? ENDMARKER
     187          expr: NAME | NUMBER
     188          """
     189          test_source = """
     190          valid_cases = ["foo bar"]
     191          invalid_cases = ["foo 34"]
     192          self.check_input_strings_for_grammar(valid_cases, invalid_cases)
     193          """
     194          self.run_test(grammar_source, test_source)
     195  
     196      def test_negative_lookahead(self) -> None:
     197          grammar_source = """
     198          start: NAME !NAME expr NEWLINE? ENDMARKER
     199          expr: NAME | NUMBER
     200          """
     201          test_source = """
     202          valid_cases = ["foo 34"]
     203          invalid_cases = ["foo bar"]
     204          self.check_input_strings_for_grammar(valid_cases, invalid_cases)
     205          """
     206          self.run_test(grammar_source, test_source)
     207  
     208      def test_cut(self) -> None:
     209          grammar_source = """
     210          start: X ~ Y Z | X Q S
     211          X: 'x'
     212          Y: 'y'
     213          Z: 'z'
     214          Q: 'q'
     215          S: 's'
     216          """
     217          test_source = """
     218          valid_cases = ["x y z"]
     219          invalid_cases = ["x q s"]
     220          self.check_input_strings_for_grammar(valid_cases, invalid_cases)
     221          """
     222          self.run_test(grammar_source, test_source)
     223  
     224      def test_gather(self) -> None:
     225          grammar_source = """
     226          start: ';'.pass_stmt+ NEWLINE
     227          pass_stmt: 'pass'
     228          """
     229          test_source = """
     230          valid_cases = ["pass", "pass; pass"]
     231          invalid_cases = ["pass;", "pass; pass;"]
     232          self.check_input_strings_for_grammar(valid_cases, invalid_cases)
     233          """
     234          self.run_test(grammar_source, test_source)
     235  
     236      def test_left_recursion(self) -> None:
     237          grammar_source = """
     238          start: expr NEWLINE
     239          expr: ('-' term | expr '+' term | term)
     240          term: NUMBER
     241          """
     242          test_source = """
     243          valid_cases = ["-34", "34", "34 + 12", "1 + 1 + 2 + 3"]
     244          self.check_input_strings_for_grammar(valid_cases)
     245          """
     246          self.run_test(grammar_source, test_source)
     247  
     248      def test_advanced_left_recursive(self) -> None:
     249          grammar_source = """
     250          start: NUMBER | sign start
     251          sign: ['-']
     252          """
     253          test_source = """
     254          valid_cases = ["23", "-34"]
     255          self.check_input_strings_for_grammar(valid_cases)
     256          """
     257          self.run_test(grammar_source, test_source)
     258  
     259      def test_mutually_left_recursive(self) -> None:
     260          grammar_source = """
     261          start: foo 'E'
     262          foo: bar 'A' | 'B'
     263          bar: foo 'C' | 'D'
     264          """
     265          test_source = """
     266          valid_cases = ["B E", "D A C A E"]
     267          self.check_input_strings_for_grammar(valid_cases)
     268          """
     269          self.run_test(grammar_source, test_source)
     270  
     271      def test_nasty_mutually_left_recursive(self) -> None:
     272          grammar_source = """
     273          start: target '='
     274          target: maybe '+' | NAME
     275          maybe: maybe '-' | target
     276          """
     277          test_source = """
     278          valid_cases = ["x ="]
     279          invalid_cases = ["x - + ="]
     280          self.check_input_strings_for_grammar(valid_cases, invalid_cases)
     281          """
     282          self.run_test(grammar_source, test_source)
     283  
     284      def test_return_stmt_noexpr_action(self) -> None:
     285          grammar_source = """
     286          start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
     287          statements[asdl_stmt_seq*]: a[asdl_stmt_seq*]=statement+ { a }
     288          statement[stmt_ty]: simple_stmt
     289          simple_stmt[stmt_ty]: small_stmt
     290          small_stmt[stmt_ty]: return_stmt
     291          return_stmt[stmt_ty]: a='return' NEWLINE { _PyAST_Return(NULL, EXTRA) }
     292          """
     293          test_source = """
     294          stmt = "return"
     295          self.verify_ast_generation(stmt)
     296          """
     297          self.run_test(grammar_source, test_source)
     298  
     299      def test_gather_action_ast(self) -> None:
     300          grammar_source = """
     301          start[mod_ty]: a[asdl_stmt_seq*]=';'.pass_stmt+ NEWLINE ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
     302          pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA)}
     303          """
     304          test_source = """
     305          stmt = "pass; pass"
     306          self.verify_ast_generation(stmt)
     307          """
     308          self.run_test(grammar_source, test_source)
     309  
     310      def test_pass_stmt_action(self) -> None:
     311          grammar_source = """
     312          start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
     313          statements[asdl_stmt_seq*]: a[asdl_stmt_seq*]=statement+ { a }
     314          statement[stmt_ty]: simple_stmt
     315          simple_stmt[stmt_ty]: small_stmt
     316          small_stmt[stmt_ty]: pass_stmt
     317          pass_stmt[stmt_ty]: a='pass' NEWLINE { _PyAST_Pass(EXTRA) }
     318          """
     319          test_source = """
     320          stmt = "pass"
     321          self.verify_ast_generation(stmt)
     322          """
     323          self.run_test(grammar_source, test_source)
     324  
     325      def test_if_stmt_action(self) -> None:
     326          grammar_source = """
     327          start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
     328          statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) }
     329          statement[asdl_stmt_seq*]:  a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } | simple_stmt
     330  
     331          simple_stmt[asdl_stmt_seq*]: a=small_stmt b=further_small_stmt* [';'] NEWLINE {
     332                                              (asdl_stmt_seq*)_PyPegen_seq_insert_in_front(p, a, b) }
     333          further_small_stmt[stmt_ty]: ';' a=small_stmt { a }
     334  
     335          block: simple_stmt | NEWLINE INDENT a=statements DEDENT { a }
     336  
     337          compound_stmt: if_stmt
     338  
     339          if_stmt: 'if' a=full_expression ':' b=block { _PyAST_If(a, b, NULL, EXTRA) }
     340  
     341          small_stmt[stmt_ty]: pass_stmt
     342  
     343          pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA) }
     344  
     345          full_expression: NAME
     346          """
     347          test_source = """
     348          stmt = "pass"
     349          self.verify_ast_generation(stmt)
     350          """
     351          self.run_test(grammar_source, test_source)
     352  
     353      def test_same_name_different_types(self) -> None:
     354          grammar_source = """
     355          start[mod_ty]: a[asdl_stmt_seq*]=import_from+ NEWLINE ENDMARKER { _PyAST_Module(a, NULL, p->arena)}
     356          import_from[stmt_ty]: ( a='from' !'import' c=simple_name 'import' d=import_as_names_from {
     357                                  _PyAST_ImportFrom(c->v.Name.id, d, 0, EXTRA) }
     358                              | a='from' '.' 'import' c=import_as_names_from {
     359                                  _PyAST_ImportFrom(NULL, c, 1, EXTRA) }
     360                              )
     361          simple_name[expr_ty]: NAME
     362          import_as_names_from[asdl_alias_seq*]: a[asdl_alias_seq*]=','.import_as_name_from+ { a }
     363          import_as_name_from[alias_ty]: a=NAME 'as' b=NAME { _PyAST_alias(((expr_ty) a)->v.Name.id, ((expr_ty) b)->v.Name.id, EXTRA) }
     364          """
     365          test_source = """
     366          for stmt in ("from a import b as c", "from . import a as b"):
     367              expected_ast = ast.parse(stmt)
     368              actual_ast = parse.parse_string(stmt, mode=1)
     369              self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast))
     370          """
     371          self.run_test(grammar_source, test_source)
     372  
     373      def test_with_stmt_with_paren(self) -> None:
     374          grammar_source = """
     375          start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
     376          statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) }
     377          statement[asdl_stmt_seq*]: a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) }
     378          compound_stmt[stmt_ty]: with_stmt
     379          with_stmt[stmt_ty]: (
     380              a='with' '(' b[asdl_withitem_seq*]=','.with_item+ ')' ':' c=block {
     381                  _PyAST_With(b, (asdl_stmt_seq*) _PyPegen_singleton_seq(p, c), NULL, EXTRA) }
     382          )
     383          with_item[withitem_ty]: (
     384              e=NAME o=['as' t=NAME { t }] { _PyAST_withitem(e, _PyPegen_set_expr_context(p, o, Store), p->arena) }
     385          )
     386          block[stmt_ty]: a=pass_stmt NEWLINE { a } | NEWLINE INDENT a=pass_stmt DEDENT { a }
     387          pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA) }
     388          """
     389          test_source = """
     390          stmt = "with (\\n    a as b,\\n    c as d\\n): pass"
     391          the_ast = parse.parse_string(stmt, mode=1)
     392          self.assertTrue(ast_dump(the_ast).startswith(
     393              "Module(body=[With(items=[withitem(context_expr=Name(id='a', ctx=Load()), optional_vars=Name(id='b', ctx=Store())), "
     394              "withitem(context_expr=Name(id='c', ctx=Load()), optional_vars=Name(id='d', ctx=Store()))]"
     395          ))
     396          """
     397          self.run_test(grammar_source, test_source)
     398  
     399      def test_ternary_operator(self) -> None:
     400          grammar_source = """
     401          start[mod_ty]: a=expr ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
     402          expr[asdl_stmt_seq*]: a=listcomp NEWLINE { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, _PyAST_Expr(a, EXTRA)) }
     403          listcomp[expr_ty]: (
     404              a='[' b=NAME c=for_if_clauses d=']' { _PyAST_ListComp(b, c, EXTRA) }
     405          )
     406          for_if_clauses[asdl_comprehension_seq*]: (
     407              a[asdl_comprehension_seq*]=(y=[ASYNC] 'for' a=NAME 'in' b=NAME c[asdl_expr_seq*]=('if' z=NAME { z })*
     408                  { _PyAST_comprehension(_PyAST_Name(((expr_ty) a)->v.Name.id, Store, EXTRA), b, c, (y == NULL) ? 0 : 1, p->arena) })+ { a }
     409          )
     410          """
     411          test_source = """
     412          stmt = "[i for i in a if b]"
     413          self.verify_ast_generation(stmt)
     414          """
     415          self.run_test(grammar_source, test_source)
     416  
     417      def test_syntax_error_for_string(self) -> None:
     418          grammar_source = """
     419          start: expr+ NEWLINE? ENDMARKER
     420          expr: NAME
     421          """
     422          test_source = r"""
     423          for text in ("a b 42 b a", "\u540d \u540d 42 \u540d \u540d"):
     424              try:
     425                  parse.parse_string(text, mode=0)
     426              except SyntaxError as e:
     427                  tb = traceback.format_exc()
     428              self.assertTrue('File "<string>", line 1' in tb)
     429              self.assertTrue(f"SyntaxError: invalid syntax" in tb)
     430          """
     431          self.run_test(grammar_source, test_source)
     432  
     433      def test_headers_and_trailer(self) -> None:
     434          grammar_source = """
     435          @header 'SOME HEADER'
     436          @subheader 'SOME SUBHEADER'
     437          @trailer 'SOME TRAILER'
     438          start: expr+ NEWLINE? ENDMARKER
     439          expr: x=NAME
     440          """
     441          grammar = parse_string(grammar_source, GrammarParser)
     442          parser_source = generate_c_parser_source(grammar)
     443  
     444          self.assertTrue("SOME HEADER" in parser_source)
     445          self.assertTrue("SOME SUBHEADER" in parser_source)
     446          self.assertTrue("SOME TRAILER" in parser_source)
     447  
     448      def test_error_in_rules(self) -> None:
     449          grammar_source = """
     450          start: expr+ NEWLINE? ENDMARKER
     451          expr: NAME {PyTuple_New(-1)}
     452          """
     453          # PyTuple_New raises SystemError if an invalid argument was passed.
     454          test_source = """
     455          with self.assertRaises(SystemError):
     456              parse.parse_string("a", mode=0)
     457          """
     458          self.run_test(grammar_source, test_source)
     459  
     460      def test_no_soft_keywords(self) -> None:
     461          grammar_source = """
     462          start: expr+ NEWLINE? ENDMARKER
     463          expr: 'foo'
     464          """
     465          grammar = parse_string(grammar_source, GrammarParser)
     466          parser_source = generate_c_parser_source(grammar)
     467          assert "expect_soft_keyword" not in parser_source
     468  
     469      def test_soft_keywords(self) -> None:
     470          grammar_source = """
     471          start: expr+ NEWLINE? ENDMARKER
     472          expr: "foo"
     473          """
     474          grammar = parse_string(grammar_source, GrammarParser)
     475          parser_source = generate_c_parser_source(grammar)
     476          assert "expect_soft_keyword" in parser_source
     477  
     478      def test_soft_keywords_parse(self) -> None:
     479          grammar_source = """
     480          start: "if" expr '+' expr NEWLINE
     481          expr: NAME
     482          """
     483          test_source = """
     484          valid_cases = ["if if + if"]
     485          invalid_cases = ["if if"]
     486          self.check_input_strings_for_grammar(valid_cases, invalid_cases)
     487          """
     488          self.run_test(grammar_source, test_source)
     489  
     490      def test_soft_keywords_lookahead(self) -> None:
     491          grammar_source = """
     492          start: &"if" "if" expr '+' expr NEWLINE
     493          expr: NAME
     494          """
     495          test_source = """
     496          valid_cases = ["if if + if"]
     497          invalid_cases = ["if if"]
     498          self.check_input_strings_for_grammar(valid_cases, invalid_cases)
     499          """
     500          self.run_test(grammar_source, test_source)
     501  
     502      def test_forced(self) -> None:
     503          grammar_source = """
     504          start: NAME &&':' | NAME
     505          """
     506          test_source = """
     507          self.assertEqual(parse.parse_string("number :", mode=0), None)
     508          with self.assertRaises(SyntaxError) as e:
     509              parse.parse_string("a", mode=0)
     510          self.assertIn("expected ':'", str(e.exception))
     511          """
     512          self.run_test(grammar_source, test_source)
     513  
     514      def test_forced_with_group(self) -> None:
     515          grammar_source = """
     516          start: NAME &&(':' | ';') | NAME
     517          """
     518          test_source = """
     519          self.assertEqual(parse.parse_string("number :", mode=0), None)
     520          self.assertEqual(parse.parse_string("number ;", mode=0), None)
     521          with self.assertRaises(SyntaxError) as e:
     522              parse.parse_string("a", mode=0)
     523          self.assertIn("expected (':' | ';')", e.exception.args[0])
     524          """
     525          self.run_test(grammar_source, test_source)