python (3.11.7)
       1  import ast
       2  import difflib
       3  import io
       4  import textwrap
       5  import unittest
       6  
       7  from test import test_tools
       8  from typing import Dict, Any
       9  from tokenize import TokenInfo, NAME, NEWLINE, NUMBER, OP
      10  
      11  test_tools.skip_if_missing("peg_generator")
      12  with test_tools.imports_under_tool("peg_generator"):
      13      from pegen.grammar_parser import GeneratedParser as GrammarParser
      14      from pegen.testutil import parse_string, generate_parser, make_parser
      15      from pegen.grammar import GrammarVisitor, GrammarError, Grammar
      16      from pegen.grammar_visualizer import ASTGrammarPrinter
      17      from pegen.parser import Parser
      18      from pegen.parser_generator import compute_nullables, compute_left_recursives
      19      from pegen.python_generator import PythonParserGenerator
      20  
      21  
      22  class ESC[4;38;5;81mTestPegen(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      23      def test_parse_grammar(self) -> None:
      24          grammar_source = """
      25          start: sum NEWLINE
      26          sum: t1=term '+' t2=term { action } | term
      27          term: NUMBER
      28          """
      29          expected = """
      30          start: sum NEWLINE
      31          sum: term '+' term | term
      32          term: NUMBER
      33          """
      34          grammar: Grammar = parse_string(grammar_source, GrammarParser)
      35          rules = grammar.rules
      36          self.assertEqual(str(grammar), textwrap.dedent(expected).strip())
      37          # Check the str() and repr() of a few rules; AST nodes don't support ==.
      38          self.assertEqual(str(rules["start"]), "start: sum NEWLINE")
      39          self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
      40          expected_repr = (
      41              "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
      42          )
      43          self.assertEqual(repr(rules["term"]), expected_repr)
      44  
      45      def test_long_rule_str(self) -> None:
      46          grammar_source = """
      47          start: zero | one | one zero | one one | one zero zero | one zero one | one one zero | one one one
      48          """
      49          expected = """
      50          start:
      51              | zero
      52              | one
      53              | one zero
      54              | one one
      55              | one zero zero
      56              | one zero one
      57              | one one zero
      58              | one one one
      59          """
      60          grammar: Grammar = parse_string(grammar_source, GrammarParser)
      61          self.assertEqual(str(grammar.rules["start"]), textwrap.dedent(expected).strip())
      62  
      63      def test_typed_rules(self) -> None:
      64          grammar = """
      65          start[int]: sum NEWLINE
      66          sum[int]: t1=term '+' t2=term { action } | term
      67          term[int]: NUMBER
      68          """
      69          rules = parse_string(grammar, GrammarParser).rules
      70          # Check the str() and repr() of a few rules; AST nodes don't support ==.
      71          self.assertEqual(str(rules["start"]), "start: sum NEWLINE")
      72          self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
      73          self.assertEqual(
      74              repr(rules["term"]),
      75              "Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))",
      76          )
      77  
      78      def test_gather(self) -> None:
      79          grammar = """
      80          start: ','.thing+ NEWLINE
      81          thing: NUMBER
      82          """
      83          rules = parse_string(grammar, GrammarParser).rules
      84          self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE")
      85          self.assertTrue(
      86              repr(rules["start"]).startswith(
      87                  "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
      88              )
      89          )
      90          self.assertEqual(str(rules["thing"]), "thing: NUMBER")
      91          parser_class = make_parser(grammar)
      92          node = parse_string("42\n", parser_class)
      93          node = parse_string("1, 2\n", parser_class)
      94          self.assertEqual(
      95              node,
      96              [
      97                  [
      98                      TokenInfo(
      99                          NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n"
     100                      ),
     101                      TokenInfo(
     102                          NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n"
     103                      ),
     104                  ],
     105                  TokenInfo(
     106                      NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"
     107                  ),
     108              ],
     109          )
     110  
     111      def test_expr_grammar(self) -> None:
     112          grammar = """
     113          start: sum NEWLINE
     114          sum: term '+' term | term
     115          term: NUMBER
     116          """
     117          parser_class = make_parser(grammar)
     118          node = parse_string("42\n", parser_class)
     119          self.assertEqual(
     120              node,
     121              [
     122                  TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n"),
     123                  TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
     124              ],
     125          )
     126  
     127      def test_optional_operator(self) -> None:
     128          grammar = """
     129          start: sum NEWLINE
     130          sum: term ('+' term)?
     131          term: NUMBER
     132          """
     133          parser_class = make_parser(grammar)
     134          node = parse_string("1 + 2\n", parser_class)
     135          self.assertEqual(
     136              node,
     137              [
     138                  [
     139                      TokenInfo(
     140                          NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
     141                      ),
     142                      [
     143                          TokenInfo(
     144                              OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
     145                          ),
     146                          TokenInfo(
     147                              NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
     148                          ),
     149                      ],
     150                  ],
     151                  TokenInfo(
     152                      NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
     153                  ),
     154              ],
     155          )
     156          node = parse_string("1\n", parser_class)
     157          self.assertEqual(
     158              node,
     159              [
     160                  [
     161                      TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
     162                      None,
     163                  ],
     164                  TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
     165              ],
     166          )
     167  
     168      def test_optional_literal(self) -> None:
     169          grammar = """
     170          start: sum NEWLINE
     171          sum: term '+' ?
     172          term: NUMBER
     173          """
     174          parser_class = make_parser(grammar)
     175          node = parse_string("1+\n", parser_class)
     176          self.assertEqual(
     177              node,
     178              [
     179                  [
     180                      TokenInfo(
     181                          NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n"
     182                      ),
     183                      TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"),
     184                  ],
     185                  TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"),
     186              ],
     187          )
     188          node = parse_string("1\n", parser_class)
     189          self.assertEqual(
     190              node,
     191              [
     192                  [
     193                      TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
     194                      None,
     195                  ],
     196                  TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
     197              ],
     198          )
     199  
     200      def test_alt_optional_operator(self) -> None:
     201          grammar = """
     202          start: sum NEWLINE
     203          sum: term ['+' term]
     204          term: NUMBER
     205          """
     206          parser_class = make_parser(grammar)
     207          node = parse_string("1 + 2\n", parser_class)
     208          self.assertEqual(
     209              node,
     210              [
     211                  [
     212                      TokenInfo(
     213                          NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
     214                      ),
     215                      [
     216                          TokenInfo(
     217                              OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
     218                          ),
     219                          TokenInfo(
     220                              NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
     221                          ),
     222                      ],
     223                  ],
     224                  TokenInfo(
     225                      NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
     226                  ),
     227              ],
     228          )
     229          node = parse_string("1\n", parser_class)
     230          self.assertEqual(
     231              node,
     232              [
     233                  [
     234                      TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
     235                      None,
     236                  ],
     237                  TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
     238              ],
     239          )
     240  
     241      def test_repeat_0_simple(self) -> None:
     242          grammar = """
     243          start: thing thing* NEWLINE
     244          thing: NUMBER
     245          """
     246          parser_class = make_parser(grammar)
     247          node = parse_string("1 2 3\n", parser_class)
     248          self.assertEqual(
     249              node,
     250              [
     251                  TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
     252                  [
     253                      TokenInfo(
     254                          NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
     255                      ),
     256                      TokenInfo(
     257                          NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
     258                      ),
     259                  ],
     260                  TokenInfo(
     261                      NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
     262                  ),
     263              ],
     264          )
     265          node = parse_string("1\n", parser_class)
     266          self.assertEqual(
     267              node,
     268              [
     269                  TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
     270                  [],
     271                  TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
     272              ],
     273          )
     274  
     275      def test_repeat_0_complex(self) -> None:
     276          grammar = """
     277          start: term ('+' term)* NEWLINE
     278          term: NUMBER
     279          """
     280          parser_class = make_parser(grammar)
     281          node = parse_string("1 + 2 + 3\n", parser_class)
     282          self.assertEqual(
     283              node,
     284              [
     285                  TokenInfo(
     286                      NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
     287                  ),
     288                  [
     289                      [
     290                          TokenInfo(
     291                              OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
     292                          ),
     293                          TokenInfo(
     294                              NUMBER,
     295                              string="2",
     296                              start=(1, 4),
     297                              end=(1, 5),
     298                              line="1 + 2 + 3\n",
     299                          ),
     300                      ],
     301                      [
     302                          TokenInfo(
     303                              OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
     304                          ),
     305                          TokenInfo(
     306                              NUMBER,
     307                              string="3",
     308                              start=(1, 8),
     309                              end=(1, 9),
     310                              line="1 + 2 + 3\n",
     311                          ),
     312                      ],
     313                  ],
     314                  TokenInfo(
     315                      NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
     316                  ),
     317              ],
     318          )
     319  
     320      def test_repeat_1_simple(self) -> None:
     321          grammar = """
     322          start: thing thing+ NEWLINE
     323          thing: NUMBER
     324          """
     325          parser_class = make_parser(grammar)
     326          node = parse_string("1 2 3\n", parser_class)
     327          self.assertEqual(
     328              node,
     329              [
     330                  TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
     331                  [
     332                      TokenInfo(
     333                          NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
     334                      ),
     335                      TokenInfo(
     336                          NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
     337                      ),
     338                  ],
     339                  TokenInfo(
     340                      NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
     341                  ),
     342              ],
     343          )
     344          with self.assertRaises(SyntaxError):
     345              parse_string("1\n", parser_class)
     346  
     347      def test_repeat_1_complex(self) -> None:
     348          grammar = """
     349          start: term ('+' term)+ NEWLINE
     350          term: NUMBER
     351          """
     352          parser_class = make_parser(grammar)
     353          node = parse_string("1 + 2 + 3\n", parser_class)
     354          self.assertEqual(
     355              node,
     356              [
     357                  TokenInfo(
     358                      NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
     359                  ),
     360                  [
     361                      [
     362                          TokenInfo(
     363                              OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
     364                          ),
     365                          TokenInfo(
     366                              NUMBER,
     367                              string="2",
     368                              start=(1, 4),
     369                              end=(1, 5),
     370                              line="1 + 2 + 3\n",
     371                          ),
     372                      ],
     373                      [
     374                          TokenInfo(
     375                              OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
     376                          ),
     377                          TokenInfo(
     378                              NUMBER,
     379                              string="3",
     380                              start=(1, 8),
     381                              end=(1, 9),
     382                              line="1 + 2 + 3\n",
     383                          ),
     384                      ],
     385                  ],
     386                  TokenInfo(
     387                      NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
     388                  ),
     389              ],
     390          )
     391          with self.assertRaises(SyntaxError):
     392              parse_string("1\n", parser_class)
     393  
     394      def test_repeat_with_sep_simple(self) -> None:
     395          grammar = """
     396          start: ','.thing+ NEWLINE
     397          thing: NUMBER
     398          """
     399          parser_class = make_parser(grammar)
     400          node = parse_string("1, 2, 3\n", parser_class)
     401          self.assertEqual(
     402              node,
     403              [
     404                  [
     405                      TokenInfo(
     406                          NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n"
     407                      ),
     408                      TokenInfo(
     409                          NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n"
     410                      ),
     411                      TokenInfo(
     412                          NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n"
     413                      ),
     414                  ],
     415                  TokenInfo(
     416                      NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"
     417                  ),
     418              ],
     419          )
     420  
     421      def test_left_recursive(self) -> None:
     422          grammar_source = """
     423          start: expr NEWLINE
     424          expr: ('-' term | expr '+' term | term)
     425          term: NUMBER
     426          foo: NAME+
     427          bar: NAME*
     428          baz: NAME?
     429          """
     430          grammar: Grammar = parse_string(grammar_source, GrammarParser)
     431          parser_class = generate_parser(grammar)
     432          rules = grammar.rules
     433          self.assertFalse(rules["start"].left_recursive)
     434          self.assertTrue(rules["expr"].left_recursive)
     435          self.assertFalse(rules["term"].left_recursive)
     436          self.assertFalse(rules["foo"].left_recursive)
     437          self.assertFalse(rules["bar"].left_recursive)
     438          self.assertFalse(rules["baz"].left_recursive)
     439          node = parse_string("1 + 2 + 3\n", parser_class)
     440          self.assertEqual(
     441              node,
     442              [
     443                  [
     444                      [
     445                          TokenInfo(
     446                              NUMBER,
     447                              string="1",
     448                              start=(1, 0),
     449                              end=(1, 1),
     450                              line="1 + 2 + 3\n",
     451                          ),
     452                          TokenInfo(
     453                              OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
     454                          ),
     455                          TokenInfo(
     456                              NUMBER,
     457                              string="2",
     458                              start=(1, 4),
     459                              end=(1, 5),
     460                              line="1 + 2 + 3\n",
     461                          ),
     462                      ],
     463                      TokenInfo(
     464                          OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
     465                      ),
     466                      TokenInfo(
     467                          NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n"
     468                      ),
     469                  ],
     470                  TokenInfo(
     471                      NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
     472                  ),
     473              ],
     474          )
     475  
     476      def test_python_expr(self) -> None:
     477          grammar = """
     478          start: expr NEWLINE? $ { ast.Expression(expr, lineno=1, col_offset=0) }
     479          expr: ( expr '+' term { ast.BinOp(expr, ast.Add(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) }
     480              | expr '-' term { ast.BinOp(expr, ast.Sub(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) }
     481              | term { term }
     482              )
     483          term: ( l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, lineno=l.lineno, col_offset=l.col_offset, end_lineno=r.end_lineno, end_col_offset=r.end_col_offset) }
     484              | l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, lineno=l.lineno, col_offset=l.col_offset, end_lineno=r.end_lineno, end_col_offset=r.end_col_offset) }
     485              | factor { factor }
     486              )
     487          factor: ( '(' expr ')' { expr }
     488                  | atom { atom }
     489                  )
     490          atom: ( n=NAME { ast.Name(id=n.string, ctx=ast.Load(), lineno=n.start[0], col_offset=n.start[1], end_lineno=n.end[0], end_col_offset=n.end[1]) }
     491              | n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), lineno=n.start[0], col_offset=n.start[1], end_lineno=n.end[0], end_col_offset=n.end[1]) }
     492              )
     493          """
     494          parser_class = make_parser(grammar)
     495          node = parse_string("(1 + 2*3 + 5)/(6 - 2)\n", parser_class)
     496          code = compile(node, "", "eval")
     497          val = eval(code)
     498          self.assertEqual(val, 3.0)
     499  
     500      def test_nullable(self) -> None:
     501          grammar_source = """
     502          start: sign NUMBER
     503          sign: ['-' | '+']
     504          """
     505          grammar: Grammar = parse_string(grammar_source, GrammarParser)
     506          rules = grammar.rules
     507          nullables = compute_nullables(rules)
     508          self.assertNotIn(rules["start"], nullables)  # Not None!
     509          self.assertIn(rules["sign"], nullables)
     510  
     511      def test_advanced_left_recursive(self) -> None:
     512          grammar_source = """
     513          start: NUMBER | sign start
     514          sign: ['-']
     515          """
     516          grammar: Grammar = parse_string(grammar_source, GrammarParser)
     517          rules = grammar.rules
     518          nullables = compute_nullables(rules)
     519          compute_left_recursives(rules)
     520          self.assertNotIn(rules["start"], nullables)  # Not None!
     521          self.assertIn(rules["sign"], nullables)
     522          self.assertTrue(rules["start"].left_recursive)
     523          self.assertFalse(rules["sign"].left_recursive)
     524  
     525      def test_mutually_left_recursive(self) -> None:
     526          grammar_source = """
     527          start: foo 'E'
     528          foo: bar 'A' | 'B'
     529          bar: foo 'C' | 'D'
     530          """
     531          grammar: Grammar = parse_string(grammar_source, GrammarParser)
     532          out = io.StringIO()
     533          genr = PythonParserGenerator(grammar, out)
     534          rules = grammar.rules
     535          self.assertFalse(rules["start"].left_recursive)
     536          self.assertTrue(rules["foo"].left_recursive)
     537          self.assertTrue(rules["bar"].left_recursive)
     538          genr.generate("<string>")
     539          ns: Dict[str, Any] = {}
     540          exec(out.getvalue(), ns)
     541          parser_class: Type[Parser] = ns["GeneratedParser"]
     542          node = parse_string("D A C A E", parser_class)
     543  
     544          self.assertEqual(
     545              node,
     546              [
     547                  [
     548                      [
     549                          [
     550                              TokenInfo(
     551                                  type=NAME,
     552                                  string="D",
     553                                  start=(1, 0),
     554                                  end=(1, 1),
     555                                  line="D A C A E",
     556                              ),
     557                              TokenInfo(
     558                                  type=NAME,
     559                                  string="A",
     560                                  start=(1, 2),
     561                                  end=(1, 3),
     562                                  line="D A C A E",
     563                              ),
     564                          ],
     565                          TokenInfo(
     566                              type=NAME,
     567                              string="C",
     568                              start=(1, 4),
     569                              end=(1, 5),
     570                              line="D A C A E",
     571                          ),
     572                      ],
     573                      TokenInfo(
     574                          type=NAME,
     575                          string="A",
     576                          start=(1, 6),
     577                          end=(1, 7),
     578                          line="D A C A E",
     579                      ),
     580                  ],
     581                  TokenInfo(
     582                      type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"
     583                  ),
     584              ],
     585          )
     586          node = parse_string("B C A E", parser_class)
     587          self.assertEqual(
     588              node,
     589              [
     590                  [
     591                      [
     592                          TokenInfo(
     593                              type=NAME,
     594                              string="B",
     595                              start=(1, 0),
     596                              end=(1, 1),
     597                              line="B C A E",
     598                          ),
     599                          TokenInfo(
     600                              type=NAME,
     601                              string="C",
     602                              start=(1, 2),
     603                              end=(1, 3),
     604                              line="B C A E",
     605                          ),
     606                      ],
     607                      TokenInfo(
     608                          type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"
     609                      ),
     610                  ],
     611                  TokenInfo(
     612                      type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"
     613                  ),
     614              ],
     615          )
     616  
     617      def test_nasty_mutually_left_recursive(self) -> None:
     618          # This grammar does not recognize 'x - + =', much to my chagrin.
     619          # But that's the way PEG works.
     620          # [Breathlessly]
     621          # The problem is that the toplevel target call
     622          # recurses into maybe, which recognizes 'x - +',
     623          # and then the toplevel target looks for another '+',
     624          # which fails, so it retreats to NAME,
     625          # which succeeds, so we end up just recognizing 'x',
     626          # and then start fails because there's no '=' after that.
     627          grammar_source = """
     628          start: target '='
     629          target: maybe '+' | NAME
     630          maybe: maybe '-' | target
     631          """
     632          grammar: Grammar = parse_string(grammar_source, GrammarParser)
     633          out = io.StringIO()
     634          genr = PythonParserGenerator(grammar, out)
     635          genr.generate("<string>")
     636          ns: Dict[str, Any] = {}
     637          exec(out.getvalue(), ns)
     638          parser_class = ns["GeneratedParser"]
     639          with self.assertRaises(SyntaxError):
     640              parse_string("x - + =", parser_class)
     641  
     642      def test_lookahead(self) -> None:
     643          grammar = """
     644          start: (expr_stmt | assign_stmt) &'.'
     645          expr_stmt: !(target '=') expr
     646          assign_stmt: target '=' expr
     647          expr: term ('+' term)*
     648          target: NAME
     649          term: NUMBER
     650          """
     651          parser_class = make_parser(grammar)
     652          node = parse_string("foo = 12 + 12 .", parser_class)
     653          self.assertEqual(
     654              node,
     655              [
     656                  TokenInfo(
     657                      NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 ."
     658                  ),
     659                  TokenInfo(
     660                      OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."
     661                  ),
     662                  [
     663                      TokenInfo(
     664                          NUMBER,
     665                          string="12",
     666                          start=(1, 6),
     667                          end=(1, 8),
     668                          line="foo = 12 + 12 .",
     669                      ),
     670                      [
     671                          [
     672                              TokenInfo(
     673                                  OP,
     674                                  string="+",
     675                                  start=(1, 9),
     676                                  end=(1, 10),
     677                                  line="foo = 12 + 12 .",
     678                              ),
     679                              TokenInfo(
     680                                  NUMBER,
     681                                  string="12",
     682                                  start=(1, 11),
     683                                  end=(1, 13),
     684                                  line="foo = 12 + 12 .",
     685                              ),
     686                          ]
     687                      ],
     688                  ],
     689              ],
     690          )
     691  
     692      def test_named_lookahead_error(self) -> None:
     693          grammar = """
     694          start: foo=!'x' NAME
     695          """
     696          with self.assertRaises(SyntaxError):
     697              make_parser(grammar)
     698  
     699      def test_start_leader(self) -> None:
     700          grammar = """
     701          start: attr | NAME
     702          attr: start '.' NAME
     703          """
     704          # Would assert False without a special case in compute_left_recursives().
     705          make_parser(grammar)
     706  
     707      def test_opt_sequence(self) -> None:
     708          grammar = """
     709          start: [NAME*]
     710          """
     711          # This case was failing because of a double trailing comma at the end
     712          # of a line in the generated source. See bpo-41044
     713          make_parser(grammar)
     714  
     715      def test_left_recursion_too_complex(self) -> None:
     716          grammar = """
     717          start: foo
     718          foo: bar '+' | baz '+' | '+'
     719          bar: baz '-' | foo '-' | '-'
     720          baz: foo '*' | bar '*' | '*'
     721          """
     722          with self.assertRaises(ValueError) as errinfo:
     723              make_parser(grammar)
     724              self.assertTrue("no leader" in str(errinfo.exception.value))
     725  
     726      def test_cut(self) -> None:
     727          grammar = """
     728          start: '(' ~ expr ')'
     729          expr: NUMBER
     730          """
     731          parser_class = make_parser(grammar)
     732          node = parse_string("(1)", parser_class)
     733          self.assertEqual(
     734              node,
     735              [
     736                  TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
     737                  TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)"),
     738                  TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
     739              ],
     740          )
     741  
     742      def test_dangling_reference(self) -> None:
     743          grammar = """
     744          start: foo ENDMARKER
     745          foo: bar NAME
     746          """
     747          with self.assertRaises(GrammarError):
     748              parser_class = make_parser(grammar)
     749  
     750      def test_bad_token_reference(self) -> None:
     751          grammar = """
     752          start: foo
     753          foo: NAMEE
     754          """
     755          with self.assertRaises(GrammarError):
     756              parser_class = make_parser(grammar)
     757  
     758      def test_missing_start(self) -> None:
     759          grammar = """
     760          foo: NAME
     761          """
     762          with self.assertRaises(GrammarError):
     763              parser_class = make_parser(grammar)
     764  
     765      def test_invalid_rule_name(self) -> None:
     766          grammar = """
     767          start: _a b
     768          _a: 'a'
     769          b: 'b'
     770          """
     771          with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_a'"):
     772              parser_class = make_parser(grammar)
     773  
     774      def test_invalid_variable_name(self) -> None:
     775          grammar = """
     776          start: a b
     777          a: _x='a'
     778          b: 'b'
     779          """
     780          with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"):
     781              parser_class = make_parser(grammar)
     782  
     783      def test_invalid_variable_name_in_temporal_rule(self) -> None:
     784          grammar = """
     785          start: a b
     786          a: (_x='a' | 'b') | 'c'
     787          b: 'b'
     788          """
     789          with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"):
     790              parser_class = make_parser(grammar)
     791  
     792      def test_soft_keyword(self) -> None:
     793          grammar = """
     794          start:
     795              | "number" n=NUMBER { eval(n.string) }
     796              | "string" n=STRING { n.string }
     797              | SOFT_KEYWORD l=NAME n=(NUMBER | NAME | STRING) { f"{l.string} = {n.string}"}
     798          """
     799          parser_class = make_parser(grammar)
     800          self.assertEqual(parse_string("number 1", parser_class), 1)
     801          self.assertEqual(parse_string("string 'b'", parser_class), "'b'")
     802          self.assertEqual(
     803              parse_string("number test 1", parser_class), "test = 1"
     804          )
     805          assert (
     806              parse_string("string test 'b'", parser_class) == "test = 'b'"
     807          )
     808          with self.assertRaises(SyntaxError):
     809              parse_string("test 1", parser_class)
     810  
     811      def test_forced(self) -> None:
     812          grammar = """
     813          start: NAME &&':' | NAME
     814          """
     815          parser_class = make_parser(grammar)
     816          self.assertTrue(parse_string("number :", parser_class))
     817          with self.assertRaises(SyntaxError) as e:
     818              parse_string("a", parser_class)
     819  
     820          self.assertIn("expected ':'", str(e.exception))
     821  
     822      def test_forced_with_group(self) -> None:
     823          grammar = """
     824          start: NAME &&(':' | ';') | NAME
     825          """
     826          parser_class = make_parser(grammar)
     827          self.assertTrue(parse_string("number :", parser_class))
     828          self.assertTrue(parse_string("number ;", parser_class))
     829          with self.assertRaises(SyntaxError) as e:
     830              parse_string("a", parser_class)
     831          self.assertIn("expected (':' | ';')", e.exception.args[0])
     832  
     833      def test_unreachable_explicit(self) -> None:
     834          source = """
     835          start: NAME { UNREACHABLE }
     836          """
     837          grammar = parse_string(source, GrammarParser)
     838          out = io.StringIO()
     839          genr = PythonParserGenerator(
     840              grammar, out, unreachable_formatting="This is a test"
     841          )
     842          genr.generate("<string>")
     843          self.assertIn("This is a test", out.getvalue())
     844  
     845      def test_unreachable_implicit1(self) -> None:
     846          source = """
     847          start: NAME | invalid_input
     848          invalid_input: NUMBER { None }
     849          """
     850          grammar = parse_string(source, GrammarParser)
     851          out = io.StringIO()
     852          genr = PythonParserGenerator(
     853              grammar, out, unreachable_formatting="This is a test"
     854          )
     855          genr.generate("<string>")
     856          self.assertIn("This is a test", out.getvalue())
     857  
     858      def test_unreachable_implicit2(self) -> None:
     859          source = """
     860          start: NAME | '(' invalid_input ')'
     861          invalid_input: NUMBER { None }
     862          """
     863          grammar = parse_string(source, GrammarParser)
     864          out = io.StringIO()
     865          genr = PythonParserGenerator(
     866              grammar, out, unreachable_formatting="This is a test"
     867          )
     868          genr.generate("<string>")
     869          self.assertIn("This is a test", out.getvalue())
     870  
     871      def test_unreachable_implicit3(self) -> None:
     872          source = """
     873          start: NAME | invalid_input { None }
     874          invalid_input: NUMBER
     875          """
     876          grammar = parse_string(source, GrammarParser)
     877          out = io.StringIO()
     878          genr = PythonParserGenerator(
     879              grammar, out, unreachable_formatting="This is a test"
     880          )
     881          genr.generate("<string>")
     882          self.assertNotIn("This is a test", out.getvalue())
     883  
     884      def test_locations_in_alt_action_and_group(self) -> None:
     885          grammar = """
     886          start: t=term NEWLINE? $ { ast.Expression(t, LOCATIONS) }
     887          term:
     888              | l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, LOCATIONS) }
     889              | l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, LOCATIONS) }
     890              | factor
     891          factor:
     892              | (
     893                  n=NAME { ast.Name(id=n.string, ctx=ast.Load(), LOCATIONS) } |
     894                  n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), LOCATIONS) }
     895              )
     896          """
     897          parser_class = make_parser(grammar)
     898          source = "2*3\n"
     899          o = ast.dump(parse_string(source, parser_class).body, include_attributes=True)
     900          p = ast.dump(ast.parse(source).body[0].value, include_attributes=True).replace(
     901              " kind=None,", ""
     902          )
     903          diff = "\n".join(
     904              difflib.unified_diff(
     905                  o.split("\n"), p.split("\n"), "cpython", "python-pegen"
     906              )
     907          )
     908          self.assertFalse(diff)
     909  
     910  
     911  class ESC[4;38;5;81mTestGrammarVisitor:
     912      class ESC[4;38;5;81mVisitor(ESC[4;38;5;149mGrammarVisitor):
     913          def __init__(self) -> None:
     914              self.n_nodes = 0
     915  
     916          def visit(self, node: Any, *args: Any, **kwargs: Any) -> None:
     917              self.n_nodes += 1
     918              super().visit(node, *args, **kwargs)
     919  
     920      def test_parse_trivial_grammar(self) -> None:
     921          grammar = """
     922          start: 'a'
     923          """
     924          rules = parse_string(grammar, GrammarParser)
     925          visitor = self.Visitor()
     926  
     927          visitor.visit(rules)
     928  
     929          self.assertEqual(visitor.n_nodes, 6)
     930  
     931      def test_parse_or_grammar(self) -> None:
     932          grammar = """
     933          start: rule
     934          rule: 'a' | 'b'
     935          """
     936          rules = parse_string(grammar, GrammarParser)
     937          visitor = self.Visitor()
     938  
     939          visitor.visit(rules)
     940  
     941          # Grammar/Rule/Rhs/Alt/NamedItem/NameLeaf   -> 6
     942          #         Rule/Rhs/                         -> 2
     943          #                  Alt/NamedItem/StringLeaf -> 3
     944          #                  Alt/NamedItem/StringLeaf -> 3
     945  
     946          self.assertEqual(visitor.n_nodes, 14)
     947  
     948      def test_parse_repeat1_grammar(self) -> None:
     949          grammar = """
     950          start: 'a'+
     951          """
     952          rules = parse_string(grammar, GrammarParser)
     953          visitor = self.Visitor()
     954  
     955          visitor.visit(rules)
     956  
     957          # Grammar/Rule/Rhs/Alt/NamedItem/Repeat1/StringLeaf -> 6
     958          self.assertEqual(visitor.n_nodes, 7)
     959  
     960      def test_parse_repeat0_grammar(self) -> None:
     961          grammar = """
     962          start: 'a'*
     963          """
     964          rules = parse_string(grammar, GrammarParser)
     965          visitor = self.Visitor()
     966  
     967          visitor.visit(rules)
     968  
     969          # Grammar/Rule/Rhs/Alt/NamedItem/Repeat0/StringLeaf -> 6
     970  
     971          self.assertEqual(visitor.n_nodes, 7)
     972  
     973      def test_parse_optional_grammar(self) -> None:
     974          grammar = """
     975          start: 'a' ['b']
     976          """
     977          rules = parse_string(grammar, GrammarParser)
     978          visitor = self.Visitor()
     979  
     980          visitor.visit(rules)
     981  
     982          # Grammar/Rule/Rhs/Alt/NamedItem/StringLeaf                       -> 6
     983          #                      NamedItem/Opt/Rhs/Alt/NamedItem/Stringleaf -> 6
     984  
     985          self.assertEqual(visitor.n_nodes, 12)
     986  
     987  
     988  class ESC[4;38;5;81mTestGrammarVisualizer(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     989      def test_simple_rule(self) -> None:
     990          grammar = """
     991          start: 'a' 'b'
     992          """
     993          rules = parse_string(grammar, GrammarParser)
     994  
     995          printer = ASTGrammarPrinter()
     996          lines: List[str] = []
     997          printer.print_grammar_ast(rules, printer=lines.append)
     998  
     999          output = "\n".join(lines)
    1000          expected_output = textwrap.dedent(
    1001              """\
    1002          └──Rule
    1003             └──Rhs
    1004                └──Alt
    1005                   ├──NamedItem
    1006                   │  └──StringLeaf("'a'")
    1007                   └──NamedItem
    1008                      └──StringLeaf("'b'")
    1009          """
    1010          )
    1011  
    1012          self.assertEqual(output, expected_output)
    1013  
    1014      def test_multiple_rules(self) -> None:
    1015          grammar = """
    1016          start: a b
    1017          a: 'a'
    1018          b: 'b'
    1019          """
    1020          rules = parse_string(grammar, GrammarParser)
    1021  
    1022          printer = ASTGrammarPrinter()
    1023          lines: List[str] = []
    1024          printer.print_grammar_ast(rules, printer=lines.append)
    1025  
    1026          output = "\n".join(lines)
    1027          expected_output = textwrap.dedent(
    1028              """\
    1029          └──Rule
    1030             └──Rhs
    1031                └──Alt
    1032                   ├──NamedItem
    1033                   │  └──NameLeaf('a')
    1034                   └──NamedItem
    1035                      └──NameLeaf('b')
    1036  
    1037          └──Rule
    1038             └──Rhs
    1039                └──Alt
    1040                   └──NamedItem
    1041                      └──StringLeaf("'a'")
    1042  
    1043          └──Rule
    1044             └──Rhs
    1045                └──Alt
    1046                   └──NamedItem
    1047                      └──StringLeaf("'b'")
    1048                          """
    1049          )
    1050  
    1051          self.assertEqual(output, expected_output)
    1052  
    1053      def test_deep_nested_rule(self) -> None:
    1054          grammar = """
    1055          start: 'a' ['b'['c'['d']]]
    1056          """
    1057          rules = parse_string(grammar, GrammarParser)
    1058  
    1059          printer = ASTGrammarPrinter()
    1060          lines: List[str] = []
    1061          printer.print_grammar_ast(rules, printer=lines.append)
    1062  
    1063          output = "\n".join(lines)
    1064          expected_output = textwrap.dedent(
    1065              """\
    1066          └──Rule
    1067             └──Rhs
    1068                └──Alt
    1069                   ├──NamedItem
    1070                   │  └──StringLeaf("'a'")
    1071                   └──NamedItem
    1072                      └──Opt
    1073                         └──Rhs
    1074                            └──Alt
    1075                               ├──NamedItem
    1076                               │  └──StringLeaf("'b'")
    1077                               └──NamedItem
    1078                                  └──Opt
    1079                                     └──Rhs
    1080                                        └──Alt
    1081                                           ├──NamedItem
    1082                                           │  └──StringLeaf("'c'")
    1083                                           └──NamedItem
    1084                                              └──Opt
    1085                                                 └──Rhs
    1086                                                    └──Alt
    1087                                                       └──NamedItem
    1088                                                          └──StringLeaf("'d'")
    1089                                  """
    1090          )
    1091  
    1092          self.assertEqual(output, expected_output)