1 import unittest
2
3 from test import test_tools
4 from typing import Dict, Set
5
6 test_tools.skip_if_missing("peg_generator")
7 with test_tools.imports_under_tool("peg_generator"):
8 from pegen.grammar_parser import GeneratedParser as GrammarParser
9 from pegen.testutil import parse_string
10 from pegen.first_sets import FirstSetCalculator
11 from pegen.grammar import Grammar
12
13
14 class ESC[4;38;5;81mTestFirstSets(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
15 def calculate_first_sets(self, grammar_source: str) -> Dict[str, Set[str]]:
16 grammar: Grammar = parse_string(grammar_source, GrammarParser)
17 return FirstSetCalculator(grammar.rules).calculate()
18
19 def test_alternatives(self) -> None:
20 grammar = """
21 start: expr NEWLINE? ENDMARKER
22 expr: A | B
23 A: 'a' | '-'
24 B: 'b' | '+'
25 """
26 self.assertEqual(
27 self.calculate_first_sets(grammar),
28 {
29 "A": {"'a'", "'-'"},
30 "B": {"'+'", "'b'"},
31 "expr": {"'+'", "'a'", "'b'", "'-'"},
32 "start": {"'+'", "'a'", "'b'", "'-'"},
33 },
34 )
35
36 def test_optionals(self) -> None:
37 grammar = """
38 start: expr NEWLINE
39 expr: ['a'] ['b'] 'c'
40 """
41 self.assertEqual(
42 self.calculate_first_sets(grammar),
43 {
44 "expr": {"'c'", "'a'", "'b'"},
45 "start": {"'c'", "'a'", "'b'"},
46 },
47 )
48
49 def test_repeat_with_separator(self) -> None:
50 grammar = """
51 start: ','.thing+ NEWLINE
52 thing: NUMBER
53 """
54 self.assertEqual(
55 self.calculate_first_sets(grammar),
56 {"thing": {"NUMBER"}, "start": {"NUMBER"}},
57 )
58
59 def test_optional_operator(self) -> None:
60 grammar = """
61 start: sum NEWLINE
62 sum: (term)? 'b'
63 term: NUMBER
64 """
65 self.assertEqual(
66 self.calculate_first_sets(grammar),
67 {
68 "term": {"NUMBER"},
69 "sum": {"NUMBER", "'b'"},
70 "start": {"'b'", "NUMBER"},
71 },
72 )
73
74 def test_optional_literal(self) -> None:
75 grammar = """
76 start: sum NEWLINE
77 sum: '+' ? term
78 term: NUMBER
79 """
80 self.assertEqual(
81 self.calculate_first_sets(grammar),
82 {
83 "term": {"NUMBER"},
84 "sum": {"'+'", "NUMBER"},
85 "start": {"'+'", "NUMBER"},
86 },
87 )
88
89 def test_optional_after(self) -> None:
90 grammar = """
91 start: term NEWLINE
92 term: NUMBER ['+']
93 """
94 self.assertEqual(
95 self.calculate_first_sets(grammar),
96 {"term": {"NUMBER"}, "start": {"NUMBER"}},
97 )
98
99 def test_optional_before(self) -> None:
100 grammar = """
101 start: term NEWLINE
102 term: ['+'] NUMBER
103 """
104 self.assertEqual(
105 self.calculate_first_sets(grammar),
106 {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}},
107 )
108
109 def test_repeat_0(self) -> None:
110 grammar = """
111 start: thing* "+" NEWLINE
112 thing: NUMBER
113 """
114 self.assertEqual(
115 self.calculate_first_sets(grammar),
116 {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}},
117 )
118
119 def test_repeat_0_with_group(self) -> None:
120 grammar = """
121 start: ('+' '-')* term NEWLINE
122 term: NUMBER
123 """
124 self.assertEqual(
125 self.calculate_first_sets(grammar),
126 {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}},
127 )
128
129 def test_repeat_1(self) -> None:
130 grammar = """
131 start: thing+ '-' NEWLINE
132 thing: NUMBER
133 """
134 self.assertEqual(
135 self.calculate_first_sets(grammar),
136 {"thing": {"NUMBER"}, "start": {"NUMBER"}},
137 )
138
139 def test_repeat_1_with_group(self) -> None:
140 grammar = """
141 start: ('+' term)+ term NEWLINE
142 term: NUMBER
143 """
144 self.assertEqual(
145 self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}}
146 )
147
148 def test_gather(self) -> None:
149 grammar = """
150 start: ','.thing+ NEWLINE
151 thing: NUMBER
152 """
153 self.assertEqual(
154 self.calculate_first_sets(grammar),
155 {"thing": {"NUMBER"}, "start": {"NUMBER"}},
156 )
157
158 def test_positive_lookahead(self) -> None:
159 grammar = """
160 start: expr NEWLINE
161 expr: &'a' opt
162 opt: 'a' | 'b' | 'c'
163 """
164 self.assertEqual(
165 self.calculate_first_sets(grammar),
166 {
167 "expr": {"'a'"},
168 "start": {"'a'"},
169 "opt": {"'b'", "'c'", "'a'"},
170 },
171 )
172
173 def test_negative_lookahead(self) -> None:
174 grammar = """
175 start: expr NEWLINE
176 expr: !'a' opt
177 opt: 'a' | 'b' | 'c'
178 """
179 self.assertEqual(
180 self.calculate_first_sets(grammar),
181 {
182 "opt": {"'b'", "'a'", "'c'"},
183 "expr": {"'b'", "'c'"},
184 "start": {"'b'", "'c'"},
185 },
186 )
187
188 def test_left_recursion(self) -> None:
189 grammar = """
190 start: expr NEWLINE
191 expr: ('-' term | expr '+' term | term)
192 term: NUMBER
193 foo: 'foo'
194 bar: 'bar'
195 baz: 'baz'
196 """
197 self.assertEqual(
198 self.calculate_first_sets(grammar),
199 {
200 "expr": {"NUMBER", "'-'"},
201 "term": {"NUMBER"},
202 "start": {"NUMBER", "'-'"},
203 "foo": {"'foo'"},
204 "bar": {"'bar'"},
205 "baz": {"'baz'"},
206 },
207 )
208
209 def test_advance_left_recursion(self) -> None:
210 grammar = """
211 start: NUMBER | sign start
212 sign: ['-']
213 """
214 self.assertEqual(
215 self.calculate_first_sets(grammar),
216 {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}},
217 )
218
219 def test_mutual_left_recursion(self) -> None:
220 grammar = """
221 start: foo 'E'
222 foo: bar 'A' | 'B'
223 bar: foo 'C' | 'D'
224 """
225 self.assertEqual(
226 self.calculate_first_sets(grammar),
227 {
228 "foo": {"'D'", "'B'"},
229 "bar": {"'D'"},
230 "start": {"'D'", "'B'"},
231 },
232 )
233
234 def test_nasty_left_recursion(self) -> None:
235 # TODO: Validate this
236 grammar = """
237 start: target '='
238 target: maybe '+' | NAME
239 maybe: maybe '-' | target
240 """
241 self.assertEqual(
242 self.calculate_first_sets(grammar),
243 {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}},
244 )
245
246 def test_nullable_rule(self) -> None:
247 grammar = """
248 start: sign thing $
249 sign: ['-']
250 thing: NUMBER
251 """
252 self.assertEqual(
253 self.calculate_first_sets(grammar),
254 {
255 "sign": {"", "'-'"},
256 "thing": {"NUMBER"},
257 "start": {"NUMBER", "'-'"},
258 },
259 )
260
261 def test_epsilon_production_in_start_rule(self) -> None:
262 grammar = """
263 start: ['-'] $
264 """
265 self.assertEqual(
266 self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}}
267 )
268
269 def test_multiple_nullable_rules(self) -> None:
270 grammar = """
271 start: sign thing other another $
272 sign: ['-']
273 thing: ['+']
274 other: '*'
275 another: '/'
276 """
277 self.assertEqual(
278 self.calculate_first_sets(grammar),
279 {
280 "sign": {"", "'-'"},
281 "thing": {"'+'", ""},
282 "start": {"'+'", "'-'", "'*'"},
283 "other": {"'*'"},
284 "another": {"'/'"},
285 },
286 )