1 """Parser for bytecodes.inst."""
2
3 from dataclasses import dataclass, field
4 from typing import NamedTuple, Callable, TypeVar, Literal
5
6 import lexer as lx
7 from plexer import PLexer
8
9
10 P = TypeVar("P", bound="Parser")
11 N = TypeVar("N", bound="Node")
12
13
14 def contextual(func: Callable[[P], N | None]) -> Callable[[P], N | None]:
15 # Decorator to wrap grammar methods.
16 # Resets position if `func` returns None.
17 def contextual_wrapper(self: P) -> N | None:
18 begin = self.getpos()
19 res = func(self)
20 if res is None:
21 self.setpos(begin)
22 return
23 end = self.getpos()
24 res.context = Context(begin, end, self)
25 return res
26
27 return contextual_wrapper
28
29
30 class ESC[4;38;5;81mContext(ESC[4;38;5;149mNamedTuple):
31 begin: int
32 end: int
33 owner: PLexer
34
35 def __repr__(self):
36 return f"<{self.owner.filename}: {self.begin}-{self.end}>"
37
38
39 @dataclass
40 class ESC[4;38;5;81mNode:
41 context: Context | None = field(init=False, compare=False, default=None)
42
43 @property
44 def text(self) -> str:
45 return self.to_text()
46
47 def to_text(self, dedent: int = 0) -> str:
48 context = self.context
49 if not context:
50 return ""
51 return lx.to_text(self.tokens, dedent)
52
53 @property
54 def tokens(self) -> list[lx.Token]:
55 context = self.context
56 if not context:
57 return []
58 tokens = context.owner.tokens
59 begin = context.begin
60 end = context.end
61 return tokens[begin:end]
62
63
64 @dataclass
65 class ESC[4;38;5;81mBlock(ESC[4;38;5;149mNode):
66 # This just holds a context which has the list of tokens.
67 pass
68
69
70 @dataclass
71 class ESC[4;38;5;81mStackEffect(ESC[4;38;5;149mNode):
72 name: str
73 type: str = "" # Optional `:type`
74 cond: str = "" # Optional `if (cond)`
75 size: str = "" # Optional `[size]`
76 # Note: size cannot be combined with type or cond
77
78
79 @dataclass
80 class ESC[4;38;5;81mExpression(ESC[4;38;5;149mNode):
81 size: str
82
83
84 @dataclass
85 class ESC[4;38;5;81mCacheEffect(ESC[4;38;5;149mNode):
86 name: str
87 size: int
88
89
90 @dataclass
91 class ESC[4;38;5;81mOpName(ESC[4;38;5;149mNode):
92 name: str
93
94
95 InputEffect = StackEffect | CacheEffect
96 OutputEffect = StackEffect
97 UOp = OpName | CacheEffect
98
99
100 @dataclass
101 class ESC[4;38;5;81mInstHeader(ESC[4;38;5;149mNode):
102 override: bool
103 register: bool
104 kind: Literal["inst", "op", "legacy"] # Legacy means no (inputs -- outputs)
105 name: str
106 inputs: list[InputEffect]
107 outputs: list[OutputEffect]
108
109
110 @dataclass
111 class ESC[4;38;5;81mInstDef(ESC[4;38;5;149mNode):
112 override: bool
113 register: bool
114 kind: Literal["inst", "op", "legacy"]
115 name: str
116 inputs: list[InputEffect]
117 outputs: list[OutputEffect]
118 block: Block
119
120
121 @dataclass
122 class ESC[4;38;5;81mSuper(ESC[4;38;5;149mNode):
123 name: str
124 ops: list[OpName]
125
126
127 @dataclass
128 class ESC[4;38;5;81mMacro(ESC[4;38;5;149mNode):
129 name: str
130 uops: list[UOp]
131
132
133 @dataclass
134 class ESC[4;38;5;81mFamily(ESC[4;38;5;149mNode):
135 name: str
136 size: str # Variable giving the cache size in code units
137 members: list[str]
138
139
140 class ESC[4;38;5;81mParser(ESC[4;38;5;149mPLexer):
141 @contextual
142 def definition(self) -> InstDef | Super | Macro | Family | None:
143 if inst := self.inst_def():
144 return inst
145 if super := self.super_def():
146 return super
147 if macro := self.macro_def():
148 return macro
149 if family := self.family_def():
150 return family
151
152 @contextual
153 def inst_def(self) -> InstDef | None:
154 if hdr := self.inst_header():
155 if block := self.block():
156 return InstDef(
157 hdr.override, hdr.register, hdr.kind, hdr.name, hdr.inputs, hdr.outputs, block
158 )
159 raise self.make_syntax_error("Expected block")
160 return None
161
162 @contextual
163 def inst_header(self) -> InstHeader | None:
164 # [override] inst(NAME)
165 # | [override] [register] inst(NAME, (inputs -- outputs))
166 # | [override] [register] op(NAME, (inputs -- outputs))
167 # TODO: Make INST a keyword in the lexer.
168 override = bool(self.expect(lx.OVERRIDE))
169 register = bool(self.expect(lx.REGISTER))
170 if (tkn := self.expect(lx.IDENTIFIER)) and (kind := tkn.text) in ("inst", "op"):
171 if self.expect(lx.LPAREN) and (tkn := self.expect(lx.IDENTIFIER)):
172 name = tkn.text
173 if self.expect(lx.COMMA):
174 inp, outp = self.io_effect()
175 if self.expect(lx.RPAREN):
176 if (tkn := self.peek()) and tkn.kind == lx.LBRACE:
177 return InstHeader(override, register, kind, name, inp, outp)
178 elif self.expect(lx.RPAREN) and kind == "inst":
179 # No legacy stack effect if kind is "op".
180 return InstHeader(override, register, "legacy", name, [], [])
181 return None
182
183 def io_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]:
184 # '(' [inputs] '--' [outputs] ')'
185 if self.expect(lx.LPAREN):
186 inputs = self.inputs() or []
187 if self.expect(lx.MINUSMINUS):
188 outputs = self.outputs() or []
189 if self.expect(lx.RPAREN):
190 return inputs, outputs
191 raise self.make_syntax_error("Expected stack effect")
192
193 def inputs(self) -> list[InputEffect] | None:
194 # input (',' input)*
195 here = self.getpos()
196 if inp := self.input():
197 near = self.getpos()
198 if self.expect(lx.COMMA):
199 if rest := self.inputs():
200 return [inp] + rest
201 self.setpos(near)
202 return [inp]
203 self.setpos(here)
204 return None
205
206 @contextual
207 def input(self) -> InputEffect | None:
208 return self.cache_effect() or self.stack_effect()
209
210 def outputs(self) -> list[OutputEffect] | None:
211 # output (, output)*
212 here = self.getpos()
213 if outp := self.output():
214 near = self.getpos()
215 if self.expect(lx.COMMA):
216 if rest := self.outputs():
217 return [outp] + rest
218 self.setpos(near)
219 return [outp]
220 self.setpos(here)
221 return None
222
223 @contextual
224 def output(self) -> OutputEffect | None:
225 return self.stack_effect()
226
227 @contextual
228 def cache_effect(self) -> CacheEffect | None:
229 # IDENTIFIER '/' NUMBER
230 if tkn := self.expect(lx.IDENTIFIER):
231 if self.expect(lx.DIVIDE):
232 num = self.require(lx.NUMBER).text
233 try:
234 size = int(num)
235 except ValueError:
236 raise self.make_syntax_error(f"Expected integer, got {num!r}")
237 else:
238 return CacheEffect(tkn.text, size)
239
240 @contextual
241 def stack_effect(self) -> StackEffect | None:
242 # IDENTIFIER [':' IDENTIFIER] ['if' '(' expression ')']
243 # | IDENTIFIER '[' expression ']'
244 if tkn := self.expect(lx.IDENTIFIER):
245 type_text = ""
246 if self.expect(lx.COLON):
247 type_text = self.require(lx.IDENTIFIER).text.strip()
248 cond_text = ""
249 if self.expect(lx.IF):
250 self.require(lx.LPAREN)
251 if not (cond := self.expression()):
252 raise self.make_syntax_error("Expected condition")
253 self.require(lx.RPAREN)
254 cond_text = cond.text.strip()
255 size_text = ""
256 if self.expect(lx.LBRACKET):
257 if type_text or cond_text:
258 raise self.make_syntax_error("Unexpected [")
259 if not (size := self.expression()):
260 raise self.make_syntax_error("Expected expression")
261 self.require(lx.RBRACKET)
262 type_text = "PyObject **"
263 size_text = size.text.strip()
264 return StackEffect(tkn.text, type_text, cond_text, size_text)
265
266 @contextual
267 def expression(self) -> Expression | None:
268 tokens: list[lx.Token] = []
269 level = 1
270 while tkn := self.peek():
271 if tkn.kind in (lx.LBRACKET, lx.LPAREN):
272 level += 1
273 elif tkn.kind in (lx.RBRACKET, lx.RPAREN):
274 level -= 1
275 if level == 0:
276 break
277 tokens.append(tkn)
278 self.next()
279 if not tokens:
280 return None
281 return Expression(lx.to_text(tokens).strip())
282
283 @contextual
284 def super_def(self) -> Super | None:
285 if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "super":
286 if self.expect(lx.LPAREN):
287 if tkn := self.expect(lx.IDENTIFIER):
288 if self.expect(lx.RPAREN):
289 if self.expect(lx.EQUALS):
290 if ops := self.ops():
291 self.require(lx.SEMI)
292 res = Super(tkn.text, ops)
293 return res
294
295 def ops(self) -> list[OpName] | None:
296 if op := self.op():
297 ops = [op]
298 while self.expect(lx.PLUS):
299 if op := self.op():
300 ops.append(op)
301 return ops
302
303 @contextual
304 def op(self) -> OpName | None:
305 if tkn := self.expect(lx.IDENTIFIER):
306 return OpName(tkn.text)
307
308 @contextual
309 def macro_def(self) -> Macro | None:
310 if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "macro":
311 if self.expect(lx.LPAREN):
312 if tkn := self.expect(lx.IDENTIFIER):
313 if self.expect(lx.RPAREN):
314 if self.expect(lx.EQUALS):
315 if uops := self.uops():
316 self.require(lx.SEMI)
317 res = Macro(tkn.text, uops)
318 return res
319
320 def uops(self) -> list[UOp] | None:
321 if uop := self.uop():
322 uops = [uop]
323 while self.expect(lx.PLUS):
324 if uop := self.uop():
325 uops.append(uop)
326 else:
327 raise self.make_syntax_error("Expected op name or cache effect")
328 return uops
329
330 @contextual
331 def uop(self) -> UOp | None:
332 if tkn := self.expect(lx.IDENTIFIER):
333 if self.expect(lx.DIVIDE):
334 if num := self.expect(lx.NUMBER):
335 try:
336 size = int(num.text)
337 except ValueError:
338 raise self.make_syntax_error(
339 f"Expected integer, got {num.text!r}"
340 )
341 else:
342 return CacheEffect(tkn.text, size)
343 raise self.make_syntax_error("Expected integer")
344 else:
345 return OpName(tkn.text)
346
347 @contextual
348 def family_def(self) -> Family | None:
349 if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "family":
350 size = None
351 if self.expect(lx.LPAREN):
352 if tkn := self.expect(lx.IDENTIFIER):
353 if self.expect(lx.COMMA):
354 if not (size := self.expect(lx.IDENTIFIER)):
355 raise self.make_syntax_error("Expected identifier")
356 if self.expect(lx.RPAREN):
357 if self.expect(lx.EQUALS):
358 if not self.expect(lx.LBRACE):
359 raise self.make_syntax_error("Expected {")
360 if members := self.members():
361 if self.expect(lx.RBRACE) and self.expect(lx.SEMI):
362 return Family(
363 tkn.text, size.text if size else "", members
364 )
365 return None
366
367 def members(self) -> list[str] | None:
368 here = self.getpos()
369 if tkn := self.expect(lx.IDENTIFIER):
370 members = [tkn.text]
371 while self.expect(lx.COMMA):
372 if tkn := self.expect(lx.IDENTIFIER):
373 members.append(tkn.text)
374 else:
375 break
376 peek = self.peek()
377 if not peek or peek.kind != lx.RBRACE:
378 raise self.make_syntax_error("Expected comma or right paren")
379 return members
380 self.setpos(here)
381 return None
382
383 @contextual
384 def block(self) -> Block | None:
385 if self.c_blob():
386 return Block()
387
388 def c_blob(self) -> list[lx.Token]:
389 tokens: list[lx.Token] = []
390 level = 0
391 while tkn := self.next(raw=True):
392 tokens.append(tkn)
393 if tkn.kind in (lx.LBRACE, lx.LPAREN, lx.LBRACKET):
394 level += 1
395 elif tkn.kind in (lx.RBRACE, lx.RPAREN, lx.RBRACKET):
396 level -= 1
397 if level <= 0:
398 break
399 return tokens
400
401
402 if __name__ == "__main__":
403 import sys
404
405 if sys.argv[1:]:
406 filename = sys.argv[1]
407 if filename == "-c" and sys.argv[2:]:
408 src = sys.argv[2]
409 filename = "<string>"
410 else:
411 with open(filename) as f:
412 src = f.read()
413 srclines = src.splitlines()
414 begin = srclines.index("// BEGIN BYTECODES //")
415 end = srclines.index("// END BYTECODES //")
416 src = "\n".join(srclines[begin + 1 : end])
417 else:
418 filename = "<default>"
419 src = "if (x) { x.foo; // comment\n}"
420 parser = Parser(src, filename)
421 x = parser.definition()
422 print(x)