(root)/
Python-3.12.0/
Tools/
cases_generator/
parser.py
       1  """Parser for bytecodes.inst."""
       2  
       3  from dataclasses import dataclass, field
       4  from typing import NamedTuple, Callable, TypeVar, Literal
       5  
       6  import lexer as lx
       7  from plexer import PLexer
       8  
       9  
      10  P = TypeVar("P", bound="Parser")
      11  N = TypeVar("N", bound="Node")
      12  
      13  
      14  def contextual(func: Callable[[P], N | None]) -> Callable[[P], N | None]:
      15      # Decorator to wrap grammar methods.
      16      # Resets position if `func` returns None.
      17      def contextual_wrapper(self: P) -> N | None:
      18          begin = self.getpos()
      19          res = func(self)
      20          if res is None:
      21              self.setpos(begin)
      22              return
      23          end = self.getpos()
      24          res.context = Context(begin, end, self)
      25          return res
      26  
      27      return contextual_wrapper
      28  
      29  
      30  class ESC[4;38;5;81mContext(ESC[4;38;5;149mNamedTuple):
      31      begin: int
      32      end: int
      33      owner: PLexer
      34  
      35      def __repr__(self):
      36          return f"<{self.owner.filename}: {self.begin}-{self.end}>"
      37  
      38  
      39  @dataclass
      40  class ESC[4;38;5;81mNode:
      41      context: Context | None = field(init=False, compare=False, default=None)
      42  
      43      @property
      44      def text(self) -> str:
      45          return self.to_text()
      46  
      47      def to_text(self, dedent: int = 0) -> str:
      48          context = self.context
      49          if not context:
      50              return ""
      51          return lx.to_text(self.tokens, dedent)
      52  
      53      @property
      54      def tokens(self) -> list[lx.Token]:
      55          context = self.context
      56          if not context:
      57              return []
      58          tokens = context.owner.tokens
      59          begin = context.begin
      60          end = context.end
      61          return tokens[begin:end]
      62  
      63  
      64  @dataclass
      65  class ESC[4;38;5;81mBlock(ESC[4;38;5;149mNode):
      66      # This just holds a context which has the list of tokens.
      67      pass
      68  
      69  
      70  @dataclass
      71  class ESC[4;38;5;81mStackEffect(ESC[4;38;5;149mNode):
      72      name: str
      73      type: str = ""  # Optional `:type`
      74      cond: str = ""  # Optional `if (cond)`
      75      size: str = ""  # Optional `[size]`
      76      # Note: size cannot be combined with type or cond
      77  
      78  
      79  @dataclass
      80  class ESC[4;38;5;81mExpression(ESC[4;38;5;149mNode):
      81      size: str
      82  
      83  
      84  @dataclass
      85  class ESC[4;38;5;81mCacheEffect(ESC[4;38;5;149mNode):
      86      name: str
      87      size: int
      88  
      89  
      90  @dataclass
      91  class ESC[4;38;5;81mOpName(ESC[4;38;5;149mNode):
      92      name: str
      93  
      94  
      95  InputEffect = StackEffect | CacheEffect
      96  OutputEffect = StackEffect
      97  UOp = OpName | CacheEffect
      98  
      99  
     100  @dataclass
     101  class ESC[4;38;5;81mInstHeader(ESC[4;38;5;149mNode):
     102      override: bool
     103      register: bool
     104      kind: Literal["inst", "op", "legacy"]  # Legacy means no (inputs -- outputs)
     105      name: str
     106      inputs: list[InputEffect]
     107      outputs: list[OutputEffect]
     108  
     109  
     110  @dataclass
     111  class ESC[4;38;5;81mInstDef(ESC[4;38;5;149mNode):
     112      override: bool
     113      register: bool
     114      kind: Literal["inst", "op", "legacy"]
     115      name: str
     116      inputs: list[InputEffect]
     117      outputs: list[OutputEffect]
     118      block: Block
     119  
     120  
     121  @dataclass
     122  class ESC[4;38;5;81mSuper(ESC[4;38;5;149mNode):
     123      name: str
     124      ops: list[OpName]
     125  
     126  
     127  @dataclass
     128  class ESC[4;38;5;81mMacro(ESC[4;38;5;149mNode):
     129      name: str
     130      uops: list[UOp]
     131  
     132  
     133  @dataclass
     134  class ESC[4;38;5;81mFamily(ESC[4;38;5;149mNode):
     135      name: str
     136      size: str  # Variable giving the cache size in code units
     137      members: list[str]
     138  
     139  
     140  class ESC[4;38;5;81mParser(ESC[4;38;5;149mPLexer):
     141      @contextual
     142      def definition(self) -> InstDef | Super | Macro | Family | None:
     143          if inst := self.inst_def():
     144              return inst
     145          if super := self.super_def():
     146              return super
     147          if macro := self.macro_def():
     148              return macro
     149          if family := self.family_def():
     150              return family
     151  
     152      @contextual
     153      def inst_def(self) -> InstDef | None:
     154          if hdr := self.inst_header():
     155              if block := self.block():
     156                  return InstDef(
     157                      hdr.override, hdr.register, hdr.kind, hdr.name, hdr.inputs, hdr.outputs, block
     158                  )
     159              raise self.make_syntax_error("Expected block")
     160          return None
     161  
     162      @contextual
     163      def inst_header(self) -> InstHeader | None:
     164          # [override] inst(NAME)
     165          #   | [override] [register] inst(NAME, (inputs -- outputs))
     166          #   | [override] [register] op(NAME, (inputs -- outputs))
     167          # TODO: Make INST a keyword in the lexer.
     168          override = bool(self.expect(lx.OVERRIDE))
     169          register = bool(self.expect(lx.REGISTER))
     170          if (tkn := self.expect(lx.IDENTIFIER)) and (kind := tkn.text) in ("inst", "op"):
     171              if self.expect(lx.LPAREN) and (tkn := self.expect(lx.IDENTIFIER)):
     172                  name = tkn.text
     173                  if self.expect(lx.COMMA):
     174                      inp, outp = self.io_effect()
     175                      if self.expect(lx.RPAREN):
     176                          if (tkn := self.peek()) and tkn.kind == lx.LBRACE:
     177                              return InstHeader(override, register, kind, name, inp, outp)
     178                  elif self.expect(lx.RPAREN) and kind == "inst":
     179                      # No legacy stack effect if kind is "op".
     180                      return InstHeader(override, register, "legacy", name, [], [])
     181          return None
     182  
     183      def io_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]:
     184          # '(' [inputs] '--' [outputs] ')'
     185          if self.expect(lx.LPAREN):
     186              inputs = self.inputs() or []
     187              if self.expect(lx.MINUSMINUS):
     188                  outputs = self.outputs() or []
     189                  if self.expect(lx.RPAREN):
     190                      return inputs, outputs
     191          raise self.make_syntax_error("Expected stack effect")
     192  
     193      def inputs(self) -> list[InputEffect] | None:
     194          # input (',' input)*
     195          here = self.getpos()
     196          if inp := self.input():
     197              near = self.getpos()
     198              if self.expect(lx.COMMA):
     199                  if rest := self.inputs():
     200                      return [inp] + rest
     201              self.setpos(near)
     202              return [inp]
     203          self.setpos(here)
     204          return None
     205  
     206      @contextual
     207      def input(self) -> InputEffect | None:
     208          return self.cache_effect() or self.stack_effect()
     209  
     210      def outputs(self) -> list[OutputEffect] | None:
     211          # output (, output)*
     212          here = self.getpos()
     213          if outp := self.output():
     214              near = self.getpos()
     215              if self.expect(lx.COMMA):
     216                  if rest := self.outputs():
     217                      return [outp] + rest
     218              self.setpos(near)
     219              return [outp]
     220          self.setpos(here)
     221          return None
     222  
     223      @contextual
     224      def output(self) -> OutputEffect | None:
     225          return self.stack_effect()
     226  
     227      @contextual
     228      def cache_effect(self) -> CacheEffect | None:
     229          # IDENTIFIER '/' NUMBER
     230          if tkn := self.expect(lx.IDENTIFIER):
     231              if self.expect(lx.DIVIDE):
     232                  num = self.require(lx.NUMBER).text
     233                  try:
     234                      size = int(num)
     235                  except ValueError:
     236                      raise self.make_syntax_error(f"Expected integer, got {num!r}")
     237                  else:
     238                      return CacheEffect(tkn.text, size)
     239  
     240      @contextual
     241      def stack_effect(self) -> StackEffect | None:
     242          #   IDENTIFIER [':' IDENTIFIER] ['if' '(' expression ')']
     243          # | IDENTIFIER '[' expression ']'
     244          if tkn := self.expect(lx.IDENTIFIER):
     245              type_text = ""
     246              if self.expect(lx.COLON):
     247                  type_text = self.require(lx.IDENTIFIER).text.strip()
     248              cond_text = ""
     249              if self.expect(lx.IF):
     250                  self.require(lx.LPAREN)
     251                  if not (cond := self.expression()):
     252                      raise self.make_syntax_error("Expected condition")
     253                  self.require(lx.RPAREN)
     254                  cond_text = cond.text.strip()
     255              size_text = ""
     256              if self.expect(lx.LBRACKET):
     257                  if type_text or cond_text:
     258                      raise self.make_syntax_error("Unexpected [")
     259                  if not (size := self.expression()):
     260                      raise self.make_syntax_error("Expected expression")
     261                  self.require(lx.RBRACKET)
     262                  type_text = "PyObject **"
     263                  size_text = size.text.strip()
     264              return StackEffect(tkn.text, type_text, cond_text, size_text)
     265  
     266      @contextual
     267      def expression(self) -> Expression | None:
     268          tokens: list[lx.Token] = []
     269          level = 1
     270          while tkn := self.peek():
     271              if tkn.kind in (lx.LBRACKET, lx.LPAREN):
     272                  level += 1
     273              elif tkn.kind in (lx.RBRACKET, lx.RPAREN):
     274                  level -= 1
     275                  if level == 0:
     276                      break
     277              tokens.append(tkn)
     278              self.next()
     279          if not tokens:
     280              return None
     281          return Expression(lx.to_text(tokens).strip())
     282  
     283      @contextual
     284      def super_def(self) -> Super | None:
     285          if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "super":
     286              if self.expect(lx.LPAREN):
     287                  if tkn := self.expect(lx.IDENTIFIER):
     288                      if self.expect(lx.RPAREN):
     289                          if self.expect(lx.EQUALS):
     290                              if ops := self.ops():
     291                                  self.require(lx.SEMI)
     292                                  res = Super(tkn.text, ops)
     293                                  return res
     294  
     295      def ops(self) -> list[OpName] | None:
     296          if op := self.op():
     297              ops = [op]
     298              while self.expect(lx.PLUS):
     299                  if op := self.op():
     300                      ops.append(op)
     301              return ops
     302  
     303      @contextual
     304      def op(self) -> OpName | None:
     305          if tkn := self.expect(lx.IDENTIFIER):
     306              return OpName(tkn.text)
     307  
     308      @contextual
     309      def macro_def(self) -> Macro | None:
     310          if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "macro":
     311              if self.expect(lx.LPAREN):
     312                  if tkn := self.expect(lx.IDENTIFIER):
     313                      if self.expect(lx.RPAREN):
     314                          if self.expect(lx.EQUALS):
     315                              if uops := self.uops():
     316                                  self.require(lx.SEMI)
     317                                  res = Macro(tkn.text, uops)
     318                                  return res
     319  
     320      def uops(self) -> list[UOp] | None:
     321          if uop := self.uop():
     322              uops = [uop]
     323              while self.expect(lx.PLUS):
     324                  if uop := self.uop():
     325                      uops.append(uop)
     326                  else:
     327                      raise self.make_syntax_error("Expected op name or cache effect")
     328              return uops
     329  
     330      @contextual
     331      def uop(self) -> UOp | None:
     332          if tkn := self.expect(lx.IDENTIFIER):
     333              if self.expect(lx.DIVIDE):
     334                  if num := self.expect(lx.NUMBER):
     335                      try:
     336                          size = int(num.text)
     337                      except ValueError:
     338                          raise self.make_syntax_error(
     339                              f"Expected integer, got {num.text!r}"
     340                          )
     341                      else:
     342                          return CacheEffect(tkn.text, size)
     343                  raise self.make_syntax_error("Expected integer")
     344              else:
     345                  return OpName(tkn.text)
     346  
     347      @contextual
     348      def family_def(self) -> Family | None:
     349          if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "family":
     350              size = None
     351              if self.expect(lx.LPAREN):
     352                  if tkn := self.expect(lx.IDENTIFIER):
     353                      if self.expect(lx.COMMA):
     354                          if not (size := self.expect(lx.IDENTIFIER)):
     355                              raise self.make_syntax_error("Expected identifier")
     356                      if self.expect(lx.RPAREN):
     357                          if self.expect(lx.EQUALS):
     358                              if not self.expect(lx.LBRACE):
     359                                  raise self.make_syntax_error("Expected {")
     360                              if members := self.members():
     361                                  if self.expect(lx.RBRACE) and self.expect(lx.SEMI):
     362                                      return Family(
     363                                          tkn.text, size.text if size else "", members
     364                                      )
     365          return None
     366  
     367      def members(self) -> list[str] | None:
     368          here = self.getpos()
     369          if tkn := self.expect(lx.IDENTIFIER):
     370              members = [tkn.text]
     371              while self.expect(lx.COMMA):
     372                  if tkn := self.expect(lx.IDENTIFIER):
     373                      members.append(tkn.text)
     374                  else:
     375                      break
     376              peek = self.peek()
     377              if not peek or peek.kind != lx.RBRACE:
     378                  raise self.make_syntax_error("Expected comma or right paren")
     379              return members
     380          self.setpos(here)
     381          return None
     382  
     383      @contextual
     384      def block(self) -> Block | None:
     385          if self.c_blob():
     386              return Block()
     387  
     388      def c_blob(self) -> list[lx.Token]:
     389          tokens: list[lx.Token] = []
     390          level = 0
     391          while tkn := self.next(raw=True):
     392              tokens.append(tkn)
     393              if tkn.kind in (lx.LBRACE, lx.LPAREN, lx.LBRACKET):
     394                  level += 1
     395              elif tkn.kind in (lx.RBRACE, lx.RPAREN, lx.RBRACKET):
     396                  level -= 1
     397                  if level <= 0:
     398                      break
     399          return tokens
     400  
     401  
     402  if __name__ == "__main__":
     403      import sys
     404  
     405      if sys.argv[1:]:
     406          filename = sys.argv[1]
     407          if filename == "-c" and sys.argv[2:]:
     408              src = sys.argv[2]
     409              filename = "<string>"
     410          else:
     411              with open(filename) as f:
     412                  src = f.read()
     413              srclines = src.splitlines()
     414              begin = srclines.index("// BEGIN BYTECODES //")
     415              end = srclines.index("// END BYTECODES //")
     416              src = "\n".join(srclines[begin + 1 : end])
     417      else:
     418          filename = "<default>"
     419          src = "if (x) { x.foo; // comment\n}"
     420      parser = Parser(src, filename)
     421      x = parser.definition()
     422      print(x)