(root)/
Python-3.12.0/
Parser/
asdl.py
       1  #-------------------------------------------------------------------------------
       2  # Parser for ASDL [1] definition files. Reads in an ASDL description and parses
       3  # it into an AST that describes it.
       4  #
       5  # The EBNF we're parsing here: Figure 1 of the paper [1]. Extended to support
       6  # modules and attributes after a product. Words starting with Capital letters
       7  # are terminals. Literal tokens are in "double quotes". Others are
       8  # non-terminals. Id is either TokenId or ConstructorId.
       9  #
      10  # module        ::= "module" Id "{" [definitions] "}"
      11  # definitions   ::= { TypeId "=" type }
      12  # type          ::= product | sum
      13  # product       ::= fields ["attributes" fields]
      14  # fields        ::= "(" { field, "," } field ")"
      15  # field         ::= TypeId ["?" | "*"] [Id]
      16  # sum           ::= constructor { "|" constructor } ["attributes" fields]
      17  # constructor   ::= ConstructorId [fields]
      18  #
      19  # [1] "The Zephyr Abstract Syntax Description Language" by Wang, et. al. See
      20  #     http://asdl.sourceforge.net/
      21  #-------------------------------------------------------------------------------
      22  from collections import namedtuple
      23  import re
      24  
      25  __all__ = [
      26      'builtin_types', 'parse', 'AST', 'Module', 'Type', 'Constructor',
      27      'Field', 'Sum', 'Product', 'VisitorBase', 'Check', 'check']
      28  
      29  # The following classes define nodes into which the ASDL description is parsed.
      30  # Note: this is a "meta-AST". ASDL files (such as Python.asdl) describe the AST
      31  # structure used by a programming language. But ASDL files themselves need to be
      32  # parsed. This module parses ASDL files and uses a simple AST to represent them.
      33  # See the EBNF at the top of the file to understand the logical connection
      34  # between the various node types.
      35  
      36  builtin_types = {'identifier', 'string', 'int', 'constant'}
      37  
      38  class ESC[4;38;5;81mAST:
      39      def __repr__(self):
      40          raise NotImplementedError
      41  
      42  class ESC[4;38;5;81mModule(ESC[4;38;5;149mAST):
      43      def __init__(self, name, dfns):
      44          self.name = name
      45          self.dfns = dfns
      46          self.types = {type.name: type.value for type in dfns}
      47  
      48      def __repr__(self):
      49          return 'Module({0.name}, {0.dfns})'.format(self)
      50  
      51  class ESC[4;38;5;81mType(ESC[4;38;5;149mAST):
      52      def __init__(self, name, value):
      53          self.name = name
      54          self.value = value
      55  
      56      def __repr__(self):
      57          return 'Type({0.name}, {0.value})'.format(self)
      58  
      59  class ESC[4;38;5;81mConstructor(ESC[4;38;5;149mAST):
      60      def __init__(self, name, fields=None):
      61          self.name = name
      62          self.fields = fields or []
      63  
      64      def __repr__(self):
      65          return 'Constructor({0.name}, {0.fields})'.format(self)
      66  
      67  class ESC[4;38;5;81mField(ESC[4;38;5;149mAST):
      68      def __init__(self, type, name=None, seq=False, opt=False):
      69          self.type = type
      70          self.name = name
      71          self.seq = seq
      72          self.opt = opt
      73  
      74      def __str__(self):
      75          if self.seq:
      76              extra = "*"
      77          elif self.opt:
      78              extra = "?"
      79          else:
      80              extra = ""
      81  
      82          return "{}{} {}".format(self.type, extra, self.name)
      83  
      84      def __repr__(self):
      85          if self.seq:
      86              extra = ", seq=True"
      87          elif self.opt:
      88              extra = ", opt=True"
      89          else:
      90              extra = ""
      91          if self.name is None:
      92              return 'Field({0.type}{1})'.format(self, extra)
      93          else:
      94              return 'Field({0.type}, {0.name}{1})'.format(self, extra)
      95  
      96  class ESC[4;38;5;81mSum(ESC[4;38;5;149mAST):
      97      def __init__(self, types, attributes=None):
      98          self.types = types
      99          self.attributes = attributes or []
     100  
     101      def __repr__(self):
     102          if self.attributes:
     103              return 'Sum({0.types}, {0.attributes})'.format(self)
     104          else:
     105              return 'Sum({0.types})'.format(self)
     106  
     107  class ESC[4;38;5;81mProduct(ESC[4;38;5;149mAST):
     108      def __init__(self, fields, attributes=None):
     109          self.fields = fields
     110          self.attributes = attributes or []
     111  
     112      def __repr__(self):
     113          if self.attributes:
     114              return 'Product({0.fields}, {0.attributes})'.format(self)
     115          else:
     116              return 'Product({0.fields})'.format(self)
     117  
     118  # A generic visitor for the meta-AST that describes ASDL. This can be used by
     119  # emitters. Note that this visitor does not provide a generic visit method, so a
     120  # subclass needs to define visit methods from visitModule to as deep as the
     121  # interesting node.
     122  # We also define a Check visitor that makes sure the parsed ASDL is well-formed.
     123  
     124  class ESC[4;38;5;81mVisitorBase(ESC[4;38;5;149mobject):
     125      """Generic tree visitor for ASTs."""
     126      def __init__(self):
     127          self.cache = {}
     128  
     129      def visit(self, obj, *args):
     130          klass = obj.__class__
     131          meth = self.cache.get(klass)
     132          if meth is None:
     133              methname = "visit" + klass.__name__
     134              meth = getattr(self, methname, None)
     135              self.cache[klass] = meth
     136          if meth:
     137              try:
     138                  meth(obj, *args)
     139              except Exception as e:
     140                  print("Error visiting %r: %s" % (obj, e))
     141                  raise
     142  
     143  class ESC[4;38;5;81mCheck(ESC[4;38;5;149mVisitorBase):
     144      """A visitor that checks a parsed ASDL tree for correctness.
     145  
     146      Errors are printed and accumulated.
     147      """
     148      def __init__(self):
     149          super(Check, self).__init__()
     150          self.cons = {}
     151          self.errors = 0
     152          self.types = {}
     153  
     154      def visitModule(self, mod):
     155          for dfn in mod.dfns:
     156              self.visit(dfn)
     157  
     158      def visitType(self, type):
     159          self.visit(type.value, str(type.name))
     160  
     161      def visitSum(self, sum, name):
     162          for t in sum.types:
     163              self.visit(t, name)
     164  
     165      def visitConstructor(self, cons, name):
     166          key = str(cons.name)
     167          conflict = self.cons.get(key)
     168          if conflict is None:
     169              self.cons[key] = name
     170          else:
     171              print('Redefinition of constructor {}'.format(key))
     172              print('Defined in {} and {}'.format(conflict, name))
     173              self.errors += 1
     174          for f in cons.fields:
     175              self.visit(f, key)
     176  
     177      def visitField(self, field, name):
     178          key = str(field.type)
     179          l = self.types.setdefault(key, [])
     180          l.append(name)
     181  
     182      def visitProduct(self, prod, name):
     183          for f in prod.fields:
     184              self.visit(f, name)
     185  
     186  def check(mod):
     187      """Check the parsed ASDL tree for correctness.
     188  
     189      Return True if success. For failure, the errors are printed out and False
     190      is returned.
     191      """
     192      v = Check()
     193      v.visit(mod)
     194  
     195      for t in v.types:
     196          if t not in mod.types and not t in builtin_types:
     197              v.errors += 1
     198              uses = ", ".join(v.types[t])
     199              print('Undefined type {}, used in {}'.format(t, uses))
     200      return not v.errors
     201  
     202  # The ASDL parser itself comes next. The only interesting external interface
     203  # here is the top-level parse function.
     204  
     205  def parse(filename):
     206      """Parse ASDL from the given file and return a Module node describing it."""
     207      with open(filename, encoding="utf-8") as f:
     208          parser = ASDLParser()
     209          return parser.parse(f.read())
     210  
     211  # Types for describing tokens in an ASDL specification.
     212  class ESC[4;38;5;81mTokenKind:
     213      """TokenKind is provides a scope for enumerated token kinds."""
     214      (ConstructorId, TypeId, Equals, Comma, Question, Pipe, Asterisk,
     215       LParen, RParen, LBrace, RBrace) = range(11)
     216  
     217      operator_table = {
     218          '=': Equals, ',': Comma,    '?': Question, '|': Pipe,    '(': LParen,
     219          ')': RParen, '*': Asterisk, '{': LBrace,   '}': RBrace}
     220  
     221  Token = namedtuple('Token', 'kind value lineno')
     222  
     223  class ESC[4;38;5;81mASDLSyntaxError(ESC[4;38;5;149mException):
     224      def __init__(self, msg, lineno=None):
     225          self.msg = msg
     226          self.lineno = lineno or '<unknown>'
     227  
     228      def __str__(self):
     229          return 'Syntax error on line {0.lineno}: {0.msg}'.format(self)
     230  
     231  def tokenize_asdl(buf):
     232      """Tokenize the given buffer. Yield Token objects."""
     233      for lineno, line in enumerate(buf.splitlines(), 1):
     234          for m in re.finditer(r'\s*(\w+|--.*|.)', line.strip()):
     235              c = m.group(1)
     236              if c[0].isalpha():
     237                  # Some kind of identifier
     238                  if c[0].isupper():
     239                      yield Token(TokenKind.ConstructorId, c, lineno)
     240                  else:
     241                      yield Token(TokenKind.TypeId, c, lineno)
     242              elif c[:2] == '--':
     243                  # Comment
     244                  break
     245              else:
     246                  # Operators
     247                  try:
     248                      op_kind = TokenKind.operator_table[c]
     249                  except KeyError:
     250                      raise ASDLSyntaxError('Invalid operator %s' % c, lineno)
     251                  yield Token(op_kind, c, lineno)
     252  
     253  class ESC[4;38;5;81mASDLParser:
     254      """Parser for ASDL files.
     255  
     256      Create, then call the parse method on a buffer containing ASDL.
     257      This is a simple recursive descent parser that uses tokenize_asdl for the
     258      lexing.
     259      """
     260      def __init__(self):
     261          self._tokenizer = None
     262          self.cur_token = None
     263  
     264      def parse(self, buf):
     265          """Parse the ASDL in the buffer and return an AST with a Module root.
     266          """
     267          self._tokenizer = tokenize_asdl(buf)
     268          self._advance()
     269          return self._parse_module()
     270  
     271      def _parse_module(self):
     272          if self._at_keyword('module'):
     273              self._advance()
     274          else:
     275              raise ASDLSyntaxError(
     276                  'Expected "module" (found {})'.format(self.cur_token.value),
     277                  self.cur_token.lineno)
     278          name = self._match(self._id_kinds)
     279          self._match(TokenKind.LBrace)
     280          defs = self._parse_definitions()
     281          self._match(TokenKind.RBrace)
     282          return Module(name, defs)
     283  
     284      def _parse_definitions(self):
     285          defs = []
     286          while self.cur_token.kind == TokenKind.TypeId:
     287              typename = self._advance()
     288              self._match(TokenKind.Equals)
     289              type = self._parse_type()
     290              defs.append(Type(typename, type))
     291          return defs
     292  
     293      def _parse_type(self):
     294          if self.cur_token.kind == TokenKind.LParen:
     295              # If we see a (, it's a product
     296              return self._parse_product()
     297          else:
     298              # Otherwise it's a sum. Look for ConstructorId
     299              sumlist = [Constructor(self._match(TokenKind.ConstructorId),
     300                                     self._parse_optional_fields())]
     301              while self.cur_token.kind  == TokenKind.Pipe:
     302                  # More constructors
     303                  self._advance()
     304                  sumlist.append(Constructor(
     305                                  self._match(TokenKind.ConstructorId),
     306                                  self._parse_optional_fields()))
     307              return Sum(sumlist, self._parse_optional_attributes())
     308  
     309      def _parse_product(self):
     310          return Product(self._parse_fields(), self._parse_optional_attributes())
     311  
     312      def _parse_fields(self):
     313          fields = []
     314          self._match(TokenKind.LParen)
     315          while self.cur_token.kind == TokenKind.TypeId:
     316              typename = self._advance()
     317              is_seq, is_opt = self._parse_optional_field_quantifier()
     318              id = (self._advance() if self.cur_token.kind in self._id_kinds
     319                                    else None)
     320              fields.append(Field(typename, id, seq=is_seq, opt=is_opt))
     321              if self.cur_token.kind == TokenKind.RParen:
     322                  break
     323              elif self.cur_token.kind == TokenKind.Comma:
     324                  self._advance()
     325          self._match(TokenKind.RParen)
     326          return fields
     327  
     328      def _parse_optional_fields(self):
     329          if self.cur_token.kind == TokenKind.LParen:
     330              return self._parse_fields()
     331          else:
     332              return None
     333  
     334      def _parse_optional_attributes(self):
     335          if self._at_keyword('attributes'):
     336              self._advance()
     337              return self._parse_fields()
     338          else:
     339              return None
     340  
     341      def _parse_optional_field_quantifier(self):
     342          is_seq, is_opt = False, False
     343          if self.cur_token.kind == TokenKind.Asterisk:
     344              is_seq = True
     345              self._advance()
     346          elif self.cur_token.kind == TokenKind.Question:
     347              is_opt = True
     348              self._advance()
     349          return is_seq, is_opt
     350  
     351      def _advance(self):
     352          """ Return the value of the current token and read the next one into
     353              self.cur_token.
     354          """
     355          cur_val = None if self.cur_token is None else self.cur_token.value
     356          try:
     357              self.cur_token = next(self._tokenizer)
     358          except StopIteration:
     359              self.cur_token = None
     360          return cur_val
     361  
     362      _id_kinds = (TokenKind.ConstructorId, TokenKind.TypeId)
     363  
     364      def _match(self, kind):
     365          """The 'match' primitive of RD parsers.
     366  
     367          * Verifies that the current token is of the given kind (kind can
     368            be a tuple, in which the kind must match one of its members).
     369          * Returns the value of the current token
     370          * Reads in the next token
     371          """
     372          if (isinstance(kind, tuple) and self.cur_token.kind in kind or
     373              self.cur_token.kind == kind
     374              ):
     375              value = self.cur_token.value
     376              self._advance()
     377              return value
     378          else:
     379              raise ASDLSyntaxError(
     380                  'Unmatched {} (found {})'.format(kind, self.cur_token.kind),
     381                  self.cur_token.lineno)
     382  
     383      def _at_keyword(self, keyword):
     384          return (self.cur_token.kind == TokenKind.TypeId and
     385                  self.cur_token.value == keyword)