(root)/
Python-3.11.7/
Tools/
peg_generator/
pegen/
build.py
       1  import itertools
       2  import pathlib
       3  import sys
       4  import sysconfig
       5  import tempfile
       6  import tokenize
       7  from typing import IO, Dict, List, Optional, Set, Tuple
       8  
       9  from pegen.c_generator import CParserGenerator
      10  from pegen.grammar import Grammar
      11  from pegen.grammar_parser import GeneratedParser as GrammarParser
      12  from pegen.parser import Parser
      13  from pegen.parser_generator import ParserGenerator
      14  from pegen.python_generator import PythonParserGenerator
      15  from pegen.tokenizer import Tokenizer
      16  
      17  MOD_DIR = pathlib.Path(__file__).resolve().parent
      18  
      19  TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
      20  
      21  
      22  def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
      23      flags = sysconfig.get_config_var(compiler_flags)
      24      py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
      25      if flags is None or py_flags_nodist is None:
      26          return []
      27      return f"{flags} {py_flags_nodist}".split()
      28  
      29  
      30  def compile_c_extension(
      31      generated_source_path: str,
      32      build_dir: Optional[str] = None,
      33      verbose: bool = False,
      34      keep_asserts: bool = True,
      35      disable_optimization: bool = False,
      36      library_dir: Optional[str] = None,
      37  ) -> str:
      38      """Compile the generated source for a parser generator into an extension module.
      39  
      40      The extension module will be generated in the same directory as the provided path
      41      for the generated source, with the same basename (in addition to extension module
      42      metadata). For example, for the source mydir/parser.c the generated extension
      43      in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so.
      44  
      45      If *build_dir* is provided, that path will be used as the temporary build directory
      46      of distutils (this is useful in case you want to use a temporary directory).
      47  
      48      If *library_dir* is provided, that path will be used as the directory for a
      49      static library of the common parser sources (this is useful in case you are
      50      creating multiple extensions).
      51      """
      52      import distutils.log
      53      from distutils.core import Distribution, Extension
      54      from distutils.tests.support import fixup_build_ext  # type: ignore
      55  
      56      from distutils.ccompiler import new_compiler
      57      from distutils.dep_util import newer_group
      58      from distutils.sysconfig import customize_compiler
      59  
      60      if verbose:
      61          distutils.log.set_threshold(distutils.log.DEBUG)
      62  
      63      source_file_path = pathlib.Path(generated_source_path)
      64      extension_name = source_file_path.stem
      65      extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
      66      extra_compile_args.append("-DPy_BUILD_CORE_MODULE")
      67      # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c
      68      extra_compile_args.append("-D_Py_TEST_PEGEN")
      69      extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
      70      if keep_asserts:
      71          extra_compile_args.append("-UNDEBUG")
      72      if disable_optimization:
      73          if sys.platform == 'win32':
      74              extra_compile_args.append("/Od")
      75              extra_link_args.append("/LTCG:OFF")
      76          else:
      77              extra_compile_args.append("-O0")
      78              if sysconfig.get_config_var("GNULD") == "yes":
      79                  extra_link_args.append("-fno-lto")
      80  
      81      common_sources = [
      82          str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
      83          str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
      84          str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
      85          str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
      86          str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
      87          str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
      88          str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
      89          str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
      90      ]
      91      include_dirs = [
      92          str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
      93          str(MOD_DIR.parent.parent.parent / "Parser"),
      94      ]
      95      extension = Extension(
      96          extension_name,
      97          sources=[generated_source_path],
      98          extra_compile_args=extra_compile_args,
      99          extra_link_args=extra_link_args,
     100      )
     101      dist = Distribution({"name": extension_name, "ext_modules": [extension]})
     102      cmd = dist.get_command_obj("build_ext")
     103      fixup_build_ext(cmd)
     104      cmd.build_lib = str(source_file_path.parent)
     105      cmd.include_dirs = include_dirs
     106      if build_dir:
     107          cmd.build_temp = build_dir
     108      cmd.ensure_finalized()
     109  
     110      compiler = new_compiler()
     111      customize_compiler(compiler)
     112      compiler.set_include_dirs(cmd.include_dirs)
     113      compiler.set_library_dirs(cmd.library_dirs)
     114      # build static lib
     115      if library_dir:
     116          library_filename = compiler.library_filename(extension_name,
     117                                                       output_dir=library_dir)
     118          if newer_group(common_sources, library_filename, 'newer'):
     119              if sys.platform == 'win32':
     120                  pdb = compiler.static_lib_format % (extension_name, '.pdb')
     121                  compile_opts = [f"/Fd{library_dir}\\{pdb}"]
     122                  compile_opts.extend(extra_compile_args)
     123              else:
     124                  compile_opts = extra_compile_args
     125              objects = compiler.compile(common_sources,
     126                                         output_dir=library_dir,
     127                                         debug=cmd.debug,
     128                                         extra_postargs=compile_opts)
     129              compiler.create_static_lib(objects, extension_name,
     130                                         output_dir=library_dir,
     131                                         debug=cmd.debug)
     132          if sys.platform == 'win32':
     133              compiler.add_library_dir(library_dir)
     134              extension.libraries = [extension_name]
     135          elif sys.platform == 'darwin':
     136              compiler.set_link_objects([
     137                  '-Wl,-force_load', library_filename,
     138              ])
     139          else:
     140              compiler.set_link_objects([
     141                  '-Wl,--whole-archive', library_filename, '-Wl,--no-whole-archive',
     142              ])
     143      else:
     144          extension.sources[0:0] = common_sources
     145  
     146      # Compile the source code to object files.
     147      ext_path = cmd.get_ext_fullpath(extension_name)
     148      if newer_group(extension.sources, ext_path, 'newer'):
     149          objects = compiler.compile(extension.sources,
     150                                      output_dir=cmd.build_temp,
     151                                      debug=cmd.debug,
     152                                      extra_postargs=extra_compile_args)
     153      else:
     154          objects = compiler.object_filenames(extension.sources,
     155                                              output_dir=cmd.build_temp)
     156      # Now link the object files together into a "shared object"
     157      compiler.link_shared_object(
     158          objects, ext_path,
     159          libraries=cmd.get_libraries(extension),
     160          extra_postargs=extra_link_args,
     161          export_symbols=cmd.get_export_symbols(extension),
     162          debug=cmd.debug,
     163          build_temp=cmd.build_temp)
     164  
     165      return pathlib.Path(ext_path)
     166  
     167  
     168  def build_parser(
     169      grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
     170  ) -> Tuple[Grammar, Parser, Tokenizer]:
     171      with open(grammar_file) as file:
     172          tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
     173          parser = GrammarParser(tokenizer, verbose=verbose_parser)
     174          grammar = parser.start()
     175  
     176          if not grammar:
     177              raise parser.make_syntax_error(grammar_file)
     178  
     179      return grammar, parser, tokenizer
     180  
     181  
     182  def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions:
     183      all_tokens = {}
     184      exact_tokens = {}
     185      non_exact_tokens = set()
     186      numbers = itertools.count(0)
     187  
     188      for line in tokens:
     189          line = line.strip()
     190  
     191          if not line or line.startswith("#"):
     192              continue
     193  
     194          pieces = line.split()
     195          index = next(numbers)
     196  
     197          if len(pieces) == 1:
     198              (token,) = pieces
     199              non_exact_tokens.add(token)
     200              all_tokens[index] = token
     201          elif len(pieces) == 2:
     202              token, op = pieces
     203              exact_tokens[op.strip("'")] = index
     204              all_tokens[index] = token
     205          else:
     206              raise ValueError(f"Unexpected line found in Tokens file: {line}")
     207  
     208      return all_tokens, exact_tokens, non_exact_tokens
     209  
     210  
     211  def build_c_generator(
     212      grammar: Grammar,
     213      grammar_file: str,
     214      tokens_file: str,
     215      output_file: str,
     216      compile_extension: bool = False,
     217      verbose_c_extension: bool = False,
     218      keep_asserts_in_extension: bool = True,
     219      skip_actions: bool = False,
     220  ) -> ParserGenerator:
     221      with open(tokens_file, "r") as tok_file:
     222          all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
     223      with open(output_file, "w") as file:
     224          gen: ParserGenerator = CParserGenerator(
     225              grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions
     226          )
     227          gen.generate(grammar_file)
     228  
     229      if compile_extension:
     230          with tempfile.TemporaryDirectory() as build_dir:
     231              compile_c_extension(
     232                  output_file,
     233                  build_dir=build_dir,
     234                  verbose=verbose_c_extension,
     235                  keep_asserts=keep_asserts_in_extension,
     236              )
     237      return gen
     238  
     239  
     240  def build_python_generator(
     241      grammar: Grammar,
     242      grammar_file: str,
     243      output_file: str,
     244      skip_actions: bool = False,
     245  ) -> ParserGenerator:
     246      with open(output_file, "w") as file:
     247          gen: ParserGenerator = PythonParserGenerator(grammar, file)  # TODO: skip_actions
     248          gen.generate(grammar_file)
     249      return gen
     250  
     251  
     252  def build_c_parser_and_generator(
     253      grammar_file: str,
     254      tokens_file: str,
     255      output_file: str,
     256      compile_extension: bool = False,
     257      verbose_tokenizer: bool = False,
     258      verbose_parser: bool = False,
     259      verbose_c_extension: bool = False,
     260      keep_asserts_in_extension: bool = True,
     261      skip_actions: bool = False,
     262  ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
     263      """Generate rules, C parser, tokenizer, parser generator for a given grammar
     264  
     265      Args:
     266          grammar_file (string): Path for the grammar file
     267          tokens_file (string): Path for the tokens file
     268          output_file (string): Path for the output file
     269          compile_extension (bool, optional): Whether to compile the C extension.
     270            Defaults to False.
     271          verbose_tokenizer (bool, optional): Whether to display additional output
     272            when generating the tokenizer. Defaults to False.
     273          verbose_parser (bool, optional): Whether to display additional output
     274            when generating the parser. Defaults to False.
     275          verbose_c_extension (bool, optional): Whether to display additional
     276            output when compiling the C extension . Defaults to False.
     277          keep_asserts_in_extension (bool, optional): Whether to keep the assert statements
     278            when compiling the extension module. Defaults to True.
     279          skip_actions (bool, optional): Whether to pretend no rule has any actions.
     280      """
     281      grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
     282      gen = build_c_generator(
     283          grammar,
     284          grammar_file,
     285          tokens_file,
     286          output_file,
     287          compile_extension,
     288          verbose_c_extension,
     289          keep_asserts_in_extension,
     290          skip_actions=skip_actions,
     291      )
     292  
     293      return grammar, parser, tokenizer, gen
     294  
     295  
     296  def build_python_parser_and_generator(
     297      grammar_file: str,
     298      output_file: str,
     299      verbose_tokenizer: bool = False,
     300      verbose_parser: bool = False,
     301      skip_actions: bool = False,
     302  ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
     303      """Generate rules, python parser, tokenizer, parser generator for a given grammar
     304  
     305      Args:
     306          grammar_file (string): Path for the grammar file
     307          output_file (string): Path for the output file
     308          verbose_tokenizer (bool, optional): Whether to display additional output
     309            when generating the tokenizer. Defaults to False.
     310          verbose_parser (bool, optional): Whether to display additional output
     311            when generating the parser. Defaults to False.
     312          skip_actions (bool, optional): Whether to pretend no rule has any actions.
     313      """
     314      grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
     315      gen = build_python_generator(
     316          grammar,
     317          grammar_file,
     318          output_file,
     319          skip_actions=skip_actions,
     320      )
     321      return grammar, parser, tokenizer, gen