(root)/
Python-3.12.0/
Tools/
peg_generator/
pegen/
build.py
       1  import itertools
       2  import os
       3  import pathlib
       4  import sys
       5  import sysconfig
       6  import tempfile
       7  import tokenize
       8  from typing import IO, Dict, List, Optional, Set, Tuple
       9  
      10  from pegen.c_generator import CParserGenerator
      11  from pegen.grammar import Grammar
      12  from pegen.grammar_parser import GeneratedParser as GrammarParser
      13  from pegen.parser import Parser
      14  from pegen.parser_generator import ParserGenerator
      15  from pegen.python_generator import PythonParserGenerator
      16  from pegen.tokenizer import Tokenizer
      17  
      18  MOD_DIR = pathlib.Path(__file__).resolve().parent
      19  
      20  TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
      21  
      22  
      23  def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
      24      flags = sysconfig.get_config_var(compiler_flags)
      25      py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
      26      if flags is None or py_flags_nodist is None:
      27          return []
      28      return f"{flags} {py_flags_nodist}".split()
      29  
      30  
      31  def fixup_build_ext(cmd):
      32      """Function needed to make build_ext tests pass.
      33  
      34      When Python was built with --enable-shared on Unix, -L. is not enough to
      35      find libpython<blah>.so, because regrtest runs in a tempdir, not in the
      36      source directory where the .so lives.
      37  
      38      When Python was built with in debug mode on Windows, build_ext commands
      39      need their debug attribute set, and it is not done automatically for
      40      some reason.
      41  
      42      This function handles both of these things.  Example use:
      43  
      44          cmd = build_ext(dist)
      45          support.fixup_build_ext(cmd)
      46          cmd.ensure_finalized()
      47  
      48      Unlike most other Unix platforms, Mac OS X embeds absolute paths
      49      to shared libraries into executables, so the fixup is not needed there.
      50  
      51      Taken from distutils (was part of the CPython stdlib until Python 3.11)
      52      """
      53      if os.name == 'nt':
      54          cmd.debug = sys.executable.endswith('_d.exe')
      55      elif sysconfig.get_config_var('Py_ENABLE_SHARED'):
      56          # To further add to the shared builds fun on Unix, we can't just add
      57          # library_dirs to the Extension() instance because that doesn't get
      58          # plumbed through to the final compiler command.
      59          runshared = sysconfig.get_config_var('RUNSHARED')
      60          if runshared is None:
      61              cmd.library_dirs = ['.']
      62          else:
      63              if sys.platform == 'darwin':
      64                  cmd.library_dirs = []
      65              else:
      66                  name, equals, value = runshared.partition('=')
      67                  cmd.library_dirs = [d for d in value.split(os.pathsep) if d]
      68  
      69  
      70  
      71  def compile_c_extension(
      72      generated_source_path: str,
      73      build_dir: Optional[str] = None,
      74      verbose: bool = False,
      75      keep_asserts: bool = True,
      76      disable_optimization: bool = False,
      77      library_dir: Optional[str] = None,
      78  ) -> str:
      79      """Compile the generated source for a parser generator into an extension module.
      80  
      81      The extension module will be generated in the same directory as the provided path
      82      for the generated source, with the same basename (in addition to extension module
      83      metadata). For example, for the source mydir/parser.c the generated extension
      84      in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so.
      85  
      86      If *build_dir* is provided, that path will be used as the temporary build directory
      87      of distutils (this is useful in case you want to use a temporary directory).
      88  
      89      If *library_dir* is provided, that path will be used as the directory for a
      90      static library of the common parser sources (this is useful in case you are
      91      creating multiple extensions).
      92      """
      93      import setuptools.logging
      94  
      95      from setuptools import Extension, Distribution
      96      from setuptools._distutils.dep_util import newer_group
      97      from setuptools._distutils.ccompiler import new_compiler
      98      from setuptools._distutils.sysconfig import customize_compiler
      99  
     100      if verbose:
     101          setuptools.logging.set_threshold(setuptools.logging.logging.DEBUG)
     102  
     103      source_file_path = pathlib.Path(generated_source_path)
     104      extension_name = source_file_path.stem
     105      extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
     106      extra_compile_args.append("-DPy_BUILD_CORE_MODULE")
     107      # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c
     108      extra_compile_args.append("-D_Py_TEST_PEGEN")
     109      extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
     110      if keep_asserts:
     111          extra_compile_args.append("-UNDEBUG")
     112      if disable_optimization:
     113          if sys.platform == 'win32':
     114              extra_compile_args.append("/Od")
     115              extra_link_args.append("/LTCG:OFF")
     116          else:
     117              extra_compile_args.append("-O0")
     118              if sysconfig.get_config_var("GNULD") == "yes":
     119                  extra_link_args.append("-fno-lto")
     120  
     121      common_sources = [
     122          str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
     123          str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
     124          str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
     125          str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
     126          str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
     127          str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
     128          str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
     129          str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
     130      ]
     131      include_dirs = [
     132          str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
     133          str(MOD_DIR.parent.parent.parent / "Parser"),
     134      ]
     135      extension = Extension(
     136          extension_name,
     137          sources=[generated_source_path],
     138          extra_compile_args=extra_compile_args,
     139          extra_link_args=extra_link_args,
     140      )
     141      dist = Distribution({"name": extension_name, "ext_modules": [extension]})
     142      cmd = dist.get_command_obj("build_ext")
     143      fixup_build_ext(cmd)
     144      cmd.build_lib = str(source_file_path.parent)
     145      cmd.include_dirs = include_dirs
     146      if build_dir:
     147          cmd.build_temp = build_dir
     148      cmd.ensure_finalized()
     149  
     150      compiler = new_compiler()
     151      customize_compiler(compiler)
     152      compiler.set_include_dirs(cmd.include_dirs)
     153      compiler.set_library_dirs(cmd.library_dirs)
     154      # build static lib
     155      if library_dir:
     156          library_filename = compiler.library_filename(extension_name,
     157                                                       output_dir=library_dir)
     158          if newer_group(common_sources, library_filename, 'newer'):
     159              if sys.platform == 'win32':
     160                  pdb = compiler.static_lib_format % (extension_name, '.pdb')
     161                  compile_opts = [f"/Fd{library_dir}\\{pdb}"]
     162                  compile_opts.extend(extra_compile_args)
     163              else:
     164                  compile_opts = extra_compile_args
     165              objects = compiler.compile(common_sources,
     166                                         output_dir=library_dir,
     167                                         debug=cmd.debug,
     168                                         extra_postargs=compile_opts)
     169              compiler.create_static_lib(objects, extension_name,
     170                                         output_dir=library_dir,
     171                                         debug=cmd.debug)
     172          if sys.platform == 'win32':
     173              compiler.add_library_dir(library_dir)
     174              extension.libraries = [extension_name]
     175          elif sys.platform == 'darwin':
     176              compiler.set_link_objects([
     177                  '-Wl,-force_load', library_filename,
     178              ])
     179          else:
     180              compiler.set_link_objects([
     181                  '-Wl,--whole-archive', library_filename, '-Wl,--no-whole-archive',
     182              ])
     183      else:
     184          extension.sources[0:0] = common_sources
     185  
     186      # Compile the source code to object files.
     187      ext_path = cmd.get_ext_fullpath(extension_name)
     188      if newer_group(extension.sources, ext_path, 'newer'):
     189          objects = compiler.compile(extension.sources,
     190                                      output_dir=cmd.build_temp,
     191                                      debug=cmd.debug,
     192                                      extra_postargs=extra_compile_args)
     193      else:
     194          objects = compiler.object_filenames(extension.sources,
     195                                              output_dir=cmd.build_temp)
     196      # Now link the object files together into a "shared object"
     197      compiler.link_shared_object(
     198          objects, ext_path,
     199          libraries=cmd.get_libraries(extension),
     200          extra_postargs=extra_link_args,
     201          export_symbols=cmd.get_export_symbols(extension),
     202          debug=cmd.debug,
     203          build_temp=cmd.build_temp)
     204  
     205      return pathlib.Path(ext_path)
     206  
     207  
     208  def build_parser(
     209      grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
     210  ) -> Tuple[Grammar, Parser, Tokenizer]:
     211      with open(grammar_file) as file:
     212          tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
     213          parser = GrammarParser(tokenizer, verbose=verbose_parser)
     214          grammar = parser.start()
     215  
     216          if not grammar:
     217              raise parser.make_syntax_error(grammar_file)
     218  
     219      return grammar, parser, tokenizer
     220  
     221  
     222  def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions:
     223      all_tokens = {}
     224      exact_tokens = {}
     225      non_exact_tokens = set()
     226      numbers = itertools.count(0)
     227  
     228      for line in tokens:
     229          line = line.strip()
     230  
     231          if not line or line.startswith("#"):
     232              continue
     233  
     234          pieces = line.split()
     235          index = next(numbers)
     236  
     237          if len(pieces) == 1:
     238              (token,) = pieces
     239              non_exact_tokens.add(token)
     240              all_tokens[index] = token
     241          elif len(pieces) == 2:
     242              token, op = pieces
     243              exact_tokens[op.strip("'")] = index
     244              all_tokens[index] = token
     245          else:
     246              raise ValueError(f"Unexpected line found in Tokens file: {line}")
     247  
     248      return all_tokens, exact_tokens, non_exact_tokens
     249  
     250  
     251  def build_c_generator(
     252      grammar: Grammar,
     253      grammar_file: str,
     254      tokens_file: str,
     255      output_file: str,
     256      compile_extension: bool = False,
     257      verbose_c_extension: bool = False,
     258      keep_asserts_in_extension: bool = True,
     259      skip_actions: bool = False,
     260  ) -> ParserGenerator:
     261      with open(tokens_file, "r") as tok_file:
     262          all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
     263      with open(output_file, "w") as file:
     264          gen: ParserGenerator = CParserGenerator(
     265              grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions
     266          )
     267          gen.generate(grammar_file)
     268  
     269      if compile_extension:
     270          with tempfile.TemporaryDirectory() as build_dir:
     271              compile_c_extension(
     272                  output_file,
     273                  build_dir=build_dir,
     274                  verbose=verbose_c_extension,
     275                  keep_asserts=keep_asserts_in_extension,
     276              )
     277      return gen
     278  
     279  
     280  def build_python_generator(
     281      grammar: Grammar,
     282      grammar_file: str,
     283      output_file: str,
     284      skip_actions: bool = False,
     285  ) -> ParserGenerator:
     286      with open(output_file, "w") as file:
     287          gen: ParserGenerator = PythonParserGenerator(grammar, file)  # TODO: skip_actions
     288          gen.generate(grammar_file)
     289      return gen
     290  
     291  
     292  def build_c_parser_and_generator(
     293      grammar_file: str,
     294      tokens_file: str,
     295      output_file: str,
     296      compile_extension: bool = False,
     297      verbose_tokenizer: bool = False,
     298      verbose_parser: bool = False,
     299      verbose_c_extension: bool = False,
     300      keep_asserts_in_extension: bool = True,
     301      skip_actions: bool = False,
     302  ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
     303      """Generate rules, C parser, tokenizer, parser generator for a given grammar
     304  
     305      Args:
     306          grammar_file (string): Path for the grammar file
     307          tokens_file (string): Path for the tokens file
     308          output_file (string): Path for the output file
     309          compile_extension (bool, optional): Whether to compile the C extension.
     310            Defaults to False.
     311          verbose_tokenizer (bool, optional): Whether to display additional output
     312            when generating the tokenizer. Defaults to False.
     313          verbose_parser (bool, optional): Whether to display additional output
     314            when generating the parser. Defaults to False.
     315          verbose_c_extension (bool, optional): Whether to display additional
     316            output when compiling the C extension . Defaults to False.
     317          keep_asserts_in_extension (bool, optional): Whether to keep the assert statements
     318            when compiling the extension module. Defaults to True.
     319          skip_actions (bool, optional): Whether to pretend no rule has any actions.
     320      """
     321      grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
     322      gen = build_c_generator(
     323          grammar,
     324          grammar_file,
     325          tokens_file,
     326          output_file,
     327          compile_extension,
     328          verbose_c_extension,
     329          keep_asserts_in_extension,
     330          skip_actions=skip_actions,
     331      )
     332  
     333      return grammar, parser, tokenizer, gen
     334  
     335  
     336  def build_python_parser_and_generator(
     337      grammar_file: str,
     338      output_file: str,
     339      verbose_tokenizer: bool = False,
     340      verbose_parser: bool = False,
     341      skip_actions: bool = False,
     342  ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
     343      """Generate rules, python parser, tokenizer, parser generator for a given grammar
     344  
     345      Args:
     346          grammar_file (string): Path for the grammar file
     347          output_file (string): Path for the output file
     348          verbose_tokenizer (bool, optional): Whether to display additional output
     349            when generating the tokenizer. Defaults to False.
     350          verbose_parser (bool, optional): Whether to display additional output
     351            when generating the parser. Defaults to False.
     352          skip_actions (bool, optional): Whether to pretend no rule has any actions.
     353      """
     354      grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
     355      gen = build_python_generator(
     356          grammar,
     357          grammar_file,
     358          output_file,
     359          skip_actions=skip_actions,
     360      )
     361      return grammar, parser, tokenizer, gen