1  import os.path
       2  import re
       3  
       4  from . import common as _common
       5  
       6  
       7  TOOL = 'gcc'
       8  
       9  META_FILES = {
      10      '<built-in>',
      11      '<command-line>',
      12  }
      13  
      14  # https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
      15  # flags:
      16  #  1  start of a new file
      17  #  2  returning to a file (after including another)
      18  #  3  following text comes from a system header file
      19  #  4  following text treated wrapped in implicit extern "C" block
      20  LINE_MARKER_RE = re.compile(r'^# (\d+) "([^"]+)"((?: [1234])*)$')
      21  PREPROC_DIRECTIVE_RE = re.compile(r'^\s*#\s*(\w+)\b.*')
      22  COMPILER_DIRECTIVE_RE = re.compile(r'''
      23      ^
      24      (.*?)  # <before>
      25      (__\w+__)  # <directive>
      26      \s*
      27      [(] [(]
      28      (
      29          [^()]*
      30          (?:
      31              [(]
      32              [^()]*
      33              [)]
      34              [^()]*
      35           )*
      36       )  # <args>
      37      ( [)] [)] )  # <closed>
      38  ''', re.VERBOSE)
      39  
      40  POST_ARGS = (
      41      '-pthread',
      42      '-std=c99',
      43      #'-g',
      44      #'-Og',
      45      #'-Wno-unused-result',
      46      #'-Wsign-compare',
      47      #'-Wall',
      48      #'-Wextra',
      49      '-E',
      50  )
      51  
      52  
      53  def preprocess(filename,
      54                 incldirs=None,
      55                 includes=None,
      56                 macros=None,
      57                 samefiles=None,
      58                 cwd=None,
      59                 ):
      60      if not cwd or not os.path.isabs(cwd):
      61          cwd = os.path.abspath(cwd or '.')
      62      filename = _normpath(filename, cwd)
      63      text = _common.preprocess(
      64          TOOL,
      65          filename,
      66          incldirs=incldirs,
      67          includes=includes,
      68          macros=macros,
      69          #preargs=PRE_ARGS,
      70          postargs=POST_ARGS,
      71          executable=['gcc'],
      72          compiler='unix',
      73          cwd=cwd,
      74      )
      75      return _iter_lines(text, filename, samefiles, cwd)
      76  
      77  
      78  def _iter_lines(text, reqfile, samefiles, cwd, raw=False):
      79      lines = iter(text.splitlines())
      80  
      81      # The first line is special.
      82      # The next two lines are consistent.
      83      firstlines = [
      84          f'# 0 "{reqfile}"',
      85          '# 0 "<built-in>"',
      86          '# 0 "<command-line>"',
      87      ]
      88      if text.startswith('# 1 '):
      89          # Some preprocessors emit a lineno of 1 for line-less entries.
      90          firstlines = [l.replace('# 0 ', '# 1 ') for l in firstlines]
      91      for expected in firstlines:
      92          line = next(lines)
      93          if line != expected:
      94              raise NotImplementedError((line, expected))
      95  
      96      # Do all the CLI-provided includes.
      97      filter_reqfile = (lambda f: _filter_reqfile(f, reqfile, samefiles))
      98      make_info = (lambda lno: _common.FileInfo(reqfile, lno))
      99      last = None
     100      for line in lines:
     101          assert last != reqfile, (last,)
     102          lno, included, flags = _parse_marker_line(line, reqfile)
     103          if not included:
     104              raise NotImplementedError((line,))
     105          if included == reqfile:
     106              # This will be the last one.
     107              assert not flags, (line, flags)
     108          else:
     109              assert 1 in flags, (line, flags)
     110          yield from _iter_top_include_lines(
     111              lines,
     112              _normpath(included, cwd),
     113              cwd,
     114              filter_reqfile,
     115              make_info,
     116              raw,
     117          )
     118          last = included
     119      # The last one is always the requested file.
     120      assert included == reqfile, (line,)
     121  
     122  
     123  def _iter_top_include_lines(lines, topfile, cwd,
     124                              filter_reqfile, make_info,
     125                              raw):
     126      partial = 0  # depth
     127      files = [topfile]
     128      # We start at 1 in case there are source lines (including blank onces)
     129      # before the first marker line.  Also, we already verified in
     130      # _parse_marker_line() that the preprocessor reported lno as 1.
     131      lno = 1
     132      for line in lines:
     133          if line == '# 0 "<command-line>" 2' or line == '# 1 "<command-line>" 2':
     134              # We're done with this top-level include.
     135              return
     136  
     137          _lno, included, flags = _parse_marker_line(line)
     138          if included:
     139              lno = _lno
     140              included = _normpath(included, cwd)
     141              # We hit a marker line.
     142              if 1 in flags:
     143                  # We're entering a file.
     144                  # XXX Cycles are unexpected?
     145                  #assert included not in files, (line, files)
     146                  files.append(included)
     147              elif 2 in flags:
     148                  # We're returning to a file.
     149                  assert files and included in files, (line, files)
     150                  assert included != files[-1], (line, files)
     151                  while files[-1] != included:
     152                      files.pop()
     153                  # XXX How can a file return to line 1?
     154                  #assert lno > 1, (line, lno)
     155              else:
     156                  if included == files[-1]:
     157                      # It's the next line from the file.
     158                      assert lno > 1, (line, lno)
     159                  else:
     160                      # We ran into a user-added #LINE directive,
     161                      # which we promptly ignore.
     162                      pass
     163          elif not files:
     164              raise NotImplementedError((line,))
     165          elif filter_reqfile(files[-1]):
     166              assert lno is not None, (line, files[-1])
     167              if (m := PREPROC_DIRECTIVE_RE.match(line)):
     168                  name, = m.groups()
     169                  if name != 'pragma':
     170                      raise Exception(line)
     171              else:
     172                  line = re.sub(r'__inline__', 'inline', line)
     173                  if not raw:
     174                      line, partial = _strip_directives(line, partial=partial)
     175                  yield _common.SourceLine(
     176                      make_info(lno),
     177                      'source',
     178                      line or '',
     179                      None,
     180                  )
     181              lno += 1
     182  
     183  
     184  def _parse_marker_line(line, reqfile=None):
     185      m = LINE_MARKER_RE.match(line)
     186      if not m:
     187          return None, None, None
     188      lno, origfile, flags = m.groups()
     189      lno = int(lno)
     190      assert origfile not in META_FILES, (line,)
     191      assert lno > 0, (line, lno)
     192      flags = set(int(f) for f in flags.split()) if flags else ()
     193  
     194      if 1 in flags:
     195          # We're entering a file.
     196          assert lno == 1, (line, lno)
     197          assert 2 not in flags, (line,)
     198      elif 2 in flags:
     199          # We're returning to a file.
     200          #assert lno > 1, (line, lno)
     201          pass
     202      elif reqfile and origfile == reqfile:
     203          # We're starting the requested file.
     204          assert lno == 1, (line, lno)
     205          assert not flags, (line, flags)
     206      else:
     207          # It's the next line from the file.
     208          assert lno > 1, (line, lno)
     209      return lno, origfile, flags
     210  
     211  
     212  def _strip_directives(line, partial=0):
     213      # We assume there are no string literals with parens in directive bodies.
     214      while partial > 0:
     215          if not (m := re.match(r'[^{}]*([()])', line)):
     216              return None, partial
     217          delim, = m.groups()
     218          partial += 1 if delim == '(' else -1  # opened/closed
     219          line = line[m.end():]
     220  
     221      line = re.sub(r'__extension__', '', line)
     222      line = re.sub(r'__thread\b', '_Thread_local', line)
     223  
     224      while (m := COMPILER_DIRECTIVE_RE.match(line)):
     225          before, _, _, closed = m.groups()
     226          if closed:
     227              line = f'{before} {line[m.end():]}'
     228          else:
     229              after, partial = _strip_directives(line[m.end():], 2)
     230              line = f'{before} {after or ""}'
     231              if partial:
     232                  break
     233  
     234      return line, partial
     235  
     236  
     237  def _filter_reqfile(current, reqfile, samefiles):
     238      if current == reqfile:
     239          return True
     240      if current == '<stdin>':
     241          return True
     242      if current in samefiles:
     243          return True
     244      return False
     245  
     246  
     247  def _normpath(filename, cwd):
     248      assert cwd
     249      return os.path.normpath(os.path.join(cwd, filename))