python (3.11.7)

(root)/
lib/
python3.11/
site-packages/
pip/
_vendor/
distlib/
manifest.py
       1  # -*- coding: utf-8 -*-
       2  #
       3  # Copyright (C) 2012-2013 Python Software Foundation.
       4  # See LICENSE.txt and CONTRIBUTORS.txt.
       5  #
       6  """
       7  Class representing the list of files in a distribution.
       8  
       9  Equivalent to distutils.filelist, but fixes some problems.
      10  """
      11  import fnmatch
      12  import logging
      13  import os
      14  import re
      15  import sys
      16  
      17  from . import DistlibException
      18  from .compat import fsdecode
      19  from .util import convert_path
      20  
      21  
      22  __all__ = ['Manifest']
      23  
      24  logger = logging.getLogger(__name__)
      25  
      26  # a \ followed by some spaces + EOL
      27  _COLLAPSE_PATTERN = re.compile('\\\\w*\n', re.M)
      28  _COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S)
      29  
      30  #
      31  # Due to the different results returned by fnmatch.translate, we need
      32  # to do slightly different processing for Python 2.7 and 3.2 ... this needed
      33  # to be brought in for Python 3.6 onwards.
      34  #
      35  _PYTHON_VERSION = sys.version_info[:2]
      36  
      37  class ESC[4;38;5;81mManifest(ESC[4;38;5;149mobject):
      38      """A list of files built by on exploring the filesystem and filtered by
      39      applying various patterns to what we find there.
      40      """
      41  
      42      def __init__(self, base=None):
      43          """
      44          Initialise an instance.
      45  
      46          :param base: The base directory to explore under.
      47          """
      48          self.base = os.path.abspath(os.path.normpath(base or os.getcwd()))
      49          self.prefix = self.base + os.sep
      50          self.allfiles = None
      51          self.files = set()
      52  
      53      #
      54      # Public API
      55      #
      56  
      57      def findall(self):
      58          """Find all files under the base and set ``allfiles`` to the absolute
      59          pathnames of files found.
      60          """
      61          from stat import S_ISREG, S_ISDIR, S_ISLNK
      62  
      63          self.allfiles = allfiles = []
      64          root = self.base
      65          stack = [root]
      66          pop = stack.pop
      67          push = stack.append
      68  
      69          while stack:
      70              root = pop()
      71              names = os.listdir(root)
      72  
      73              for name in names:
      74                  fullname = os.path.join(root, name)
      75  
      76                  # Avoid excess stat calls -- just one will do, thank you!
      77                  stat = os.stat(fullname)
      78                  mode = stat.st_mode
      79                  if S_ISREG(mode):
      80                      allfiles.append(fsdecode(fullname))
      81                  elif S_ISDIR(mode) and not S_ISLNK(mode):
      82                      push(fullname)
      83  
      84      def add(self, item):
      85          """
      86          Add a file to the manifest.
      87  
      88          :param item: The pathname to add. This can be relative to the base.
      89          """
      90          if not item.startswith(self.prefix):
      91              item = os.path.join(self.base, item)
      92          self.files.add(os.path.normpath(item))
      93  
      94      def add_many(self, items):
      95          """
      96          Add a list of files to the manifest.
      97  
      98          :param items: The pathnames to add. These can be relative to the base.
      99          """
     100          for item in items:
     101              self.add(item)
     102  
     103      def sorted(self, wantdirs=False):
     104          """
     105          Return sorted files in directory order
     106          """
     107  
     108          def add_dir(dirs, d):
     109              dirs.add(d)
     110              logger.debug('add_dir added %s', d)
     111              if d != self.base:
     112                  parent, _ = os.path.split(d)
     113                  assert parent not in ('', '/')
     114                  add_dir(dirs, parent)
     115  
     116          result = set(self.files)    # make a copy!
     117          if wantdirs:
     118              dirs = set()
     119              for f in result:
     120                  add_dir(dirs, os.path.dirname(f))
     121              result |= dirs
     122          return [os.path.join(*path_tuple) for path_tuple in
     123                  sorted(os.path.split(path) for path in result)]
     124  
     125      def clear(self):
     126          """Clear all collected files."""
     127          self.files = set()
     128          self.allfiles = []
     129  
     130      def process_directive(self, directive):
     131          """
     132          Process a directive which either adds some files from ``allfiles`` to
     133          ``files``, or removes some files from ``files``.
     134  
     135          :param directive: The directive to process. This should be in a format
     136                       compatible with distutils ``MANIFEST.in`` files:
     137  
     138                       http://docs.python.org/distutils/sourcedist.html#commands
     139          """
     140          # Parse the line: split it up, make sure the right number of words
     141          # is there, and return the relevant words.  'action' is always
     142          # defined: it's the first word of the line.  Which of the other
     143          # three are defined depends on the action; it'll be either
     144          # patterns, (dir and patterns), or (dirpattern).
     145          action, patterns, thedir, dirpattern = self._parse_directive(directive)
     146  
     147          # OK, now we know that the action is valid and we have the
     148          # right number of words on the line for that action -- so we
     149          # can proceed with minimal error-checking.
     150          if action == 'include':
     151              for pattern in patterns:
     152                  if not self._include_pattern(pattern, anchor=True):
     153                      logger.warning('no files found matching %r', pattern)
     154  
     155          elif action == 'exclude':
     156              for pattern in patterns:
     157                  found = self._exclude_pattern(pattern, anchor=True)
     158                  #if not found:
     159                  #    logger.warning('no previously-included files '
     160                  #                   'found matching %r', pattern)
     161  
     162          elif action == 'global-include':
     163              for pattern in patterns:
     164                  if not self._include_pattern(pattern, anchor=False):
     165                      logger.warning('no files found matching %r '
     166                                     'anywhere in distribution', pattern)
     167  
     168          elif action == 'global-exclude':
     169              for pattern in patterns:
     170                  found = self._exclude_pattern(pattern, anchor=False)
     171                  #if not found:
     172                  #    logger.warning('no previously-included files '
     173                  #                   'matching %r found anywhere in '
     174                  #                   'distribution', pattern)
     175  
     176          elif action == 'recursive-include':
     177              for pattern in patterns:
     178                  if not self._include_pattern(pattern, prefix=thedir):
     179                      logger.warning('no files found matching %r '
     180                                     'under directory %r', pattern, thedir)
     181  
     182          elif action == 'recursive-exclude':
     183              for pattern in patterns:
     184                  found = self._exclude_pattern(pattern, prefix=thedir)
     185                  #if not found:
     186                  #    logger.warning('no previously-included files '
     187                  #                   'matching %r found under directory %r',
     188                  #                   pattern, thedir)
     189  
     190          elif action == 'graft':
     191              if not self._include_pattern(None, prefix=dirpattern):
     192                  logger.warning('no directories found matching %r',
     193                                 dirpattern)
     194  
     195          elif action == 'prune':
     196              if not self._exclude_pattern(None, prefix=dirpattern):
     197                  logger.warning('no previously-included directories found '
     198                                 'matching %r', dirpattern)
     199          else:   # pragma: no cover
     200              # This should never happen, as it should be caught in
     201              # _parse_template_line
     202              raise DistlibException(
     203                  'invalid action %r' % action)
     204  
     205      #
     206      # Private API
     207      #
     208  
     209      def _parse_directive(self, directive):
     210          """
     211          Validate a directive.
     212          :param directive: The directive to validate.
     213          :return: A tuple of action, patterns, thedir, dir_patterns
     214          """
     215          words = directive.split()
     216          if len(words) == 1 and words[0] not in ('include', 'exclude',
     217                                                  'global-include',
     218                                                  'global-exclude',
     219                                                  'recursive-include',
     220                                                  'recursive-exclude',
     221                                                  'graft', 'prune'):
     222              # no action given, let's use the default 'include'
     223              words.insert(0, 'include')
     224  
     225          action = words[0]
     226          patterns = thedir = dir_pattern = None
     227  
     228          if action in ('include', 'exclude',
     229                        'global-include', 'global-exclude'):
     230              if len(words) < 2:
     231                  raise DistlibException(
     232                      '%r expects <pattern1> <pattern2> ...' % action)
     233  
     234              patterns = [convert_path(word) for word in words[1:]]
     235  
     236          elif action in ('recursive-include', 'recursive-exclude'):
     237              if len(words) < 3:
     238                  raise DistlibException(
     239                      '%r expects <dir> <pattern1> <pattern2> ...' % action)
     240  
     241              thedir = convert_path(words[1])
     242              patterns = [convert_path(word) for word in words[2:]]
     243  
     244          elif action in ('graft', 'prune'):
     245              if len(words) != 2:
     246                  raise DistlibException(
     247                      '%r expects a single <dir_pattern>' % action)
     248  
     249              dir_pattern = convert_path(words[1])
     250  
     251          else:
     252              raise DistlibException('unknown action %r' % action)
     253  
     254          return action, patterns, thedir, dir_pattern
     255  
     256      def _include_pattern(self, pattern, anchor=True, prefix=None,
     257                           is_regex=False):
     258          """Select strings (presumably filenames) from 'self.files' that
     259          match 'pattern', a Unix-style wildcard (glob) pattern.
     260  
     261          Patterns are not quite the same as implemented by the 'fnmatch'
     262          module: '*' and '?'  match non-special characters, where "special"
     263          is platform-dependent: slash on Unix; colon, slash, and backslash on
     264          DOS/Windows; and colon on Mac OS.
     265  
     266          If 'anchor' is true (the default), then the pattern match is more
     267          stringent: "*.py" will match "foo.py" but not "foo/bar.py".  If
     268          'anchor' is false, both of these will match.
     269  
     270          If 'prefix' is supplied, then only filenames starting with 'prefix'
     271          (itself a pattern) and ending with 'pattern', with anything in between
     272          them, will match.  'anchor' is ignored in this case.
     273  
     274          If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
     275          'pattern' is assumed to be either a string containing a regex or a
     276          regex object -- no translation is done, the regex is just compiled
     277          and used as-is.
     278  
     279          Selected strings will be added to self.files.
     280  
     281          Return True if files are found.
     282          """
     283          # XXX docstring lying about what the special chars are?
     284          found = False
     285          pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
     286  
     287          # delayed loading of allfiles list
     288          if self.allfiles is None:
     289              self.findall()
     290  
     291          for name in self.allfiles:
     292              if pattern_re.search(name):
     293                  self.files.add(name)
     294                  found = True
     295          return found
     296  
     297      def _exclude_pattern(self, pattern, anchor=True, prefix=None,
     298                           is_regex=False):
     299          """Remove strings (presumably filenames) from 'files' that match
     300          'pattern'.
     301  
     302          Other parameters are the same as for 'include_pattern()', above.
     303          The list 'self.files' is modified in place. Return True if files are
     304          found.
     305  
     306          This API is public to allow e.g. exclusion of SCM subdirs, e.g. when
     307          packaging source distributions
     308          """
     309          found = False
     310          pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
     311          for f in list(self.files):
     312              if pattern_re.search(f):
     313                  self.files.remove(f)
     314                  found = True
     315          return found
     316  
     317      def _translate_pattern(self, pattern, anchor=True, prefix=None,
     318                             is_regex=False):
     319          """Translate a shell-like wildcard pattern to a compiled regular
     320          expression.
     321  
     322          Return the compiled regex.  If 'is_regex' true,
     323          then 'pattern' is directly compiled to a regex (if it's a string)
     324          or just returned as-is (assumes it's a regex object).
     325          """
     326          if is_regex:
     327              if isinstance(pattern, str):
     328                  return re.compile(pattern)
     329              else:
     330                  return pattern
     331  
     332          if _PYTHON_VERSION > (3, 2):
     333              # ditch start and end characters
     334              start, _, end = self._glob_to_re('_').partition('_')
     335  
     336          if pattern:
     337              pattern_re = self._glob_to_re(pattern)
     338              if _PYTHON_VERSION > (3, 2):
     339                  assert pattern_re.startswith(start) and pattern_re.endswith(end)
     340          else:
     341              pattern_re = ''
     342  
     343          base = re.escape(os.path.join(self.base, ''))
     344          if prefix is not None:
     345              # ditch end of pattern character
     346              if _PYTHON_VERSION <= (3, 2):
     347                  empty_pattern = self._glob_to_re('')
     348                  prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)]
     349              else:
     350                  prefix_re = self._glob_to_re(prefix)
     351                  assert prefix_re.startswith(start) and prefix_re.endswith(end)
     352                  prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
     353              sep = os.sep
     354              if os.sep == '\\':
     355                  sep = r'\\'
     356              if _PYTHON_VERSION <= (3, 2):
     357                  pattern_re = '^' + base + sep.join((prefix_re,
     358                                                      '.*' + pattern_re))
     359              else:
     360                  pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
     361                  pattern_re = r'%s%s%s%s.*%s%s' % (start, base, prefix_re, sep,
     362                                                    pattern_re, end)
     363          else:  # no prefix -- respect anchor flag
     364              if anchor:
     365                  if _PYTHON_VERSION <= (3, 2):
     366                      pattern_re = '^' + base + pattern_re
     367                  else:
     368                      pattern_re = r'%s%s%s' % (start, base, pattern_re[len(start):])
     369  
     370          return re.compile(pattern_re)
     371  
     372      def _glob_to_re(self, pattern):
     373          """Translate a shell-like glob pattern to a regular expression.
     374  
     375          Return a string containing the regex.  Differs from
     376          'fnmatch.translate()' in that '*' does not match "special characters"
     377          (which are platform-specific).
     378          """
     379          pattern_re = fnmatch.translate(pattern)
     380  
     381          # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
     382          # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
     383          # and by extension they shouldn't match such "special characters" under
     384          # any OS.  So change all non-escaped dots in the RE to match any
     385          # character except the special characters (currently: just os.sep).
     386          sep = os.sep
     387          if os.sep == '\\':
     388              # we're using a regex to manipulate a regex, so we need
     389              # to escape the backslash twice
     390              sep = r'\\\\'
     391          escaped = r'\1[^%s]' % sep
     392          pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
     393          return pattern_re