python (3.11.7)

(root)/
lib/
python3.11/
site-packages/
pip/
_vendor/
pygments/
regexopt.py
       1  """
       2      pygments.regexopt
       3      ~~~~~~~~~~~~~~~~~
       4  
       5      An algorithm that generates optimized regexes for matching long lists of
       6      literal strings.
       7  
       8      :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
       9      :license: BSD, see LICENSE for details.
      10  """
      11  
      12  import re
      13  from re import escape
      14  from os.path import commonprefix
      15  from itertools import groupby
      16  from operator import itemgetter
      17  
      18  CS_ESCAPE = re.compile(r'[\[\^\\\-\]]')
      19  FIRST_ELEMENT = itemgetter(0)
      20  
      21  
      22  def make_charset(letters):
      23      return '[' + CS_ESCAPE.sub(lambda m: '\\' + m.group(), ''.join(letters)) + ']'
      24  
      25  
      26  def regex_opt_inner(strings, open_paren):
      27      """Return a regex that matches any string in the sorted list of strings."""
      28      close_paren = open_paren and ')' or ''
      29      # print strings, repr(open_paren)
      30      if not strings:
      31          # print '-> nothing left'
      32          return ''
      33      first = strings[0]
      34      if len(strings) == 1:
      35          # print '-> only 1 string'
      36          return open_paren + escape(first) + close_paren
      37      if not first:
      38          # print '-> first string empty'
      39          return open_paren + regex_opt_inner(strings[1:], '(?:') \
      40              + '?' + close_paren
      41      if len(first) == 1:
      42          # multiple one-char strings? make a charset
      43          oneletter = []
      44          rest = []
      45          for s in strings:
      46              if len(s) == 1:
      47                  oneletter.append(s)
      48              else:
      49                  rest.append(s)
      50          if len(oneletter) > 1:  # do we have more than one oneletter string?
      51              if rest:
      52                  # print '-> 1-character + rest'
      53                  return open_paren + regex_opt_inner(rest, '') + '|' \
      54                      + make_charset(oneletter) + close_paren
      55              # print '-> only 1-character'
      56              return open_paren + make_charset(oneletter) + close_paren
      57      prefix = commonprefix(strings)
      58      if prefix:
      59          plen = len(prefix)
      60          # we have a prefix for all strings
      61          # print '-> prefix:', prefix
      62          return open_paren + escape(prefix) \
      63              + regex_opt_inner([s[plen:] for s in strings], '(?:') \
      64              + close_paren
      65      # is there a suffix?
      66      strings_rev = [s[::-1] for s in strings]
      67      suffix = commonprefix(strings_rev)
      68      if suffix:
      69          slen = len(suffix)
      70          # print '-> suffix:', suffix[::-1]
      71          return open_paren \
      72              + regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
      73              + escape(suffix[::-1]) + close_paren
      74      # recurse on common 1-string prefixes
      75      # print '-> last resort'
      76      return open_paren + \
      77          '|'.join(regex_opt_inner(list(group[1]), '')
      78                   for group in groupby(strings, lambda s: s[0] == first[0])) \
      79          + close_paren
      80  
      81  
      82  def regex_opt(strings, prefix='', suffix=''):
      83      """Return a compiled regex that matches any string in the given list.
      84  
      85      The strings to match must be literal strings, not regexes.  They will be
      86      regex-escaped.
      87  
      88      *prefix* and *suffix* are pre- and appended to the final regex.
      89      """
      90      strings = sorted(strings)
      91      return prefix + regex_opt_inner(strings, '(') + suffix