python (3.11.7)

(root)/
lib/
python3.11/
re/
_constants.py
       1  #
       2  # Secret Labs' Regular Expression Engine
       3  #
       4  # various symbols used by the regular expression engine.
       5  # run this script to update the _sre include files!
       6  #
       7  # Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
       8  #
       9  # See the __init__.py file for information on usage and redistribution.
      10  #
      11  
      12  """Internal support module for sre"""
      13  
      14  # update when constants are added or removed
      15  
      16  MAGIC = 20220615
      17  
      18  from _sre import MAXREPEAT, MAXGROUPS
      19  
      20  # SRE standard exception (access as sre.error)
      21  # should this really be here?
      22  
      23  class ESC[4;38;5;81merror(ESC[4;38;5;149mException):
      24      """Exception raised for invalid regular expressions.
      25  
      26      Attributes:
      27  
      28          msg: The unformatted error message
      29          pattern: The regular expression pattern
      30          pos: The index in the pattern where compilation failed (may be None)
      31          lineno: The line corresponding to pos (may be None)
      32          colno: The column corresponding to pos (may be None)
      33      """
      34  
      35      __module__ = 're'
      36  
      37      def __init__(self, msg, pattern=None, pos=None):
      38          self.msg = msg
      39          self.pattern = pattern
      40          self.pos = pos
      41          if pattern is not None and pos is not None:
      42              msg = '%s at position %d' % (msg, pos)
      43              if isinstance(pattern, str):
      44                  newline = '\n'
      45              else:
      46                  newline = b'\n'
      47              self.lineno = pattern.count(newline, 0, pos) + 1
      48              self.colno = pos - pattern.rfind(newline, 0, pos)
      49              if newline in pattern:
      50                  msg = '%s (line %d, column %d)' % (msg, self.lineno, self.colno)
      51          else:
      52              self.lineno = self.colno = None
      53          super().__init__(msg)
      54  
      55  
      56  class ESC[4;38;5;81m_NamedIntConstant(ESC[4;38;5;149mint):
      57      def __new__(cls, value, name):
      58          self = super(_NamedIntConstant, cls).__new__(cls, value)
      59          self.name = name
      60          return self
      61  
      62      def __repr__(self):
      63          return self.name
      64  
      65      __reduce__ = None
      66  
      67  MAXREPEAT = _NamedIntConstant(MAXREPEAT, 'MAXREPEAT')
      68  
      69  def _makecodes(*names):
      70      items = [_NamedIntConstant(i, name) for i, name in enumerate(names)]
      71      globals().update({item.name: item for item in items})
      72      return items
      73  
      74  # operators
      75  OPCODES = _makecodes(
      76      # failure=0 success=1 (just because it looks better that way :-)
      77      'FAILURE', 'SUCCESS',
      78  
      79      'ANY', 'ANY_ALL',
      80      'ASSERT', 'ASSERT_NOT',
      81      'AT',
      82      'BRANCH',
      83      'CATEGORY',
      84      'CHARSET', 'BIGCHARSET',
      85      'GROUPREF', 'GROUPREF_EXISTS',
      86      'IN',
      87      'INFO',
      88      'JUMP',
      89      'LITERAL',
      90      'MARK',
      91      'MAX_UNTIL',
      92      'MIN_UNTIL',
      93      'NOT_LITERAL',
      94      'NEGATE',
      95      'RANGE',
      96      'REPEAT',
      97      'REPEAT_ONE',
      98      'SUBPATTERN',
      99      'MIN_REPEAT_ONE',
     100      'ATOMIC_GROUP',
     101      'POSSESSIVE_REPEAT',
     102      'POSSESSIVE_REPEAT_ONE',
     103  
     104      'GROUPREF_IGNORE',
     105      'IN_IGNORE',
     106      'LITERAL_IGNORE',
     107      'NOT_LITERAL_IGNORE',
     108  
     109      'GROUPREF_LOC_IGNORE',
     110      'IN_LOC_IGNORE',
     111      'LITERAL_LOC_IGNORE',
     112      'NOT_LITERAL_LOC_IGNORE',
     113  
     114      'GROUPREF_UNI_IGNORE',
     115      'IN_UNI_IGNORE',
     116      'LITERAL_UNI_IGNORE',
     117      'NOT_LITERAL_UNI_IGNORE',
     118      'RANGE_UNI_IGNORE',
     119  
     120      # The following opcodes are only occurred in the parser output,
     121      # but not in the compiled code.
     122      'MIN_REPEAT', 'MAX_REPEAT',
     123  )
     124  del OPCODES[-2:] # remove MIN_REPEAT and MAX_REPEAT
     125  
     126  # positions
     127  ATCODES = _makecodes(
     128      'AT_BEGINNING', 'AT_BEGINNING_LINE', 'AT_BEGINNING_STRING',
     129      'AT_BOUNDARY', 'AT_NON_BOUNDARY',
     130      'AT_END', 'AT_END_LINE', 'AT_END_STRING',
     131  
     132      'AT_LOC_BOUNDARY', 'AT_LOC_NON_BOUNDARY',
     133  
     134      'AT_UNI_BOUNDARY', 'AT_UNI_NON_BOUNDARY',
     135  )
     136  
     137  # categories
     138  CHCODES = _makecodes(
     139      'CATEGORY_DIGIT', 'CATEGORY_NOT_DIGIT',
     140      'CATEGORY_SPACE', 'CATEGORY_NOT_SPACE',
     141      'CATEGORY_WORD', 'CATEGORY_NOT_WORD',
     142      'CATEGORY_LINEBREAK', 'CATEGORY_NOT_LINEBREAK',
     143  
     144      'CATEGORY_LOC_WORD', 'CATEGORY_LOC_NOT_WORD',
     145  
     146      'CATEGORY_UNI_DIGIT', 'CATEGORY_UNI_NOT_DIGIT',
     147      'CATEGORY_UNI_SPACE', 'CATEGORY_UNI_NOT_SPACE',
     148      'CATEGORY_UNI_WORD', 'CATEGORY_UNI_NOT_WORD',
     149      'CATEGORY_UNI_LINEBREAK', 'CATEGORY_UNI_NOT_LINEBREAK',
     150  )
     151  
     152  
     153  # replacement operations for "ignore case" mode
     154  OP_IGNORE = {
     155      LITERAL: LITERAL_IGNORE,
     156      NOT_LITERAL: NOT_LITERAL_IGNORE,
     157  }
     158  
     159  OP_LOCALE_IGNORE = {
     160      LITERAL: LITERAL_LOC_IGNORE,
     161      NOT_LITERAL: NOT_LITERAL_LOC_IGNORE,
     162  }
     163  
     164  OP_UNICODE_IGNORE = {
     165      LITERAL: LITERAL_UNI_IGNORE,
     166      NOT_LITERAL: NOT_LITERAL_UNI_IGNORE,
     167  }
     168  
     169  AT_MULTILINE = {
     170      AT_BEGINNING: AT_BEGINNING_LINE,
     171      AT_END: AT_END_LINE
     172  }
     173  
     174  AT_LOCALE = {
     175      AT_BOUNDARY: AT_LOC_BOUNDARY,
     176      AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
     177  }
     178  
     179  AT_UNICODE = {
     180      AT_BOUNDARY: AT_UNI_BOUNDARY,
     181      AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
     182  }
     183  
     184  CH_LOCALE = {
     185      CATEGORY_DIGIT: CATEGORY_DIGIT,
     186      CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
     187      CATEGORY_SPACE: CATEGORY_SPACE,
     188      CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,
     189      CATEGORY_WORD: CATEGORY_LOC_WORD,
     190      CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
     191      CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,
     192      CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK
     193  }
     194  
     195  CH_UNICODE = {
     196      CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,
     197      CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,
     198      CATEGORY_SPACE: CATEGORY_UNI_SPACE,
     199      CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,
     200      CATEGORY_WORD: CATEGORY_UNI_WORD,
     201      CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,
     202      CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,
     203      CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
     204  }
     205  
     206  # flags
     207  SRE_FLAG_TEMPLATE = 1 # template mode (unknown purpose, deprecated)
     208  SRE_FLAG_IGNORECASE = 2 # case insensitive
     209  SRE_FLAG_LOCALE = 4 # honour system locale
     210  SRE_FLAG_MULTILINE = 8 # treat target as multiline string
     211  SRE_FLAG_DOTALL = 16 # treat target as a single string
     212  SRE_FLAG_UNICODE = 32 # use unicode "locale"
     213  SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
     214  SRE_FLAG_DEBUG = 128 # debugging
     215  SRE_FLAG_ASCII = 256 # use ascii "locale"
     216  
     217  # flags for INFO primitive
     218  SRE_INFO_PREFIX = 1 # has prefix
     219  SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)
     220  SRE_INFO_CHARSET = 4 # pattern starts with character from given set