(root)/
Python-3.12.0/
Modules/
_sre/
sre.c
       1  /*
       2   * Secret Labs' Regular Expression Engine
       3   *
       4   * regular expression matching engine
       5   *
       6   * partial history:
       7   * 1999-10-24 fl   created (based on existing template matcher code)
       8   * 2000-03-06 fl   first alpha, sort of
       9   * 2000-08-01 fl   fixes for 1.6b1
      10   * 2000-08-07 fl   use PyOS_CheckStack() if available
      11   * 2000-09-20 fl   added expand method
      12   * 2001-03-20 fl   lots of fixes for 2.1b2
      13   * 2001-04-15 fl   export copyright as Python attribute, not global
      14   * 2001-04-28 fl   added __copy__ methods (work in progress)
      15   * 2001-05-14 fl   fixes for 1.5.2 compatibility
      16   * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
      17   * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
      18   * 2001-10-20 fl   added split primitive; re-enable unicode for 1.6/2.0/2.1
      19   * 2001-10-21 fl   added sub/subn primitive
      20   * 2001-10-24 fl   added finditer primitive (for 2.2 only)
      21   * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
      22   * 2002-11-09 fl   fixed empty sub/subn return type
      23   * 2003-04-18 mvl  fully support 4-byte codes
      24   * 2003-10-17 gn   implemented non recursive scheme
      25   * 2013-02-04 mrab added fullmatch primitive
      26   *
      27   * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
      28   *
      29   * This version of the SRE library can be redistributed under CNRI's
      30   * Python 1.6 license.  For any other use, please contact Secret Labs
      31   * AB (info@pythonware.com).
      32   *
      33   * Portions of this engine have been developed in cooperation with
      34   * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
      35   * other compatibility work.
      36   */
      37  
      38  static const char copyright[] =
      39      " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
      40  
      41  #define PY_SSIZE_T_CLEAN
      42  
      43  #include "Python.h"
      44  #include "pycore_long.h"          // _PyLong_GetZero()
      45  #include "pycore_moduleobject.h"  // _PyModule_GetState()
      46  #include "structmember.h"         // PyMemberDef
      47  
      48  #include "sre.h"
      49  
      50  #define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
      51  
      52  #include <ctype.h>
      53  
      54  /* defining this one enables tracing */
      55  #undef VERBOSE
      56  
      57  /* -------------------------------------------------------------------- */
      58  
      59  #if defined(_MSC_VER)
      60  #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
      61  #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
      62  /* fastest possible local call under MSVC */
      63  #define LOCAL(type) static __inline type __fastcall
      64  #else
      65  #define LOCAL(type) static inline type
      66  #endif
      67  
      68  /* error codes */
      69  #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
      70  #define SRE_ERROR_STATE -2 /* illegal state */
      71  #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
      72  #define SRE_ERROR_MEMORY -9 /* out of memory */
      73  #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
      74  
      75  #if defined(VERBOSE)
      76  #define TRACE(v) printf v
      77  #else
      78  #define TRACE(v)
      79  #endif
      80  
      81  /* -------------------------------------------------------------------- */
      82  /* search engine state */
      83  
      84  #define SRE_IS_DIGIT(ch)\
      85      ((ch) <= '9' && Py_ISDIGIT(ch))
      86  #define SRE_IS_SPACE(ch)\
      87      ((ch) <= ' ' && Py_ISSPACE(ch))
      88  #define SRE_IS_LINEBREAK(ch)\
      89      ((ch) == '\n')
      90  #define SRE_IS_WORD(ch)\
      91      ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
      92  
      93  static unsigned int sre_lower_ascii(unsigned int ch)
      94  {
      95      return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
      96  }
      97  
      98  /* locale-specific character predicates */
      99  /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
     100   * warnings when c's type supports only numbers < N+1 */
     101  #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
     102  #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
     103  
     104  static unsigned int sre_lower_locale(unsigned int ch)
     105  {
     106      return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
     107  }
     108  
     109  static unsigned int sre_upper_locale(unsigned int ch)
     110  {
     111      return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
     112  }
     113  
     114  /* unicode-specific character predicates */
     115  
     116  #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
     117  #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
     118  #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
     119  #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
     120  #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
     121  
     122  static unsigned int sre_lower_unicode(unsigned int ch)
     123  {
     124      return (unsigned int) Py_UNICODE_TOLOWER(ch);
     125  }
     126  
     127  static unsigned int sre_upper_unicode(unsigned int ch)
     128  {
     129      return (unsigned int) Py_UNICODE_TOUPPER(ch);
     130  }
     131  
     132  LOCAL(int)
     133  sre_category(SRE_CODE category, unsigned int ch)
     134  {
     135      switch (category) {
     136  
     137      case SRE_CATEGORY_DIGIT:
     138          return SRE_IS_DIGIT(ch);
     139      case SRE_CATEGORY_NOT_DIGIT:
     140          return !SRE_IS_DIGIT(ch);
     141      case SRE_CATEGORY_SPACE:
     142          return SRE_IS_SPACE(ch);
     143      case SRE_CATEGORY_NOT_SPACE:
     144          return !SRE_IS_SPACE(ch);
     145      case SRE_CATEGORY_WORD:
     146          return SRE_IS_WORD(ch);
     147      case SRE_CATEGORY_NOT_WORD:
     148          return !SRE_IS_WORD(ch);
     149      case SRE_CATEGORY_LINEBREAK:
     150          return SRE_IS_LINEBREAK(ch);
     151      case SRE_CATEGORY_NOT_LINEBREAK:
     152          return !SRE_IS_LINEBREAK(ch);
     153  
     154      case SRE_CATEGORY_LOC_WORD:
     155          return SRE_LOC_IS_WORD(ch);
     156      case SRE_CATEGORY_LOC_NOT_WORD:
     157          return !SRE_LOC_IS_WORD(ch);
     158  
     159      case SRE_CATEGORY_UNI_DIGIT:
     160          return SRE_UNI_IS_DIGIT(ch);
     161      case SRE_CATEGORY_UNI_NOT_DIGIT:
     162          return !SRE_UNI_IS_DIGIT(ch);
     163      case SRE_CATEGORY_UNI_SPACE:
     164          return SRE_UNI_IS_SPACE(ch);
     165      case SRE_CATEGORY_UNI_NOT_SPACE:
     166          return !SRE_UNI_IS_SPACE(ch);
     167      case SRE_CATEGORY_UNI_WORD:
     168          return SRE_UNI_IS_WORD(ch);
     169      case SRE_CATEGORY_UNI_NOT_WORD:
     170          return !SRE_UNI_IS_WORD(ch);
     171      case SRE_CATEGORY_UNI_LINEBREAK:
     172          return SRE_UNI_IS_LINEBREAK(ch);
     173      case SRE_CATEGORY_UNI_NOT_LINEBREAK:
     174          return !SRE_UNI_IS_LINEBREAK(ch);
     175      }
     176      return 0;
     177  }
     178  
     179  LOCAL(int)
     180  char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
     181  {
     182      return ch == pattern
     183          || (SRE_CODE) sre_lower_locale(ch) == pattern
     184          || (SRE_CODE) sre_upper_locale(ch) == pattern;
     185  }
     186  
     187  
     188  /* helpers */
     189  
     190  static void
     191  data_stack_dealloc(SRE_STATE* state)
     192  {
     193      if (state->data_stack) {
     194          PyMem_Free(state->data_stack);
     195          state->data_stack = NULL;
     196      }
     197      state->data_stack_size = state->data_stack_base = 0;
     198  }
     199  
     200  static int
     201  data_stack_grow(SRE_STATE* state, Py_ssize_t size)
     202  {
     203      Py_ssize_t minsize, cursize;
     204      minsize = state->data_stack_base+size;
     205      cursize = state->data_stack_size;
     206      if (cursize < minsize) {
     207          void* stack;
     208          cursize = minsize+minsize/4+1024;
     209          TRACE(("allocate/grow stack %zd\n", cursize));
     210          stack = PyMem_Realloc(state->data_stack, cursize);
     211          if (!stack) {
     212              data_stack_dealloc(state);
     213              return SRE_ERROR_MEMORY;
     214          }
     215          state->data_stack = (char *)stack;
     216          state->data_stack_size = cursize;
     217      }
     218      return 0;
     219  }
     220  
     221  /* generate 8-bit version */
     222  
     223  #define SRE_CHAR Py_UCS1
     224  #define SIZEOF_SRE_CHAR 1
     225  #define SRE(F) sre_ucs1_##F
     226  #include "sre_lib.h"
     227  
     228  /* generate 16-bit unicode version */
     229  
     230  #define SRE_CHAR Py_UCS2
     231  #define SIZEOF_SRE_CHAR 2
     232  #define SRE(F) sre_ucs2_##F
     233  #include "sre_lib.h"
     234  
     235  /* generate 32-bit unicode version */
     236  
     237  #define SRE_CHAR Py_UCS4
     238  #define SIZEOF_SRE_CHAR 4
     239  #define SRE(F) sre_ucs4_##F
     240  #include "sre_lib.h"
     241  
     242  /* -------------------------------------------------------------------- */
     243  /* factories and destructors */
     244  
     245  /* module state */
     246  typedef struct {
     247      PyTypeObject *Pattern_Type;
     248      PyTypeObject *Match_Type;
     249      PyTypeObject *Scanner_Type;
     250      PyTypeObject *Template_Type;
     251      PyObject *compile_template;  // reference to re._compile_template
     252  } _sremodulestate;
     253  
     254  static _sremodulestate *
     255  get_sre_module_state(PyObject *m)
     256  {
     257      _sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m);
     258      assert(state);
     259      return state;
     260  }
     261  
     262  static struct PyModuleDef sremodule;
     263  #define get_sre_module_state_by_class(cls) \
     264      (get_sre_module_state(PyType_GetModule(cls)))
     265  
     266  /* see sre.h for object declarations */
     267  static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t);
     268  static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
     269  
     270  /*[clinic input]
     271  module _sre
     272  class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type"
     273  class _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type"
     274  class _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type"
     275  [clinic start generated code]*/
     276  /*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/
     277  
     278  /*[clinic input]
     279  _sre.getcodesize -> int
     280  [clinic start generated code]*/
     281  
     282  static int
     283  _sre_getcodesize_impl(PyObject *module)
     284  /*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
     285  {
     286      return sizeof(SRE_CODE);
     287  }
     288  
     289  /*[clinic input]
     290  _sre.ascii_iscased -> bool
     291  
     292      character: int
     293      /
     294  
     295  [clinic start generated code]*/
     296  
     297  static int
     298  _sre_ascii_iscased_impl(PyObject *module, int character)
     299  /*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
     300  {
     301      unsigned int ch = (unsigned int)character;
     302      return ch < 128 && Py_ISALPHA(ch);
     303  }
     304  
     305  /*[clinic input]
     306  _sre.unicode_iscased -> bool
     307  
     308      character: int
     309      /
     310  
     311  [clinic start generated code]*/
     312  
     313  static int
     314  _sre_unicode_iscased_impl(PyObject *module, int character)
     315  /*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
     316  {
     317      unsigned int ch = (unsigned int)character;
     318      return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
     319  }
     320  
     321  /*[clinic input]
     322  _sre.ascii_tolower -> int
     323  
     324      character: int
     325      /
     326  
     327  [clinic start generated code]*/
     328  
     329  static int
     330  _sre_ascii_tolower_impl(PyObject *module, int character)
     331  /*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
     332  {
     333      return sre_lower_ascii(character);
     334  }
     335  
     336  /*[clinic input]
     337  _sre.unicode_tolower -> int
     338  
     339      character: int
     340      /
     341  
     342  [clinic start generated code]*/
     343  
     344  static int
     345  _sre_unicode_tolower_impl(PyObject *module, int character)
     346  /*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
     347  {
     348      return sre_lower_unicode(character);
     349  }
     350  
     351  LOCAL(void)
     352  state_reset(SRE_STATE* state)
     353  {
     354      /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
     355      /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
     356  
     357      state->lastmark = -1;
     358      state->lastindex = -1;
     359  
     360      state->repeat = NULL;
     361  
     362      data_stack_dealloc(state);
     363  }
     364  
     365  static const void*
     366  getstring(PyObject* string, Py_ssize_t* p_length,
     367            int* p_isbytes, int* p_charsize,
     368            Py_buffer *view)
     369  {
     370      /* given a python object, return a data pointer, a length (in
     371         characters), and a character size.  return NULL if the object
     372         is not a string (or not compatible) */
     373  
     374      /* Unicode objects do not support the buffer API. So, get the data
     375         directly instead. */
     376      if (PyUnicode_Check(string)) {
     377          if (PyUnicode_READY(string) == -1)
     378              return NULL;
     379          *p_length = PyUnicode_GET_LENGTH(string);
     380          *p_charsize = PyUnicode_KIND(string);
     381          *p_isbytes = 0;
     382          return PyUnicode_DATA(string);
     383      }
     384  
     385      /* get pointer to byte string buffer */
     386      if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
     387          PyErr_Format(PyExc_TypeError, "expected string or bytes-like "
     388                       "object, got '%.200s'", Py_TYPE(string)->tp_name);
     389          return NULL;
     390      }
     391  
     392      *p_length = view->len;
     393      *p_charsize = 1;
     394      *p_isbytes = 1;
     395  
     396      if (view->buf == NULL) {
     397          PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
     398          PyBuffer_Release(view);
     399          view->buf = NULL;
     400          return NULL;
     401      }
     402      return view->buf;
     403  }
     404  
     405  LOCAL(PyObject*)
     406  state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
     407             Py_ssize_t start, Py_ssize_t end)
     408  {
     409      /* prepare state object */
     410  
     411      Py_ssize_t length;
     412      int isbytes, charsize;
     413      const void* ptr;
     414  
     415      memset(state, 0, sizeof(SRE_STATE));
     416  
     417      state->mark = PyMem_New(const void *, pattern->groups * 2);
     418      if (!state->mark) {
     419          PyErr_NoMemory();
     420          goto err;
     421      }
     422      state->lastmark = -1;
     423      state->lastindex = -1;
     424  
     425      state->buffer.buf = NULL;
     426      ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
     427      if (!ptr)
     428          goto err;
     429  
     430      if (isbytes && pattern->isbytes == 0) {
     431          PyErr_SetString(PyExc_TypeError,
     432                          "cannot use a string pattern on a bytes-like object");
     433          goto err;
     434      }
     435      if (!isbytes && pattern->isbytes > 0) {
     436          PyErr_SetString(PyExc_TypeError,
     437                          "cannot use a bytes pattern on a string-like object");
     438          goto err;
     439      }
     440  
     441      /* adjust boundaries */
     442      if (start < 0)
     443          start = 0;
     444      else if (start > length)
     445          start = length;
     446  
     447      if (end < 0)
     448          end = 0;
     449      else if (end > length)
     450          end = length;
     451  
     452      state->isbytes = isbytes;
     453      state->charsize = charsize;
     454      state->match_all = 0;
     455      state->must_advance = 0;
     456  
     457      state->beginning = ptr;
     458  
     459      state->start = (void*) ((char*) ptr + start * state->charsize);
     460      state->end = (void*) ((char*) ptr + end * state->charsize);
     461  
     462      state->string = Py_NewRef(string);
     463      state->pos = start;
     464      state->endpos = end;
     465  
     466      return string;
     467    err:
     468      /* We add an explicit cast here because MSVC has a bug when
     469         compiling C code where it believes that `const void**` cannot be
     470         safely casted to `void*`, see bpo-39943 for details. */
     471      PyMem_Free((void*) state->mark);
     472      state->mark = NULL;
     473      if (state->buffer.buf)
     474          PyBuffer_Release(&state->buffer);
     475      return NULL;
     476  }
     477  
     478  LOCAL(void)
     479  state_fini(SRE_STATE* state)
     480  {
     481      if (state->buffer.buf)
     482          PyBuffer_Release(&state->buffer);
     483      Py_XDECREF(state->string);
     484      data_stack_dealloc(state);
     485      /* See above PyMem_Del for why we explicitly cast here. */
     486      PyMem_Free((void*) state->mark);
     487      state->mark = NULL;
     488  }
     489  
     490  /* calculate offset from start of string */
     491  #define STATE_OFFSET(state, member)\
     492      (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
     493  
     494  LOCAL(PyObject*)
     495  getslice(int isbytes, const void *ptr,
     496           PyObject* string, Py_ssize_t start, Py_ssize_t end)
     497  {
     498      if (isbytes) {
     499          if (PyBytes_CheckExact(string) &&
     500              start == 0 && end == PyBytes_GET_SIZE(string)) {
     501              return Py_NewRef(string);
     502          }
     503          return PyBytes_FromStringAndSize(
     504                  (const char *)ptr + start, end - start);
     505      }
     506      else {
     507          return PyUnicode_Substring(string, start, end);
     508      }
     509  }
     510  
     511  LOCAL(PyObject*)
     512  state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
     513  {
     514      Py_ssize_t i, j;
     515  
     516      index = (index - 1) * 2;
     517  
     518      if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
     519          if (empty)
     520              /* want empty string */
     521              i = j = 0;
     522          else {
     523              Py_RETURN_NONE;
     524          }
     525      } else {
     526          i = STATE_OFFSET(state, state->mark[index]);
     527          j = STATE_OFFSET(state, state->mark[index+1]);
     528  
     529          /* check wrong span */
     530          if (i > j) {
     531              PyErr_SetString(PyExc_SystemError,
     532                              "The span of capturing group is wrong,"
     533                              " please report a bug for the re module.");
     534              return NULL;
     535          }
     536      }
     537  
     538      return getslice(state->isbytes, state->beginning, string, i, j);
     539  }
     540  
     541  static void
     542  pattern_error(Py_ssize_t status)
     543  {
     544      switch (status) {
     545      case SRE_ERROR_RECURSION_LIMIT:
     546          /* This error code seems to be unused. */
     547          PyErr_SetString(
     548              PyExc_RecursionError,
     549              "maximum recursion limit exceeded"
     550              );
     551          break;
     552      case SRE_ERROR_MEMORY:
     553          PyErr_NoMemory();
     554          break;
     555      case SRE_ERROR_INTERRUPTED:
     556      /* An exception has already been raised, so let it fly */
     557          break;
     558      default:
     559          /* other error codes indicate compiler/engine bugs */
     560          PyErr_SetString(
     561              PyExc_RuntimeError,
     562              "internal error in regular expression engine"
     563              );
     564      }
     565  }
     566  
     567  static int
     568  pattern_traverse(PatternObject *self, visitproc visit, void *arg)
     569  {
     570      Py_VISIT(Py_TYPE(self));
     571      Py_VISIT(self->groupindex);
     572      Py_VISIT(self->indexgroup);
     573      Py_VISIT(self->pattern);
     574      return 0;
     575  }
     576  
     577  static int
     578  pattern_clear(PatternObject *self)
     579  {
     580      Py_CLEAR(self->groupindex);
     581      Py_CLEAR(self->indexgroup);
     582      Py_CLEAR(self->pattern);
     583      return 0;
     584  }
     585  
     586  static void
     587  pattern_dealloc(PatternObject* self)
     588  {
     589      PyTypeObject *tp = Py_TYPE(self);
     590  
     591      PyObject_GC_UnTrack(self);
     592      if (self->weakreflist != NULL) {
     593          PyObject_ClearWeakRefs((PyObject *) self);
     594      }
     595      (void)pattern_clear(self);
     596      tp->tp_free(self);
     597      Py_DECREF(tp);
     598  }
     599  
     600  LOCAL(Py_ssize_t)
     601  sre_match(SRE_STATE* state, SRE_CODE* pattern)
     602  {
     603      if (state->charsize == 1)
     604          return sre_ucs1_match(state, pattern, 1);
     605      if (state->charsize == 2)
     606          return sre_ucs2_match(state, pattern, 1);
     607      assert(state->charsize == 4);
     608      return sre_ucs4_match(state, pattern, 1);
     609  }
     610  
     611  LOCAL(Py_ssize_t)
     612  sre_search(SRE_STATE* state, SRE_CODE* pattern)
     613  {
     614      if (state->charsize == 1)
     615          return sre_ucs1_search(state, pattern);
     616      if (state->charsize == 2)
     617          return sre_ucs2_search(state, pattern);
     618      assert(state->charsize == 4);
     619      return sre_ucs4_search(state, pattern);
     620  }
     621  
     622  /*[clinic input]
     623  _sre.SRE_Pattern.match
     624  
     625      cls: defining_class
     626      /
     627      string: object
     628      pos: Py_ssize_t = 0
     629      endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
     630  
     631  Matches zero or more characters at the beginning of the string.
     632  [clinic start generated code]*/
     633  
     634  static PyObject *
     635  _sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls,
     636                              PyObject *string, Py_ssize_t pos,
     637                              Py_ssize_t endpos)
     638  /*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/
     639  {
     640      _sremodulestate *module_state = get_sre_module_state_by_class(cls);
     641      SRE_STATE state;
     642      Py_ssize_t status;
     643      PyObject *match;
     644  
     645      if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
     646          return NULL;
     647  
     648      state.ptr = state.start;
     649  
     650      TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
     651  
     652      status = sre_match(&state, PatternObject_GetCode(self));
     653  
     654      TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
     655      if (PyErr_Occurred()) {
     656          state_fini(&state);
     657          return NULL;
     658      }
     659  
     660      match = pattern_new_match(module_state, self, &state, status);
     661      state_fini(&state);
     662      return match;
     663  }
     664  
     665  /*[clinic input]
     666  _sre.SRE_Pattern.fullmatch
     667  
     668      cls: defining_class
     669      /
     670      string: object
     671      pos: Py_ssize_t = 0
     672      endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
     673  
     674  Matches against all of the string.
     675  [clinic start generated code]*/
     676  
     677  static PyObject *
     678  _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls,
     679                                  PyObject *string, Py_ssize_t pos,
     680                                  Py_ssize_t endpos)
     681  /*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/
     682  {
     683      _sremodulestate *module_state = get_sre_module_state_by_class(cls);
     684      SRE_STATE state;
     685      Py_ssize_t status;
     686      PyObject *match;
     687  
     688      if (!state_init(&state, self, string, pos, endpos))
     689          return NULL;
     690  
     691      state.ptr = state.start;
     692  
     693      TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
     694  
     695      state.match_all = 1;
     696      status = sre_match(&state, PatternObject_GetCode(self));
     697  
     698      TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
     699      if (PyErr_Occurred()) {
     700          state_fini(&state);
     701          return NULL;
     702      }
     703  
     704      match = pattern_new_match(module_state, self, &state, status);
     705      state_fini(&state);
     706      return match;
     707  }
     708  
     709  /*[clinic input]
     710  _sre.SRE_Pattern.search
     711  
     712      cls: defining_class
     713      /
     714      string: object
     715      pos: Py_ssize_t = 0
     716      endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
     717  
     718  Scan through string looking for a match, and return a corresponding match object instance.
     719  
     720  Return None if no position in the string matches.
     721  [clinic start generated code]*/
     722  
     723  static PyObject *
     724  _sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls,
     725                               PyObject *string, Py_ssize_t pos,
     726                               Py_ssize_t endpos)
     727  /*[clinic end generated code: output=bd7f2d9d583e1463 input=afa9afb66a74a4b3]*/
     728  {
     729      _sremodulestate *module_state = get_sre_module_state_by_class(cls);
     730      SRE_STATE state;
     731      Py_ssize_t status;
     732      PyObject *match;
     733  
     734      if (!state_init(&state, self, string, pos, endpos))
     735          return NULL;
     736  
     737      TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
     738  
     739      status = sre_search(&state, PatternObject_GetCode(self));
     740  
     741      TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
     742  
     743      if (PyErr_Occurred()) {
     744          state_fini(&state);
     745          return NULL;
     746      }
     747  
     748      match = pattern_new_match(module_state, self, &state, status);
     749      state_fini(&state);
     750      return match;
     751  }
     752  
     753  /*[clinic input]
     754  _sre.SRE_Pattern.findall
     755  
     756      string: object
     757      pos: Py_ssize_t = 0
     758      endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
     759  
     760  Return a list of all non-overlapping matches of pattern in string.
     761  [clinic start generated code]*/
     762  
     763  static PyObject *
     764  _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
     765                                Py_ssize_t pos, Py_ssize_t endpos)
     766  /*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
     767  {
     768      SRE_STATE state;
     769      PyObject* list;
     770      Py_ssize_t status;
     771      Py_ssize_t i, b, e;
     772  
     773      if (!state_init(&state, self, string, pos, endpos))
     774          return NULL;
     775  
     776      list = PyList_New(0);
     777      if (!list) {
     778          state_fini(&state);
     779          return NULL;
     780      }
     781  
     782      while (state.start <= state.end) {
     783  
     784          PyObject* item;
     785  
     786          state_reset(&state);
     787  
     788          state.ptr = state.start;
     789  
     790          status = sre_search(&state, PatternObject_GetCode(self));
     791          if (PyErr_Occurred())
     792              goto error;
     793  
     794          if (status <= 0) {
     795              if (status == 0)
     796                  break;
     797              pattern_error(status);
     798              goto error;
     799          }
     800  
     801          /* don't bother to build a match object */
     802          switch (self->groups) {
     803          case 0:
     804              b = STATE_OFFSET(&state, state.start);
     805              e = STATE_OFFSET(&state, state.ptr);
     806              item = getslice(state.isbytes, state.beginning,
     807                              string, b, e);
     808              if (!item)
     809                  goto error;
     810              break;
     811          case 1:
     812              item = state_getslice(&state, 1, string, 1);
     813              if (!item)
     814                  goto error;
     815              break;
     816          default:
     817              item = PyTuple_New(self->groups);
     818              if (!item)
     819                  goto error;
     820              for (i = 0; i < self->groups; i++) {
     821                  PyObject* o = state_getslice(&state, i+1, string, 1);
     822                  if (!o) {
     823                      Py_DECREF(item);
     824                      goto error;
     825                  }
     826                  PyTuple_SET_ITEM(item, i, o);
     827              }
     828              break;
     829          }
     830  
     831          status = PyList_Append(list, item);
     832          Py_DECREF(item);
     833          if (status < 0)
     834              goto error;
     835  
     836          state.must_advance = (state.ptr == state.start);
     837          state.start = state.ptr;
     838      }
     839  
     840      state_fini(&state);
     841      return list;
     842  
     843  error:
     844      Py_DECREF(list);
     845      state_fini(&state);
     846      return NULL;
     847  
     848  }
     849  
     850  /*[clinic input]
     851  _sre.SRE_Pattern.finditer
     852  
     853      cls: defining_class
     854      /
     855      string: object
     856      pos: Py_ssize_t = 0
     857      endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
     858  
     859  Return an iterator over all non-overlapping matches for the RE pattern in string.
     860  
     861  For each match, the iterator returns a match object.
     862  [clinic start generated code]*/
     863  
     864  static PyObject *
     865  _sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls,
     866                                 PyObject *string, Py_ssize_t pos,
     867                                 Py_ssize_t endpos)
     868  /*[clinic end generated code: output=1791dbf3618ade56 input=812e332a4848cbaf]*/
     869  {
     870      _sremodulestate *module_state = get_sre_module_state_by_class(cls);
     871      PyObject* scanner;
     872      PyObject* search;
     873      PyObject* iterator;
     874  
     875      scanner = pattern_scanner(module_state, self, string, pos, endpos);
     876      if (!scanner)
     877          return NULL;
     878  
     879      search = PyObject_GetAttrString(scanner, "search");
     880      Py_DECREF(scanner);
     881      if (!search)
     882          return NULL;
     883  
     884      iterator = PyCallIter_New(search, Py_None);
     885      Py_DECREF(search);
     886  
     887      return iterator;
     888  }
     889  
     890  /*[clinic input]
     891  _sre.SRE_Pattern.scanner
     892  
     893      cls: defining_class
     894      /
     895      string: object
     896      pos: Py_ssize_t = 0
     897      endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
     898  
     899  [clinic start generated code]*/
     900  
     901  static PyObject *
     902  _sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls,
     903                                PyObject *string, Py_ssize_t pos,
     904                                Py_ssize_t endpos)
     905  /*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/
     906  {
     907      _sremodulestate *module_state = get_sre_module_state_by_class(cls);
     908  
     909      return pattern_scanner(module_state, self, string, pos, endpos);
     910  }
     911  
     912  /*[clinic input]
     913  _sre.SRE_Pattern.split
     914  
     915      string: object
     916      maxsplit: Py_ssize_t = 0
     917  
     918  Split string by the occurrences of pattern.
     919  [clinic start generated code]*/
     920  
     921  static PyObject *
     922  _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
     923                              Py_ssize_t maxsplit)
     924  /*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
     925  {
     926      SRE_STATE state;
     927      PyObject* list;
     928      PyObject* item;
     929      Py_ssize_t status;
     930      Py_ssize_t n;
     931      Py_ssize_t i;
     932      const void* last;
     933  
     934      assert(self->codesize != 0);
     935  
     936      if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
     937          return NULL;
     938  
     939      list = PyList_New(0);
     940      if (!list) {
     941          state_fini(&state);
     942          return NULL;
     943      }
     944  
     945      n = 0;
     946      last = state.start;
     947  
     948      while (!maxsplit || n < maxsplit) {
     949  
     950          state_reset(&state);
     951  
     952          state.ptr = state.start;
     953  
     954          status = sre_search(&state, PatternObject_GetCode(self));
     955          if (PyErr_Occurred())
     956              goto error;
     957  
     958          if (status <= 0) {
     959              if (status == 0)
     960                  break;
     961              pattern_error(status);
     962              goto error;
     963          }
     964  
     965          /* get segment before this match */
     966          item = getslice(state.isbytes, state.beginning,
     967              string, STATE_OFFSET(&state, last),
     968              STATE_OFFSET(&state, state.start)
     969              );
     970          if (!item)
     971              goto error;
     972          status = PyList_Append(list, item);
     973          Py_DECREF(item);
     974          if (status < 0)
     975              goto error;
     976  
     977          /* add groups (if any) */
     978          for (i = 0; i < self->groups; i++) {
     979              item = state_getslice(&state, i+1, string, 0);
     980              if (!item)
     981                  goto error;
     982              status = PyList_Append(list, item);
     983              Py_DECREF(item);
     984              if (status < 0)
     985                  goto error;
     986          }
     987  
     988          n = n + 1;
     989          state.must_advance = (state.ptr == state.start);
     990          last = state.start = state.ptr;
     991  
     992      }
     993  
     994      /* get segment following last match (even if empty) */
     995      item = getslice(state.isbytes, state.beginning,
     996          string, STATE_OFFSET(&state, last), state.endpos
     997          );
     998      if (!item)
     999          goto error;
    1000      status = PyList_Append(list, item);
    1001      Py_DECREF(item);
    1002      if (status < 0)
    1003          goto error;
    1004  
    1005      state_fini(&state);
    1006      return list;
    1007  
    1008  error:
    1009      Py_DECREF(list);
    1010      state_fini(&state);
    1011      return NULL;
    1012  
    1013  }
    1014  
    1015  static PyObject *
    1016  compile_template(_sremodulestate *module_state,
    1017                   PatternObject *pattern, PyObject *template)
    1018  {
    1019      /* delegate to Python code */
    1020      PyObject *func = module_state->compile_template;
    1021      if (func == NULL) {
    1022          func = _PyImport_GetModuleAttrString("re", "_compile_template");
    1023          if (func == NULL) {
    1024              return NULL;
    1025          }
    1026          Py_XSETREF(module_state->compile_template, func);
    1027      }
    1028  
    1029      PyObject *args[] = {(PyObject *)pattern, template};
    1030      PyObject *result = PyObject_Vectorcall(func, args, 2, NULL);
    1031  
    1032      if (result == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) {
    1033          /* If the replacement string is unhashable (e.g. bytearray),
    1034           * convert it to the basic type (str or bytes) and repeat. */
    1035          if (PyUnicode_Check(template) && !PyUnicode_CheckExact(template)) {
    1036              PyErr_Clear();
    1037              template = _PyUnicode_Copy(template);
    1038          }
    1039          else if (PyObject_CheckBuffer(template) && !PyBytes_CheckExact(template)) {
    1040              PyErr_Clear();
    1041              template = PyBytes_FromObject(template);
    1042          }
    1043          else {
    1044              return NULL;
    1045          }
    1046          if (template == NULL) {
    1047              return NULL;
    1048          }
    1049          args[1] = template;
    1050          result = PyObject_Vectorcall(func, args, 2, NULL);
    1051          Py_DECREF(template);
    1052      }
    1053  
    1054      if (result != NULL && Py_TYPE(result) != module_state->Template_Type) {
    1055          PyErr_Format(PyExc_RuntimeError,
    1056                      "the result of compiling a replacement string is %.200s",
    1057                      Py_TYPE(result)->tp_name);
    1058          Py_DECREF(result);
    1059          return NULL;
    1060      }
    1061      return result;
    1062  }
    1063  
    1064  static PyObject *expand_template(TemplateObject *, MatchObject *); /* Forward */
    1065  
    1066  static PyObject*
    1067  pattern_subx(_sremodulestate* module_state,
    1068               PatternObject* self,
    1069               PyObject* ptemplate,
    1070               PyObject* string,
    1071               Py_ssize_t count,
    1072               Py_ssize_t subn)
    1073  {
    1074      SRE_STATE state;
    1075      PyObject* list;
    1076      PyObject* joiner;
    1077      PyObject* item;
    1078      PyObject* filter;
    1079      PyObject* match;
    1080      const void* ptr;
    1081      Py_ssize_t status;
    1082      Py_ssize_t n;
    1083      Py_ssize_t i, b, e;
    1084      int isbytes, charsize;
    1085      enum {LITERAL, TEMPLATE, CALLABLE} filter_type;
    1086      Py_buffer view;
    1087  
    1088      if (PyCallable_Check(ptemplate)) {
    1089          /* sub/subn takes either a function or a template */
    1090          filter = Py_NewRef(ptemplate);
    1091          filter_type = CALLABLE;
    1092      } else {
    1093          /* if not callable, check if it's a literal string */
    1094          int literal;
    1095          view.buf = NULL;
    1096          ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
    1097          if (ptr) {
    1098              if (charsize == 1)
    1099                  literal = memchr(ptr, '\\', n) == NULL;
    1100              else
    1101                  literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
    1102          } else {
    1103              PyErr_Clear();
    1104              literal = 0;
    1105          }
    1106          if (view.buf)
    1107              PyBuffer_Release(&view);
    1108          if (literal) {
    1109              filter = Py_NewRef(ptemplate);
    1110              filter_type = LITERAL;
    1111          } else {
    1112              /* not a literal; hand it over to the template compiler */
    1113              filter = compile_template(module_state, self, ptemplate);
    1114              if (!filter)
    1115                  return NULL;
    1116  
    1117              assert(Py_TYPE(filter) == module_state->Template_Type);
    1118              if (Py_SIZE(filter) == 0) {
    1119                  Py_SETREF(filter,
    1120                            Py_NewRef(((TemplateObject *)filter)->literal));
    1121                  filter_type = LITERAL;
    1122              }
    1123              else {
    1124                  filter_type = TEMPLATE;
    1125              }
    1126          }
    1127      }
    1128  
    1129      if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
    1130          Py_DECREF(filter);
    1131          return NULL;
    1132      }
    1133  
    1134      list = PyList_New(0);
    1135      if (!list) {
    1136          Py_DECREF(filter);
    1137          state_fini(&state);
    1138          return NULL;
    1139      }
    1140  
    1141      n = i = 0;
    1142  
    1143      while (!count || n < count) {
    1144  
    1145          state_reset(&state);
    1146  
    1147          state.ptr = state.start;
    1148  
    1149          status = sre_search(&state, PatternObject_GetCode(self));
    1150          if (PyErr_Occurred())
    1151              goto error;
    1152  
    1153          if (status <= 0) {
    1154              if (status == 0)
    1155                  break;
    1156              pattern_error(status);
    1157              goto error;
    1158          }
    1159  
    1160          b = STATE_OFFSET(&state, state.start);
    1161          e = STATE_OFFSET(&state, state.ptr);
    1162  
    1163          if (i < b) {
    1164              /* get segment before this match */
    1165              item = getslice(state.isbytes, state.beginning,
    1166                  string, i, b);
    1167              if (!item)
    1168                  goto error;
    1169              status = PyList_Append(list, item);
    1170              Py_DECREF(item);
    1171              if (status < 0)
    1172                  goto error;
    1173  
    1174          }
    1175  
    1176          if (filter_type != LITERAL) {
    1177              /* pass match object through filter */
    1178              match = pattern_new_match(module_state, self, &state, 1);
    1179              if (!match)
    1180                  goto error;
    1181              if (filter_type == TEMPLATE) {
    1182                  item = expand_template((TemplateObject *)filter,
    1183                                         (MatchObject *)match);
    1184              }
    1185              else {
    1186                  assert(filter_type == CALLABLE);
    1187                  item = PyObject_CallOneArg(filter, match);
    1188              }
    1189              Py_DECREF(match);
    1190              if (!item)
    1191                  goto error;
    1192          } else {
    1193              /* filter is literal string */
    1194              item = Py_NewRef(filter);
    1195          }
    1196  
    1197          /* add to list */
    1198          if (item != Py_None) {
    1199              status = PyList_Append(list, item);
    1200              Py_DECREF(item);
    1201              if (status < 0)
    1202                  goto error;
    1203          }
    1204  
    1205          i = e;
    1206          n = n + 1;
    1207          state.must_advance = (state.ptr == state.start);
    1208          state.start = state.ptr;
    1209      }
    1210  
    1211      /* get segment following last match */
    1212      if (i < state.endpos) {
    1213          item = getslice(state.isbytes, state.beginning,
    1214                          string, i, state.endpos);
    1215          if (!item)
    1216              goto error;
    1217          status = PyList_Append(list, item);
    1218          Py_DECREF(item);
    1219          if (status < 0)
    1220              goto error;
    1221      }
    1222  
    1223      state_fini(&state);
    1224  
    1225      Py_DECREF(filter);
    1226  
    1227      /* convert list to single string (also removes list) */
    1228      joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
    1229      if (!joiner) {
    1230          Py_DECREF(list);
    1231          return NULL;
    1232      }
    1233      if (PyList_GET_SIZE(list) == 0) {
    1234          Py_DECREF(list);
    1235          item = joiner;
    1236      }
    1237      else {
    1238          if (state.isbytes)
    1239              item = _PyBytes_Join(joiner, list);
    1240          else
    1241              item = PyUnicode_Join(joiner, list);
    1242          Py_DECREF(joiner);
    1243          Py_DECREF(list);
    1244          if (!item)
    1245              return NULL;
    1246      }
    1247  
    1248      if (subn)
    1249          return Py_BuildValue("Nn", item, n);
    1250  
    1251      return item;
    1252  
    1253  error:
    1254      Py_DECREF(list);
    1255      state_fini(&state);
    1256      Py_DECREF(filter);
    1257      return NULL;
    1258  
    1259  }
    1260  
    1261  /*[clinic input]
    1262  _sre.SRE_Pattern.sub
    1263  
    1264      cls: defining_class
    1265      /
    1266      repl: object
    1267      string: object
    1268      count: Py_ssize_t = 0
    1269  
    1270  Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
    1271  [clinic start generated code]*/
    1272  
    1273  static PyObject *
    1274  _sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls,
    1275                            PyObject *repl, PyObject *string, Py_ssize_t count)
    1276  /*[clinic end generated code: output=4be141ab04bca60d input=d8d1d4ac2311a07c]*/
    1277  {
    1278      _sremodulestate *module_state = get_sre_module_state_by_class(cls);
    1279  
    1280      return pattern_subx(module_state, self, repl, string, count, 0);
    1281  }
    1282  
    1283  /*[clinic input]
    1284  _sre.SRE_Pattern.subn
    1285  
    1286      cls: defining_class
    1287      /
    1288      repl: object
    1289      string: object
    1290      count: Py_ssize_t = 0
    1291  
    1292  Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
    1293  [clinic start generated code]*/
    1294  
    1295  static PyObject *
    1296  _sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls,
    1297                             PyObject *repl, PyObject *string,
    1298                             Py_ssize_t count)
    1299  /*[clinic end generated code: output=da02fd85258b1e1f input=8b78a65b8302e58d]*/
    1300  {
    1301      _sremodulestate *module_state = get_sre_module_state_by_class(cls);
    1302  
    1303      return pattern_subx(module_state, self, repl, string, count, 1);
    1304  }
    1305  
    1306  /*[clinic input]
    1307  _sre.SRE_Pattern.__copy__
    1308  
    1309  [clinic start generated code]*/
    1310  
    1311  static PyObject *
    1312  _sre_SRE_Pattern___copy___impl(PatternObject *self)
    1313  /*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
    1314  {
    1315      return Py_NewRef(self);
    1316  }
    1317  
    1318  /*[clinic input]
    1319  _sre.SRE_Pattern.__deepcopy__
    1320  
    1321      memo: object
    1322      /
    1323  
    1324  [clinic start generated code]*/
    1325  
    1326  static PyObject *
    1327  _sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
    1328  /*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
    1329  {
    1330      return Py_NewRef(self);
    1331  }
    1332  
    1333  static PyObject *
    1334  pattern_repr(PatternObject *obj)
    1335  {
    1336      static const struct {
    1337          const char *name;
    1338          int value;
    1339      } flag_names[] = {
    1340          {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
    1341          {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
    1342          {"re.LOCALE", SRE_FLAG_LOCALE},
    1343          {"re.MULTILINE", SRE_FLAG_MULTILINE},
    1344          {"re.DOTALL", SRE_FLAG_DOTALL},
    1345          {"re.UNICODE", SRE_FLAG_UNICODE},
    1346          {"re.VERBOSE", SRE_FLAG_VERBOSE},
    1347          {"re.DEBUG", SRE_FLAG_DEBUG},
    1348          {"re.ASCII", SRE_FLAG_ASCII},
    1349      };
    1350      PyObject *result = NULL;
    1351      PyObject *flag_items;
    1352      size_t i;
    1353      int flags = obj->flags;
    1354  
    1355      /* Omit re.UNICODE for valid string patterns. */
    1356      if (obj->isbytes == 0 &&
    1357          (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
    1358           SRE_FLAG_UNICODE)
    1359          flags &= ~SRE_FLAG_UNICODE;
    1360  
    1361      flag_items = PyList_New(0);
    1362      if (!flag_items)
    1363          return NULL;
    1364  
    1365      for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
    1366          if (flags & flag_names[i].value) {
    1367              PyObject *item = PyUnicode_FromString(flag_names[i].name);
    1368              if (!item)
    1369                  goto done;
    1370  
    1371              if (PyList_Append(flag_items, item) < 0) {
    1372                  Py_DECREF(item);
    1373                  goto done;
    1374              }
    1375              Py_DECREF(item);
    1376              flags &= ~flag_names[i].value;
    1377          }
    1378      }
    1379      if (flags) {
    1380          PyObject *item = PyUnicode_FromFormat("0x%x", flags);
    1381          if (!item)
    1382              goto done;
    1383  
    1384          if (PyList_Append(flag_items, item) < 0) {
    1385              Py_DECREF(item);
    1386              goto done;
    1387          }
    1388          Py_DECREF(item);
    1389      }
    1390  
    1391      if (PyList_Size(flag_items) > 0) {
    1392          PyObject *flags_result;
    1393          PyObject *sep = PyUnicode_FromString("|");
    1394          if (!sep)
    1395              goto done;
    1396          flags_result = PyUnicode_Join(sep, flag_items);
    1397          Py_DECREF(sep);
    1398          if (!flags_result)
    1399              goto done;
    1400          result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
    1401                                        obj->pattern, flags_result);
    1402          Py_DECREF(flags_result);
    1403      }
    1404      else {
    1405          result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
    1406      }
    1407  
    1408  done:
    1409      Py_DECREF(flag_items);
    1410      return result;
    1411  }
    1412  
    1413  PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
    1414  
    1415  /* PatternObject's 'groupindex' method. */
    1416  static PyObject *
    1417  pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
    1418  {
    1419      if (self->groupindex == NULL)
    1420          return PyDict_New();
    1421      return PyDictProxy_New(self->groupindex);
    1422  }
    1423  
    1424  static int _validate(PatternObject *self); /* Forward */
    1425  
    1426  /*[clinic input]
    1427  _sre.compile
    1428  
    1429      pattern: object
    1430      flags: int
    1431      code: object(subclass_of='&PyList_Type')
    1432      groups: Py_ssize_t
    1433      groupindex: object(subclass_of='&PyDict_Type')
    1434      indexgroup: object(subclass_of='&PyTuple_Type')
    1435  
    1436  [clinic start generated code]*/
    1437  
    1438  static PyObject *
    1439  _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
    1440                    PyObject *code, Py_ssize_t groups, PyObject *groupindex,
    1441                    PyObject *indexgroup)
    1442  /*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
    1443  {
    1444      /* "compile" pattern descriptor to pattern object */
    1445  
    1446      _sremodulestate *module_state = get_sre_module_state(module);
    1447      PatternObject* self;
    1448      Py_ssize_t i, n;
    1449  
    1450      n = PyList_GET_SIZE(code);
    1451      /* coverity[ampersand_in_size] */
    1452      self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n);
    1453      if (!self)
    1454          return NULL;
    1455      self->weakreflist = NULL;
    1456      self->pattern = NULL;
    1457      self->groupindex = NULL;
    1458      self->indexgroup = NULL;
    1459  
    1460      self->codesize = n;
    1461  
    1462      for (i = 0; i < n; i++) {
    1463          PyObject *o = PyList_GET_ITEM(code, i);
    1464          unsigned long value = PyLong_AsUnsignedLong(o);
    1465          self->code[i] = (SRE_CODE) value;
    1466          if ((unsigned long) self->code[i] != value) {
    1467              PyErr_SetString(PyExc_OverflowError,
    1468                              "regular expression code size limit exceeded");
    1469              break;
    1470          }
    1471      }
    1472      PyObject_GC_Track(self);
    1473  
    1474      if (PyErr_Occurred()) {
    1475          Py_DECREF(self);
    1476          return NULL;
    1477      }
    1478  
    1479      if (pattern == Py_None) {
    1480          self->isbytes = -1;
    1481      }
    1482      else {
    1483          Py_ssize_t p_length;
    1484          int charsize;
    1485          Py_buffer view;
    1486          view.buf = NULL;
    1487          if (!getstring(pattern, &p_length, &self->isbytes,
    1488                         &charsize, &view)) {
    1489              Py_DECREF(self);
    1490              return NULL;
    1491          }
    1492          if (view.buf)
    1493              PyBuffer_Release(&view);
    1494      }
    1495  
    1496      self->pattern = Py_NewRef(pattern);
    1497  
    1498      self->flags = flags;
    1499  
    1500      self->groups = groups;
    1501  
    1502      if (PyDict_GET_SIZE(groupindex) > 0) {
    1503          self->groupindex = Py_NewRef(groupindex);
    1504          if (PyTuple_GET_SIZE(indexgroup) > 0) {
    1505              self->indexgroup = Py_NewRef(indexgroup);
    1506          }
    1507      }
    1508  
    1509      if (!_validate(self)) {
    1510          Py_DECREF(self);
    1511          return NULL;
    1512      }
    1513  
    1514      return (PyObject*) self;
    1515  }
    1516  
    1517  /*[clinic input]
    1518  _sre.template
    1519  
    1520      pattern: object
    1521      template: object(subclass_of="&PyList_Type")
    1522          A list containing interleaved literal strings (str or bytes) and group
    1523          indices (int), as returned by re._parser.parse_template():
    1524              [literal1, group1, ..., literalN, groupN]
    1525      /
    1526  
    1527  [clinic start generated code]*/
    1528  
    1529  static PyObject *
    1530  _sre_template_impl(PyObject *module, PyObject *pattern, PyObject *template)
    1531  /*[clinic end generated code: output=d51290e596ebca86 input=af55380b27f02942]*/
    1532  {
    1533      /* template is a list containing interleaved literal strings (str or bytes)
    1534       * and group indices (int), as returned by _parser.parse_template:
    1535       * [literal1, group1, literal2, ..., literalN].
    1536       */
    1537      _sremodulestate *module_state = get_sre_module_state(module);
    1538      TemplateObject *self = NULL;
    1539      Py_ssize_t n = PyList_GET_SIZE(template);
    1540      if ((n & 1) == 0 || n < 1) {
    1541          goto bad_template;
    1542      }
    1543      n /= 2;
    1544      self = PyObject_GC_NewVar(TemplateObject, module_state->Template_Type, n);
    1545      if (!self)
    1546          return NULL;
    1547      self->chunks = 1 + 2*n;
    1548      self->literal = Py_NewRef(PyList_GET_ITEM(template, 0));
    1549      for (Py_ssize_t i = 0; i < n; i++) {
    1550          Py_ssize_t index = PyLong_AsSsize_t(PyList_GET_ITEM(template, 2*i+1));
    1551          if (index == -1 && PyErr_Occurred()) {
    1552              Py_SET_SIZE(self, i);
    1553              Py_DECREF(self);
    1554              return NULL;
    1555          }
    1556          if (index < 0) {
    1557              Py_SET_SIZE(self, i);
    1558              goto bad_template;
    1559          }
    1560          self->items[i].index = index;
    1561  
    1562          PyObject *literal = PyList_GET_ITEM(template, 2*i+2);
    1563          // Skip empty literals.
    1564          if ((PyUnicode_Check(literal) && !PyUnicode_GET_LENGTH(literal)) ||
    1565              (PyBytes_Check(literal) && !PyBytes_GET_SIZE(literal)))
    1566          {
    1567              literal = NULL;
    1568              self->chunks--;
    1569          }
    1570          self->items[i].literal = Py_XNewRef(literal);
    1571      }
    1572      return (PyObject*) self;
    1573  
    1574  bad_template:
    1575      PyErr_SetString(PyExc_TypeError, "invalid template");
    1576      Py_XDECREF(self);
    1577      return NULL;
    1578  }
    1579  
    1580  /* -------------------------------------------------------------------- */
    1581  /* Code validation */
    1582  
    1583  /* To learn more about this code, have a look at the _compile() function in
    1584     Lib/sre_compile.py.  The validation functions below checks the code array
    1585     for conformance with the code patterns generated there.
    1586  
    1587     The nice thing about the generated code is that it is position-independent:
    1588     all jumps are relative jumps forward.  Also, jumps don't cross each other:
    1589     the target of a later jump is always earlier than the target of an earlier
    1590     jump.  IOW, this is okay:
    1591  
    1592     J---------J-------T--------T
    1593      \         \_____/        /
    1594       \______________________/
    1595  
    1596     but this is not:
    1597  
    1598     J---------J-------T--------T
    1599      \_________\_____/        /
    1600                 \____________/
    1601  
    1602     It also helps that SRE_CODE is always an unsigned type.
    1603  */
    1604  
    1605  /* Defining this one enables tracing of the validator */
    1606  #undef VVERBOSE
    1607  
    1608  /* Trace macro for the validator */
    1609  #if defined(VVERBOSE)
    1610  #define VTRACE(v) printf v
    1611  #else
    1612  #define VTRACE(v) do {} while(0)  /* do nothing */
    1613  #endif
    1614  
    1615  /* Report failure */
    1616  #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
    1617  
    1618  /* Extract opcode, argument, or skip count from code array */
    1619  #define GET_OP                                          \
    1620      do {                                                \
    1621          VTRACE(("%p: ", code));                         \
    1622          if (code >= end) FAIL;                          \
    1623          op = *code++;                                   \
    1624          VTRACE(("%lu (op)\n", (unsigned long)op));      \
    1625      } while (0)
    1626  #define GET_ARG                                         \
    1627      do {                                                \
    1628          VTRACE(("%p= ", code));                         \
    1629          if (code >= end) FAIL;                          \
    1630          arg = *code++;                                  \
    1631          VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
    1632      } while (0)
    1633  #define GET_SKIP_ADJ(adj)                               \
    1634      do {                                                \
    1635          VTRACE(("%p= ", code));                         \
    1636          if (code >= end) FAIL;                          \
    1637          skip = *code;                                   \
    1638          VTRACE(("%lu (skip to %p)\n",                   \
    1639                 (unsigned long)skip, code+skip));        \
    1640          if (skip-adj > (uintptr_t)(end - code))         \
    1641              FAIL;                                       \
    1642          code++;                                         \
    1643      } while (0)
    1644  #define GET_SKIP GET_SKIP_ADJ(0)
    1645  
    1646  static int
    1647  _validate_charset(SRE_CODE *code, SRE_CODE *end)
    1648  {
    1649      /* Some variables are manipulated by the macros above */
    1650      SRE_CODE op;
    1651      SRE_CODE arg;
    1652      SRE_CODE offset;
    1653      int i;
    1654  
    1655      while (code < end) {
    1656          GET_OP;
    1657          switch (op) {
    1658  
    1659          case SRE_OP_NEGATE:
    1660              break;
    1661  
    1662          case SRE_OP_LITERAL:
    1663              GET_ARG;
    1664              break;
    1665  
    1666          case SRE_OP_RANGE:
    1667          case SRE_OP_RANGE_UNI_IGNORE:
    1668              GET_ARG;
    1669              GET_ARG;
    1670              break;
    1671  
    1672          case SRE_OP_CHARSET:
    1673              offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
    1674              if (offset > (uintptr_t)(end - code))
    1675                  FAIL;
    1676              code += offset;
    1677              break;
    1678  
    1679          case SRE_OP_BIGCHARSET:
    1680              GET_ARG; /* Number of blocks */
    1681              offset = 256/sizeof(SRE_CODE); /* 256-byte table */
    1682              if (offset > (uintptr_t)(end - code))
    1683                  FAIL;
    1684              /* Make sure that each byte points to a valid block */
    1685              for (i = 0; i < 256; i++) {
    1686                  if (((unsigned char *)code)[i] >= arg)
    1687                      FAIL;
    1688              }
    1689              code += offset;
    1690              offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
    1691              if (offset > (uintptr_t)(end - code))
    1692                  FAIL;
    1693              code += offset;
    1694              break;
    1695  
    1696          case SRE_OP_CATEGORY:
    1697              GET_ARG;
    1698              switch (arg) {
    1699              case SRE_CATEGORY_DIGIT:
    1700              case SRE_CATEGORY_NOT_DIGIT:
    1701              case SRE_CATEGORY_SPACE:
    1702              case SRE_CATEGORY_NOT_SPACE:
    1703              case SRE_CATEGORY_WORD:
    1704              case SRE_CATEGORY_NOT_WORD:
    1705              case SRE_CATEGORY_LINEBREAK:
    1706              case SRE_CATEGORY_NOT_LINEBREAK:
    1707              case SRE_CATEGORY_LOC_WORD:
    1708              case SRE_CATEGORY_LOC_NOT_WORD:
    1709              case SRE_CATEGORY_UNI_DIGIT:
    1710              case SRE_CATEGORY_UNI_NOT_DIGIT:
    1711              case SRE_CATEGORY_UNI_SPACE:
    1712              case SRE_CATEGORY_UNI_NOT_SPACE:
    1713              case SRE_CATEGORY_UNI_WORD:
    1714              case SRE_CATEGORY_UNI_NOT_WORD:
    1715              case SRE_CATEGORY_UNI_LINEBREAK:
    1716              case SRE_CATEGORY_UNI_NOT_LINEBREAK:
    1717                  break;
    1718              default:
    1719                  FAIL;
    1720              }
    1721              break;
    1722  
    1723          default:
    1724              FAIL;
    1725  
    1726          }
    1727      }
    1728  
    1729      return 0;
    1730  }
    1731  
    1732  /* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
    1733  static int
    1734  _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
    1735  {
    1736      /* Some variables are manipulated by the macros above */
    1737      SRE_CODE op;
    1738      SRE_CODE arg;
    1739      SRE_CODE skip;
    1740  
    1741      VTRACE(("code=%p, end=%p\n", code, end));
    1742  
    1743      if (code > end)
    1744          FAIL;
    1745  
    1746      while (code < end) {
    1747          GET_OP;
    1748          switch (op) {
    1749  
    1750          case SRE_OP_MARK:
    1751              /* We don't check whether marks are properly nested; the
    1752                 sre_match() code is robust even if they don't, and the worst
    1753                 you can get is nonsensical match results. */
    1754              GET_ARG;
    1755              if (arg > 2 * (size_t)groups + 1) {
    1756                  VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
    1757                  FAIL;
    1758              }
    1759              break;
    1760  
    1761          case SRE_OP_LITERAL:
    1762          case SRE_OP_NOT_LITERAL:
    1763          case SRE_OP_LITERAL_IGNORE:
    1764          case SRE_OP_NOT_LITERAL_IGNORE:
    1765          case SRE_OP_LITERAL_UNI_IGNORE:
    1766          case SRE_OP_NOT_LITERAL_UNI_IGNORE:
    1767          case SRE_OP_LITERAL_LOC_IGNORE:
    1768          case SRE_OP_NOT_LITERAL_LOC_IGNORE:
    1769              GET_ARG;
    1770              /* The arg is just a character, nothing to check */
    1771              break;
    1772  
    1773          case SRE_OP_SUCCESS:
    1774          case SRE_OP_FAILURE:
    1775              /* Nothing to check; these normally end the matching process */
    1776              break;
    1777  
    1778          case SRE_OP_AT:
    1779              GET_ARG;
    1780              switch (arg) {
    1781              case SRE_AT_BEGINNING:
    1782              case SRE_AT_BEGINNING_STRING:
    1783              case SRE_AT_BEGINNING_LINE:
    1784              case SRE_AT_END:
    1785              case SRE_AT_END_LINE:
    1786              case SRE_AT_END_STRING:
    1787              case SRE_AT_BOUNDARY:
    1788              case SRE_AT_NON_BOUNDARY:
    1789              case SRE_AT_LOC_BOUNDARY:
    1790              case SRE_AT_LOC_NON_BOUNDARY:
    1791              case SRE_AT_UNI_BOUNDARY:
    1792              case SRE_AT_UNI_NON_BOUNDARY:
    1793                  break;
    1794              default:
    1795                  FAIL;
    1796              }
    1797              break;
    1798  
    1799          case SRE_OP_ANY:
    1800          case SRE_OP_ANY_ALL:
    1801              /* These have no operands */
    1802              break;
    1803  
    1804          case SRE_OP_IN:
    1805          case SRE_OP_IN_IGNORE:
    1806          case SRE_OP_IN_UNI_IGNORE:
    1807          case SRE_OP_IN_LOC_IGNORE:
    1808              GET_SKIP;
    1809              /* Stop 1 before the end; we check the FAILURE below */
    1810              if (_validate_charset(code, code+skip-2))
    1811                  FAIL;
    1812              if (code[skip-2] != SRE_OP_FAILURE)
    1813                  FAIL;
    1814              code += skip-1;
    1815              break;
    1816  
    1817          case SRE_OP_INFO:
    1818              {
    1819                  /* A minimal info field is
    1820                     <INFO> <1=skip> <2=flags> <3=min> <4=max>;
    1821                     If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
    1822                     more follows. */
    1823                  SRE_CODE flags, i;
    1824                  SRE_CODE *newcode;
    1825                  GET_SKIP;
    1826                  newcode = code+skip-1;
    1827                  GET_ARG; flags = arg;
    1828                  GET_ARG;
    1829                  GET_ARG;
    1830                  /* Check that only valid flags are present */
    1831                  if ((flags & ~(SRE_INFO_PREFIX |
    1832                                 SRE_INFO_LITERAL |
    1833                                 SRE_INFO_CHARSET)) != 0)
    1834                      FAIL;
    1835                  /* PREFIX and CHARSET are mutually exclusive */
    1836                  if ((flags & SRE_INFO_PREFIX) &&
    1837                      (flags & SRE_INFO_CHARSET))
    1838                      FAIL;
    1839                  /* LITERAL implies PREFIX */
    1840                  if ((flags & SRE_INFO_LITERAL) &&
    1841                      !(flags & SRE_INFO_PREFIX))
    1842                      FAIL;
    1843                  /* Validate the prefix */
    1844                  if (flags & SRE_INFO_PREFIX) {
    1845                      SRE_CODE prefix_len;
    1846                      GET_ARG; prefix_len = arg;
    1847                      GET_ARG;
    1848                      /* Here comes the prefix string */
    1849                      if (prefix_len > (uintptr_t)(newcode - code))
    1850                          FAIL;
    1851                      code += prefix_len;
    1852                      /* And here comes the overlap table */
    1853                      if (prefix_len > (uintptr_t)(newcode - code))
    1854                          FAIL;
    1855                      /* Each overlap value should be < prefix_len */
    1856                      for (i = 0; i < prefix_len; i++) {
    1857                          if (code[i] >= prefix_len)
    1858                              FAIL;
    1859                      }
    1860                      code += prefix_len;
    1861                  }
    1862                  /* Validate the charset */
    1863                  if (flags & SRE_INFO_CHARSET) {
    1864                      if (_validate_charset(code, newcode-1))
    1865                          FAIL;
    1866                      if (newcode[-1] != SRE_OP_FAILURE)
    1867                          FAIL;
    1868                      code = newcode;
    1869                  }
    1870                  else if (code != newcode) {
    1871                    VTRACE(("code=%p, newcode=%p\n", code, newcode));
    1872                      FAIL;
    1873                  }
    1874              }
    1875              break;
    1876  
    1877          case SRE_OP_BRANCH:
    1878              {
    1879                  SRE_CODE *target = NULL;
    1880                  for (;;) {
    1881                      GET_SKIP;
    1882                      if (skip == 0)
    1883                          break;
    1884                      /* Stop 2 before the end; we check the JUMP below */
    1885                      if (_validate_inner(code, code+skip-3, groups))
    1886                          FAIL;
    1887                      code += skip-3;
    1888                      /* Check that it ends with a JUMP, and that each JUMP
    1889                         has the same target */
    1890                      GET_OP;
    1891                      if (op != SRE_OP_JUMP)
    1892                          FAIL;
    1893                      GET_SKIP;
    1894                      if (target == NULL)
    1895                          target = code+skip-1;
    1896                      else if (code+skip-1 != target)
    1897                          FAIL;
    1898                  }
    1899                  if (code != target)
    1900                      FAIL;
    1901              }
    1902              break;
    1903  
    1904          case SRE_OP_REPEAT_ONE:
    1905          case SRE_OP_MIN_REPEAT_ONE:
    1906          case SRE_OP_POSSESSIVE_REPEAT_ONE:
    1907              {
    1908                  SRE_CODE min, max;
    1909                  GET_SKIP;
    1910                  GET_ARG; min = arg;
    1911                  GET_ARG; max = arg;
    1912                  if (min > max)
    1913                      FAIL;
    1914                  if (max > SRE_MAXREPEAT)
    1915                      FAIL;
    1916                  if (_validate_inner(code, code+skip-4, groups))
    1917                      FAIL;
    1918                  code += skip-4;
    1919                  GET_OP;
    1920                  if (op != SRE_OP_SUCCESS)
    1921                      FAIL;
    1922              }
    1923              break;
    1924  
    1925          case SRE_OP_REPEAT:
    1926          case SRE_OP_POSSESSIVE_REPEAT:
    1927              {
    1928                  SRE_CODE op1 = op, min, max;
    1929                  GET_SKIP;
    1930                  GET_ARG; min = arg;
    1931                  GET_ARG; max = arg;
    1932                  if (min > max)
    1933                      FAIL;
    1934                  if (max > SRE_MAXREPEAT)
    1935                      FAIL;
    1936                  if (_validate_inner(code, code+skip-3, groups))
    1937                      FAIL;
    1938                  code += skip-3;
    1939                  GET_OP;
    1940                  if (op1 == SRE_OP_POSSESSIVE_REPEAT) {
    1941                      if (op != SRE_OP_SUCCESS)
    1942                          FAIL;
    1943                  }
    1944                  else {
    1945                      if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
    1946                          FAIL;
    1947                  }
    1948              }
    1949              break;
    1950  
    1951          case SRE_OP_ATOMIC_GROUP:
    1952              {
    1953                  GET_SKIP;
    1954                  if (_validate_inner(code, code+skip-2, groups))
    1955                      FAIL;
    1956                  code += skip-2;
    1957                  GET_OP;
    1958                  if (op != SRE_OP_SUCCESS)
    1959                      FAIL;
    1960              }
    1961              break;
    1962  
    1963          case SRE_OP_GROUPREF:
    1964          case SRE_OP_GROUPREF_IGNORE:
    1965          case SRE_OP_GROUPREF_UNI_IGNORE:
    1966          case SRE_OP_GROUPREF_LOC_IGNORE:
    1967              GET_ARG;
    1968              if (arg >= (size_t)groups)
    1969                  FAIL;
    1970              break;
    1971  
    1972          case SRE_OP_GROUPREF_EXISTS:
    1973              /* The regex syntax for this is: '(?(group)then|else)', where
    1974                 'group' is either an integer group number or a group name,
    1975                 'then' and 'else' are sub-regexes, and 'else' is optional. */
    1976              GET_ARG;
    1977              if (arg >= (size_t)groups)
    1978                  FAIL;
    1979              GET_SKIP_ADJ(1);
    1980              code--; /* The skip is relative to the first arg! */
    1981              /* There are two possibilities here: if there is both a 'then'
    1982                 part and an 'else' part, the generated code looks like:
    1983  
    1984                 GROUPREF_EXISTS
    1985                 <group>
    1986                 <skipyes>
    1987                 ...then part...
    1988                 JUMP
    1989                 <skipno>
    1990                 (<skipyes> jumps here)
    1991                 ...else part...
    1992                 (<skipno> jumps here)
    1993  
    1994                 If there is only a 'then' part, it looks like:
    1995  
    1996                 GROUPREF_EXISTS
    1997                 <group>
    1998                 <skip>
    1999                 ...then part...
    2000                 (<skip> jumps here)
    2001  
    2002                 There is no direct way to decide which it is, and we don't want
    2003                 to allow arbitrary jumps anywhere in the code; so we just look
    2004                 for a JUMP opcode preceding our skip target.
    2005              */
    2006              VTRACE(("then part:\n"));
    2007              int rc = _validate_inner(code+1, code+skip-1, groups);
    2008              if (rc == 1) {
    2009                  VTRACE(("else part:\n"));
    2010                  code += skip-2; /* Position after JUMP, at <skipno> */
    2011                  GET_SKIP;
    2012                  rc = _validate_inner(code, code+skip-1, groups);
    2013              }
    2014              if (rc)
    2015                  FAIL;
    2016              code += skip-1;
    2017              break;
    2018  
    2019          case SRE_OP_ASSERT:
    2020          case SRE_OP_ASSERT_NOT:
    2021              GET_SKIP;
    2022              GET_ARG; /* 0 for lookahead, width for lookbehind */
    2023              code--; /* Back up over arg to simplify math below */
    2024              if (arg & 0x80000000)
    2025                  FAIL; /* Width too large */
    2026              /* Stop 1 before the end; we check the SUCCESS below */
    2027              if (_validate_inner(code+1, code+skip-2, groups))
    2028                  FAIL;
    2029              code += skip-2;
    2030              GET_OP;
    2031              if (op != SRE_OP_SUCCESS)
    2032                  FAIL;
    2033              break;
    2034  
    2035          case SRE_OP_JUMP:
    2036              if (code + 1 != end)
    2037                  FAIL;
    2038              VTRACE(("JUMP: %d\n", __LINE__));
    2039              return 1;
    2040  
    2041          default:
    2042              FAIL;
    2043  
    2044          }
    2045      }
    2046  
    2047      VTRACE(("okay\n"));
    2048      return 0;
    2049  }
    2050  
    2051  static int
    2052  _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
    2053  {
    2054      if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
    2055          code >= end || end[-1] != SRE_OP_SUCCESS)
    2056          FAIL;
    2057      return _validate_inner(code, end-1, groups);
    2058  }
    2059  
    2060  static int
    2061  _validate(PatternObject *self)
    2062  {
    2063      if (_validate_outer(self->code, self->code+self->codesize, self->groups))
    2064      {
    2065          PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
    2066          return 0;
    2067      }
    2068      else
    2069          VTRACE(("Success!\n"));
    2070      return 1;
    2071  }
    2072  
    2073  /* -------------------------------------------------------------------- */
    2074  /* match methods */
    2075  
    2076  static int
    2077  match_traverse(MatchObject *self, visitproc visit, void *arg)
    2078  {
    2079      Py_VISIT(Py_TYPE(self));
    2080      Py_VISIT(self->string);
    2081      Py_VISIT(self->regs);
    2082      Py_VISIT(self->pattern);
    2083      return 0;
    2084  }
    2085  
    2086  static int
    2087  match_clear(MatchObject *self)
    2088  {
    2089      Py_CLEAR(self->string);
    2090      Py_CLEAR(self->regs);
    2091      Py_CLEAR(self->pattern);
    2092      return 0;
    2093  }
    2094  
    2095  static void
    2096  match_dealloc(MatchObject* self)
    2097  {
    2098      PyTypeObject *tp = Py_TYPE(self);
    2099  
    2100      PyObject_GC_UnTrack(self);
    2101      (void)match_clear(self);
    2102      tp->tp_free(self);
    2103      Py_DECREF(tp);
    2104  }
    2105  
    2106  static PyObject*
    2107  match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
    2108  {
    2109      Py_ssize_t length;
    2110      int isbytes, charsize;
    2111      Py_buffer view;
    2112      PyObject *result;
    2113      const void* ptr;
    2114      Py_ssize_t i, j;
    2115  
    2116      assert(0 <= index && index < self->groups);
    2117      index *= 2;
    2118  
    2119      if (self->string == Py_None || self->mark[index] < 0) {
    2120          /* return default value if the string or group is undefined */
    2121          return Py_NewRef(def);
    2122      }
    2123  
    2124      ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
    2125      if (ptr == NULL)
    2126          return NULL;
    2127  
    2128      i = self->mark[index];
    2129      j = self->mark[index+1];
    2130      i = Py_MIN(i, length);
    2131      j = Py_MIN(j, length);
    2132      result = getslice(isbytes, ptr, self->string, i, j);
    2133      if (isbytes && view.buf != NULL)
    2134          PyBuffer_Release(&view);
    2135      return result;
    2136  }
    2137  
    2138  static Py_ssize_t
    2139  match_getindex(MatchObject* self, PyObject* index)
    2140  {
    2141      Py_ssize_t i;
    2142  
    2143      if (index == NULL)
    2144          /* Default value */
    2145          return 0;
    2146  
    2147      if (PyIndex_Check(index)) {
    2148          i = PyNumber_AsSsize_t(index, NULL);
    2149      }
    2150      else {
    2151          i = -1;
    2152  
    2153          if (self->pattern->groupindex) {
    2154              index = PyDict_GetItemWithError(self->pattern->groupindex, index);
    2155              if (index && PyLong_Check(index)) {
    2156                  i = PyLong_AsSsize_t(index);
    2157              }
    2158          }
    2159      }
    2160      if (i < 0 || i >= self->groups) {
    2161          /* raise IndexError if we were given a bad group number */
    2162          if (!PyErr_Occurred()) {
    2163              PyErr_SetString(PyExc_IndexError, "no such group");
    2164          }
    2165          return -1;
    2166      }
    2167  
    2168      return i;
    2169  }
    2170  
    2171  static PyObject*
    2172  match_getslice(MatchObject* self, PyObject* index, PyObject* def)
    2173  {
    2174      Py_ssize_t i = match_getindex(self, index);
    2175  
    2176      if (i < 0) {
    2177          return NULL;
    2178      }
    2179  
    2180      return match_getslice_by_index(self, i, def);
    2181  }
    2182  
    2183  /*[clinic input]
    2184  _sre.SRE_Match.expand
    2185  
    2186      template: object
    2187  
    2188  Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
    2189  [clinic start generated code]*/
    2190  
    2191  static PyObject *
    2192  _sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
    2193  /*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
    2194  {
    2195      _sremodulestate *module_state = get_sre_module_state_by_class(Py_TYPE(self));
    2196      PyObject *filter = compile_template(module_state, self->pattern, template);
    2197      if (filter == NULL) {
    2198          return NULL;
    2199      }
    2200      PyObject *result = expand_template((TemplateObject *)filter, self);
    2201      Py_DECREF(filter);
    2202      return result;
    2203  }
    2204  
    2205  static PyObject*
    2206  match_group(MatchObject* self, PyObject* args)
    2207  {
    2208      PyObject* result;
    2209      Py_ssize_t i, size;
    2210  
    2211      size = PyTuple_GET_SIZE(args);
    2212  
    2213      switch (size) {
    2214      case 0:
    2215          result = match_getslice(self, _PyLong_GetZero(), Py_None);
    2216          break;
    2217      case 1:
    2218          result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
    2219          break;
    2220      default:
    2221          /* fetch multiple items */
    2222          result = PyTuple_New(size);
    2223          if (!result)
    2224              return NULL;
    2225          for (i = 0; i < size; i++) {
    2226              PyObject* item = match_getslice(
    2227                  self, PyTuple_GET_ITEM(args, i), Py_None
    2228                  );
    2229              if (!item) {
    2230                  Py_DECREF(result);
    2231                  return NULL;
    2232              }
    2233              PyTuple_SET_ITEM(result, i, item);
    2234          }
    2235          break;
    2236      }
    2237      return result;
    2238  }
    2239  
    2240  static PyObject*
    2241  match_getitem(MatchObject* self, PyObject* name)
    2242  {
    2243      return match_getslice(self, name, Py_None);
    2244  }
    2245  
    2246  /*[clinic input]
    2247  _sre.SRE_Match.groups
    2248  
    2249      default: object = None
    2250          Is used for groups that did not participate in the match.
    2251  
    2252  Return a tuple containing all the subgroups of the match, from 1.
    2253  [clinic start generated code]*/
    2254  
    2255  static PyObject *
    2256  _sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
    2257  /*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
    2258  {
    2259      PyObject* result;
    2260      Py_ssize_t index;
    2261  
    2262      result = PyTuple_New(self->groups-1);
    2263      if (!result)
    2264          return NULL;
    2265  
    2266      for (index = 1; index < self->groups; index++) {
    2267          PyObject* item;
    2268          item = match_getslice_by_index(self, index, default_value);
    2269          if (!item) {
    2270              Py_DECREF(result);
    2271              return NULL;
    2272          }
    2273          PyTuple_SET_ITEM(result, index-1, item);
    2274      }
    2275  
    2276      return result;
    2277  }
    2278  
    2279  /*[clinic input]
    2280  _sre.SRE_Match.groupdict
    2281  
    2282      default: object = None
    2283          Is used for groups that did not participate in the match.
    2284  
    2285  Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
    2286  [clinic start generated code]*/
    2287  
    2288  static PyObject *
    2289  _sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
    2290  /*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
    2291  {
    2292      PyObject *result;
    2293      PyObject *key;
    2294      PyObject *value;
    2295      Py_ssize_t pos = 0;
    2296      Py_hash_t hash;
    2297  
    2298      result = PyDict_New();
    2299      if (!result || !self->pattern->groupindex)
    2300          return result;
    2301  
    2302      while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
    2303          int status;
    2304          Py_INCREF(key);
    2305          value = match_getslice(self, key, default_value);
    2306          if (!value) {
    2307              Py_DECREF(key);
    2308              goto failed;
    2309          }
    2310          status = _PyDict_SetItem_KnownHash(result, key, value, hash);
    2311          Py_DECREF(value);
    2312          Py_DECREF(key);
    2313          if (status < 0)
    2314              goto failed;
    2315      }
    2316  
    2317      return result;
    2318  
    2319  failed:
    2320      Py_DECREF(result);
    2321      return NULL;
    2322  }
    2323  
    2324  /*[clinic input]
    2325  _sre.SRE_Match.start -> Py_ssize_t
    2326  
    2327      group: object(c_default="NULL") = 0
    2328      /
    2329  
    2330  Return index of the start of the substring matched by group.
    2331  [clinic start generated code]*/
    2332  
    2333  static Py_ssize_t
    2334  _sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
    2335  /*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
    2336  {
    2337      Py_ssize_t index = match_getindex(self, group);
    2338  
    2339      if (index < 0) {
    2340          return -1;
    2341      }
    2342  
    2343      /* mark is -1 if group is undefined */
    2344      return self->mark[index*2];
    2345  }
    2346  
    2347  /*[clinic input]
    2348  _sre.SRE_Match.end -> Py_ssize_t
    2349  
    2350      group: object(c_default="NULL") = 0
    2351      /
    2352  
    2353  Return index of the end of the substring matched by group.
    2354  [clinic start generated code]*/
    2355  
    2356  static Py_ssize_t
    2357  _sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
    2358  /*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
    2359  {
    2360      Py_ssize_t index = match_getindex(self, group);
    2361  
    2362      if (index < 0) {
    2363          return -1;
    2364      }
    2365  
    2366      /* mark is -1 if group is undefined */
    2367      return self->mark[index*2+1];
    2368  }
    2369  
    2370  LOCAL(PyObject*)
    2371  _pair(Py_ssize_t i1, Py_ssize_t i2)
    2372  {
    2373      PyObject* pair;
    2374      PyObject* item;
    2375  
    2376      pair = PyTuple_New(2);
    2377      if (!pair)
    2378          return NULL;
    2379  
    2380      item = PyLong_FromSsize_t(i1);
    2381      if (!item)
    2382          goto error;
    2383      PyTuple_SET_ITEM(pair, 0, item);
    2384  
    2385      item = PyLong_FromSsize_t(i2);
    2386      if (!item)
    2387          goto error;
    2388      PyTuple_SET_ITEM(pair, 1, item);
    2389  
    2390      return pair;
    2391  
    2392    error:
    2393      Py_DECREF(pair);
    2394      return NULL;
    2395  }
    2396  
    2397  /*[clinic input]
    2398  _sre.SRE_Match.span
    2399  
    2400      group: object(c_default="NULL") = 0
    2401      /
    2402  
    2403  For match object m, return the 2-tuple (m.start(group), m.end(group)).
    2404  [clinic start generated code]*/
    2405  
    2406  static PyObject *
    2407  _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
    2408  /*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
    2409  {
    2410      Py_ssize_t index = match_getindex(self, group);
    2411  
    2412      if (index < 0) {
    2413          return NULL;
    2414      }
    2415  
    2416      /* marks are -1 if group is undefined */
    2417      return _pair(self->mark[index*2], self->mark[index*2+1]);
    2418  }
    2419  
    2420  static PyObject*
    2421  match_regs(MatchObject* self)
    2422  {
    2423      PyObject* regs;
    2424      PyObject* item;
    2425      Py_ssize_t index;
    2426  
    2427      regs = PyTuple_New(self->groups);
    2428      if (!regs)
    2429          return NULL;
    2430  
    2431      for (index = 0; index < self->groups; index++) {
    2432          item = _pair(self->mark[index*2], self->mark[index*2+1]);
    2433          if (!item) {
    2434              Py_DECREF(regs);
    2435              return NULL;
    2436          }
    2437          PyTuple_SET_ITEM(regs, index, item);
    2438      }
    2439  
    2440      self->regs = Py_NewRef(regs);
    2441  
    2442      return regs;
    2443  }
    2444  
    2445  /*[clinic input]
    2446  _sre.SRE_Match.__copy__
    2447  
    2448  [clinic start generated code]*/
    2449  
    2450  static PyObject *
    2451  _sre_SRE_Match___copy___impl(MatchObject *self)
    2452  /*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
    2453  {
    2454      return Py_NewRef(self);
    2455  }
    2456  
    2457  /*[clinic input]
    2458  _sre.SRE_Match.__deepcopy__
    2459  
    2460      memo: object
    2461      /
    2462  
    2463  [clinic start generated code]*/
    2464  
    2465  static PyObject *
    2466  _sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
    2467  /*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
    2468  {
    2469      return Py_NewRef(self);
    2470  }
    2471  
    2472  PyDoc_STRVAR(match_doc,
    2473  "The result of re.match() and re.search().\n\
    2474  Match objects always have a boolean value of True.");
    2475  
    2476  PyDoc_STRVAR(match_group_doc,
    2477  "group([group1, ...]) -> str or tuple.\n\
    2478      Return subgroup(s) of the match by indices or names.\n\
    2479      For 0 returns the entire match.");
    2480  
    2481  static PyObject *
    2482  match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
    2483  {
    2484      if (self->lastindex >= 0)
    2485          return PyLong_FromSsize_t(self->lastindex);
    2486      Py_RETURN_NONE;
    2487  }
    2488  
    2489  static PyObject *
    2490  match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
    2491  {
    2492      if (self->pattern->indexgroup &&
    2493          self->lastindex >= 0 &&
    2494          self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
    2495      {
    2496          PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
    2497                                              self->lastindex);
    2498          return Py_NewRef(result);
    2499      }
    2500      Py_RETURN_NONE;
    2501  }
    2502  
    2503  static PyObject *
    2504  match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
    2505  {
    2506      if (self->regs) {
    2507          return Py_NewRef(self->regs);
    2508      } else
    2509          return match_regs(self);
    2510  }
    2511  
    2512  static PyObject *
    2513  match_repr(MatchObject *self)
    2514  {
    2515      PyObject *result;
    2516      PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
    2517      if (group0 == NULL)
    2518          return NULL;
    2519      result = PyUnicode_FromFormat(
    2520              "<%s object; span=(%zd, %zd), match=%.50R>",
    2521              Py_TYPE(self)->tp_name,
    2522              self->mark[0], self->mark[1], group0);
    2523      Py_DECREF(group0);
    2524      return result;
    2525  }
    2526  
    2527  
    2528  static PyObject*
    2529  pattern_new_match(_sremodulestate* module_state,
    2530                    PatternObject* pattern,
    2531                    SRE_STATE* state,
    2532                    Py_ssize_t status)
    2533  {
    2534      /* create match object (from state object) */
    2535  
    2536      MatchObject* match;
    2537      Py_ssize_t i, j;
    2538      char* base;
    2539      int n;
    2540  
    2541      if (status > 0) {
    2542  
    2543          /* create match object (with room for extra group marks) */
    2544          /* coverity[ampersand_in_size] */
    2545          match = PyObject_GC_NewVar(MatchObject,
    2546                                     module_state->Match_Type,
    2547                                     2*(pattern->groups+1));
    2548          if (!match)
    2549              return NULL;
    2550  
    2551          match->pattern = (PatternObject*)Py_NewRef(pattern);
    2552  
    2553          match->string = Py_NewRef(state->string);
    2554  
    2555          match->regs = NULL;
    2556          match->groups = pattern->groups+1;
    2557  
    2558          /* fill in group slices */
    2559  
    2560          base = (char*) state->beginning;
    2561          n = state->charsize;
    2562  
    2563          match->mark[0] = ((char*) state->start - base) / n;
    2564          match->mark[1] = ((char*) state->ptr - base) / n;
    2565  
    2566          for (i = j = 0; i < pattern->groups; i++, j+=2)
    2567              if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
    2568                  match->mark[j+2] = ((char*) state->mark[j] - base) / n;
    2569                  match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
    2570  
    2571                  /* check wrong span */
    2572                  if (match->mark[j+2] > match->mark[j+3]) {
    2573                      PyErr_SetString(PyExc_SystemError,
    2574                                      "The span of capturing group is wrong,"
    2575                                      " please report a bug for the re module.");
    2576                      Py_DECREF(match);
    2577                      return NULL;
    2578                  }
    2579              } else
    2580                  match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
    2581  
    2582          match->pos = state->pos;
    2583          match->endpos = state->endpos;
    2584  
    2585          match->lastindex = state->lastindex;
    2586  
    2587          PyObject_GC_Track(match);
    2588          return (PyObject*) match;
    2589  
    2590      } else if (status == 0) {
    2591  
    2592          /* no match */
    2593          Py_RETURN_NONE;
    2594  
    2595      }
    2596  
    2597      /* internal error */
    2598      pattern_error(status);
    2599      return NULL;
    2600  }
    2601  
    2602  
    2603  /* -------------------------------------------------------------------- */
    2604  /* scanner methods (experimental) */
    2605  
    2606  static int
    2607  scanner_traverse(ScannerObject *self, visitproc visit, void *arg)
    2608  {
    2609      Py_VISIT(Py_TYPE(self));
    2610      Py_VISIT(self->pattern);
    2611      return 0;
    2612  }
    2613  
    2614  static int
    2615  scanner_clear(ScannerObject *self)
    2616  {
    2617      Py_CLEAR(self->pattern);
    2618      return 0;
    2619  }
    2620  
    2621  static void
    2622  scanner_dealloc(ScannerObject* self)
    2623  {
    2624      PyTypeObject *tp = Py_TYPE(self);
    2625  
    2626      PyObject_GC_UnTrack(self);
    2627      state_fini(&self->state);
    2628      (void)scanner_clear(self);
    2629      tp->tp_free(self);
    2630      Py_DECREF(tp);
    2631  }
    2632  
    2633  static int
    2634  scanner_begin(ScannerObject* self)
    2635  {
    2636      if (self->executing) {
    2637          PyErr_SetString(PyExc_ValueError,
    2638                          "regular expression scanner already executing");
    2639          return 0;
    2640      }
    2641      self->executing = 1;
    2642      return 1;
    2643  }
    2644  
    2645  static void
    2646  scanner_end(ScannerObject* self)
    2647  {
    2648      assert(self->executing);
    2649      self->executing = 0;
    2650  }
    2651  
    2652  /*[clinic input]
    2653  _sre.SRE_Scanner.match
    2654  
    2655      cls: defining_class
    2656      /
    2657  
    2658  [clinic start generated code]*/
    2659  
    2660  static PyObject *
    2661  _sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls)
    2662  /*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/
    2663  {
    2664      _sremodulestate *module_state = get_sre_module_state_by_class(cls);
    2665      SRE_STATE* state = &self->state;
    2666      PyObject* match;
    2667      Py_ssize_t status;
    2668  
    2669      if (!scanner_begin(self)) {
    2670          return NULL;
    2671      }
    2672      if (state->start == NULL) {
    2673          scanner_end(self);
    2674          Py_RETURN_NONE;
    2675      }
    2676  
    2677      state_reset(state);
    2678  
    2679      state->ptr = state->start;
    2680  
    2681      status = sre_match(state, PatternObject_GetCode(self->pattern));
    2682      if (PyErr_Occurred()) {
    2683          scanner_end(self);
    2684          return NULL;
    2685      }
    2686  
    2687      match = pattern_new_match(module_state, (PatternObject*) self->pattern,
    2688                                state, status);
    2689  
    2690      if (status == 0)
    2691          state->start = NULL;
    2692      else {
    2693          state->must_advance = (state->ptr == state->start);
    2694          state->start = state->ptr;
    2695      }
    2696  
    2697      scanner_end(self);
    2698      return match;
    2699  }
    2700  
    2701  
    2702  /*[clinic input]
    2703  _sre.SRE_Scanner.search
    2704  
    2705      cls: defining_class
    2706      /
    2707  
    2708  [clinic start generated code]*/
    2709  
    2710  static PyObject *
    2711  _sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls)
    2712  /*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/
    2713  {
    2714      _sremodulestate *module_state = get_sre_module_state_by_class(cls);
    2715      SRE_STATE* state = &self->state;
    2716      PyObject* match;
    2717      Py_ssize_t status;
    2718  
    2719      if (!scanner_begin(self)) {
    2720          return NULL;
    2721      }
    2722      if (state->start == NULL) {
    2723          scanner_end(self);
    2724          Py_RETURN_NONE;
    2725      }
    2726  
    2727      state_reset(state);
    2728  
    2729      state->ptr = state->start;
    2730  
    2731      status = sre_search(state, PatternObject_GetCode(self->pattern));
    2732      if (PyErr_Occurred()) {
    2733          scanner_end(self);
    2734          return NULL;
    2735      }
    2736  
    2737      match = pattern_new_match(module_state, (PatternObject*) self->pattern,
    2738                                state, status);
    2739  
    2740      if (status == 0)
    2741          state->start = NULL;
    2742      else {
    2743          state->must_advance = (state->ptr == state->start);
    2744          state->start = state->ptr;
    2745      }
    2746  
    2747      scanner_end(self);
    2748      return match;
    2749  }
    2750  
    2751  static PyObject *
    2752  pattern_scanner(_sremodulestate *module_state,
    2753                  PatternObject *self,
    2754                  PyObject *string,
    2755                  Py_ssize_t pos,
    2756                  Py_ssize_t endpos)
    2757  {
    2758      ScannerObject* scanner;
    2759  
    2760      /* create scanner object */
    2761      scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type);
    2762      if (!scanner)
    2763          return NULL;
    2764      scanner->pattern = NULL;
    2765      scanner->executing = 0;
    2766  
    2767      /* create search state object */
    2768      if (!state_init(&scanner->state, self, string, pos, endpos)) {
    2769          Py_DECREF(scanner);
    2770          return NULL;
    2771      }
    2772  
    2773      scanner->pattern = Py_NewRef(self);
    2774  
    2775      PyObject_GC_Track(scanner);
    2776      return (PyObject*) scanner;
    2777  }
    2778  
    2779  /* -------------------------------------------------------------------- */
    2780  /* template methods */
    2781  
    2782  static int
    2783  template_traverse(TemplateObject *self, visitproc visit, void *arg)
    2784  {
    2785      Py_VISIT(Py_TYPE(self));
    2786      Py_VISIT(self->literal);
    2787      for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
    2788          Py_VISIT(self->items[i].literal);
    2789      }
    2790      return 0;
    2791  }
    2792  
    2793  static int
    2794  template_clear(TemplateObject *self)
    2795  {
    2796      Py_CLEAR(self->literal);
    2797      for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
    2798          Py_CLEAR(self->items[i].literal);
    2799      }
    2800      return 0;
    2801  }
    2802  
    2803  static void
    2804  template_dealloc(TemplateObject *self)
    2805  {
    2806      PyTypeObject *tp = Py_TYPE(self);
    2807  
    2808      PyObject_GC_UnTrack(self);
    2809      (void)template_clear(self);
    2810      tp->tp_free(self);
    2811      Py_DECREF(tp);
    2812  }
    2813  
    2814  static PyObject *
    2815  expand_template(TemplateObject *self, MatchObject *match)
    2816  {
    2817      if (Py_SIZE(self) == 0) {
    2818          return Py_NewRef(self->literal);
    2819      }
    2820  
    2821      PyObject *result = NULL;
    2822      Py_ssize_t count = 0;  // the number of non-empty chunks
    2823      /* For small number of strings use a buffer allocated on the stack,
    2824       * otherwise use a list object. */
    2825      PyObject *buffer[10];
    2826      PyObject **out = buffer;
    2827      PyObject *list = NULL;
    2828      if (self->chunks > (int)Py_ARRAY_LENGTH(buffer) ||
    2829          !PyUnicode_Check(self->literal))
    2830      {
    2831          list = PyList_New(self->chunks);
    2832          if (!list) {
    2833              return NULL;
    2834          }
    2835          out = &PyList_GET_ITEM(list, 0);
    2836      }
    2837  
    2838      out[count++] = Py_NewRef(self->literal);
    2839      for (Py_ssize_t i = 0; i < Py_SIZE(self); i++) {
    2840          Py_ssize_t index = self->items[i].index;
    2841          if (index >= match->groups) {
    2842              PyErr_SetString(PyExc_IndexError, "no such group");
    2843              goto cleanup;
    2844          }
    2845          PyObject *item = match_getslice_by_index(match, index, Py_None);
    2846          if (item == NULL) {
    2847              goto cleanup;
    2848          }
    2849          if (item != Py_None) {
    2850              out[count++] = Py_NewRef(item);
    2851          }
    2852          Py_DECREF(item);
    2853  
    2854          PyObject *literal = self->items[i].literal;
    2855          if (literal != NULL) {
    2856              out[count++] = Py_NewRef(literal);
    2857          }
    2858      }
    2859  
    2860      if (PyUnicode_Check(self->literal)) {
    2861          result = _PyUnicode_JoinArray(&_Py_STR(empty), out, count);
    2862      }
    2863      else {
    2864          Py_SET_SIZE(list, count);
    2865          result = _PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), list);
    2866      }
    2867  
    2868  cleanup:
    2869      if (list) {
    2870          Py_DECREF(list);
    2871      }
    2872      else {
    2873          for (Py_ssize_t i = 0; i < count; i++) {
    2874              Py_DECREF(out[i]);
    2875          }
    2876      }
    2877      return result;
    2878  }
    2879  
    2880  
    2881  static Py_hash_t
    2882  pattern_hash(PatternObject *self)
    2883  {
    2884      Py_hash_t hash, hash2;
    2885  
    2886      hash = PyObject_Hash(self->pattern);
    2887      if (hash == -1) {
    2888          return -1;
    2889      }
    2890  
    2891      hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
    2892      hash ^= hash2;
    2893  
    2894      hash ^= self->flags;
    2895      hash ^= self->isbytes;
    2896      hash ^= self->codesize;
    2897  
    2898      if (hash == -1) {
    2899          hash = -2;
    2900      }
    2901      return hash;
    2902  }
    2903  
    2904  static PyObject*
    2905  pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
    2906  {
    2907      PyTypeObject *tp = Py_TYPE(lefto);
    2908      _sremodulestate *module_state = get_sre_module_state_by_class(tp);
    2909      PatternObject *left, *right;
    2910      int cmp;
    2911  
    2912      if (op != Py_EQ && op != Py_NE) {
    2913          Py_RETURN_NOTIMPLEMENTED;
    2914      }
    2915  
    2916      if (!Py_IS_TYPE(righto, module_state->Pattern_Type))
    2917      {
    2918          Py_RETURN_NOTIMPLEMENTED;
    2919      }
    2920  
    2921      if (lefto == righto) {
    2922          /* a pattern is equal to itself */
    2923          return PyBool_FromLong(op == Py_EQ);
    2924      }
    2925  
    2926      left = (PatternObject *)lefto;
    2927      right = (PatternObject *)righto;
    2928  
    2929      cmp = (left->flags == right->flags
    2930             && left->isbytes == right->isbytes
    2931             && left->codesize == right->codesize);
    2932      if (cmp) {
    2933          /* Compare the code and the pattern because the same pattern can
    2934             produce different codes depending on the locale used to compile the
    2935             pattern when the re.LOCALE flag is used. Don't compare groups,
    2936             indexgroup nor groupindex: they are derivated from the pattern. */
    2937          cmp = (memcmp(left->code, right->code,
    2938                        sizeof(left->code[0]) * left->codesize) == 0);
    2939      }
    2940      if (cmp) {
    2941          cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
    2942                                         Py_EQ);
    2943          if (cmp < 0) {
    2944              return NULL;
    2945          }
    2946      }
    2947      if (op == Py_NE) {
    2948          cmp = !cmp;
    2949      }
    2950      return PyBool_FromLong(cmp);
    2951  }
    2952  
    2953  #include "clinic/sre.c.h"
    2954  
    2955  static PyMethodDef pattern_methods[] = {
    2956      _SRE_SRE_PATTERN_MATCH_METHODDEF
    2957      _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
    2958      _SRE_SRE_PATTERN_SEARCH_METHODDEF
    2959      _SRE_SRE_PATTERN_SUB_METHODDEF
    2960      _SRE_SRE_PATTERN_SUBN_METHODDEF
    2961      _SRE_SRE_PATTERN_FINDALL_METHODDEF
    2962      _SRE_SRE_PATTERN_SPLIT_METHODDEF
    2963      _SRE_SRE_PATTERN_FINDITER_METHODDEF
    2964      _SRE_SRE_PATTERN_SCANNER_METHODDEF
    2965      _SRE_SRE_PATTERN___COPY___METHODDEF
    2966      _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
    2967      {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
    2968       PyDoc_STR("See PEP 585")},
    2969      {NULL, NULL}
    2970  };
    2971  
    2972  static PyGetSetDef pattern_getset[] = {
    2973      {"groupindex", (getter)pattern_groupindex, (setter)NULL,
    2974        "A dictionary mapping group names to group numbers."},
    2975      {NULL}  /* Sentinel */
    2976  };
    2977  
    2978  #define PAT_OFF(x) offsetof(PatternObject, x)
    2979  static PyMemberDef pattern_members[] = {
    2980      {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY,
    2981       "The pattern string from which the RE object was compiled."},
    2982      {"flags",      T_INT,       PAT_OFF(flags),         READONLY,
    2983       "The regex matching flags."},
    2984      {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY,
    2985       "The number of capturing groups in the pattern."},
    2986      {"__weaklistoffset__", T_PYSSIZET, offsetof(PatternObject, weakreflist), READONLY},
    2987      {NULL}  /* Sentinel */
    2988  };
    2989  
    2990  static PyType_Slot pattern_slots[] = {
    2991      {Py_tp_dealloc, (destructor)pattern_dealloc},
    2992      {Py_tp_repr, (reprfunc)pattern_repr},
    2993      {Py_tp_hash, (hashfunc)pattern_hash},
    2994      {Py_tp_doc, (void *)pattern_doc},
    2995      {Py_tp_richcompare, pattern_richcompare},
    2996      {Py_tp_methods, pattern_methods},
    2997      {Py_tp_members, pattern_members},
    2998      {Py_tp_getset, pattern_getset},
    2999      {Py_tp_traverse, pattern_traverse},
    3000      {Py_tp_clear, pattern_clear},
    3001      {0, NULL},
    3002  };
    3003  
    3004  static PyType_Spec pattern_spec = {
    3005      .name = "re.Pattern",
    3006      .basicsize = sizeof(PatternObject),
    3007      .itemsize = sizeof(SRE_CODE),
    3008      .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
    3009                Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
    3010      .slots = pattern_slots,
    3011  };
    3012  
    3013  static PyMethodDef match_methods[] = {
    3014      {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
    3015      _SRE_SRE_MATCH_START_METHODDEF
    3016      _SRE_SRE_MATCH_END_METHODDEF
    3017      _SRE_SRE_MATCH_SPAN_METHODDEF
    3018      _SRE_SRE_MATCH_GROUPS_METHODDEF
    3019      _SRE_SRE_MATCH_GROUPDICT_METHODDEF
    3020      _SRE_SRE_MATCH_EXPAND_METHODDEF
    3021      _SRE_SRE_MATCH___COPY___METHODDEF
    3022      _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
    3023      {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
    3024       PyDoc_STR("See PEP 585")},
    3025      {NULL, NULL}
    3026  };
    3027  
    3028  static PyGetSetDef match_getset[] = {
    3029      {"lastindex", (getter)match_lastindex_get, (setter)NULL,
    3030       "The integer index of the last matched capturing group."},
    3031      {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
    3032       "The name of the last matched capturing group."},
    3033      {"regs",      (getter)match_regs_get,      (setter)NULL},
    3034      {NULL}
    3035  };
    3036  
    3037  #define MATCH_OFF(x) offsetof(MatchObject, x)
    3038  static PyMemberDef match_members[] = {
    3039      {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY,
    3040       "The string passed to match() or search()."},
    3041      {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY,
    3042       "The regular expression object."},
    3043      {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY,
    3044       "The index into the string at which the RE engine started looking for a match."},
    3045      {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY,
    3046       "The index into the string beyond which the RE engine will not go."},
    3047      {NULL}
    3048  };
    3049  
    3050  /* FIXME: implement setattr("string", None) as a special case (to
    3051     detach the associated string, if any */
    3052  static PyType_Slot match_slots[] = {
    3053      {Py_tp_dealloc, match_dealloc},
    3054      {Py_tp_repr, match_repr},
    3055      {Py_tp_doc, (void *)match_doc},
    3056      {Py_tp_methods, match_methods},
    3057      {Py_tp_members, match_members},
    3058      {Py_tp_getset, match_getset},
    3059      {Py_tp_traverse, match_traverse},
    3060      {Py_tp_clear, match_clear},
    3061  
    3062      /* As mapping.
    3063       *
    3064       * Match objects do not support length or assignment, but do support
    3065       * __getitem__.
    3066       */
    3067      {Py_mp_subscript, match_getitem},
    3068  
    3069      {0, NULL},
    3070  };
    3071  
    3072  static PyType_Spec match_spec = {
    3073      .name = "re.Match",
    3074      .basicsize = sizeof(MatchObject),
    3075      .itemsize = sizeof(Py_ssize_t),
    3076      .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
    3077                Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
    3078      .slots = match_slots,
    3079  };
    3080  
    3081  static PyMethodDef scanner_methods[] = {
    3082      _SRE_SRE_SCANNER_MATCH_METHODDEF
    3083      _SRE_SRE_SCANNER_SEARCH_METHODDEF
    3084      {NULL, NULL}
    3085  };
    3086  
    3087  #define SCAN_OFF(x) offsetof(ScannerObject, x)
    3088  static PyMemberDef scanner_members[] = {
    3089      {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
    3090      {NULL}  /* Sentinel */
    3091  };
    3092  
    3093  static PyType_Slot scanner_slots[] = {
    3094      {Py_tp_dealloc, scanner_dealloc},
    3095      {Py_tp_methods, scanner_methods},
    3096      {Py_tp_members, scanner_members},
    3097      {Py_tp_traverse, scanner_traverse},
    3098      {Py_tp_clear, scanner_clear},
    3099      {0, NULL},
    3100  };
    3101  
    3102  static PyType_Spec scanner_spec = {
    3103      .name = "_sre.SRE_Scanner",
    3104      .basicsize = sizeof(ScannerObject),
    3105      .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
    3106                Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
    3107      .slots = scanner_slots,
    3108  };
    3109  
    3110  static PyType_Slot template_slots[] = {
    3111      {Py_tp_dealloc, template_dealloc},
    3112      {Py_tp_traverse, template_traverse},
    3113      {Py_tp_clear, template_clear},
    3114      {0, NULL},
    3115  };
    3116  
    3117  static PyType_Spec template_spec = {
    3118      .name = "_sre.SRE_Template",
    3119      .basicsize = sizeof(TemplateObject),
    3120      .itemsize = sizeof(((TemplateObject *)0)->items[0]),
    3121      .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
    3122                Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
    3123      .slots = template_slots,
    3124  };
    3125  
    3126  static PyMethodDef _functions[] = {
    3127      _SRE_COMPILE_METHODDEF
    3128      _SRE_TEMPLATE_METHODDEF
    3129      _SRE_GETCODESIZE_METHODDEF
    3130      _SRE_ASCII_ISCASED_METHODDEF
    3131      _SRE_UNICODE_ISCASED_METHODDEF
    3132      _SRE_ASCII_TOLOWER_METHODDEF
    3133      _SRE_UNICODE_TOLOWER_METHODDEF
    3134      {NULL, NULL}
    3135  };
    3136  
    3137  static int
    3138  sre_traverse(PyObject *module, visitproc visit, void *arg)
    3139  {
    3140      _sremodulestate *state = get_sre_module_state(module);
    3141  
    3142      Py_VISIT(state->Pattern_Type);
    3143      Py_VISIT(state->Match_Type);
    3144      Py_VISIT(state->Scanner_Type);
    3145      Py_VISIT(state->Template_Type);
    3146      Py_VISIT(state->compile_template);
    3147  
    3148      return 0;
    3149  }
    3150  
    3151  static int
    3152  sre_clear(PyObject *module)
    3153  {
    3154      _sremodulestate *state = get_sre_module_state(module);
    3155  
    3156      Py_CLEAR(state->Pattern_Type);
    3157      Py_CLEAR(state->Match_Type);
    3158      Py_CLEAR(state->Scanner_Type);
    3159      Py_CLEAR(state->Template_Type);
    3160      Py_CLEAR(state->compile_template);
    3161  
    3162      return 0;
    3163  }
    3164  
    3165  static void
    3166  sre_free(void *module)
    3167  {
    3168      sre_clear((PyObject *)module);
    3169  }
    3170  
    3171  #define CREATE_TYPE(m, type, spec)                                  \
    3172  do {                                                                \
    3173      type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \
    3174      if (type == NULL) {                                             \
    3175          goto error;                                                 \
    3176      }                                                               \
    3177  } while (0)
    3178  
    3179  #define ADD_ULONG_CONSTANT(module, name, value)           \
    3180      do {                                                  \
    3181          PyObject *o = PyLong_FromUnsignedLong(value);     \
    3182          if (!o)                                           \
    3183              goto error;                                   \
    3184          int res = PyModule_AddObjectRef(module, name, o); \
    3185          Py_DECREF(o);                                     \
    3186          if (res < 0) {                                    \
    3187              goto error;                                   \
    3188          }                                                 \
    3189  } while (0)
    3190  
    3191  static int
    3192  sre_exec(PyObject *m)
    3193  {
    3194      _sremodulestate *state;
    3195  
    3196      /* Create heap types */
    3197      state = get_sre_module_state(m);
    3198      CREATE_TYPE(m, state->Pattern_Type, &pattern_spec);
    3199      CREATE_TYPE(m, state->Match_Type, &match_spec);
    3200      CREATE_TYPE(m, state->Scanner_Type, &scanner_spec);
    3201      CREATE_TYPE(m, state->Template_Type, &template_spec);
    3202  
    3203      if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) {
    3204          goto error;
    3205      }
    3206  
    3207      if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) {
    3208          goto error;
    3209      }
    3210  
    3211      ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT);
    3212      ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS);
    3213  
    3214      if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) {
    3215          goto error;
    3216      }
    3217  
    3218      return 0;
    3219  
    3220  error:
    3221      return -1;
    3222  }
    3223  
    3224  static PyModuleDef_Slot sre_slots[] = {
    3225      {Py_mod_exec, sre_exec},
    3226      {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
    3227      {0, NULL},
    3228  };
    3229  
    3230  static struct PyModuleDef sremodule = {
    3231      .m_base = PyModuleDef_HEAD_INIT,
    3232      .m_name = "_sre",
    3233      .m_size = sizeof(_sremodulestate),
    3234      .m_methods = _functions,
    3235      .m_slots = sre_slots,
    3236      .m_traverse = sre_traverse,
    3237      .m_free = sre_free,
    3238      .m_clear = sre_clear,
    3239  };
    3240  
    3241  PyMODINIT_FUNC
    3242  PyInit__sre(void)
    3243  {
    3244      return PyModuleDef_Init(&sremodule);
    3245  }
    3246  
    3247  /* vim:ts=4:sw=4:et
    3248  */