(root)/
Python-3.12.0/
Modules/
_sre/
sre_lib.h
       1  /*
       2   * Secret Labs' Regular Expression Engine
       3   *
       4   * regular expression matching engine
       5   *
       6   * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
       7   *
       8   * See the sre.c file for information on usage and redistribution.
       9   */
      10  
      11  /* String matching engine */
      12  
      13  /* This file is included three times, with different character settings */
      14  
      15  LOCAL(int)
      16  SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
      17  {
      18      /* check if pointer is at given position */
      19  
      20      Py_ssize_t thisp, thatp;
      21  
      22      switch (at) {
      23  
      24      case SRE_AT_BEGINNING:
      25      case SRE_AT_BEGINNING_STRING:
      26          return ((void*) ptr == state->beginning);
      27  
      28      case SRE_AT_BEGINNING_LINE:
      29          return ((void*) ptr == state->beginning ||
      30                  SRE_IS_LINEBREAK((int) ptr[-1]));
      31  
      32      case SRE_AT_END:
      33          return (((SRE_CHAR *)state->end - ptr == 1 &&
      34                   SRE_IS_LINEBREAK((int) ptr[0])) ||
      35                  ((void*) ptr == state->end));
      36  
      37      case SRE_AT_END_LINE:
      38          return ((void*) ptr == state->end ||
      39                  SRE_IS_LINEBREAK((int) ptr[0]));
      40  
      41      case SRE_AT_END_STRING:
      42          return ((void*) ptr == state->end);
      43  
      44      case SRE_AT_BOUNDARY:
      45          if (state->beginning == state->end)
      46              return 0;
      47          thatp = ((void*) ptr > state->beginning) ?
      48              SRE_IS_WORD((int) ptr[-1]) : 0;
      49          thisp = ((void*) ptr < state->end) ?
      50              SRE_IS_WORD((int) ptr[0]) : 0;
      51          return thisp != thatp;
      52  
      53      case SRE_AT_NON_BOUNDARY:
      54          if (state->beginning == state->end)
      55              return 0;
      56          thatp = ((void*) ptr > state->beginning) ?
      57              SRE_IS_WORD((int) ptr[-1]) : 0;
      58          thisp = ((void*) ptr < state->end) ?
      59              SRE_IS_WORD((int) ptr[0]) : 0;
      60          return thisp == thatp;
      61  
      62      case SRE_AT_LOC_BOUNDARY:
      63          if (state->beginning == state->end)
      64              return 0;
      65          thatp = ((void*) ptr > state->beginning) ?
      66              SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
      67          thisp = ((void*) ptr < state->end) ?
      68              SRE_LOC_IS_WORD((int) ptr[0]) : 0;
      69          return thisp != thatp;
      70  
      71      case SRE_AT_LOC_NON_BOUNDARY:
      72          if (state->beginning == state->end)
      73              return 0;
      74          thatp = ((void*) ptr > state->beginning) ?
      75              SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
      76          thisp = ((void*) ptr < state->end) ?
      77              SRE_LOC_IS_WORD((int) ptr[0]) : 0;
      78          return thisp == thatp;
      79  
      80      case SRE_AT_UNI_BOUNDARY:
      81          if (state->beginning == state->end)
      82              return 0;
      83          thatp = ((void*) ptr > state->beginning) ?
      84              SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
      85          thisp = ((void*) ptr < state->end) ?
      86              SRE_UNI_IS_WORD((int) ptr[0]) : 0;
      87          return thisp != thatp;
      88  
      89      case SRE_AT_UNI_NON_BOUNDARY:
      90          if (state->beginning == state->end)
      91              return 0;
      92          thatp = ((void*) ptr > state->beginning) ?
      93              SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
      94          thisp = ((void*) ptr < state->end) ?
      95              SRE_UNI_IS_WORD((int) ptr[0]) : 0;
      96          return thisp == thatp;
      97  
      98      }
      99  
     100      return 0;
     101  }
     102  
     103  LOCAL(int)
     104  SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
     105  {
     106      /* check if character is a member of the given set */
     107  
     108      int ok = 1;
     109  
     110      for (;;) {
     111          switch (*set++) {
     112  
     113          case SRE_OP_FAILURE:
     114              return !ok;
     115  
     116          case SRE_OP_LITERAL:
     117              /* <LITERAL> <code> */
     118              if (ch == set[0])
     119                  return ok;
     120              set++;
     121              break;
     122  
     123          case SRE_OP_CATEGORY:
     124              /* <CATEGORY> <code> */
     125              if (sre_category(set[0], (int) ch))
     126                  return ok;
     127              set++;
     128              break;
     129  
     130          case SRE_OP_CHARSET:
     131              /* <CHARSET> <bitmap> */
     132              if (ch < 256 &&
     133                  (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
     134                  return ok;
     135              set += 256/SRE_CODE_BITS;
     136              break;
     137  
     138          case SRE_OP_RANGE:
     139              /* <RANGE> <lower> <upper> */
     140              if (set[0] <= ch && ch <= set[1])
     141                  return ok;
     142              set += 2;
     143              break;
     144  
     145          case SRE_OP_RANGE_UNI_IGNORE:
     146              /* <RANGE_UNI_IGNORE> <lower> <upper> */
     147          {
     148              SRE_CODE uch;
     149              /* ch is already lower cased */
     150              if (set[0] <= ch && ch <= set[1])
     151                  return ok;
     152              uch = sre_upper_unicode(ch);
     153              if (set[0] <= uch && uch <= set[1])
     154                  return ok;
     155              set += 2;
     156              break;
     157          }
     158  
     159          case SRE_OP_NEGATE:
     160              ok = !ok;
     161              break;
     162  
     163          case SRE_OP_BIGCHARSET:
     164              /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
     165          {
     166              Py_ssize_t count, block;
     167              count = *(set++);
     168  
     169              if (ch < 0x10000u)
     170                  block = ((unsigned char*)set)[ch >> 8];
     171              else
     172                  block = -1;
     173              set += 256/sizeof(SRE_CODE);
     174              if (block >=0 &&
     175                  (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
     176                      (1u << (ch & (SRE_CODE_BITS-1)))))
     177                  return ok;
     178              set += count * (256/SRE_CODE_BITS);
     179              break;
     180          }
     181  
     182          default:
     183              /* internal error -- there's not much we can do about it
     184                 here, so let's just pretend it didn't match... */
     185              return 0;
     186          }
     187      }
     188  }
     189  
     190  LOCAL(int)
     191  SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
     192  {
     193      SRE_CODE lo, up;
     194      lo = sre_lower_locale(ch);
     195      if (SRE(charset)(state, set, lo))
     196         return 1;
     197  
     198      up = sre_upper_locale(ch);
     199      return up != lo && SRE(charset)(state, set, up);
     200  }
     201  
     202  LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
     203  
     204  LOCAL(Py_ssize_t)
     205  SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
     206  {
     207      SRE_CODE chr;
     208      SRE_CHAR c;
     209      const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
     210      const SRE_CHAR* end = (const SRE_CHAR *)state->end;
     211      Py_ssize_t i;
     212  
     213      /* adjust end */
     214      if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
     215          end = ptr + maxcount;
     216  
     217      switch (pattern[0]) {
     218  
     219      case SRE_OP_IN:
     220          /* repeated set */
     221          TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
     222          while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
     223              ptr++;
     224          break;
     225  
     226      case SRE_OP_ANY:
     227          /* repeated dot wildcard. */
     228          TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
     229          while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
     230              ptr++;
     231          break;
     232  
     233      case SRE_OP_ANY_ALL:
     234          /* repeated dot wildcard.  skip to the end of the target
     235             string, and backtrack from there */
     236          TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
     237          ptr = end;
     238          break;
     239  
     240      case SRE_OP_LITERAL:
     241          /* repeated literal */
     242          chr = pattern[1];
     243          TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
     244          c = (SRE_CHAR) chr;
     245  #if SIZEOF_SRE_CHAR < 4
     246          if ((SRE_CODE) c != chr)
     247              ; /* literal can't match: doesn't fit in char width */
     248          else
     249  #endif
     250          while (ptr < end && *ptr == c)
     251              ptr++;
     252          break;
     253  
     254      case SRE_OP_LITERAL_IGNORE:
     255          /* repeated literal */
     256          chr = pattern[1];
     257          TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
     258          while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
     259              ptr++;
     260          break;
     261  
     262      case SRE_OP_LITERAL_UNI_IGNORE:
     263          /* repeated literal */
     264          chr = pattern[1];
     265          TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
     266          while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
     267              ptr++;
     268          break;
     269  
     270      case SRE_OP_LITERAL_LOC_IGNORE:
     271          /* repeated literal */
     272          chr = pattern[1];
     273          TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
     274          while (ptr < end && char_loc_ignore(chr, *ptr))
     275              ptr++;
     276          break;
     277  
     278      case SRE_OP_NOT_LITERAL:
     279          /* repeated non-literal */
     280          chr = pattern[1];
     281          TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
     282          c = (SRE_CHAR) chr;
     283  #if SIZEOF_SRE_CHAR < 4
     284          if ((SRE_CODE) c != chr)
     285              ptr = end; /* literal can't match: doesn't fit in char width */
     286          else
     287  #endif
     288          while (ptr < end && *ptr != c)
     289              ptr++;
     290          break;
     291  
     292      case SRE_OP_NOT_LITERAL_IGNORE:
     293          /* repeated non-literal */
     294          chr = pattern[1];
     295          TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
     296          while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
     297              ptr++;
     298          break;
     299  
     300      case SRE_OP_NOT_LITERAL_UNI_IGNORE:
     301          /* repeated non-literal */
     302          chr = pattern[1];
     303          TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
     304          while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
     305              ptr++;
     306          break;
     307  
     308      case SRE_OP_NOT_LITERAL_LOC_IGNORE:
     309          /* repeated non-literal */
     310          chr = pattern[1];
     311          TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
     312          while (ptr < end && !char_loc_ignore(chr, *ptr))
     313              ptr++;
     314          break;
     315  
     316      default:
     317          /* repeated single character pattern */
     318          TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
     319          while ((SRE_CHAR*) state->ptr < end) {
     320              i = SRE(match)(state, pattern, 0);
     321              if (i < 0)
     322                  return i;
     323              if (!i)
     324                  break;
     325          }
     326          TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
     327                 (SRE_CHAR*) state->ptr - ptr));
     328          return (SRE_CHAR*) state->ptr - ptr;
     329      }
     330  
     331      TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
     332             ptr - (SRE_CHAR*) state->ptr));
     333      return ptr - (SRE_CHAR*) state->ptr;
     334  }
     335  
     336  /* The macros below should be used to protect recursive SRE(match)()
     337   * calls that *failed* and do *not* return immediately (IOW, those
     338   * that will backtrack). Explaining:
     339   *
     340   * - Recursive SRE(match)() returned true: that's usually a success
     341   *   (besides atypical cases like ASSERT_NOT), therefore there's no
     342   *   reason to restore lastmark;
     343   *
     344   * - Recursive SRE(match)() returned false but the current SRE(match)()
     345   *   is returning to the caller: If the current SRE(match)() is the
     346   *   top function of the recursion, returning false will be a matching
     347   *   failure, and it doesn't matter where lastmark is pointing to.
     348   *   If it's *not* the top function, it will be a recursive SRE(match)()
     349   *   failure by itself, and the calling SRE(match)() will have to deal
     350   *   with the failure by the same rules explained here (it will restore
     351   *   lastmark by itself if necessary);
     352   *
     353   * - Recursive SRE(match)() returned false, and will continue the
     354   *   outside 'for' loop: must be protected when breaking, since the next
     355   *   OP could potentially depend on lastmark;
     356   *
     357   * - Recursive SRE(match)() returned false, and will be called again
     358   *   inside a local for/while loop: must be protected between each
     359   *   loop iteration, since the recursive SRE(match)() could do anything,
     360   *   and could potentially depend on lastmark.
     361   *
     362   * For more information, check the discussion at SF patch #712900.
     363   */
     364  #define LASTMARK_SAVE()     \
     365      do { \
     366          ctx->lastmark = state->lastmark; \
     367          ctx->lastindex = state->lastindex; \
     368      } while (0)
     369  #define LASTMARK_RESTORE()  \
     370      do { \
     371          state->lastmark = ctx->lastmark; \
     372          state->lastindex = ctx->lastindex; \
     373      } while (0)
     374  
     375  #define RETURN_ERROR(i) do { return i; } while(0)
     376  #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
     377  #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
     378  
     379  #define RETURN_ON_ERROR(i) \
     380      do { if (i < 0) RETURN_ERROR(i); } while (0)
     381  #define RETURN_ON_SUCCESS(i) \
     382      do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
     383  #define RETURN_ON_FAILURE(i) \
     384      do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
     385  
     386  #define DATA_STACK_ALLOC(state, type, ptr) \
     387  do { \
     388      alloc_pos = state->data_stack_base; \
     389      TRACE(("allocating %s in %zd (%zd)\n", \
     390             Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
     391      if (sizeof(type) > state->data_stack_size - alloc_pos) { \
     392          int j = data_stack_grow(state, sizeof(type)); \
     393          if (j < 0) return j; \
     394          if (ctx_pos != -1) \
     395              DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
     396      } \
     397      ptr = (type*)(state->data_stack+alloc_pos); \
     398      state->data_stack_base += sizeof(type); \
     399  } while (0)
     400  
     401  #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
     402  do { \
     403      TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
     404      ptr = (type*)(state->data_stack+pos); \
     405  } while (0)
     406  
     407  #define DATA_STACK_PUSH(state, data, size) \
     408  do { \
     409      TRACE(("copy data in %p to %zd (%zd)\n", \
     410             data, state->data_stack_base, size)); \
     411      if (size > state->data_stack_size - state->data_stack_base) { \
     412          int j = data_stack_grow(state, size); \
     413          if (j < 0) return j; \
     414          if (ctx_pos != -1) \
     415              DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
     416      } \
     417      memcpy(state->data_stack+state->data_stack_base, data, size); \
     418      state->data_stack_base += size; \
     419  } while (0)
     420  
     421  /* We add an explicit cast to memcpy here because MSVC has a bug when
     422     compiling C code where it believes that `const void**` cannot be
     423     safely casted to `void*`, see bpo-39943 for details. */
     424  #define DATA_STACK_POP(state, data, size, discard) \
     425  do { \
     426      TRACE(("copy data to %p from %zd (%zd)\n", \
     427             data, state->data_stack_base-size, size)); \
     428      memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
     429      if (discard) \
     430          state->data_stack_base -= size; \
     431  } while (0)
     432  
     433  #define DATA_STACK_POP_DISCARD(state, size) \
     434  do { \
     435      TRACE(("discard data from %zd (%zd)\n", \
     436             state->data_stack_base-size, size)); \
     437      state->data_stack_base -= size; \
     438  } while(0)
     439  
     440  #define DATA_PUSH(x) \
     441      DATA_STACK_PUSH(state, (x), sizeof(*(x)))
     442  #define DATA_POP(x) \
     443      DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
     444  #define DATA_POP_DISCARD(x) \
     445      DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
     446  #define DATA_ALLOC(t,p) \
     447      DATA_STACK_ALLOC(state, t, p)
     448  #define DATA_LOOKUP_AT(t,p,pos) \
     449      DATA_STACK_LOOKUP_AT(state,t,p,pos)
     450  
     451  #define MARK_PUSH(lastmark) \
     452      do if (lastmark >= 0) { \
     453          size_t _marks_size = (lastmark+1) * sizeof(void*); \
     454          DATA_STACK_PUSH(state, state->mark, _marks_size); \
     455      } while (0)
     456  #define MARK_POP(lastmark) \
     457      do if (lastmark >= 0) { \
     458          size_t _marks_size = (lastmark+1) * sizeof(void*); \
     459          DATA_STACK_POP(state, state->mark, _marks_size, 1); \
     460      } while (0)
     461  #define MARK_POP_KEEP(lastmark) \
     462      do if (lastmark >= 0) { \
     463          size_t _marks_size = (lastmark+1) * sizeof(void*); \
     464          DATA_STACK_POP(state, state->mark, _marks_size, 0); \
     465      } while (0)
     466  #define MARK_POP_DISCARD(lastmark) \
     467      do if (lastmark >= 0) { \
     468          size_t _marks_size = (lastmark+1) * sizeof(void*); \
     469          DATA_STACK_POP_DISCARD(state, _marks_size); \
     470      } while (0)
     471  
     472  #define JUMP_NONE            0
     473  #define JUMP_MAX_UNTIL_1     1
     474  #define JUMP_MAX_UNTIL_2     2
     475  #define JUMP_MAX_UNTIL_3     3
     476  #define JUMP_MIN_UNTIL_1     4
     477  #define JUMP_MIN_UNTIL_2     5
     478  #define JUMP_MIN_UNTIL_3     6
     479  #define JUMP_REPEAT          7
     480  #define JUMP_REPEAT_ONE_1    8
     481  #define JUMP_REPEAT_ONE_2    9
     482  #define JUMP_MIN_REPEAT_ONE  10
     483  #define JUMP_BRANCH          11
     484  #define JUMP_ASSERT          12
     485  #define JUMP_ASSERT_NOT      13
     486  #define JUMP_POSS_REPEAT_1   14
     487  #define JUMP_POSS_REPEAT_2   15
     488  #define JUMP_ATOMIC_GROUP    16
     489  
     490  #define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
     491      ctx->pattern = pattern; \
     492      ctx->ptr = ptr; \
     493      DATA_ALLOC(SRE(match_context), nextctx); \
     494      nextctx->pattern = nextpattern; \
     495      nextctx->toplevel = toplevel_; \
     496      nextctx->jump = jumpvalue; \
     497      nextctx->last_ctx_pos = ctx_pos; \
     498      pattern = nextpattern; \
     499      ctx_pos = alloc_pos; \
     500      ctx = nextctx; \
     501      goto entrance; \
     502      jumplabel: \
     503      pattern = ctx->pattern; \
     504      ptr = ctx->ptr;
     505  
     506  #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
     507      DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
     508  
     509  #define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
     510      DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
     511  
     512  typedef struct {
     513      Py_ssize_t count;
     514      union {
     515          SRE_CODE chr;
     516          SRE_REPEAT* rep;
     517      } u;
     518      int lastmark;
     519      int lastindex;
     520      const SRE_CODE* pattern;
     521      const SRE_CHAR* ptr;
     522      int toplevel;
     523      int jump;
     524      Py_ssize_t last_ctx_pos;
     525  } SRE(match_context);
     526  
     527  #define MAYBE_CHECK_SIGNALS                                        \
     528      do {                                                           \
     529          if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
     530              RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
     531          }                                                          \
     532      } while (0)
     533  
     534  #ifdef HAVE_COMPUTED_GOTOS
     535      #ifndef USE_COMPUTED_GOTOS
     536      #define USE_COMPUTED_GOTOS 1
     537      #endif
     538  #elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
     539      #error "Computed gotos are not supported on this compiler."
     540  #else
     541      #undef USE_COMPUTED_GOTOS
     542      #define USE_COMPUTED_GOTOS 0
     543  #endif
     544  
     545  #if USE_COMPUTED_GOTOS
     546      #define TARGET(OP) TARGET_ ## OP
     547      #define DISPATCH                       \
     548          do {                               \
     549              MAYBE_CHECK_SIGNALS;           \
     550              goto *sre_targets[*pattern++]; \
     551          } while (0)
     552  #else
     553      #define TARGET(OP) case OP
     554      #define DISPATCH goto dispatch
     555  #endif
     556  
     557  /* check if string matches the given pattern.  returns <0 for
     558     error, 0 for failure, and 1 for success */
     559  LOCAL(Py_ssize_t)
     560  SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
     561  {
     562      const SRE_CHAR* end = (const SRE_CHAR *)state->end;
     563      Py_ssize_t alloc_pos, ctx_pos = -1;
     564      Py_ssize_t ret = 0;
     565      int jump;
     566      unsigned int sigcount=0;
     567  
     568      SRE(match_context)* ctx;
     569      SRE(match_context)* nextctx;
     570  
     571      TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
     572  
     573      DATA_ALLOC(SRE(match_context), ctx);
     574      ctx->last_ctx_pos = -1;
     575      ctx->jump = JUMP_NONE;
     576      ctx->toplevel = toplevel;
     577      ctx_pos = alloc_pos;
     578  
     579  #if USE_COMPUTED_GOTOS
     580  #include "sre_targets.h"
     581  #endif
     582  
     583  entrance:
     584  
     585      ;  // Fashion statement.
     586      const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
     587  
     588      if (pattern[0] == SRE_OP_INFO) {
     589          /* optimization info block */
     590          /* <INFO> <1=skip> <2=flags> <3=min> ... */
     591          if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
     592              TRACE(("reject (got %zd chars, need %zd)\n",
     593                     end - ptr, (Py_ssize_t) pattern[3]));
     594              RETURN_FAILURE;
     595          }
     596          pattern += pattern[1] + 1;
     597      }
     598  
     599  #if USE_COMPUTED_GOTOS
     600      DISPATCH;
     601  #else
     602  dispatch:
     603      MAYBE_CHECK_SIGNALS;
     604      switch (*pattern++)
     605  #endif
     606      {
     607  
     608          TARGET(SRE_OP_MARK):
     609              /* set mark */
     610              /* <MARK> <gid> */
     611              TRACE(("|%p|%p|MARK %d\n", pattern,
     612                     ptr, pattern[0]));
     613              {
     614                  int i = pattern[0];
     615                  if (i & 1)
     616                      state->lastindex = i/2 + 1;
     617                  if (i > state->lastmark) {
     618                      /* state->lastmark is the highest valid index in the
     619                         state->mark array.  If it is increased by more than 1,
     620                         the intervening marks must be set to NULL to signal
     621                         that these marks have not been encountered. */
     622                      int j = state->lastmark + 1;
     623                      while (j < i)
     624                          state->mark[j++] = NULL;
     625                      state->lastmark = i;
     626                  }
     627                  state->mark[i] = ptr;
     628              }
     629              pattern++;
     630              DISPATCH;
     631  
     632          TARGET(SRE_OP_LITERAL):
     633              /* match literal string */
     634              /* <LITERAL> <code> */
     635              TRACE(("|%p|%p|LITERAL %d\n", pattern,
     636                     ptr, *pattern));
     637              if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
     638                  RETURN_FAILURE;
     639              pattern++;
     640              ptr++;
     641              DISPATCH;
     642  
     643          TARGET(SRE_OP_NOT_LITERAL):
     644              /* match anything that is not literal character */
     645              /* <NOT_LITERAL> <code> */
     646              TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
     647                     ptr, *pattern));
     648              if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
     649                  RETURN_FAILURE;
     650              pattern++;
     651              ptr++;
     652              DISPATCH;
     653  
     654          TARGET(SRE_OP_SUCCESS):
     655              /* end of pattern */
     656              TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
     657              if (ctx->toplevel &&
     658                  ((state->match_all && ptr != state->end) ||
     659                   (state->must_advance && ptr == state->start)))
     660              {
     661                  RETURN_FAILURE;
     662              }
     663              state->ptr = ptr;
     664              RETURN_SUCCESS;
     665  
     666          TARGET(SRE_OP_AT):
     667              /* match at given position */
     668              /* <AT> <code> */
     669              TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
     670              if (!SRE(at)(state, ptr, *pattern))
     671                  RETURN_FAILURE;
     672              pattern++;
     673              DISPATCH;
     674  
     675          TARGET(SRE_OP_CATEGORY):
     676              /* match at given category */
     677              /* <CATEGORY> <code> */
     678              TRACE(("|%p|%p|CATEGORY %d\n", pattern,
     679                     ptr, *pattern));
     680              if (ptr >= end || !sre_category(pattern[0], ptr[0]))
     681                  RETURN_FAILURE;
     682              pattern++;
     683              ptr++;
     684              DISPATCH;
     685  
     686          TARGET(SRE_OP_ANY):
     687              /* match anything (except a newline) */
     688              /* <ANY> */
     689              TRACE(("|%p|%p|ANY\n", pattern, ptr));
     690              if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
     691                  RETURN_FAILURE;
     692              ptr++;
     693              DISPATCH;
     694  
     695          TARGET(SRE_OP_ANY_ALL):
     696              /* match anything */
     697              /* <ANY_ALL> */
     698              TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
     699              if (ptr >= end)
     700                  RETURN_FAILURE;
     701              ptr++;
     702              DISPATCH;
     703  
     704          TARGET(SRE_OP_IN):
     705              /* match set member (or non_member) */
     706              /* <IN> <skip> <set> */
     707              TRACE(("|%p|%p|IN\n", pattern, ptr));
     708              if (ptr >= end ||
     709                  !SRE(charset)(state, pattern + 1, *ptr))
     710                  RETURN_FAILURE;
     711              pattern += pattern[0];
     712              ptr++;
     713              DISPATCH;
     714  
     715          TARGET(SRE_OP_LITERAL_IGNORE):
     716              TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
     717                     pattern, ptr, pattern[0]));
     718              if (ptr >= end ||
     719                  sre_lower_ascii(*ptr) != *pattern)
     720                  RETURN_FAILURE;
     721              pattern++;
     722              ptr++;
     723              DISPATCH;
     724  
     725          TARGET(SRE_OP_LITERAL_UNI_IGNORE):
     726              TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
     727                     pattern, ptr, pattern[0]));
     728              if (ptr >= end ||
     729                  sre_lower_unicode(*ptr) != *pattern)
     730                  RETURN_FAILURE;
     731              pattern++;
     732              ptr++;
     733              DISPATCH;
     734  
     735          TARGET(SRE_OP_LITERAL_LOC_IGNORE):
     736              TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
     737                     pattern, ptr, pattern[0]));
     738              if (ptr >= end
     739                  || !char_loc_ignore(*pattern, *ptr))
     740                  RETURN_FAILURE;
     741              pattern++;
     742              ptr++;
     743              DISPATCH;
     744  
     745          TARGET(SRE_OP_NOT_LITERAL_IGNORE):
     746              TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
     747                     pattern, ptr, *pattern));
     748              if (ptr >= end ||
     749                  sre_lower_ascii(*ptr) == *pattern)
     750                  RETURN_FAILURE;
     751              pattern++;
     752              ptr++;
     753              DISPATCH;
     754  
     755          TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
     756              TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
     757                     pattern, ptr, *pattern));
     758              if (ptr >= end ||
     759                  sre_lower_unicode(*ptr) == *pattern)
     760                  RETURN_FAILURE;
     761              pattern++;
     762              ptr++;
     763              DISPATCH;
     764  
     765          TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
     766              TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
     767                     pattern, ptr, *pattern));
     768              if (ptr >= end
     769                  || char_loc_ignore(*pattern, *ptr))
     770                  RETURN_FAILURE;
     771              pattern++;
     772              ptr++;
     773              DISPATCH;
     774  
     775          TARGET(SRE_OP_IN_IGNORE):
     776              TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
     777              if (ptr >= end
     778                  || !SRE(charset)(state, pattern+1,
     779                                   (SRE_CODE)sre_lower_ascii(*ptr)))
     780                  RETURN_FAILURE;
     781              pattern += pattern[0];
     782              ptr++;
     783              DISPATCH;
     784  
     785          TARGET(SRE_OP_IN_UNI_IGNORE):
     786              TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
     787              if (ptr >= end
     788                  || !SRE(charset)(state, pattern+1,
     789                                   (SRE_CODE)sre_lower_unicode(*ptr)))
     790                  RETURN_FAILURE;
     791              pattern += pattern[0];
     792              ptr++;
     793              DISPATCH;
     794  
     795          TARGET(SRE_OP_IN_LOC_IGNORE):
     796              TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
     797              if (ptr >= end
     798                  || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
     799                  RETURN_FAILURE;
     800              pattern += pattern[0];
     801              ptr++;
     802              DISPATCH;
     803  
     804          TARGET(SRE_OP_JUMP):
     805          TARGET(SRE_OP_INFO):
     806              /* jump forward */
     807              /* <JUMP> <offset> */
     808              TRACE(("|%p|%p|JUMP %d\n", pattern,
     809                     ptr, pattern[0]));
     810              pattern += pattern[0];
     811              DISPATCH;
     812  
     813          TARGET(SRE_OP_BRANCH):
     814              /* alternation */
     815              /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
     816              TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
     817              LASTMARK_SAVE();
     818              if (state->repeat)
     819                  MARK_PUSH(ctx->lastmark);
     820              for (; pattern[0]; pattern += pattern[0]) {
     821                  if (pattern[1] == SRE_OP_LITERAL &&
     822                      (ptr >= end ||
     823                       (SRE_CODE) *ptr != pattern[2]))
     824                      continue;
     825                  if (pattern[1] == SRE_OP_IN &&
     826                      (ptr >= end ||
     827                       !SRE(charset)(state, pattern + 3,
     828                                     (SRE_CODE) *ptr)))
     829                      continue;
     830                  state->ptr = ptr;
     831                  DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
     832                  if (ret) {
     833                      if (state->repeat)
     834                          MARK_POP_DISCARD(ctx->lastmark);
     835                      RETURN_ON_ERROR(ret);
     836                      RETURN_SUCCESS;
     837                  }
     838                  if (state->repeat)
     839                      MARK_POP_KEEP(ctx->lastmark);
     840                  LASTMARK_RESTORE();
     841              }
     842              if (state->repeat)
     843                  MARK_POP_DISCARD(ctx->lastmark);
     844              RETURN_FAILURE;
     845  
     846          TARGET(SRE_OP_REPEAT_ONE):
     847              /* match repeated sequence (maximizing regexp) */
     848  
     849              /* this operator only works if the repeated item is
     850                 exactly one character wide, and we're not already
     851                 collecting backtracking points.  for other cases,
     852                 use the MAX_REPEAT operator */
     853  
     854              /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
     855  
     856              TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
     857                     pattern[1], pattern[2]));
     858  
     859              if ((Py_ssize_t) pattern[1] > end - ptr)
     860                  RETURN_FAILURE; /* cannot match */
     861  
     862              state->ptr = ptr;
     863  
     864              ret = SRE(count)(state, pattern+3, pattern[2]);
     865              RETURN_ON_ERROR(ret);
     866              DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
     867              ctx->count = ret;
     868              ptr += ctx->count;
     869  
     870              /* when we arrive here, count contains the number of
     871                 matches, and ptr points to the tail of the target
     872                 string.  check if the rest of the pattern matches,
     873                 and backtrack if not. */
     874  
     875              if (ctx->count < (Py_ssize_t) pattern[1])
     876                  RETURN_FAILURE;
     877  
     878              if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
     879                  ptr == state->end &&
     880                  !(ctx->toplevel && state->must_advance && ptr == state->start))
     881              {
     882                  /* tail is empty.  we're finished */
     883                  state->ptr = ptr;
     884                  RETURN_SUCCESS;
     885              }
     886  
     887              LASTMARK_SAVE();
     888              if (state->repeat)
     889                  MARK_PUSH(ctx->lastmark);
     890  
     891              if (pattern[pattern[0]] == SRE_OP_LITERAL) {
     892                  /* tail starts with a literal. skip positions where
     893                     the rest of the pattern cannot possibly match */
     894                  ctx->u.chr = pattern[pattern[0]+1];
     895                  for (;;) {
     896                      while (ctx->count >= (Py_ssize_t) pattern[1] &&
     897                             (ptr >= end || *ptr != ctx->u.chr)) {
     898                          ptr--;
     899                          ctx->count--;
     900                      }
     901                      if (ctx->count < (Py_ssize_t) pattern[1])
     902                          break;
     903                      state->ptr = ptr;
     904                      DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
     905                              pattern+pattern[0]);
     906                      if (ret) {
     907                          if (state->repeat)
     908                              MARK_POP_DISCARD(ctx->lastmark);
     909                          RETURN_ON_ERROR(ret);
     910                          RETURN_SUCCESS;
     911                      }
     912                      if (state->repeat)
     913                          MARK_POP_KEEP(ctx->lastmark);
     914                      LASTMARK_RESTORE();
     915  
     916                      ptr--;
     917                      ctx->count--;
     918                  }
     919                  if (state->repeat)
     920                      MARK_POP_DISCARD(ctx->lastmark);
     921              } else {
     922                  /* general case */
     923                  while (ctx->count >= (Py_ssize_t) pattern[1]) {
     924                      state->ptr = ptr;
     925                      DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
     926                              pattern+pattern[0]);
     927                      if (ret) {
     928                          if (state->repeat)
     929                              MARK_POP_DISCARD(ctx->lastmark);
     930                          RETURN_ON_ERROR(ret);
     931                          RETURN_SUCCESS;
     932                      }
     933                      if (state->repeat)
     934                          MARK_POP_KEEP(ctx->lastmark);
     935                      LASTMARK_RESTORE();
     936  
     937                      ptr--;
     938                      ctx->count--;
     939                  }
     940                  if (state->repeat)
     941                      MARK_POP_DISCARD(ctx->lastmark);
     942              }
     943              RETURN_FAILURE;
     944  
     945          TARGET(SRE_OP_MIN_REPEAT_ONE):
     946              /* match repeated sequence (minimizing regexp) */
     947  
     948              /* this operator only works if the repeated item is
     949                 exactly one character wide, and we're not already
     950                 collecting backtracking points.  for other cases,
     951                 use the MIN_REPEAT operator */
     952  
     953              /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
     954  
     955              TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
     956                     pattern[1], pattern[2]));
     957  
     958              if ((Py_ssize_t) pattern[1] > end - ptr)
     959                  RETURN_FAILURE; /* cannot match */
     960  
     961              state->ptr = ptr;
     962  
     963              if (pattern[1] == 0)
     964                  ctx->count = 0;
     965              else {
     966                  /* count using pattern min as the maximum */
     967                  ret = SRE(count)(state, pattern+3, pattern[1]);
     968                  RETURN_ON_ERROR(ret);
     969                  DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
     970                  if (ret < (Py_ssize_t) pattern[1])
     971                      /* didn't match minimum number of times */
     972                      RETURN_FAILURE;
     973                  /* advance past minimum matches of repeat */
     974                  ctx->count = ret;
     975                  ptr += ctx->count;
     976              }
     977  
     978              if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
     979                  !(ctx->toplevel &&
     980                    ((state->match_all && ptr != state->end) ||
     981                     (state->must_advance && ptr == state->start))))
     982              {
     983                  /* tail is empty.  we're finished */
     984                  state->ptr = ptr;
     985                  RETURN_SUCCESS;
     986  
     987              } else {
     988                  /* general case */
     989                  LASTMARK_SAVE();
     990                  if (state->repeat)
     991                      MARK_PUSH(ctx->lastmark);
     992  
     993                  while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
     994                         || ctx->count <= (Py_ssize_t)pattern[2]) {
     995                      state->ptr = ptr;
     996                      DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
     997                              pattern+pattern[0]);
     998                      if (ret) {
     999                          if (state->repeat)
    1000                              MARK_POP_DISCARD(ctx->lastmark);
    1001                          RETURN_ON_ERROR(ret);
    1002                          RETURN_SUCCESS;
    1003                      }
    1004                      if (state->repeat)
    1005                          MARK_POP_KEEP(ctx->lastmark);
    1006                      LASTMARK_RESTORE();
    1007  
    1008                      state->ptr = ptr;
    1009                      ret = SRE(count)(state, pattern+3, 1);
    1010                      RETURN_ON_ERROR(ret);
    1011                      DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
    1012                      if (ret == 0)
    1013                          break;
    1014                      assert(ret == 1);
    1015                      ptr++;
    1016                      ctx->count++;
    1017                  }
    1018                  if (state->repeat)
    1019                      MARK_POP_DISCARD(ctx->lastmark);
    1020              }
    1021              RETURN_FAILURE;
    1022  
    1023          TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
    1024              /* match repeated sequence (maximizing regexp) without
    1025                 backtracking */
    1026  
    1027              /* this operator only works if the repeated item is
    1028                 exactly one character wide, and we're not already
    1029                 collecting backtracking points.  for other cases,
    1030                 use the MAX_REPEAT operator */
    1031  
    1032              /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
    1033                 tail */
    1034  
    1035              TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
    1036                     ptr, pattern[1], pattern[2]));
    1037  
    1038              if (ptr + pattern[1] > end) {
    1039                  RETURN_FAILURE; /* cannot match */
    1040              }
    1041  
    1042              state->ptr = ptr;
    1043  
    1044              ret = SRE(count)(state, pattern + 3, pattern[2]);
    1045              RETURN_ON_ERROR(ret);
    1046              DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
    1047              ctx->count = ret;
    1048              ptr += ctx->count;
    1049  
    1050              /* when we arrive here, count contains the number of
    1051                 matches, and ptr points to the tail of the target
    1052                 string.  check if the rest of the pattern matches,
    1053                 and fail if not. */
    1054  
    1055              /* Test for not enough repetitions in match */
    1056              if (ctx->count < (Py_ssize_t) pattern[1]) {
    1057                  RETURN_FAILURE;
    1058              }
    1059  
    1060              /* Update the pattern to point to the next op code */
    1061              pattern += pattern[0];
    1062  
    1063              /* Let the tail be evaluated separately and consider this
    1064                 match successful. */
    1065              if (*pattern == SRE_OP_SUCCESS &&
    1066                  ptr == state->end &&
    1067                  !(ctx->toplevel && state->must_advance && ptr == state->start))
    1068              {
    1069                  /* tail is empty.  we're finished */
    1070                  state->ptr = ptr;
    1071                  RETURN_SUCCESS;
    1072              }
    1073  
    1074              /* Attempt to match the rest of the string */
    1075              DISPATCH;
    1076  
    1077          TARGET(SRE_OP_REPEAT):
    1078              /* create repeat context.  all the hard work is done
    1079                 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
    1080              /* <REPEAT> <skip> <1=min> <2=max>
    1081                 <3=repeat_index> item <UNTIL> tail */
    1082              TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
    1083                     pattern[1], pattern[2]));
    1084  
    1085              /* install new repeat context */
    1086              /* TODO(https://github.com/python/cpython/issues/67877): Fix this
    1087               * potential memory leak. */
    1088              ctx->u.rep = (SRE_REPEAT*) PyObject_Malloc(sizeof(*ctx->u.rep));
    1089              if (!ctx->u.rep) {
    1090                  PyErr_NoMemory();
    1091                  RETURN_FAILURE;
    1092              }
    1093              ctx->u.rep->count = -1;
    1094              ctx->u.rep->pattern = pattern;
    1095              ctx->u.rep->prev = state->repeat;
    1096              ctx->u.rep->last_ptr = NULL;
    1097              state->repeat = ctx->u.rep;
    1098  
    1099              state->ptr = ptr;
    1100              DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
    1101              state->repeat = ctx->u.rep->prev;
    1102              PyObject_Free(ctx->u.rep);
    1103  
    1104              if (ret) {
    1105                  RETURN_ON_ERROR(ret);
    1106                  RETURN_SUCCESS;
    1107              }
    1108              RETURN_FAILURE;
    1109  
    1110          TARGET(SRE_OP_MAX_UNTIL):
    1111              /* maximizing repeat */
    1112              /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
    1113  
    1114              /* FIXME: we probably need to deal with zero-width
    1115                 matches in here... */
    1116  
    1117              ctx->u.rep = state->repeat;
    1118              if (!ctx->u.rep)
    1119                  RETURN_ERROR(SRE_ERROR_STATE);
    1120  
    1121              state->ptr = ptr;
    1122  
    1123              ctx->count = ctx->u.rep->count+1;
    1124  
    1125              TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
    1126                     ptr, ctx->count));
    1127  
    1128              if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
    1129                  /* not enough matches */
    1130                  ctx->u.rep->count = ctx->count;
    1131                  DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
    1132                          ctx->u.rep->pattern+3);
    1133                  if (ret) {
    1134                      RETURN_ON_ERROR(ret);
    1135                      RETURN_SUCCESS;
    1136                  }
    1137                  ctx->u.rep->count = ctx->count-1;
    1138                  state->ptr = ptr;
    1139                  RETURN_FAILURE;
    1140              }
    1141  
    1142              if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
    1143                  ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
    1144                  state->ptr != ctx->u.rep->last_ptr) {
    1145                  /* we may have enough matches, but if we can
    1146                     match another item, do so */
    1147                  ctx->u.rep->count = ctx->count;
    1148                  LASTMARK_SAVE();
    1149                  MARK_PUSH(ctx->lastmark);
    1150                  /* zero-width match protection */
    1151                  DATA_PUSH(&ctx->u.rep->last_ptr);
    1152                  ctx->u.rep->last_ptr = state->ptr;
    1153                  DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
    1154                          ctx->u.rep->pattern+3);
    1155                  DATA_POP(&ctx->u.rep->last_ptr);
    1156                  if (ret) {
    1157                      MARK_POP_DISCARD(ctx->lastmark);
    1158                      RETURN_ON_ERROR(ret);
    1159                      RETURN_SUCCESS;
    1160                  }
    1161                  MARK_POP(ctx->lastmark);
    1162                  LASTMARK_RESTORE();
    1163                  ctx->u.rep->count = ctx->count-1;
    1164                  state->ptr = ptr;
    1165              }
    1166  
    1167              /* cannot match more repeated items here.  make sure the
    1168                 tail matches */
    1169              state->repeat = ctx->u.rep->prev;
    1170              DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
    1171              state->repeat = ctx->u.rep; // restore repeat before return
    1172  
    1173              RETURN_ON_SUCCESS(ret);
    1174              state->ptr = ptr;
    1175              RETURN_FAILURE;
    1176  
    1177          TARGET(SRE_OP_MIN_UNTIL):
    1178              /* minimizing repeat */
    1179              /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
    1180  
    1181              ctx->u.rep = state->repeat;
    1182              if (!ctx->u.rep)
    1183                  RETURN_ERROR(SRE_ERROR_STATE);
    1184  
    1185              state->ptr = ptr;
    1186  
    1187              ctx->count = ctx->u.rep->count+1;
    1188  
    1189              TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
    1190                     ptr, ctx->count, ctx->u.rep->pattern));
    1191  
    1192              if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
    1193                  /* not enough matches */
    1194                  ctx->u.rep->count = ctx->count;
    1195                  DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
    1196                          ctx->u.rep->pattern+3);
    1197                  if (ret) {
    1198                      RETURN_ON_ERROR(ret);
    1199                      RETURN_SUCCESS;
    1200                  }
    1201                  ctx->u.rep->count = ctx->count-1;
    1202                  state->ptr = ptr;
    1203                  RETURN_FAILURE;
    1204              }
    1205  
    1206              /* see if the tail matches */
    1207              state->repeat = ctx->u.rep->prev;
    1208  
    1209              LASTMARK_SAVE();
    1210              if (state->repeat)
    1211                  MARK_PUSH(ctx->lastmark);
    1212  
    1213              DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
    1214              SRE_REPEAT *repeat_of_tail = state->repeat;
    1215              state->repeat = ctx->u.rep; // restore repeat before return
    1216  
    1217              if (ret) {
    1218                  if (repeat_of_tail)
    1219                      MARK_POP_DISCARD(ctx->lastmark);
    1220                  RETURN_ON_ERROR(ret);
    1221                  RETURN_SUCCESS;
    1222              }
    1223              if (repeat_of_tail)
    1224                  MARK_POP(ctx->lastmark);
    1225              LASTMARK_RESTORE();
    1226  
    1227              state->ptr = ptr;
    1228  
    1229              if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
    1230                  && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
    1231                  state->ptr == ctx->u.rep->last_ptr)
    1232                  RETURN_FAILURE;
    1233  
    1234              ctx->u.rep->count = ctx->count;
    1235              /* zero-width match protection */
    1236              DATA_PUSH(&ctx->u.rep->last_ptr);
    1237              ctx->u.rep->last_ptr = state->ptr;
    1238              DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
    1239                      ctx->u.rep->pattern+3);
    1240              DATA_POP(&ctx->u.rep->last_ptr);
    1241              if (ret) {
    1242                  RETURN_ON_ERROR(ret);
    1243                  RETURN_SUCCESS;
    1244              }
    1245              ctx->u.rep->count = ctx->count-1;
    1246              state->ptr = ptr;
    1247              RETURN_FAILURE;
    1248  
    1249          TARGET(SRE_OP_POSSESSIVE_REPEAT):
    1250              /* create possessive repeat contexts. */
    1251              /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
    1252                 <SUCCESS> tail */
    1253              TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
    1254                     ptr, pattern[1], pattern[2]));
    1255  
    1256              /* Set the global Input pointer to this context's Input
    1257                 pointer */
    1258              state->ptr = ptr;
    1259  
    1260              /* Initialize Count to 0 */
    1261              ctx->count = 0;
    1262  
    1263              /* Check for minimum required matches. */
    1264              while (ctx->count < (Py_ssize_t)pattern[1]) {
    1265                  /* not enough matches */
    1266                  DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
    1267                           &pattern[3]);
    1268                  if (ret) {
    1269                      RETURN_ON_ERROR(ret);
    1270                      ctx->count++;
    1271                  }
    1272                  else {
    1273                      state->ptr = ptr;
    1274                      RETURN_FAILURE;
    1275                  }
    1276              }
    1277  
    1278              /* Clear the context's Input stream pointer so that it
    1279                 doesn't match the global state so that the while loop can
    1280                 be entered. */
    1281              ptr = NULL;
    1282  
    1283              /* Keep trying to parse the <pattern> sub-pattern until the
    1284                 end is reached, creating a new context each time. */
    1285              while ((ctx->count < (Py_ssize_t)pattern[2] ||
    1286                      (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
    1287                     state->ptr != ptr) {
    1288                  /* Save the Capture Group Marker state into the current
    1289                     Context and back up the current highest number
    1290                     Capture Group marker. */
    1291                  LASTMARK_SAVE();
    1292                  MARK_PUSH(ctx->lastmark);
    1293  
    1294                  /* zero-width match protection */
    1295                  /* Set the context's Input Stream pointer to be the
    1296                     current Input Stream pointer from the global
    1297                     state.  When the loop reaches the next iteration,
    1298                     the context will then store the last known good
    1299                     position with the global state holding the Input
    1300                     Input Stream position that has been updated with
    1301                     the most recent match.  Thus, if state's Input
    1302                     stream remains the same as the one stored in the
    1303                     current Context, we know we have successfully
    1304                     matched an empty string and that all subsequent
    1305                     matches will also be the empty string until the
    1306                     maximum number of matches are counted, and because
    1307                     of this, we could immediately stop at that point and
    1308                     consider this match successful. */
    1309                  ptr = state->ptr;
    1310  
    1311                  /* We have not reached the maximin matches, so try to
    1312                     match once more. */
    1313                  DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
    1314                           &pattern[3]);
    1315  
    1316                  /* Check to see if the last attempted match
    1317                     succeeded. */
    1318                  if (ret) {
    1319                      /* Drop the saved highest number Capture Group
    1320                         marker saved above and use the newly updated
    1321                         value. */
    1322                      MARK_POP_DISCARD(ctx->lastmark);
    1323                      RETURN_ON_ERROR(ret);
    1324  
    1325                      /* Success, increment the count. */
    1326                      ctx->count++;
    1327                  }
    1328                  /* Last attempted match failed. */
    1329                  else {
    1330                      /* Restore the previously saved highest number
    1331                         Capture Group marker since the last iteration
    1332                         did not match, then restore that to the global
    1333                         state. */
    1334                      MARK_POP(ctx->lastmark);
    1335                      LASTMARK_RESTORE();
    1336  
    1337                      /* Restore the global Input Stream pointer
    1338                         since it can change after jumps. */
    1339                      state->ptr = ptr;
    1340  
    1341                      /* We have sufficient matches, so exit loop. */
    1342                      break;
    1343                  }
    1344              }
    1345  
    1346              /* Evaluate Tail */
    1347              /* Jump to end of pattern indicated by skip, and then skip
    1348                 the SUCCESS op code that follows it. */
    1349              pattern += pattern[0] + 1;
    1350              ptr = state->ptr;
    1351              DISPATCH;
    1352  
    1353          TARGET(SRE_OP_ATOMIC_GROUP):
    1354              /* Atomic Group Sub Pattern */
    1355              /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
    1356              TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
    1357  
    1358              /* Set the global Input pointer to this context's Input
    1359                 pointer */
    1360              state->ptr = ptr;
    1361  
    1362              /* Evaluate the Atomic Group in a new context, terminating
    1363                 when the end of the group, represented by a SUCCESS op
    1364                 code, is reached. */
    1365              /* Group Pattern begins at an offset of 1 code. */
    1366              DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
    1367                       &pattern[1]);
    1368  
    1369              /* Test Exit Condition */
    1370              RETURN_ON_ERROR(ret);
    1371  
    1372              if (ret == 0) {
    1373                  /* Atomic Group failed to Match. */
    1374                  state->ptr = ptr;
    1375                  RETURN_FAILURE;
    1376              }
    1377  
    1378              /* Evaluate Tail */
    1379              /* Jump to end of pattern indicated by skip, and then skip
    1380                 the SUCCESS op code that follows it. */
    1381              pattern += pattern[0];
    1382              ptr = state->ptr;
    1383              DISPATCH;
    1384  
    1385          TARGET(SRE_OP_GROUPREF):
    1386              /* match backreference */
    1387              TRACE(("|%p|%p|GROUPREF %d\n", pattern,
    1388                     ptr, pattern[0]));
    1389              {
    1390                  int groupref = pattern[0] * 2;
    1391                  if (groupref >= state->lastmark) {
    1392                      RETURN_FAILURE;
    1393                  } else {
    1394                      SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
    1395                      SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
    1396                      if (!p || !e || e < p)
    1397                          RETURN_FAILURE;
    1398                      while (p < e) {
    1399                          if (ptr >= end || *ptr != *p)
    1400                              RETURN_FAILURE;
    1401                          p++;
    1402                          ptr++;
    1403                      }
    1404                  }
    1405              }
    1406              pattern++;
    1407              DISPATCH;
    1408  
    1409          TARGET(SRE_OP_GROUPREF_IGNORE):
    1410              /* match backreference */
    1411              TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
    1412                     ptr, pattern[0]));
    1413              {
    1414                  int groupref = pattern[0] * 2;
    1415                  if (groupref >= state->lastmark) {
    1416                      RETURN_FAILURE;
    1417                  } else {
    1418                      SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
    1419                      SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
    1420                      if (!p || !e || e < p)
    1421                          RETURN_FAILURE;
    1422                      while (p < e) {
    1423                          if (ptr >= end ||
    1424                              sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
    1425                              RETURN_FAILURE;
    1426                          p++;
    1427                          ptr++;
    1428                      }
    1429                  }
    1430              }
    1431              pattern++;
    1432              DISPATCH;
    1433  
    1434          TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
    1435              /* match backreference */
    1436              TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
    1437                     ptr, pattern[0]));
    1438              {
    1439                  int groupref = pattern[0] * 2;
    1440                  if (groupref >= state->lastmark) {
    1441                      RETURN_FAILURE;
    1442                  } else {
    1443                      SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
    1444                      SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
    1445                      if (!p || !e || e < p)
    1446                          RETURN_FAILURE;
    1447                      while (p < e) {
    1448                          if (ptr >= end ||
    1449                              sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
    1450                              RETURN_FAILURE;
    1451                          p++;
    1452                          ptr++;
    1453                      }
    1454                  }
    1455              }
    1456              pattern++;
    1457              DISPATCH;
    1458  
    1459          TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
    1460              /* match backreference */
    1461              TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
    1462                     ptr, pattern[0]));
    1463              {
    1464                  int groupref = pattern[0] * 2;
    1465                  if (groupref >= state->lastmark) {
    1466                      RETURN_FAILURE;
    1467                  } else {
    1468                      SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
    1469                      SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
    1470                      if (!p || !e || e < p)
    1471                          RETURN_FAILURE;
    1472                      while (p < e) {
    1473                          if (ptr >= end ||
    1474                              sre_lower_locale(*ptr) != sre_lower_locale(*p))
    1475                              RETURN_FAILURE;
    1476                          p++;
    1477                          ptr++;
    1478                      }
    1479                  }
    1480              }
    1481              pattern++;
    1482              DISPATCH;
    1483  
    1484          TARGET(SRE_OP_GROUPREF_EXISTS):
    1485              TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
    1486                     ptr, pattern[0]));
    1487              /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
    1488              {
    1489                  int groupref = pattern[0] * 2;
    1490                  if (groupref >= state->lastmark) {
    1491                      pattern += pattern[1];
    1492                      DISPATCH;
    1493                  } else {
    1494                      SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
    1495                      SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
    1496                      if (!p || !e || e < p) {
    1497                          pattern += pattern[1];
    1498                          DISPATCH;
    1499                      }
    1500                  }
    1501              }
    1502              pattern += 2;
    1503              DISPATCH;
    1504  
    1505          TARGET(SRE_OP_ASSERT):
    1506              /* assert subpattern */
    1507              /* <ASSERT> <skip> <back> <pattern> */
    1508              TRACE(("|%p|%p|ASSERT %d\n", pattern,
    1509                     ptr, pattern[1]));
    1510              if (ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)pattern[1])
    1511                  RETURN_FAILURE;
    1512              state->ptr = ptr - pattern[1];
    1513              DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
    1514              RETURN_ON_FAILURE(ret);
    1515              pattern += pattern[0];
    1516              DISPATCH;
    1517  
    1518          TARGET(SRE_OP_ASSERT_NOT):
    1519              /* assert not subpattern */
    1520              /* <ASSERT_NOT> <skip> <back> <pattern> */
    1521              TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
    1522                     ptr, pattern[1]));
    1523              if (ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)pattern[1]) {
    1524                  state->ptr = ptr - pattern[1];
    1525                  LASTMARK_SAVE();
    1526                  if (state->repeat)
    1527                      MARK_PUSH(ctx->lastmark);
    1528  
    1529                  DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
    1530                  if (ret) {
    1531                      if (state->repeat)
    1532                          MARK_POP_DISCARD(ctx->lastmark);
    1533                      RETURN_ON_ERROR(ret);
    1534                      RETURN_FAILURE;
    1535                  }
    1536                  if (state->repeat)
    1537                      MARK_POP(ctx->lastmark);
    1538                  LASTMARK_RESTORE();
    1539              }
    1540              pattern += pattern[0];
    1541              DISPATCH;
    1542  
    1543          TARGET(SRE_OP_FAILURE):
    1544              /* immediate failure */
    1545              TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
    1546              RETURN_FAILURE;
    1547  
    1548  #if !USE_COMPUTED_GOTOS
    1549          default:
    1550  #endif
    1551          // Also any unused opcodes:
    1552          TARGET(SRE_OP_RANGE_UNI_IGNORE):
    1553          TARGET(SRE_OP_SUBPATTERN):
    1554          TARGET(SRE_OP_RANGE):
    1555          TARGET(SRE_OP_NEGATE):
    1556          TARGET(SRE_OP_BIGCHARSET):
    1557          TARGET(SRE_OP_CHARSET):
    1558              TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
    1559                     pattern[-1]));
    1560              RETURN_ERROR(SRE_ERROR_ILLEGAL);
    1561  
    1562      }
    1563  
    1564  exit:
    1565      ctx_pos = ctx->last_ctx_pos;
    1566      jump = ctx->jump;
    1567      DATA_POP_DISCARD(ctx);
    1568      if (ctx_pos == -1)
    1569          return ret;
    1570      DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
    1571  
    1572      switch (jump) {
    1573          case JUMP_MAX_UNTIL_2:
    1574              TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
    1575              goto jump_max_until_2;
    1576          case JUMP_MAX_UNTIL_3:
    1577              TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
    1578              goto jump_max_until_3;
    1579          case JUMP_MIN_UNTIL_2:
    1580              TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
    1581              goto jump_min_until_2;
    1582          case JUMP_MIN_UNTIL_3:
    1583              TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
    1584              goto jump_min_until_3;
    1585          case JUMP_BRANCH:
    1586              TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
    1587              goto jump_branch;
    1588          case JUMP_MAX_UNTIL_1:
    1589              TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
    1590              goto jump_max_until_1;
    1591          case JUMP_MIN_UNTIL_1:
    1592              TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
    1593              goto jump_min_until_1;
    1594          case JUMP_POSS_REPEAT_1:
    1595              TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
    1596              goto jump_poss_repeat_1;
    1597          case JUMP_POSS_REPEAT_2:
    1598              TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
    1599              goto jump_poss_repeat_2;
    1600          case JUMP_REPEAT:
    1601              TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
    1602              goto jump_repeat;
    1603          case JUMP_REPEAT_ONE_1:
    1604              TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
    1605              goto jump_repeat_one_1;
    1606          case JUMP_REPEAT_ONE_2:
    1607              TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
    1608              goto jump_repeat_one_2;
    1609          case JUMP_MIN_REPEAT_ONE:
    1610              TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
    1611              goto jump_min_repeat_one;
    1612          case JUMP_ATOMIC_GROUP:
    1613              TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
    1614              goto jump_atomic_group;
    1615          case JUMP_ASSERT:
    1616              TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
    1617              goto jump_assert;
    1618          case JUMP_ASSERT_NOT:
    1619              TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
    1620              goto jump_assert_not;
    1621          case JUMP_NONE:
    1622              TRACE(("|%p|%p|RETURN %zd\n", pattern,
    1623                     ptr, ret));
    1624              break;
    1625      }
    1626  
    1627      return ret; /* should never get here */
    1628  }
    1629  
    1630  /* need to reset capturing groups between two SRE(match) callings in loops */
    1631  #define RESET_CAPTURE_GROUP() \
    1632      do { state->lastmark = state->lastindex = -1; } while (0)
    1633  
    1634  LOCAL(Py_ssize_t)
    1635  SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
    1636  {
    1637      SRE_CHAR* ptr = (SRE_CHAR *)state->start;
    1638      SRE_CHAR* end = (SRE_CHAR *)state->end;
    1639      Py_ssize_t status = 0;
    1640      Py_ssize_t prefix_len = 0;
    1641      Py_ssize_t prefix_skip = 0;
    1642      SRE_CODE* prefix = NULL;
    1643      SRE_CODE* charset = NULL;
    1644      SRE_CODE* overlap = NULL;
    1645      int flags = 0;
    1646  
    1647      if (ptr > end)
    1648          return 0;
    1649  
    1650      if (pattern[0] == SRE_OP_INFO) {
    1651          /* optimization info block */
    1652          /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
    1653  
    1654          flags = pattern[2];
    1655  
    1656          if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
    1657              TRACE(("reject (got %u chars, need %u)\n",
    1658                     (unsigned int)(end - ptr), pattern[3]));
    1659              return 0;
    1660          }
    1661          if (pattern[3] > 1) {
    1662              /* adjust end point (but make sure we leave at least one
    1663                 character in there, so literal search will work) */
    1664              end -= pattern[3] - 1;
    1665              if (end <= ptr)
    1666                  end = ptr;
    1667          }
    1668  
    1669          if (flags & SRE_INFO_PREFIX) {
    1670              /* pattern starts with a known prefix */
    1671              /* <length> <skip> <prefix data> <overlap data> */
    1672              prefix_len = pattern[5];
    1673              prefix_skip = pattern[6];
    1674              prefix = pattern + 7;
    1675              overlap = prefix + prefix_len - 1;
    1676          } else if (flags & SRE_INFO_CHARSET)
    1677              /* pattern starts with a character from a known set */
    1678              /* <charset> */
    1679              charset = pattern + 5;
    1680  
    1681          pattern += 1 + pattern[1];
    1682      }
    1683  
    1684      TRACE(("prefix = %p %zd %zd\n",
    1685             prefix, prefix_len, prefix_skip));
    1686      TRACE(("charset = %p\n", charset));
    1687  
    1688      if (prefix_len == 1) {
    1689          /* pattern starts with a literal character */
    1690          SRE_CHAR c = (SRE_CHAR) prefix[0];
    1691  #if SIZEOF_SRE_CHAR < 4
    1692          if ((SRE_CODE) c != prefix[0])
    1693              return 0; /* literal can't match: doesn't fit in char width */
    1694  #endif
    1695          end = (SRE_CHAR *)state->end;
    1696          state->must_advance = 0;
    1697          while (ptr < end) {
    1698              while (*ptr != c) {
    1699                  if (++ptr >= end)
    1700                      return 0;
    1701              }
    1702              TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
    1703              state->start = ptr;
    1704              state->ptr = ptr + prefix_skip;
    1705              if (flags & SRE_INFO_LITERAL)
    1706                  return 1; /* we got all of it */
    1707              status = SRE(match)(state, pattern + 2*prefix_skip, 0);
    1708              if (status != 0)
    1709                  return status;
    1710              ++ptr;
    1711              RESET_CAPTURE_GROUP();
    1712          }
    1713          return 0;
    1714      }
    1715  
    1716      if (prefix_len > 1) {
    1717          /* pattern starts with a known prefix.  use the overlap
    1718             table to skip forward as fast as we possibly can */
    1719          Py_ssize_t i = 0;
    1720  
    1721          end = (SRE_CHAR *)state->end;
    1722          if (prefix_len > end - ptr)
    1723              return 0;
    1724  #if SIZEOF_SRE_CHAR < 4
    1725          for (i = 0; i < prefix_len; i++)
    1726              if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
    1727                  return 0; /* literal can't match: doesn't fit in char width */
    1728  #endif
    1729          while (ptr < end) {
    1730              SRE_CHAR c = (SRE_CHAR) prefix[0];
    1731              while (*ptr++ != c) {
    1732                  if (ptr >= end)
    1733                      return 0;
    1734              }
    1735              if (ptr >= end)
    1736                  return 0;
    1737  
    1738              i = 1;
    1739              state->must_advance = 0;
    1740              do {
    1741                  if (*ptr == (SRE_CHAR) prefix[i]) {
    1742                      if (++i != prefix_len) {
    1743                          if (++ptr >= end)
    1744                              return 0;
    1745                          continue;
    1746                      }
    1747                      /* found a potential match */
    1748                      TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
    1749                      state->start = ptr - (prefix_len - 1);
    1750                      state->ptr = ptr - (prefix_len - prefix_skip - 1);
    1751                      if (flags & SRE_INFO_LITERAL)
    1752                          return 1; /* we got all of it */
    1753                      status = SRE(match)(state, pattern + 2*prefix_skip, 0);
    1754                      if (status != 0)
    1755                          return status;
    1756                      /* close but no cigar -- try again */
    1757                      if (++ptr >= end)
    1758                          return 0;
    1759                      RESET_CAPTURE_GROUP();
    1760                  }
    1761                  i = overlap[i];
    1762              } while (i != 0);
    1763          }
    1764          return 0;
    1765      }
    1766  
    1767      if (charset) {
    1768          /* pattern starts with a character from a known set */
    1769          end = (SRE_CHAR *)state->end;
    1770          state->must_advance = 0;
    1771          for (;;) {
    1772              while (ptr < end && !SRE(charset)(state, charset, *ptr))
    1773                  ptr++;
    1774              if (ptr >= end)
    1775                  return 0;
    1776              TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
    1777              state->start = ptr;
    1778              state->ptr = ptr;
    1779              status = SRE(match)(state, pattern, 0);
    1780              if (status != 0)
    1781                  break;
    1782              ptr++;
    1783              RESET_CAPTURE_GROUP();
    1784          }
    1785      } else {
    1786          /* general case */
    1787          assert(ptr <= end);
    1788          TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
    1789          state->start = state->ptr = ptr;
    1790          status = SRE(match)(state, pattern, 1);
    1791          state->must_advance = 0;
    1792          if (status == 0 && pattern[0] == SRE_OP_AT &&
    1793              (pattern[1] == SRE_AT_BEGINNING ||
    1794               pattern[1] == SRE_AT_BEGINNING_STRING))
    1795          {
    1796              state->start = state->ptr = ptr = end;
    1797              return 0;
    1798          }
    1799          while (status == 0 && ptr < end) {
    1800              ptr++;
    1801              RESET_CAPTURE_GROUP();
    1802              TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
    1803              state->start = state->ptr = ptr;
    1804              status = SRE(match)(state, pattern, 0);
    1805          }
    1806      }
    1807  
    1808      return status;
    1809  }
    1810  
    1811  #undef SRE_CHAR
    1812  #undef SIZEOF_SRE_CHAR
    1813  #undef SRE
    1814  
    1815  /* vim:ts=4:sw=4:et
    1816  */