(root)/
Python-3.12.0/
Objects/
stringlib/
fastsearch.h
       1  /* stringlib: fastsearch implementation */
       2  
       3  #define STRINGLIB_FASTSEARCH_H
       4  
       5  /* fast search/count implementation, based on a mix between boyer-
       6     moore and horspool, with a few more bells and whistles on the top.
       7     for some more background, see:
       8     https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
       9  
      10  /* note: fastsearch may access s[n], which isn't a problem when using
      11     Python's ordinary string types, but may cause problems if you're
      12     using this code in other contexts.  also, the count mode returns -1
      13     if there cannot possibly be a match in the target string, and 0 if
      14     it has actually checked for matches, but didn't find any.  callers
      15     beware! */
      16  
      17  /* If the strings are long enough, use Crochemore and Perrin's Two-Way
      18     algorithm, which has worst-case O(n) runtime and best-case O(n/k).
      19     Also compute a table of shifts to achieve O(n/k) in more cases,
      20     and often (data dependent) deduce larger shifts than pure C&P can
      21     deduce. See stringlib_find_two_way_notes.txt in this folder for a
      22     detailed explanation. */
      23  
      24  #define FAST_COUNT 0
      25  #define FAST_SEARCH 1
      26  #define FAST_RSEARCH 2
      27  
      28  #if LONG_BIT >= 128
      29  #define STRINGLIB_BLOOM_WIDTH 128
      30  #elif LONG_BIT >= 64
      31  #define STRINGLIB_BLOOM_WIDTH 64
      32  #elif LONG_BIT >= 32
      33  #define STRINGLIB_BLOOM_WIDTH 32
      34  #else
      35  #error "LONG_BIT is smaller than 32"
      36  #endif
      37  
      38  #define STRINGLIB_BLOOM_ADD(mask, ch) \
      39      ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
      40  #define STRINGLIB_BLOOM(mask, ch)     \
      41      ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
      42  
      43  #ifdef STRINGLIB_FAST_MEMCHR
      44  #  define MEMCHR_CUT_OFF 15
      45  #else
      46  #  define MEMCHR_CUT_OFF 40
      47  #endif
      48  
      49  Py_LOCAL_INLINE(Py_ssize_t)
      50  STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
      51  {
      52      const STRINGLIB_CHAR *p, *e;
      53  
      54      p = s;
      55      e = s + n;
      56      if (n > MEMCHR_CUT_OFF) {
      57  #ifdef STRINGLIB_FAST_MEMCHR
      58          p = STRINGLIB_FAST_MEMCHR(s, ch, n);
      59          if (p != NULL)
      60              return (p - s);
      61          return -1;
      62  #else
      63          /* use memchr if we can choose a needle without too many likely
      64             false positives */
      65          const STRINGLIB_CHAR *s1, *e1;
      66          unsigned char needle = ch & 0xff;
      67          /* If looking for a multiple of 256, we'd have too
      68             many false positives looking for the '\0' byte in UCS2
      69             and UCS4 representations. */
      70          if (needle != 0) {
      71              do {
      72                  void *candidate = memchr(p, needle,
      73                                           (e - p) * sizeof(STRINGLIB_CHAR));
      74                  if (candidate == NULL)
      75                      return -1;
      76                  s1 = p;
      77                  p = (const STRINGLIB_CHAR *)
      78                          _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
      79                  if (*p == ch)
      80                      return (p - s);
      81                  /* False positive */
      82                  p++;
      83                  if (p - s1 > MEMCHR_CUT_OFF)
      84                      continue;
      85                  if (e - p <= MEMCHR_CUT_OFF)
      86                      break;
      87                  e1 = p + MEMCHR_CUT_OFF;
      88                  while (p != e1) {
      89                      if (*p == ch)
      90                          return (p - s);
      91                      p++;
      92                  }
      93              }
      94              while (e - p > MEMCHR_CUT_OFF);
      95          }
      96  #endif
      97      }
      98      while (p < e) {
      99          if (*p == ch)
     100              return (p - s);
     101          p++;
     102      }
     103      return -1;
     104  }
     105  
     106  #undef MEMCHR_CUT_OFF
     107  
     108  #if STRINGLIB_SIZEOF_CHAR == 1
     109  #  define MEMRCHR_CUT_OFF 15
     110  #else
     111  #  define MEMRCHR_CUT_OFF 40
     112  #endif
     113  
     114  
     115  Py_LOCAL_INLINE(Py_ssize_t)
     116  STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
     117  {
     118      const STRINGLIB_CHAR *p;
     119  #ifdef HAVE_MEMRCHR
     120      /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
     121         doesn't seem as optimized as memchr(), but is still quite
     122         faster than our hand-written loop below. There is no wmemrchr
     123         for 4-byte chars. */
     124  
     125      if (n > MEMRCHR_CUT_OFF) {
     126  #if STRINGLIB_SIZEOF_CHAR == 1
     127          p = memrchr(s, ch, n);
     128          if (p != NULL)
     129              return (p - s);
     130          return -1;
     131  #else
     132          /* use memrchr if we can choose a needle without too many likely
     133             false positives */
     134          const STRINGLIB_CHAR *s1;
     135          Py_ssize_t n1;
     136          unsigned char needle = ch & 0xff;
     137          /* If looking for a multiple of 256, we'd have too
     138             many false positives looking for the '\0' byte in UCS2
     139             and UCS4 representations. */
     140          if (needle != 0) {
     141              do {
     142                  void *candidate = memrchr(s, needle,
     143                                            n * sizeof(STRINGLIB_CHAR));
     144                  if (candidate == NULL)
     145                      return -1;
     146                  n1 = n;
     147                  p = (const STRINGLIB_CHAR *)
     148                          _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
     149                  n = p - s;
     150                  if (*p == ch)
     151                      return n;
     152                  /* False positive */
     153                  if (n1 - n > MEMRCHR_CUT_OFF)
     154                      continue;
     155                  if (n <= MEMRCHR_CUT_OFF)
     156                      break;
     157                  s1 = p - MEMRCHR_CUT_OFF;
     158                  while (p > s1) {
     159                      p--;
     160                      if (*p == ch)
     161                          return (p - s);
     162                  }
     163                  n = p - s;
     164              }
     165              while (n > MEMRCHR_CUT_OFF);
     166          }
     167  #endif
     168      }
     169  #endif  /* HAVE_MEMRCHR */
     170      p = s + n;
     171      while (p > s) {
     172          p--;
     173          if (*p == ch)
     174              return (p - s);
     175      }
     176      return -1;
     177  }
     178  
     179  #undef MEMRCHR_CUT_OFF
     180  
     181  /* Change to a 1 to see logging comments walk through the algorithm. */
     182  #if 0 && STRINGLIB_SIZEOF_CHAR == 1
     183  # define LOG(...) printf(__VA_ARGS__)
     184  # define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
     185  # define LOG_LINEUP() do {                                         \
     186      LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
     187      LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
     188      LOG_STRING(needle, len_needle); LOG("\n");                     \
     189  } while(0)
     190  #else
     191  # define LOG(...)
     192  # define LOG_STRING(s, n)
     193  # define LOG_LINEUP()
     194  #endif
     195  
     196  Py_LOCAL_INLINE(Py_ssize_t)
     197  STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
     198                         Py_ssize_t *return_period, int invert_alphabet)
     199  {
     200      /* Do a lexicographic search. Essentially this:
     201             >>> max(needle[i:] for i in range(len(needle)+1))
     202         Also find the period of the right half.   */
     203      Py_ssize_t max_suffix = 0;
     204      Py_ssize_t candidate = 1;
     205      Py_ssize_t k = 0;
     206      // The period of the right half.
     207      Py_ssize_t period = 1;
     208  
     209      while (candidate + k < len_needle) {
     210          // each loop increases candidate + k + max_suffix
     211          STRINGLIB_CHAR a = needle[candidate + k];
     212          STRINGLIB_CHAR b = needle[max_suffix + k];
     213          // check if the suffix at candidate is better than max_suffix
     214          if (invert_alphabet ? (b < a) : (a < b)) {
     215              // Fell short of max_suffix.
     216              // The next k + 1 characters are non-increasing
     217              // from candidate, so they won't start a maximal suffix.
     218              candidate += k + 1;
     219              k = 0;
     220              // We've ruled out any period smaller than what's
     221              // been scanned since max_suffix.
     222              period = candidate - max_suffix;
     223          }
     224          else if (a == b) {
     225              if (k + 1 != period) {
     226                  // Keep scanning the equal strings
     227                  k++;
     228              }
     229              else {
     230                  // Matched a whole period.
     231                  // Start matching the next period.
     232                  candidate += period;
     233                  k = 0;
     234              }
     235          }
     236          else {
     237              // Did better than max_suffix, so replace it.
     238              max_suffix = candidate;
     239              candidate++;
     240              k = 0;
     241              period = 1;
     242          }
     243      }
     244      *return_period = period;
     245      return max_suffix;
     246  }
     247  
     248  Py_LOCAL_INLINE(Py_ssize_t)
     249  STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
     250                        Py_ssize_t len_needle,
     251                        Py_ssize_t *return_period)
     252  {
     253      /* Do a "critical factorization", making it so that:
     254         >>> needle = (left := needle[:cut]) + (right := needle[cut:])
     255         where the "local period" of the cut is maximal.
     256  
     257         The local period of the cut is the minimal length of a string w
     258         such that (left endswith w or w endswith left)
     259         and (right startswith w or w startswith left).
     260  
     261         The Critical Factorization Theorem says that this maximal local
     262         period is the global period of the string.
     263  
     264         Crochemore and Perrin (1991) show that this cut can be computed
     265         as the later of two cuts: one that gives a lexicographically
     266         maximal right half, and one that gives the same with the
     267         with respect to a reversed alphabet-ordering.
     268  
     269         This is what we want to happen:
     270             >>> x = "GCAGAGAG"
     271             >>> cut, period = factorize(x)
     272             >>> x[:cut], (right := x[cut:])
     273             ('GC', 'AGAGAG')
     274             >>> period  # right half period
     275             2
     276             >>> right[period:] == right[:-period]
     277             True
     278  
     279         This is how the local period lines up in the above example:
     280                  GC | AGAGAG
     281             AGAGAGC = AGAGAGC
     282         The length of this minimal repetition is 7, which is indeed the
     283         period of the original string. */
     284  
     285      Py_ssize_t cut1, period1, cut2, period2, cut, period;
     286      cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
     287      cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
     288  
     289      // Take the later cut.
     290      if (cut1 > cut2) {
     291          period = period1;
     292          cut = cut1;
     293      }
     294      else {
     295          period = period2;
     296          cut = cut2;
     297      }
     298  
     299      LOG("split: "); LOG_STRING(needle, cut);
     300      LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
     301      LOG("\n");
     302  
     303      *return_period = period;
     304      return cut;
     305  }
     306  
     307  
     308  #define SHIFT_TYPE uint8_t
     309  #define MAX_SHIFT UINT8_MAX
     310  
     311  #define TABLE_SIZE_BITS 6u
     312  #define TABLE_SIZE (1U << TABLE_SIZE_BITS)
     313  #define TABLE_MASK (TABLE_SIZE - 1U)
     314  
     315  typedef struct STRINGLIB(_pre) {
     316      const STRINGLIB_CHAR *needle;
     317      Py_ssize_t len_needle;
     318      Py_ssize_t cut;
     319      Py_ssize_t period;
     320      Py_ssize_t gap;
     321      int is_periodic;
     322      SHIFT_TYPE table[TABLE_SIZE];
     323  } STRINGLIB(prework);
     324  
     325  
     326  static void
     327  STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
     328                         STRINGLIB(prework) *p)
     329  {
     330      p->needle = needle;
     331      p->len_needle = len_needle;
     332      p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
     333      assert(p->period + p->cut <= len_needle);
     334      p->is_periodic = (0 == memcmp(needle,
     335                                    needle + p->period,
     336                                    p->cut * STRINGLIB_SIZEOF_CHAR));
     337      if (p->is_periodic) {
     338          assert(p->cut <= len_needle/2);
     339          assert(p->cut < p->period);
     340          p->gap = 0; // unused
     341      }
     342      else {
     343          // A lower bound on the period
     344          p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
     345          // The gap between the last character and the previous
     346          // occurrence of an equivalent character (modulo TABLE_SIZE)
     347          p->gap = len_needle;
     348          STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
     349          for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
     350              STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
     351              if (x == last) {
     352                  p->gap = len_needle - 1 - i;
     353                  break;
     354              }
     355          }
     356      }
     357      // Fill up a compressed Boyer-Moore "Bad Character" table
     358      Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
     359      for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
     360          p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
     361                                         Py_ssize_t, SHIFT_TYPE);
     362      }
     363      for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
     364          SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
     365                                              Py_ssize_t, SHIFT_TYPE);
     366          p->table[needle[i] & TABLE_MASK] = shift;
     367      }
     368  }
     369  
     370  static Py_ssize_t
     371  STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
     372                      STRINGLIB(prework) *p)
     373  {
     374      // Crochemore and Perrin's (1991) Two-Way algorithm.
     375      // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
     376      const Py_ssize_t len_needle = p->len_needle;
     377      const Py_ssize_t cut = p->cut;
     378      Py_ssize_t period = p->period;
     379      const STRINGLIB_CHAR *const needle = p->needle;
     380      const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
     381      const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
     382      SHIFT_TYPE *table = p->table;
     383      const STRINGLIB_CHAR *window;
     384      LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
     385  
     386      if (p->is_periodic) {
     387          LOG("Needle is periodic.\n");
     388          Py_ssize_t memory = 0;
     389        periodicwindowloop:
     390          while (window_last < haystack_end) {
     391              assert(memory == 0);
     392              for (;;) {
     393                  LOG_LINEUP();
     394                  Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
     395                  window_last += shift;
     396                  if (shift == 0) {
     397                      break;
     398                  }
     399                  if (window_last >= haystack_end) {
     400                      return -1;
     401                  }
     402                  LOG("Horspool skip\n");
     403              }
     404            no_shift:
     405              window = window_last - len_needle + 1;
     406              assert((window[len_needle - 1] & TABLE_MASK) ==
     407                     (needle[len_needle - 1] & TABLE_MASK));
     408              Py_ssize_t i = Py_MAX(cut, memory);
     409              for (; i < len_needle; i++) {
     410                  if (needle[i] != window[i]) {
     411                      LOG("Right half does not match.\n");
     412                      window_last += i - cut + 1;
     413                      memory = 0;
     414                      goto periodicwindowloop;
     415                  }
     416              }
     417              for (i = memory; i < cut; i++) {
     418                  if (needle[i] != window[i]) {
     419                      LOG("Left half does not match.\n");
     420                      window_last += period;
     421                      memory = len_needle - period;
     422                      if (window_last >= haystack_end) {
     423                          return -1;
     424                      }
     425                      Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
     426                      if (shift) {
     427                          // A mismatch has been identified to the right
     428                          // of where i will next start, so we can jump
     429                          // at least as far as if the mismatch occurred
     430                          // on the first comparison.
     431                          Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
     432                          LOG("Skip with Memory.\n");
     433                          memory = 0;
     434                          window_last += Py_MAX(shift, mem_jump);
     435                          goto periodicwindowloop;
     436                      }
     437                      goto no_shift;
     438                  }
     439              }
     440              LOG("Found a match!\n");
     441              return window - haystack;
     442          }
     443      }
     444      else {
     445          Py_ssize_t gap = p->gap;
     446          period = Py_MAX(gap, period);
     447          LOG("Needle is not periodic.\n");
     448          Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
     449        windowloop:
     450          while (window_last < haystack_end) {
     451              for (;;) {
     452                  LOG_LINEUP();
     453                  Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
     454                  window_last += shift;
     455                  if (shift == 0) {
     456                      break;
     457                  }
     458                  if (window_last >= haystack_end) {
     459                      return -1;
     460                  }
     461                  LOG("Horspool skip\n");
     462              }
     463              window = window_last - len_needle + 1;
     464              assert((window[len_needle - 1] & TABLE_MASK) ==
     465                     (needle[len_needle - 1] & TABLE_MASK));
     466              for (Py_ssize_t i = cut; i < gap_jump_end; i++) {
     467                  if (needle[i] != window[i]) {
     468                      LOG("Early right half mismatch: jump by gap.\n");
     469                      assert(gap >= i - cut + 1);
     470                      window_last += gap;
     471                      goto windowloop;
     472                  }
     473              }
     474              for (Py_ssize_t i = gap_jump_end; i < len_needle; i++) {
     475                  if (needle[i] != window[i]) {
     476                      LOG("Late right half mismatch.\n");
     477                      assert(i - cut + 1 > gap);
     478                      window_last += i - cut + 1;
     479                      goto windowloop;
     480                  }
     481              }
     482              for (Py_ssize_t i = 0; i < cut; i++) {
     483                  if (needle[i] != window[i]) {
     484                      LOG("Left half does not match.\n");
     485                      window_last += period;
     486                      goto windowloop;
     487                  }
     488              }
     489              LOG("Found a match!\n");
     490              return window - haystack;
     491          }
     492      }
     493      LOG("Not found. Returning -1.\n");
     494      return -1;
     495  }
     496  
     497  
     498  static Py_ssize_t
     499  STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
     500                           Py_ssize_t len_haystack,
     501                           const STRINGLIB_CHAR *needle,
     502                           Py_ssize_t len_needle)
     503  {
     504      LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
     505      STRINGLIB(prework) p;
     506      STRINGLIB(_preprocess)(needle, len_needle, &p);
     507      return STRINGLIB(_two_way)(haystack, len_haystack, &p);
     508  }
     509  
     510  
     511  static Py_ssize_t
     512  STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
     513                            Py_ssize_t len_haystack,
     514                            const STRINGLIB_CHAR *needle,
     515                            Py_ssize_t len_needle,
     516                            Py_ssize_t maxcount)
     517  {
     518      LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
     519      STRINGLIB(prework) p;
     520      STRINGLIB(_preprocess)(needle, len_needle, &p);
     521      Py_ssize_t index = 0, count = 0;
     522      while (1) {
     523          Py_ssize_t result;
     524          result = STRINGLIB(_two_way)(haystack + index,
     525                                       len_haystack - index, &p);
     526          if (result == -1) {
     527              return count;
     528          }
     529          count++;
     530          if (count == maxcount) {
     531              return maxcount;
     532          }
     533          index += result + len_needle;
     534      }
     535      return count;
     536  }
     537  
     538  #undef SHIFT_TYPE
     539  #undef NOT_FOUND
     540  #undef SHIFT_OVERFLOW
     541  #undef TABLE_SIZE_BITS
     542  #undef TABLE_SIZE
     543  #undef TABLE_MASK
     544  
     545  #undef LOG
     546  #undef LOG_STRING
     547  #undef LOG_LINEUP
     548  
     549  static inline Py_ssize_t
     550  STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
     551                          const STRINGLIB_CHAR* p, Py_ssize_t m,
     552                          Py_ssize_t maxcount, int mode)
     553  {
     554      const Py_ssize_t w = n - m;
     555      Py_ssize_t mlast = m - 1, count = 0;
     556      Py_ssize_t gap = mlast;
     557      const STRINGLIB_CHAR last = p[mlast];
     558      const STRINGLIB_CHAR *const ss = &s[mlast];
     559  
     560      unsigned long mask = 0;
     561      for (Py_ssize_t i = 0; i < mlast; i++) {
     562          STRINGLIB_BLOOM_ADD(mask, p[i]);
     563          if (p[i] == last) {
     564              gap = mlast - i - 1;
     565          }
     566      }
     567      STRINGLIB_BLOOM_ADD(mask, last);
     568  
     569      for (Py_ssize_t i = 0; i <= w; i++) {
     570          if (ss[i] == last) {
     571              /* candidate match */
     572              Py_ssize_t j;
     573              for (j = 0; j < mlast; j++) {
     574                  if (s[i+j] != p[j]) {
     575                      break;
     576                  }
     577              }
     578              if (j == mlast) {
     579                  /* got a match! */
     580                  if (mode != FAST_COUNT) {
     581                      return i;
     582                  }
     583                  count++;
     584                  if (count == maxcount) {
     585                      return maxcount;
     586                  }
     587                  i = i + mlast;
     588                  continue;
     589              }
     590              /* miss: check if next character is part of pattern */
     591              if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
     592                  i = i + m;
     593              }
     594              else {
     595                  i = i + gap;
     596              }
     597          }
     598          else {
     599              /* skip: check if next character is part of pattern */
     600              if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
     601                  i = i + m;
     602              }
     603          }
     604      }
     605      return mode == FAST_COUNT ? count : -1;
     606  }
     607  
     608  
     609  static Py_ssize_t
     610  STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
     611                           const STRINGLIB_CHAR* p, Py_ssize_t m,
     612                           Py_ssize_t maxcount, int mode)
     613  {
     614      const Py_ssize_t w = n - m;
     615      Py_ssize_t mlast = m - 1, count = 0;
     616      Py_ssize_t gap = mlast;
     617      Py_ssize_t hits = 0, res;
     618      const STRINGLIB_CHAR last = p[mlast];
     619      const STRINGLIB_CHAR *const ss = &s[mlast];
     620  
     621      unsigned long mask = 0;
     622      for (Py_ssize_t i = 0; i < mlast; i++) {
     623          STRINGLIB_BLOOM_ADD(mask, p[i]);
     624          if (p[i] == last) {
     625              gap = mlast - i - 1;
     626          }
     627      }
     628      STRINGLIB_BLOOM_ADD(mask, last);
     629  
     630      for (Py_ssize_t i = 0; i <= w; i++) {
     631          if (ss[i] == last) {
     632              /* candidate match */
     633              Py_ssize_t j;
     634              for (j = 0; j < mlast; j++) {
     635                  if (s[i+j] != p[j]) {
     636                      break;
     637                  }
     638              }
     639              if (j == mlast) {
     640                  /* got a match! */
     641                  if (mode != FAST_COUNT) {
     642                      return i;
     643                  }
     644                  count++;
     645                  if (count == maxcount) {
     646                      return maxcount;
     647                  }
     648                  i = i + mlast;
     649                  continue;
     650              }
     651              hits += j + 1;
     652              if (hits > m / 4 && w - i > 2000) {
     653                  if (mode == FAST_SEARCH) {
     654                      res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
     655                      return res == -1 ? -1 : res + i;
     656                  }
     657                  else {
     658                      res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
     659                                                      maxcount - count);
     660                      return res + count;
     661                  }
     662              }
     663              /* miss: check if next character is part of pattern */
     664              if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
     665                  i = i + m;
     666              }
     667              else {
     668                  i = i + gap;
     669              }
     670          }
     671          else {
     672              /* skip: check if next character is part of pattern */
     673              if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
     674                  i = i + m;
     675              }
     676          }
     677      }
     678      return mode == FAST_COUNT ? count : -1;
     679  }
     680  
     681  
     682  static Py_ssize_t
     683  STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
     684                           const STRINGLIB_CHAR* p, Py_ssize_t m,
     685                           Py_ssize_t maxcount, int mode)
     686  {
     687      /* create compressed boyer-moore delta 1 table */
     688      unsigned long mask = 0;
     689      Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
     690  
     691      /* process pattern[0] outside the loop */
     692      STRINGLIB_BLOOM_ADD(mask, p[0]);
     693      /* process pattern[:0:-1] */
     694      for (i = mlast; i > 0; i--) {
     695          STRINGLIB_BLOOM_ADD(mask, p[i]);
     696          if (p[i] == p[0]) {
     697              skip = i - 1;
     698          }
     699      }
     700  
     701      for (i = w; i >= 0; i--) {
     702          if (s[i] == p[0]) {
     703              /* candidate match */
     704              for (j = mlast; j > 0; j--) {
     705                  if (s[i+j] != p[j]) {
     706                      break;
     707                  }
     708              }
     709              if (j == 0) {
     710                  /* got a match! */
     711                  return i;
     712              }
     713              /* miss: check if previous character is part of pattern */
     714              if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
     715                  i = i - m;
     716              }
     717              else {
     718                  i = i - skip;
     719              }
     720          }
     721          else {
     722              /* skip: check if previous character is part of pattern */
     723              if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
     724                  i = i - m;
     725              }
     726          }
     727      }
     728      return -1;
     729  }
     730  
     731  
     732  static inline Py_ssize_t
     733  STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
     734                        const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
     735  {
     736      Py_ssize_t i, count = 0;
     737      for (i = 0; i < n; i++) {
     738          if (s[i] == p0) {
     739              count++;
     740              if (count == maxcount) {
     741                  return maxcount;
     742              }
     743          }
     744      }
     745      return count;
     746  }
     747  
     748  
     749  Py_LOCAL_INLINE(Py_ssize_t)
     750  FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
     751             const STRINGLIB_CHAR* p, Py_ssize_t m,
     752             Py_ssize_t maxcount, int mode)
     753  {
     754      if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
     755          return -1;
     756      }
     757  
     758      /* look for special cases */
     759      if (m <= 1) {
     760          if (m <= 0) {
     761              return -1;
     762          }
     763          /* use special case for 1-character strings */
     764          if (mode == FAST_SEARCH)
     765              return STRINGLIB(find_char)(s, n, p[0]);
     766          else if (mode == FAST_RSEARCH)
     767              return STRINGLIB(rfind_char)(s, n, p[0]);
     768          else {
     769              return STRINGLIB(count_char)(s, n, p[0], maxcount);
     770          }
     771      }
     772  
     773      if (mode != FAST_RSEARCH) {
     774          if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
     775              return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
     776          }
     777          else if ((m >> 2) * 3 < (n >> 2)) {
     778              /* 33% threshold, but don't overflow. */
     779              /* For larger problems where the needle isn't a huge
     780                 percentage of the size of the haystack, the relatively
     781                 expensive O(m) startup cost of the two-way algorithm
     782                 will surely pay off. */
     783              if (mode == FAST_SEARCH) {
     784                  return STRINGLIB(_two_way_find)(s, n, p, m);
     785              }
     786              else {
     787                  return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
     788              }
     789          }
     790          else {
     791              /* To ensure that we have good worst-case behavior,
     792                 here's an adaptive version of the algorithm, where if
     793                 we match O(m) characters without any matches of the
     794                 entire needle, then we predict that the startup cost of
     795                 the two-way algorithm will probably be worth it. */
     796              return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
     797          }
     798      }
     799      else {
     800          /* FAST_RSEARCH */
     801          return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
     802      }
     803  }
     804