(root)/
Python-3.11.7/
Objects/
stringlib/
fastsearch.h
       1  /* stringlib: fastsearch implementation */
       2  
       3  #define STRINGLIB_FASTSEARCH_H
       4  
       5  /* fast search/count implementation, based on a mix between boyer-
       6     moore and horspool, with a few more bells and whistles on the top.
       7     for some more background, see:
       8     https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
       9  
      10  /* note: fastsearch may access s[n], which isn't a problem when using
      11     Python's ordinary string types, but may cause problems if you're
      12     using this code in other contexts.  also, the count mode returns -1
      13     if there cannot possibly be a match in the target string, and 0 if
      14     it has actually checked for matches, but didn't find any.  callers
      15     beware! */
      16  
      17  /* If the strings are long enough, use Crochemore and Perrin's Two-Way
      18     algorithm, which has worst-case O(n) runtime and best-case O(n/k).
      19     Also compute a table of shifts to achieve O(n/k) in more cases,
      20     and often (data dependent) deduce larger shifts than pure C&P can
      21     deduce. */
      22  
      23  #define FAST_COUNT 0
      24  #define FAST_SEARCH 1
      25  #define FAST_RSEARCH 2
      26  
      27  #if LONG_BIT >= 128
      28  #define STRINGLIB_BLOOM_WIDTH 128
      29  #elif LONG_BIT >= 64
      30  #define STRINGLIB_BLOOM_WIDTH 64
      31  #elif LONG_BIT >= 32
      32  #define STRINGLIB_BLOOM_WIDTH 32
      33  #else
      34  #error "LONG_BIT is smaller than 32"
      35  #endif
      36  
      37  #define STRINGLIB_BLOOM_ADD(mask, ch) \
      38      ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
      39  #define STRINGLIB_BLOOM(mask, ch)     \
      40      ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
      41  
      42  #if STRINGLIB_SIZEOF_CHAR == 1
      43  #  define MEMCHR_CUT_OFF 15
      44  #else
      45  #  define MEMCHR_CUT_OFF 40
      46  #endif
      47  
      48  Py_LOCAL_INLINE(Py_ssize_t)
      49  STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
      50  {
      51      const STRINGLIB_CHAR *p, *e;
      52  
      53      p = s;
      54      e = s + n;
      55      if (n > MEMCHR_CUT_OFF) {
      56  #if STRINGLIB_SIZEOF_CHAR == 1
      57          p = memchr(s, ch, n);
      58          if (p != NULL)
      59              return (p - s);
      60          return -1;
      61  #else
      62          /* use memchr if we can choose a needle without too many likely
      63             false positives */
      64          const STRINGLIB_CHAR *s1, *e1;
      65          unsigned char needle = ch & 0xff;
      66          /* If looking for a multiple of 256, we'd have too
      67             many false positives looking for the '\0' byte in UCS2
      68             and UCS4 representations. */
      69          if (needle != 0) {
      70              do {
      71                  void *candidate = memchr(p, needle,
      72                                           (e - p) * sizeof(STRINGLIB_CHAR));
      73                  if (candidate == NULL)
      74                      return -1;
      75                  s1 = p;
      76                  p = (const STRINGLIB_CHAR *)
      77                          _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
      78                  if (*p == ch)
      79                      return (p - s);
      80                  /* False positive */
      81                  p++;
      82                  if (p - s1 > MEMCHR_CUT_OFF)
      83                      continue;
      84                  if (e - p <= MEMCHR_CUT_OFF)
      85                      break;
      86                  e1 = p + MEMCHR_CUT_OFF;
      87                  while (p != e1) {
      88                      if (*p == ch)
      89                          return (p - s);
      90                      p++;
      91                  }
      92              }
      93              while (e - p > MEMCHR_CUT_OFF);
      94          }
      95  #endif
      96      }
      97      while (p < e) {
      98          if (*p == ch)
      99              return (p - s);
     100          p++;
     101      }
     102      return -1;
     103  }
     104  
     105  Py_LOCAL_INLINE(Py_ssize_t)
     106  STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
     107  {
     108      const STRINGLIB_CHAR *p;
     109  #ifdef HAVE_MEMRCHR
     110      /* memrchr() is a GNU extension, available since glibc 2.1.91.
     111         it doesn't seem as optimized as memchr(), but is still quite
     112         faster than our hand-written loop below */
     113  
     114      if (n > MEMCHR_CUT_OFF) {
     115  #if STRINGLIB_SIZEOF_CHAR == 1
     116          p = memrchr(s, ch, n);
     117          if (p != NULL)
     118              return (p - s);
     119          return -1;
     120  #else
     121          /* use memrchr if we can choose a needle without too many likely
     122             false positives */
     123          const STRINGLIB_CHAR *s1;
     124          Py_ssize_t n1;
     125          unsigned char needle = ch & 0xff;
     126          /* If looking for a multiple of 256, we'd have too
     127             many false positives looking for the '\0' byte in UCS2
     128             and UCS4 representations. */
     129          if (needle != 0) {
     130              do {
     131                  void *candidate = memrchr(s, needle,
     132                                            n * sizeof(STRINGLIB_CHAR));
     133                  if (candidate == NULL)
     134                      return -1;
     135                  n1 = n;
     136                  p = (const STRINGLIB_CHAR *)
     137                          _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
     138                  n = p - s;
     139                  if (*p == ch)
     140                      return n;
     141                  /* False positive */
     142                  if (n1 - n > MEMCHR_CUT_OFF)
     143                      continue;
     144                  if (n <= MEMCHR_CUT_OFF)
     145                      break;
     146                  s1 = p - MEMCHR_CUT_OFF;
     147                  while (p > s1) {
     148                      p--;
     149                      if (*p == ch)
     150                          return (p - s);
     151                  }
     152                  n = p - s;
     153              }
     154              while (n > MEMCHR_CUT_OFF);
     155          }
     156  #endif
     157      }
     158  #endif  /* HAVE_MEMRCHR */
     159      p = s + n;
     160      while (p > s) {
     161          p--;
     162          if (*p == ch)
     163              return (p - s);
     164      }
     165      return -1;
     166  }
     167  
     168  #undef MEMCHR_CUT_OFF
     169  
     170  /* Change to a 1 to see logging comments walk through the algorithm. */
     171  #if 0 && STRINGLIB_SIZEOF_CHAR == 1
     172  # define LOG(...) printf(__VA_ARGS__)
     173  # define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
     174  # define LOG_LINEUP() do {                                         \
     175      LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
     176      LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
     177      LOG_STRING(needle, len_needle); LOG("\n");                     \
     178  } while(0)
     179  #else
     180  # define LOG(...)
     181  # define LOG_STRING(s, n)
     182  # define LOG_LINEUP()
     183  #endif
     184  
     185  Py_LOCAL_INLINE(Py_ssize_t)
     186  STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
     187                         Py_ssize_t *return_period, int invert_alphabet)
     188  {
     189      /* Do a lexicographic search. Essentially this:
     190             >>> max(needle[i:] for i in range(len(needle)+1))
     191         Also find the period of the right half.   */
     192      Py_ssize_t max_suffix = 0;
     193      Py_ssize_t candidate = 1;
     194      Py_ssize_t k = 0;
     195      // The period of the right half.
     196      Py_ssize_t period = 1;
     197  
     198      while (candidate + k < len_needle) {
     199          // each loop increases candidate + k + max_suffix
     200          STRINGLIB_CHAR a = needle[candidate + k];
     201          STRINGLIB_CHAR b = needle[max_suffix + k];
     202          // check if the suffix at candidate is better than max_suffix
     203          if (invert_alphabet ? (b < a) : (a < b)) {
     204              // Fell short of max_suffix.
     205              // The next k + 1 characters are non-increasing
     206              // from candidate, so they won't start a maximal suffix.
     207              candidate += k + 1;
     208              k = 0;
     209              // We've ruled out any period smaller than what's
     210              // been scanned since max_suffix.
     211              period = candidate - max_suffix;
     212          }
     213          else if (a == b) {
     214              if (k + 1 != period) {
     215                  // Keep scanning the equal strings
     216                  k++;
     217              }
     218              else {
     219                  // Matched a whole period.
     220                  // Start matching the next period.
     221                  candidate += period;
     222                  k = 0;
     223              }
     224          }
     225          else {
     226              // Did better than max_suffix, so replace it.
     227              max_suffix = candidate;
     228              candidate++;
     229              k = 0;
     230              period = 1;
     231          }
     232      }
     233      *return_period = period;
     234      return max_suffix;
     235  }
     236  
     237  Py_LOCAL_INLINE(Py_ssize_t)
     238  STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
     239                        Py_ssize_t len_needle,
     240                        Py_ssize_t *return_period)
     241  {
     242      /* Do a "critical factorization", making it so that:
     243         >>> needle = (left := needle[:cut]) + (right := needle[cut:])
     244         where the "local period" of the cut is maximal.
     245  
     246         The local period of the cut is the minimal length of a string w
     247         such that (left endswith w or w endswith left)
     248         and (right startswith w or w startswith left).
     249  
     250         The Critical Factorization Theorem says that this maximal local
     251         period is the global period of the string.
     252  
     253         Crochemore and Perrin (1991) show that this cut can be computed
     254         as the later of two cuts: one that gives a lexicographically
     255         maximal right half, and one that gives the same with the
     256         with respect to a reversed alphabet-ordering.
     257  
     258         This is what we want to happen:
     259             >>> x = "GCAGAGAG"
     260             >>> cut, period = factorize(x)
     261             >>> x[:cut], (right := x[cut:])
     262             ('GC', 'AGAGAG')
     263             >>> period  # right half period
     264             2
     265             >>> right[period:] == right[:-period]
     266             True
     267  
     268         This is how the local period lines up in the above example:
     269                  GC | AGAGAG
     270             AGAGAGC = AGAGAGC
     271         The length of this minimal repetition is 7, which is indeed the
     272         period of the original string. */
     273  
     274      Py_ssize_t cut1, period1, cut2, period2, cut, period;
     275      cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
     276      cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
     277  
     278      // Take the later cut.
     279      if (cut1 > cut2) {
     280          period = period1;
     281          cut = cut1;
     282      }
     283      else {
     284          period = period2;
     285          cut = cut2;
     286      }
     287  
     288      LOG("split: "); LOG_STRING(needle, cut);
     289      LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
     290      LOG("\n");
     291  
     292      *return_period = period;
     293      return cut;
     294  }
     295  
     296  
     297  #define SHIFT_TYPE uint8_t
     298  #define MAX_SHIFT UINT8_MAX
     299  
     300  #define TABLE_SIZE_BITS 6u
     301  #define TABLE_SIZE (1U << TABLE_SIZE_BITS)
     302  #define TABLE_MASK (TABLE_SIZE - 1U)
     303  
     304  typedef struct STRINGLIB(_pre) {
     305      const STRINGLIB_CHAR *needle;
     306      Py_ssize_t len_needle;
     307      Py_ssize_t cut;
     308      Py_ssize_t period;
     309      Py_ssize_t gap;
     310      int is_periodic;
     311      SHIFT_TYPE table[TABLE_SIZE];
     312  } STRINGLIB(prework);
     313  
     314  
     315  static void
     316  STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
     317                         STRINGLIB(prework) *p)
     318  {
     319      p->needle = needle;
     320      p->len_needle = len_needle;
     321      p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
     322      assert(p->period + p->cut <= len_needle);
     323      p->is_periodic = (0 == memcmp(needle,
     324                                    needle + p->period,
     325                                    p->cut * STRINGLIB_SIZEOF_CHAR));
     326      if (p->is_periodic) {
     327          assert(p->cut <= len_needle/2);
     328          assert(p->cut < p->period);
     329          p->gap = 0; // unused
     330      }
     331      else {
     332          // A lower bound on the period
     333          p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
     334          // The gap between the last character and the previous
     335          // occurrence of an equivalent character (modulo TABLE_SIZE)
     336          p->gap = len_needle;
     337          STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
     338          for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
     339              STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
     340              if (x == last) {
     341                  p->gap = len_needle - 1 - i;
     342                  break;
     343              }
     344          }
     345      }
     346      // Fill up a compressed Boyer-Moore "Bad Character" table
     347      Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
     348      for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
     349          p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
     350                                         Py_ssize_t, SHIFT_TYPE);
     351      }
     352      for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
     353          SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
     354                                              Py_ssize_t, SHIFT_TYPE);
     355          p->table[needle[i] & TABLE_MASK] = shift;
     356      }
     357  }
     358  
     359  static Py_ssize_t
     360  STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
     361                      STRINGLIB(prework) *p)
     362  {
     363      // Crochemore and Perrin's (1991) Two-Way algorithm.
     364      // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
     365      const Py_ssize_t len_needle = p->len_needle;
     366      const Py_ssize_t cut = p->cut;
     367      Py_ssize_t period = p->period;
     368      const STRINGLIB_CHAR *const needle = p->needle;
     369      const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
     370      const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
     371      SHIFT_TYPE *table = p->table;
     372      const STRINGLIB_CHAR *window;
     373      LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
     374  
     375      if (p->is_periodic) {
     376          LOG("Needle is periodic.\n");
     377          Py_ssize_t memory = 0;
     378        periodicwindowloop:
     379          while (window_last < haystack_end) {
     380              assert(memory == 0);
     381              for (;;) {
     382                  LOG_LINEUP();
     383                  Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
     384                  window_last += shift;
     385                  if (shift == 0) {
     386                      break;
     387                  }
     388                  if (window_last >= haystack_end) {
     389                      return -1;
     390                  }
     391                  LOG("Horspool skip");
     392              }
     393            no_shift:
     394              window = window_last - len_needle + 1;
     395              assert((window[len_needle - 1] & TABLE_MASK) ==
     396                     (needle[len_needle - 1] & TABLE_MASK));
     397              Py_ssize_t i = Py_MAX(cut, memory);
     398              for (; i < len_needle; i++) {
     399                  if (needle[i] != window[i]) {
     400                      LOG("Right half does not match.\n");
     401                      window_last += i - cut + 1;
     402                      memory = 0;
     403                      goto periodicwindowloop;
     404                  }
     405              }
     406              for (i = memory; i < cut; i++) {
     407                  if (needle[i] != window[i]) {
     408                      LOG("Left half does not match.\n");
     409                      window_last += period;
     410                      memory = len_needle - period;
     411                      if (window_last >= haystack_end) {
     412                          return -1;
     413                      }
     414                      Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
     415                      if (shift) {
     416                          // A mismatch has been identified to the right
     417                          // of where i will next start, so we can jump
     418                          // at least as far as if the mismatch occurred
     419                          // on the first comparison.
     420                          Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
     421                          LOG("Skip with Memory.\n");
     422                          memory = 0;
     423                          window_last += Py_MAX(shift, mem_jump);
     424                          goto periodicwindowloop;
     425                      }
     426                      goto no_shift;
     427                  }
     428              }
     429              LOG("Found a match!\n");
     430              return window - haystack;
     431          }
     432      }
     433      else {
     434          Py_ssize_t gap = p->gap;
     435          period = Py_MAX(gap, period);
     436          LOG("Needle is not periodic.\n");
     437          Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
     438        windowloop:
     439          while (window_last < haystack_end) {
     440              for (;;) {
     441                  LOG_LINEUP();
     442                  Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
     443                  window_last += shift;
     444                  if (shift == 0) {
     445                      break;
     446                  }
     447                  if (window_last >= haystack_end) {
     448                      return -1;
     449                  }
     450                  LOG("Horspool skip");
     451              }
     452              window = window_last - len_needle + 1;
     453              assert((window[len_needle - 1] & TABLE_MASK) ==
     454                     (needle[len_needle - 1] & TABLE_MASK));
     455              for (Py_ssize_t i = cut; i < gap_jump_end; i++) {
     456                  if (needle[i] != window[i]) {
     457                      LOG("Early right half mismatch: jump by gap.\n");
     458                      assert(gap >= i - cut + 1);
     459                      window_last += gap;
     460                      goto windowloop;
     461                  }
     462              }
     463              for (Py_ssize_t i = gap_jump_end; i < len_needle; i++) {
     464                  if (needle[i] != window[i]) {
     465                      LOG("Late right half mismatch.\n");
     466                      assert(i - cut + 1 > gap);
     467                      window_last += i - cut + 1;
     468                      goto windowloop;
     469                  }
     470              }
     471              for (Py_ssize_t i = 0; i < cut; i++) {
     472                  if (needle[i] != window[i]) {
     473                      LOG("Left half does not match.\n");
     474                      window_last += period;
     475                      goto windowloop;
     476                  }
     477              }
     478              LOG("Found a match!\n");
     479              return window - haystack;
     480          }
     481      }
     482      LOG("Not found. Returning -1.\n");
     483      return -1;
     484  }
     485  
     486  
     487  static Py_ssize_t
     488  STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
     489                           Py_ssize_t len_haystack,
     490                           const STRINGLIB_CHAR *needle,
     491                           Py_ssize_t len_needle)
     492  {
     493      LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
     494      STRINGLIB(prework) p;
     495      STRINGLIB(_preprocess)(needle, len_needle, &p);
     496      return STRINGLIB(_two_way)(haystack, len_haystack, &p);
     497  }
     498  
     499  
     500  static Py_ssize_t
     501  STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
     502                            Py_ssize_t len_haystack,
     503                            const STRINGLIB_CHAR *needle,
     504                            Py_ssize_t len_needle,
     505                            Py_ssize_t maxcount)
     506  {
     507      LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
     508      STRINGLIB(prework) p;
     509      STRINGLIB(_preprocess)(needle, len_needle, &p);
     510      Py_ssize_t index = 0, count = 0;
     511      while (1) {
     512          Py_ssize_t result;
     513          result = STRINGLIB(_two_way)(haystack + index,
     514                                       len_haystack - index, &p);
     515          if (result == -1) {
     516              return count;
     517          }
     518          count++;
     519          if (count == maxcount) {
     520              return maxcount;
     521          }
     522          index += result + len_needle;
     523      }
     524      return count;
     525  }
     526  
     527  #undef SHIFT_TYPE
     528  #undef NOT_FOUND
     529  #undef SHIFT_OVERFLOW
     530  #undef TABLE_SIZE_BITS
     531  #undef TABLE_SIZE
     532  #undef TABLE_MASK
     533  
     534  #undef LOG
     535  #undef LOG_STRING
     536  #undef LOG_LINEUP
     537  
     538  static inline Py_ssize_t
     539  STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
     540                          const STRINGLIB_CHAR* p, Py_ssize_t m,
     541                          Py_ssize_t maxcount, int mode)
     542  {
     543      const Py_ssize_t w = n - m;
     544      Py_ssize_t mlast = m - 1, count = 0;
     545      Py_ssize_t gap = mlast;
     546      const STRINGLIB_CHAR last = p[mlast];
     547      const STRINGLIB_CHAR *const ss = &s[mlast];
     548  
     549      unsigned long mask = 0;
     550      for (Py_ssize_t i = 0; i < mlast; i++) {
     551          STRINGLIB_BLOOM_ADD(mask, p[i]);
     552          if (p[i] == last) {
     553              gap = mlast - i - 1;
     554          }
     555      }
     556      STRINGLIB_BLOOM_ADD(mask, last);
     557  
     558      for (Py_ssize_t i = 0; i <= w; i++) {
     559          if (ss[i] == last) {
     560              /* candidate match */
     561              Py_ssize_t j;
     562              for (j = 0; j < mlast; j++) {
     563                  if (s[i+j] != p[j]) {
     564                      break;
     565                  }
     566              }
     567              if (j == mlast) {
     568                  /* got a match! */
     569                  if (mode != FAST_COUNT) {
     570                      return i;
     571                  }
     572                  count++;
     573                  if (count == maxcount) {
     574                      return maxcount;
     575                  }
     576                  i = i + mlast;
     577                  continue;
     578              }
     579              /* miss: check if next character is part of pattern */
     580              if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
     581                  i = i + m;
     582              }
     583              else {
     584                  i = i + gap;
     585              }
     586          }
     587          else {
     588              /* skip: check if next character is part of pattern */
     589              if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
     590                  i = i + m;
     591              }
     592          }
     593      }
     594      return mode == FAST_COUNT ? count : -1;
     595  }
     596  
     597  
     598  static Py_ssize_t
     599  STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
     600                           const STRINGLIB_CHAR* p, Py_ssize_t m,
     601                           Py_ssize_t maxcount, int mode)
     602  {
     603      const Py_ssize_t w = n - m;
     604      Py_ssize_t mlast = m - 1, count = 0;
     605      Py_ssize_t gap = mlast;
     606      Py_ssize_t hits = 0, res;
     607      const STRINGLIB_CHAR last = p[mlast];
     608      const STRINGLIB_CHAR *const ss = &s[mlast];
     609  
     610      unsigned long mask = 0;
     611      for (Py_ssize_t i = 0; i < mlast; i++) {
     612          STRINGLIB_BLOOM_ADD(mask, p[i]);
     613          if (p[i] == last) {
     614              gap = mlast - i - 1;
     615          }
     616      }
     617      STRINGLIB_BLOOM_ADD(mask, last);
     618  
     619      for (Py_ssize_t i = 0; i <= w; i++) {
     620          if (ss[i] == last) {
     621              /* candidate match */
     622              Py_ssize_t j;
     623              for (j = 0; j < mlast; j++) {
     624                  if (s[i+j] != p[j]) {
     625                      break;
     626                  }
     627              }
     628              if (j == mlast) {
     629                  /* got a match! */
     630                  if (mode != FAST_COUNT) {
     631                      return i;
     632                  }
     633                  count++;
     634                  if (count == maxcount) {
     635                      return maxcount;
     636                  }
     637                  i = i + mlast;
     638                  continue;
     639              }
     640              hits += j + 1;
     641              if (hits > m / 4 && w - i > 2000) {
     642                  if (mode == FAST_SEARCH) {
     643                      res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
     644                      return res == -1 ? -1 : res + i;
     645                  }
     646                  else {
     647                      res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
     648                                                      maxcount - count);
     649                      return res + count;
     650                  }
     651              }
     652              /* miss: check if next character is part of pattern */
     653              if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
     654                  i = i + m;
     655              }
     656              else {
     657                  i = i + gap;
     658              }
     659          }
     660          else {
     661              /* skip: check if next character is part of pattern */
     662              if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
     663                  i = i + m;
     664              }
     665          }
     666      }
     667      return mode == FAST_COUNT ? count : -1;
     668  }
     669  
     670  
     671  static Py_ssize_t
     672  STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
     673                           const STRINGLIB_CHAR* p, Py_ssize_t m,
     674                           Py_ssize_t maxcount, int mode)
     675  {
     676      /* create compressed boyer-moore delta 1 table */
     677      unsigned long mask = 0;
     678      Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
     679  
     680      /* process pattern[0] outside the loop */
     681      STRINGLIB_BLOOM_ADD(mask, p[0]);
     682      /* process pattern[:0:-1] */
     683      for (i = mlast; i > 0; i--) {
     684          STRINGLIB_BLOOM_ADD(mask, p[i]);
     685          if (p[i] == p[0]) {
     686              skip = i - 1;
     687          }
     688      }
     689  
     690      for (i = w; i >= 0; i--) {
     691          if (s[i] == p[0]) {
     692              /* candidate match */
     693              for (j = mlast; j > 0; j--) {
     694                  if (s[i+j] != p[j]) {
     695                      break;
     696                  }
     697              }
     698              if (j == 0) {
     699                  /* got a match! */
     700                  return i;
     701              }
     702              /* miss: check if previous character is part of pattern */
     703              if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
     704                  i = i - m;
     705              }
     706              else {
     707                  i = i - skip;
     708              }
     709          }
     710          else {
     711              /* skip: check if previous character is part of pattern */
     712              if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
     713                  i = i - m;
     714              }
     715          }
     716      }
     717      return -1;
     718  }
     719  
     720  
     721  static inline Py_ssize_t
     722  STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
     723                        const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
     724  {
     725      Py_ssize_t i, count = 0;
     726      for (i = 0; i < n; i++) {
     727          if (s[i] == p0) {
     728              count++;
     729              if (count == maxcount) {
     730                  return maxcount;
     731              }
     732          }
     733      }
     734      return count;
     735  }
     736  
     737  
     738  Py_LOCAL_INLINE(Py_ssize_t)
     739  FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
     740             const STRINGLIB_CHAR* p, Py_ssize_t m,
     741             Py_ssize_t maxcount, int mode)
     742  {
     743      if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
     744          return -1;
     745      }
     746  
     747      /* look for special cases */
     748      if (m <= 1) {
     749          if (m <= 0) {
     750              return -1;
     751          }
     752          /* use special case for 1-character strings */
     753          if (mode == FAST_SEARCH)
     754              return STRINGLIB(find_char)(s, n, p[0]);
     755          else if (mode == FAST_RSEARCH)
     756              return STRINGLIB(rfind_char)(s, n, p[0]);
     757          else {
     758              return STRINGLIB(count_char)(s, n, p[0], maxcount);
     759          }
     760      }
     761  
     762      if (mode != FAST_RSEARCH) {
     763          if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
     764              return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
     765          }
     766          else if ((m >> 2) * 3 < (n >> 2)) {
     767              /* 33% threshold, but don't overflow. */
     768              /* For larger problems where the needle isn't a huge
     769                 percentage of the size of the haystack, the relatively
     770                 expensive O(m) startup cost of the two-way algorithm
     771                 will surely pay off. */
     772              if (mode == FAST_SEARCH) {
     773                  return STRINGLIB(_two_way_find)(s, n, p, m);
     774              }
     775              else {
     776                  return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
     777              }
     778          }
     779          else {
     780              /* To ensure that we have good worst-case behavior,
     781                 here's an adaptive version of the algorithm, where if
     782                 we match O(m) characters without any matches of the
     783                 entire needle, then we predict that the startup cost of
     784                 the two-way algorithm will probably be worth it. */
     785              return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
     786          }
     787      }
     788      else {
     789          /* FAST_RSEARCH */
     790          return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
     791      }
     792  }
     793