(root)/
grep-3.11/
src/
kwset.c
       1  /* kwset.c - search for any of a set of keywords.
       2     Copyright (C) 1989, 1998, 2000, 2005, 2007, 2009-2023 Free Software
       3     Foundation, Inc.
       4  
       5     This program is free software; you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation; either version 3, or (at your option)
       8     any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program; if not, write to the Free Software
      17     Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
      18     02110-1301, USA.  */
      19  
      20  /* Written August 1989 by Mike Haertel.  */
      21  
      22  /* For more on the Aho-Corasick and Boyer-Moore algorithms,
      23     as well as other algorithms that might help improve performance,
      24     see the grep manual's "Performance" chapter.  */
      25  
      26  #include <config.h>
      27  
      28  #include "kwset.h"
      29  
      30  #include <stdckdint.h>
      31  #include <stdint.h>
      32  #include <sys/types.h>
      33  
      34  #include "system.h"
      35  #include "memchr2.h"
      36  #include "obstack.h"
      37  #include "xalloc.h"
      38  #include "verify.h"
      39  
      40  #define obstack_chunk_alloc xmalloc
      41  #define obstack_chunk_free free
      42  
      43  static unsigned char
      44  U (char ch)
      45  {
      46    return to_uchar (ch);
      47  }
      48  
      49  /* Balanced tree of edges and labels leaving a given trie node.  */
      50  struct tree
      51  {
      52    struct tree *llink;		/* Left link; MUST be first field.  */
      53    struct tree *rlink;		/* Right link (to larger labels).  */
      54    struct trie *trie;		/* Trie node pointed to by this edge.  */
      55    unsigned char label;		/* Label on this edge.  */
      56    char balance;			/* Difference in depths of subtrees.  */
      57  };
      58  
      59  /* Node of a trie representing a set of keywords.  */
      60  struct trie
      61  {
      62    /* If an accepting node, this is either 2*W + 1 where W is the word
      63       index, or is -1 if Aho-Corasick is in use and FAIL
      64       specifies where to look for more info.  If not an accepting node,
      65       this is zero.  */
      66    ptrdiff_t accepting;
      67  
      68    struct tree *links;		/* Tree of edges leaving this node.  */
      69    struct trie *parent;		/* Parent of this node.  */
      70    struct trie *next;		/* List of all trie nodes in level order.  */
      71    struct trie *fail;		/* Aho-Corasick failure function.  */
      72    idx_t depth;			/* Depth of this node from the root.  */
      73    idx_t shift;			/* Shift function for search failures.  */
      74    idx_t maxshift;		/* Max shift of self and descendants.  */
      75  };
      76  
      77  /* Structure returned opaquely to the caller, containing everything.  */
      78  struct kwset
      79  {
      80    struct obstack obstack;	/* Obstack for node allocation.  */
      81    idx_t words;			/* Number of words in the trie.  */
      82    struct trie *trie;		/* The trie itself.  */
      83    idx_t mind;			/* Minimum depth of an accepting node.  */
      84    unsigned char delta[NCHAR];	/* Delta table for rapid search.  */
      85    struct trie *next[NCHAR];	/* Table of children of the root.  */
      86    char *target;			/* Target string if there's only one.  */
      87    idx_t *shift;			/* Used in Boyer-Moore search for one
      88                                     string.  */
      89    char const *trans;		/* Character translation table.  */
      90  
      91    /* This helps to match a terminal byte, which is the first byte
      92       for Aho-Corasick, and the last byte for Boyer-More.  If all the
      93       patterns have the same terminal byte (after translation via TRANS
      94       if TRANS is nonnull), then this is that byte as an unsigned char.
      95       Otherwise this is -1 if there is disagreement among the strings
      96       about terminal bytes, and -2 if there are no terminal bytes and
      97       no disagreement because all the patterns are empty.  */
      98    int gc1;
      99  
     100    /* This helps to match a terminal byte.  If 0 <= GC1HELP, B is
     101       terminal when B == GC1 || B == GC1HELP (note that GC1 == GCHELP
     102       is common here).  This is typically faster than evaluating
     103       to_uchar (TRANS[B]) == GC1.  */
     104    int gc1help;
     105  
     106    /* If the string has two or more bytes, this is the penultimate byte,
     107       after translation via TRANS if TRANS is nonnull.  This variable
     108       is used only by Boyer-Moore.  */
     109    char gc2;
     110  
     111    /* kwsexec implementation.  */
     112    ptrdiff_t (*kwsexec) (kwset_t, char const *, idx_t, struct kwsmatch *, bool);
     113  };
     114  
     115  /* Use TRANS to transliterate C.  A null TRANS does no transliteration.  */
     116  static inline char
     117  tr (char const *trans, char c)
     118  {
     119    return trans ? trans[U(c)] : c;
     120  }
     121  
     122  static ptrdiff_t acexec (kwset_t, char const *, idx_t,
     123                           struct kwsmatch *, bool);
     124  static ptrdiff_t bmexec (kwset_t, char const *, idx_t,
     125                           struct kwsmatch *, bool);
     126  
     127  /* Return a newly allocated keyword set.  A nonnull TRANS specifies a
     128     table of character translations to be applied to all pattern and
     129     search text.  */
     130  kwset_t
     131  kwsalloc (char const *trans)
     132  {
     133    struct kwset *kwset = xmalloc (sizeof *kwset);
     134  
     135    obstack_init (&kwset->obstack);
     136    kwset->words = 0;
     137    kwset->trie = obstack_alloc (&kwset->obstack, sizeof *kwset->trie);
     138    kwset->trie->accepting = 0;
     139    kwset->trie->links = NULL;
     140    kwset->trie->parent = NULL;
     141    kwset->trie->next = NULL;
     142    kwset->trie->fail = NULL;
     143    kwset->trie->depth = 0;
     144    kwset->trie->shift = 0;
     145    kwset->mind = IDX_MAX;
     146    kwset->target = NULL;
     147    kwset->trans = trans;
     148    kwset->kwsexec = acexec;
     149  
     150    return kwset;
     151  }
     152  
     153  /* This upper bound is valid for CHAR_BIT >= 4 and
     154     exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }.  */
     155  enum { DEPTH_SIZE = CHAR_BIT + CHAR_BIT / 2 };
     156  
     157  /* Add the given string to the contents of the keyword set.  */
     158  void
     159  kwsincr (kwset_t kwset, char const *text, idx_t len)
     160  {
     161    assume (0 <= len);
     162    struct trie *trie = kwset->trie;
     163    char const *trans = kwset->trans;
     164    bool reverse = kwset->kwsexec == bmexec;
     165  
     166    if (reverse)
     167      text += len;
     168  
     169    /* Descend the trie (built of keywords) character-by-character,
     170       installing new nodes when necessary.  */
     171    while (len--)
     172      {
     173        unsigned char uc = reverse ? *--text : *text++;
     174        unsigned char label = trans ? trans[uc] : uc;
     175  
     176        /* Descend the tree of outgoing links for this trie node,
     177           looking for the current character and keeping track
     178           of the path followed.  */
     179        struct tree *cur = trie->links;
     180        struct tree *links[DEPTH_SIZE];
     181        enum { L, R } dirs[DEPTH_SIZE];
     182        links[0] = (struct tree *) &trie->links;
     183        dirs[0] = L;
     184        idx_t depth = 1;
     185  
     186        while (cur && label != cur->label)
     187          {
     188            links[depth] = cur;
     189            if (label < cur->label)
     190              dirs[depth++] = L, cur = cur->llink;
     191            else
     192              dirs[depth++] = R, cur = cur->rlink;
     193          }
     194  
     195        /* The current character doesn't have an outgoing link at
     196           this trie node, so build a new trie node and install
     197           a link in the current trie node's tree.  */
     198        if (!cur)
     199          {
     200            cur = obstack_alloc (&kwset->obstack, sizeof *cur);
     201            cur->llink = NULL;
     202            cur->rlink = NULL;
     203            cur->trie = obstack_alloc (&kwset->obstack, sizeof *cur->trie);
     204            cur->trie->accepting = 0;
     205            cur->trie->links = NULL;
     206            cur->trie->parent = trie;
     207            cur->trie->next = NULL;
     208            cur->trie->fail = NULL;
     209            cur->trie->depth = trie->depth + 1;
     210            cur->trie->shift = 0;
     211            cur->label = label;
     212            cur->balance = 0;
     213  
     214            /* Install the new tree node in its parent.  */
     215            if (dirs[--depth] == L)
     216              links[depth]->llink = cur;
     217            else
     218              links[depth]->rlink = cur;
     219  
     220            /* Back up the tree fixing the balance flags.  */
     221            while (depth && !links[depth]->balance)
     222              {
     223                if (dirs[depth] == L)
     224                  --links[depth]->balance;
     225                else
     226                  ++links[depth]->balance;
     227                --depth;
     228              }
     229  
     230            /* Rebalance the tree by pointer rotations if necessary.  */
     231            if (depth && ((dirs[depth] == L && --links[depth]->balance)
     232                          || (dirs[depth] == R && ++links[depth]->balance)))
     233              {
     234                struct tree *t, *r, *l, *rl, *lr;
     235  
     236                switch (links[depth]->balance)
     237                  {
     238                  case (char) -2:
     239                    switch (dirs[depth + 1])
     240                      {
     241                      case L:
     242                        r = links[depth], t = r->llink, rl = t->rlink;
     243                        t->rlink = r, r->llink = rl;
     244                        t->balance = r->balance = 0;
     245                        break;
     246                      case R:
     247                        r = links[depth], l = r->llink, t = l->rlink;
     248                        rl = t->rlink, lr = t->llink;
     249                        t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
     250                        l->balance = t->balance != 1 ? 0 : -1;
     251                        r->balance = t->balance != (char) -1 ? 0 : 1;
     252                        t->balance = 0;
     253                        break;
     254                      default:
     255                        abort ();
     256                      }
     257                    break;
     258                  case 2:
     259                    switch (dirs[depth + 1])
     260                      {
     261                      case R:
     262                        l = links[depth], t = l->rlink, lr = t->llink;
     263                        t->llink = l, l->rlink = lr;
     264                        t->balance = l->balance = 0;
     265                        break;
     266                      case L:
     267                        l = links[depth], r = l->rlink, t = r->llink;
     268                        lr = t->llink, rl = t->rlink;
     269                        t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
     270                        l->balance = t->balance != 1 ? 0 : -1;
     271                        r->balance = t->balance != (char) -1 ? 0 : 1;
     272                        t->balance = 0;
     273                        break;
     274                      default:
     275                        abort ();
     276                      }
     277                    break;
     278                  default:
     279                    abort ();
     280                  }
     281  
     282                if (dirs[depth - 1] == L)
     283                  links[depth - 1]->llink = t;
     284                else
     285                  links[depth - 1]->rlink = t;
     286              }
     287          }
     288  
     289        trie = cur->trie;
     290      }
     291  
     292    /* Mark the node finally reached as accepting, encoding the
     293       index number of this word in the keyword set so far.  */
     294    if (!trie->accepting)
     295      trie->accepting = 2 * kwset->words + 1;
     296    ++kwset->words;
     297  
     298    /* Keep track of the longest and shortest string of the keyword set.  */
     299    if (trie->depth < kwset->mind)
     300      kwset->mind = trie->depth;
     301  }
     302  
     303  idx_t
     304  kwswords (kwset_t kwset)
     305  {
     306    return kwset->words;
     307  }
     308  
     309  /* Enqueue the trie nodes referenced from the given tree in the
     310     given queue.  */
     311  static void
     312  enqueue (struct tree *tree, struct trie **last)
     313  {
     314    if (!tree)
     315      return;
     316    enqueue (tree->llink, last);
     317    enqueue (tree->rlink, last);
     318    (*last) = (*last)->next = tree->trie;
     319  }
     320  
     321  /* Compute the Aho-Corasick failure function for the trie nodes referenced
     322     from the given tree, given the failure function for their parent as
     323     well as a last resort failure node.  */
     324  static void
     325  treefails (struct tree const *tree, struct trie const *fail,
     326             struct trie *recourse, bool reverse)
     327  {
     328    struct tree *cur;
     329  
     330    if (!tree)
     331      return;
     332  
     333    treefails (tree->llink, fail, recourse, reverse);
     334    treefails (tree->rlink, fail, recourse, reverse);
     335  
     336    /* Find, in the chain of fails going back to the root, the first
     337       node that has a descendant on the current label.  */
     338    while (fail)
     339      {
     340        cur = fail->links;
     341        while (cur && tree->label != cur->label)
     342          if (tree->label < cur->label)
     343            cur = cur->llink;
     344          else
     345            cur = cur->rlink;
     346        if (cur)
     347          {
     348            tree->trie->fail = cur->trie;
     349            if (!reverse && cur->trie->accepting && !tree->trie->accepting)
     350              tree->trie->accepting = -1;
     351            return;
     352          }
     353        fail = fail->fail;
     354      }
     355  
     356    tree->trie->fail = recourse;
     357  }
     358  
     359  /* Set delta entries for the links of the given tree such that
     360     the preexisting delta value is larger than the current depth.  */
     361  static void
     362  treedelta (struct tree const *tree, idx_t depth, unsigned char delta[])
     363  {
     364    if (!tree)
     365      return;
     366    treedelta (tree->llink, depth, delta);
     367    treedelta (tree->rlink, depth, delta);
     368    if (depth < delta[tree->label])
     369      delta[tree->label] = depth;
     370  }
     371  
     372  /* Return true if A has every label in B.  */
     373  static bool _GL_ATTRIBUTE_PURE
     374  hasevery (struct tree const *a, struct tree const *b)
     375  {
     376    if (!b)
     377      return true;
     378    if (!hasevery (a, b->llink))
     379      return false;
     380    if (!hasevery (a, b->rlink))
     381      return false;
     382    while (a && b->label != a->label)
     383      if (b->label < a->label)
     384        a = a->llink;
     385      else
     386        a = a->rlink;
     387    return !!a;
     388  }
     389  
     390  /* Compute a vector, indexed by character code, of the trie nodes
     391     referenced from the given tree.  */
     392  static void
     393  treenext (struct tree const *tree, struct trie *next[])
     394  {
     395    if (!tree)
     396      return;
     397    treenext (tree->llink, next);
     398    treenext (tree->rlink, next);
     399    next[tree->label] = tree->trie;
     400  }
     401  
     402  /* Prepare a built keyword set for use.  */
     403  void
     404  kwsprep (kwset_t kwset)
     405  {
     406    char const *trans = kwset->trans;
     407    unsigned char deltabuf[NCHAR];
     408    unsigned char *delta = trans ? deltabuf : kwset->delta;
     409    struct trie *curr, *last;
     410  
     411    /* Use Boyer-Moore if just one pattern, Aho-Corasick otherwise.  */
     412    bool reverse = kwset->words == 1;
     413  
     414    if (reverse)
     415      {
     416        kwset_t new_kwset;
     417  
     418        /* Enqueue the immediate descendants in the level order queue.  */
     419        for (curr = last = kwset->trie; curr; curr = curr->next)
     420          enqueue (curr->links, &last);
     421  
     422        /* Looking for just one string.  Extract it from the trie.  */
     423        kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
     424        curr = kwset->trie;
     425        for (idx_t i = 0; i < kwset->mind; i++)
     426          {
     427            kwset->target[i] = curr->links->label;
     428            curr = curr->next;
     429          }
     430  
     431        new_kwset = kwsalloc (kwset->trans);
     432        new_kwset->kwsexec = bmexec;
     433        kwsincr (new_kwset, kwset->target, kwset->mind);
     434        obstack_free (&kwset->obstack, NULL);
     435        *kwset = *new_kwset;
     436        free (new_kwset);
     437      }
     438  
     439    /* Initial values for the delta table; will be changed later.  The
     440       delta entry for a given character is the smallest depth of any
     441       node at which an outgoing edge is labeled by that character.  */
     442    memset (delta, MIN (kwset->mind, UCHAR_MAX), sizeof deltabuf);
     443  
     444    /* Traverse the nodes of the trie in level order, simultaneously
     445       computing the delta table, failure function, and shift function.  */
     446    for (curr = last = kwset->trie; curr; curr = curr->next)
     447      {
     448        /* Enqueue the immediate descendants in the level order queue.  */
     449        enqueue (curr->links, &last);
     450  
     451        /* Update the delta table for the descendants of this node.  */
     452        treedelta (curr->links, curr->depth, delta);
     453  
     454        /* Compute the failure function for the descendants of this node.  */
     455        treefails (curr->links, curr->fail, kwset->trie, reverse);
     456  
     457        if (reverse)
     458          {
     459            curr->shift = kwset->mind;
     460            curr->maxshift = kwset->mind;
     461  
     462            /* Update the shifts at each node in the current node's chain
     463               of fails back to the root.  */
     464            struct trie *fail;
     465            for (fail = curr->fail; fail; fail = fail->fail)
     466              {
     467                /* If the current node has some outgoing edge that the fail
     468                   doesn't, then the shift at the fail should be no larger
     469                   than the difference of their depths.  */
     470                if (!hasevery (fail->links, curr->links))
     471                  if (curr->depth - fail->depth < fail->shift)
     472                    fail->shift = curr->depth - fail->depth;
     473  
     474                /* If the current node is accepting then the shift at the
     475                   fail and its descendants should be no larger than the
     476                   difference of their depths.  */
     477                if (curr->accepting && fail->maxshift > curr->depth - fail->depth)
     478                  fail->maxshift = curr->depth - fail->depth;
     479              }
     480          }
     481      }
     482  
     483    if (reverse)
     484      {
     485        /* Traverse the trie in level order again, fixing up all nodes whose
     486           shift exceeds their inherited maxshift.  */
     487        for (curr = kwset->trie->next; curr; curr = curr->next)
     488          {
     489            if (curr->maxshift > curr->parent->maxshift)
     490              curr->maxshift = curr->parent->maxshift;
     491            if (curr->shift > curr->maxshift)
     492              curr->shift = curr->maxshift;
     493          }
     494      }
     495  
     496    /* Create a vector, indexed by character code, of the outgoing links
     497       from the root node.  Accumulate GC1 and GC1HELP.  */
     498    struct trie *nextbuf[NCHAR];
     499    struct trie **next = trans ? nextbuf : kwset->next;
     500    memset (next, 0, sizeof nextbuf);
     501    treenext (kwset->trie->links, next);
     502    int gc1 = -2;
     503    int gc1help = -1;
     504    for (int i = 0; i < NCHAR; i++)
     505      {
     506        int ti = i;
     507        if (trans)
     508          {
     509            ti = U(trans[i]);
     510            kwset->next[i] = next[ti];
     511          }
     512        if (kwset->next[i])
     513          {
     514            if (gc1 < -1)
     515              {
     516                gc1 = ti;
     517                gc1help = i;
     518              }
     519            else if (gc1 == ti)
     520              gc1help = gc1help == ti ? i : -1;
     521            else if (i == ti && gc1 == gc1help)
     522              gc1help = i;
     523            else
     524              gc1 = -1;
     525          }
     526      }
     527    kwset->gc1 = gc1;
     528    kwset->gc1help = gc1help;
     529  
     530    if (reverse)
     531      {
     532        /* Looking for just one string.  Extract it from the trie.  */
     533        kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
     534        curr = kwset->trie;
     535        for (idx_t i = kwset->mind; 0 < i; i--)
     536          {
     537            kwset->target[i - 1] = curr->links->label;
     538            curr = curr->next;
     539          }
     540  
     541        if (kwset->mind > 1)
     542          {
     543            /* Looking for the delta2 shift that might be made after a
     544               backwards match has failed.  Extract it from the trie.  */
     545            kwset->shift
     546              = obstack_alloc (&kwset->obstack,
     547                               sizeof *kwset->shift * (kwset->mind - 1));
     548            curr = kwset->trie->next;
     549            for (idx_t i = 0; i < kwset->mind - 1; i++)
     550              {
     551                kwset->shift[i] = curr->shift;
     552                curr = curr->next;
     553              }
     554  
     555            /* The penultimate byte.  */
     556            kwset->gc2 = tr (trans, kwset->target[kwset->mind - 2]);
     557          }
     558      }
     559  
     560    /* Fix things up for any translation table.  */
     561    if (trans)
     562      for (int i = 0; i < NCHAR; ++i)
     563        kwset->delta[i] = delta[U(trans[i])];
     564  }
     565  
     566  /* Delta2 portion of a Boyer-Moore search.  *TP is the string text
     567     pointer; it is updated in place.  EP is the end of the string text,
     568     and SP the end of the pattern.  LEN is the pattern length; it must
     569     be at least 2.  TRANS, if nonnull, is the input translation table.
     570     GC1 and GC2 are the last and second-from last bytes of the pattern,
     571     transliterated by TRANS; the caller precomputes them for
     572     efficiency.  If D1 is nonnull, it is a delta1 table for shifting *TP
     573     when failing.  KWSET->shift says how much to shift.  */
     574  static inline bool
     575  bm_delta2_search (char const **tpp, char const *ep, char const *sp,
     576                    idx_t len,
     577                    char const *trans, char gc1, char gc2,
     578                    unsigned char const *d1, kwset_t kwset)
     579  {
     580    char const *tp = *tpp;
     581    idx_t d = len, skip = 0;
     582  
     583    while (true)
     584      {
     585        idx_t i = 2;
     586        if (tr (trans, tp[-2]) == gc2)
     587          {
     588            while (++i <= d)
     589              if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
     590                break;
     591            if (i > d)
     592              {
     593                for (i = d + skip + 1; i <= len; ++i)
     594                  if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
     595                    break;
     596                if (i > len)
     597                  {
     598                    *tpp = tp - len;
     599                    return true;
     600                  }
     601              }
     602          }
     603  
     604        tp += d = kwset->shift[i - 2];
     605        if (tp > ep)
     606          break;
     607        if (tr (trans, tp[-1]) != gc1)
     608          {
     609            if (d1)
     610              tp += d1[U(tp[-1])];
     611            break;
     612          }
     613        skip = i - 1;
     614      }
     615  
     616    *tpp = tp;
     617    return false;
     618  }
     619  
     620  /* Return the address of the first byte in the buffer S (of size N)
     621     that matches the terminal byte specified by KWSET, or NULL if there
     622     is no match.  KWSET->gc1 should be nonnegative.  */
     623  static char const *
     624  memchr_kwset (char const *s, idx_t n, kwset_t kwset)
     625  {
     626    char const *slim = s + n;
     627    if (kwset->gc1help < 0)
     628      {
     629        for (; s < slim; s++)
     630          if (kwset->next[U(*s)])
     631            return s;
     632      }
     633    else
     634      {
     635        int small_heuristic = 2;
     636        idx_t small_bytes = small_heuristic * sizeof (unsigned long int);
     637        while (s < slim)
     638          {
     639            if (kwset->next[U(*s)])
     640              return s;
     641            s++;
     642            if ((uintptr_t) s % small_bytes == 0)
     643              return memchr2 (s, kwset->gc1, kwset->gc1help, slim - s);
     644          }
     645      }
     646    return NULL;
     647  }
     648  
     649  /* Fast Boyer-Moore search (inlinable version).  */
     650  static inline ptrdiff_t _GL_ATTRIBUTE_PURE
     651  bmexec_trans (kwset_t kwset, char const *text, idx_t size)
     652  {
     653    assume (0 <= size);
     654    unsigned char const *d1;
     655    char const *ep, *sp, *tp;
     656    int d;
     657    idx_t len = kwset->mind;
     658    char const *trans = kwset->trans;
     659  
     660    if (len == 0)
     661      return 0;
     662    if (len > size)
     663      return -1;
     664    if (len == 1)
     665      {
     666        tp = memchr_kwset (text, size, kwset);
     667        return tp ? tp - text : -1;
     668      }
     669  
     670    d1 = kwset->delta;
     671    sp = kwset->target + len;
     672    tp = text + len;
     673    char gc1 = kwset->gc1;
     674    char gc2 = kwset->gc2;
     675  
     676    /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2).  */
     677    idx_t len12;
     678    if (!ckd_mul (&len12, len, 12) && len12 < size)
     679      /* 11 is not a bug, the initial offset happens only once.  */
     680      for (ep = text + size - 11 * len; tp <= ep; )
     681        {
     682          char const *tp0 = tp;
     683          d = d1[U(tp[-1])], tp += d;
     684          d = d1[U(tp[-1])], tp += d;
     685          if (d != 0)
     686            {
     687              d = d1[U(tp[-1])], tp += d;
     688              d = d1[U(tp[-1])], tp += d;
     689              d = d1[U(tp[-1])], tp += d;
     690              if (d != 0)
     691                {
     692                  d = d1[U(tp[-1])], tp += d;
     693                  d = d1[U(tp[-1])], tp += d;
     694                  d = d1[U(tp[-1])], tp += d;
     695                  if (d != 0)
     696                    {
     697                      d = d1[U(tp[-1])], tp += d;
     698                      d = d1[U(tp[-1])], tp += d;
     699  
     700                      /* As a heuristic, prefer memchr to seeking by
     701                         delta1 when the latter doesn't advance much.  */
     702                      int advance_heuristic = 16 * sizeof (long);
     703                      if (advance_heuristic <= tp - tp0)
     704                        continue;
     705                      tp--;
     706                      tp = memchr_kwset (tp, text + size - tp, kwset);
     707                      if (! tp)
     708                        return -1;
     709                      tp++;
     710                      if (ep <= tp)
     711                        break;
     712                    }
     713                }
     714            }
     715          if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
     716            return tp - text;
     717        }
     718  
     719    /* Now only a few characters are left to search.  Carefully avoid
     720       ever producing an out-of-bounds pointer.  */
     721    ep = text + size;
     722    d = d1[U(tp[-1])];
     723    while (d <= ep - tp)
     724      {
     725        d = d1[U((tp += d)[-1])];
     726        if (d != 0)
     727          continue;
     728        if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, NULL, kwset))
     729          return tp - text;
     730      }
     731  
     732    return -1;
     733  }
     734  
     735  /* Fast Boyer-Moore search.  */
     736  static ptrdiff_t
     737  bmexec (kwset_t kwset, char const *text, idx_t size,
     738          struct kwsmatch *kwsmatch, bool longest)
     739  {
     740    /* Help the compiler inline in two ways, depending on whether
     741       kwset->trans is null.  */
     742    ptrdiff_t ret = (IGNORE_DUPLICATE_BRANCH_WARNING
     743                     (kwset->trans
     744                      ? bmexec_trans (kwset, text, size)
     745                      : bmexec_trans (kwset, text, size)));
     746    kwsmatch->index = 0;
     747    kwsmatch->offset = ret;
     748    kwsmatch->size = kwset->mind;
     749    return ret;
     750  }
     751  
     752  /* Hairy multiple string search with the Aho-Corasick algorithm.
     753     (inlinable version)  */
     754  static inline ptrdiff_t
     755  acexec_trans (kwset_t kwset, char const *text, idx_t len,
     756                struct kwsmatch *kwsmatch, bool longest)
     757  {
     758    struct trie const *trie, *accept;
     759    char const *tp, *left, *lim;
     760    struct tree const *tree;
     761    char const *trans;
     762  
     763    /* Initialize register copies and look for easy ways out.  */
     764    if (len < kwset->mind)
     765      return -1;
     766    trans = kwset->trans;
     767    trie = kwset->trie;
     768    lim = text + len;
     769    tp = text;
     770  
     771    if (!trie->accepting)
     772      {
     773        unsigned char c;
     774        int gc1 = kwset->gc1;
     775  
     776        while (true)
     777          {
     778            if (gc1 < 0)
     779              {
     780                while (! (trie = kwset->next[c = tr (trans, *tp++)]))
     781                  if (tp >= lim)
     782                    return -1;
     783              }
     784            else
     785              {
     786                tp = memchr_kwset (tp, lim - tp, kwset);
     787                if (!tp)
     788                  return -1;
     789                c = tr (trans, *tp++);
     790                trie = kwset->next[c];
     791              }
     792  
     793            while (true)
     794              {
     795                if (trie->accepting)
     796                  goto match;
     797                if (tp >= lim)
     798                  return -1;
     799                c = tr (trans, *tp++);
     800  
     801                for (tree = trie->links; c != tree->label; )
     802                  {
     803                    tree = c < tree->label ? tree->llink : tree->rlink;
     804                    if (! tree)
     805                      {
     806                        trie = trie->fail;
     807                        if (!trie)
     808                          {
     809                            trie = kwset->next[c];
     810                            if (trie)
     811                              goto have_trie;
     812                            if (tp >= lim)
     813                              return -1;
     814                            goto next_c;
     815                          }
     816                        if (trie->accepting)
     817                          {
     818                            --tp;
     819                            goto match;
     820                          }
     821                        tree = trie->links;
     822                      }
     823                  }
     824                trie = tree->trie;
     825              have_trie:;
     826              }
     827          next_c:;
     828          }
     829      }
     830  
     831   match:
     832    accept = trie;
     833    while (accept->accepting < 0)
     834      accept = accept->fail;
     835    left = tp - accept->depth;
     836  
     837    /* Try left-most longest match.  */
     838    if (longest)
     839      {
     840        while (tp < lim)
     841          {
     842            struct trie const *accept1;
     843            char const *left1;
     844            unsigned char c = tr (trans, *tp++);
     845  
     846            do
     847              {
     848                tree = trie->links;
     849                while (tree && c != tree->label)
     850                  tree = c < tree->label ? tree->llink : tree->rlink;
     851              }
     852            while (!tree && (trie = trie->fail) && accept->depth <= trie->depth);
     853  
     854            if (!tree)
     855              break;
     856            trie = tree->trie;
     857            if (trie->accepting)
     858              {
     859                accept1 = trie;
     860                while (accept1->accepting < 0)
     861                  accept1 = accept1->fail;
     862                left1 = tp - accept1->depth;
     863                if (left1 <= left)
     864                  {
     865                    left = left1;
     866                    accept = accept1;
     867                  }
     868              }
     869          }
     870      }
     871  
     872    kwsmatch->index = accept->accepting >> 1;
     873    kwsmatch->offset = left - text;
     874    kwsmatch->size = accept->depth;
     875  
     876    return left - text;
     877  }
     878  
     879  /* Hairy multiple string search with Aho-Corasick algorithm.  */
     880  static ptrdiff_t
     881  acexec (kwset_t kwset, char const *text, idx_t size,
     882          struct kwsmatch *kwsmatch, bool longest)
     883  {
     884    assume (0 <= size);
     885    /* Help the compiler inline in two ways, depending on whether
     886       kwset->trans is null.  */
     887    return (IGNORE_DUPLICATE_BRANCH_WARNING
     888            (kwset->trans
     889             ? acexec_trans (kwset, text, size, kwsmatch, longest)
     890             : acexec_trans (kwset, text, size, kwsmatch, longest)));
     891  }
     892  
     893  /* Find the first instance of a KWSET member in TEXT, which has SIZE bytes.
     894     Return the offset (into TEXT) of the first byte of the matching substring,
     895     or -1 if no match is found.  Upon a match, store details in
     896     *KWSMATCH: index of matched keyword, start offset (same as the return
     897     value), and length.  If LONGEST, find the longest match; otherwise
     898     any match will do.  */
     899  ptrdiff_t
     900  kwsexec (kwset_t kwset, char const *text, idx_t size,
     901           struct kwsmatch *kwsmatch, bool longest)
     902  {
     903    return kwset->kwsexec (kwset, text, size, kwsmatch, longest);
     904  }
     905  
     906  /* Free the components of the given keyword set.  */
     907  void
     908  kwsfree (kwset_t kwset)
     909  {
     910    obstack_free (&kwset->obstack, NULL);
     911    free (kwset);
     912  }