(root)/
m4-1.4.19/
lib/
str-kmp.h
       1  /* Substring search in a NUL terminated string of UNIT elements,
       2     using the Knuth-Morris-Pratt algorithm.
       3     Copyright (C) 2005-2021 Free Software Foundation, Inc.
       4     Written by Bruno Haible <bruno@clisp.org>, 2005.
       5  
       6     This program is free software; you can redistribute it and/or modify
       7     it under the terms of the GNU General Public License as published by
       8     the Free Software Foundation; either version 3, or (at your option)
       9     any later version.
      10  
      11     This program is distributed in the hope that it will be useful,
      12     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14     GNU General Public License for more details.
      15  
      16     You should have received a copy of the GNU General Public License
      17     along with this program; if not, see <https://www.gnu.org/licenses/>.  */
      18  
      19  /* Before including this file, you need to define:
      20       UNIT                    The element type of the needle and haystack.
      21       CANON_ELEMENT(c)        A macro that canonicalizes an element right after
      22                               it has been fetched from needle or haystack.
      23                               The argument is of type UNIT; the result must be
      24                               of type UNIT as well.  */
      25  
      26  /* Knuth-Morris-Pratt algorithm.
      27     See https://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
      28     HAYSTACK is the NUL terminated string in which to search for.
      29     NEEDLE is the string to search for in HAYSTACK, consisting of NEEDLE_LEN
      30     units.
      31     Return a boolean indicating success:
      32     Return true and set *RESULTP if the search was completed.
      33     Return false if it was aborted because not enough memory was available.  */
      34  static bool
      35  knuth_morris_pratt (const UNIT *haystack,
      36                      const UNIT *needle, size_t needle_len,
      37                      const UNIT **resultp)
      38  {
      39    size_t m = needle_len;
      40  
      41    /* Allocate the table.  */
      42    size_t *table = (size_t *) nmalloca (m, sizeof (size_t));
      43    if (table == NULL)
      44      return false;
      45    /* Fill the table.
      46       For 0 < i < m:
      47         0 < table[i] <= i is defined such that
      48         forall 0 < x < table[i]: needle[x..i-1] != needle[0..i-1-x],
      49         and table[i] is as large as possible with this property.
      50       This implies:
      51       1) For 0 < i < m:
      52            If table[i] < i,
      53            needle[table[i]..i-1] = needle[0..i-1-table[i]].
      54       2) For 0 < i < m:
      55            rhaystack[0..i-1] == needle[0..i-1]
      56            and exists h, i <= h < m: rhaystack[h] != needle[h]
      57            implies
      58            forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1].
      59       table[0] remains uninitialized.  */
      60    {
      61      size_t i, j;
      62  
      63      /* i = 1: Nothing to verify for x = 0.  */
      64      table[1] = 1;
      65      j = 0;
      66  
      67      for (i = 2; i < m; i++)
      68        {
      69          /* Here: j = i-1 - table[i-1].
      70             The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold
      71             for x < table[i-1], by induction.
      72             Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
      73          UNIT b = CANON_ELEMENT (needle[i - 1]);
      74  
      75          for (;;)
      76            {
      77              /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x]
      78                 is known to hold for x < i-1-j.
      79                 Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
      80              if (b == CANON_ELEMENT (needle[j]))
      81                {
      82                  /* Set table[i] := i-1-j.  */
      83                  table[i] = i - ++j;
      84                  break;
      85                }
      86              /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
      87                 for x = i-1-j, because
      88                   needle[i-1] != needle[j] = needle[i-1-x].  */
      89              if (j == 0)
      90                {
      91                  /* The inequality holds for all possible x.  */
      92                  table[i] = i;
      93                  break;
      94                }
      95              /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
      96                 for i-1-j < x < i-1-j+table[j], because for these x:
      97                   needle[x..i-2]
      98                   = needle[x-(i-1-j)..j-1]
      99                   != needle[0..j-1-(x-(i-1-j))]  (by definition of table[j])
     100                      = needle[0..i-2-x],
     101                 hence needle[x..i-1] != needle[0..i-1-x].
     102                 Furthermore
     103                   needle[i-1-j+table[j]..i-2]
     104                   = needle[table[j]..j-1]
     105                   = needle[0..j-1-table[j]]  (by definition of table[j]).  */
     106              j = j - table[j];
     107            }
     108          /* Here: j = i - table[i].  */
     109        }
     110    }
     111  
     112    /* Search, using the table to accelerate the processing.  */
     113    {
     114      size_t j;
     115      const UNIT *rhaystack;
     116      const UNIT *phaystack;
     117  
     118      *resultp = NULL;
     119      j = 0;
     120      rhaystack = haystack;
     121      phaystack = haystack;
     122      /* Invariant: phaystack = rhaystack + j.  */
     123      while (*phaystack != 0)
     124        if (CANON_ELEMENT (needle[j]) == CANON_ELEMENT (*phaystack))
     125          {
     126            j++;
     127            phaystack++;
     128            if (j == m)
     129              {
     130                /* The entire needle has been found.  */
     131                *resultp = rhaystack;
     132                break;
     133              }
     134          }
     135        else if (j > 0)
     136          {
     137            /* Found a match of needle[0..j-1], mismatch at needle[j].  */
     138            rhaystack += table[j];
     139            j -= table[j];
     140          }
     141        else
     142          {
     143            /* Found a mismatch at needle[0] already.  */
     144            rhaystack++;
     145            phaystack++;
     146          }
     147    }
     148  
     149    freea (table);
     150    return true;
     151  }
     152  
     153  #undef CANON_ELEMENT