(root)/
glibc-2.38/
string/
memmem.c
       1  /* Copyright (C) 1991-2023 Free Software Foundation, Inc.
       2     This file is part of the GNU C Library.
       3  
       4     The GNU C Library is free software; you can redistribute it and/or
       5     modify it under the terms of the GNU Lesser General Public
       6     License as published by the Free Software Foundation; either
       7     version 2.1 of the License, or (at your option) any later version.
       8  
       9     The GNU C Library is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      12     Lesser General Public License for more details.
      13  
      14     You should have received a copy of the GNU Lesser General Public
      15     License along with the GNU C Library; if not, see
      16     <https://www.gnu.org/licenses/>.  */
      17  
      18  #ifndef _LIBC
      19  # include <config.h>
      20  #endif
      21  
      22  #include <string.h>
      23  
      24  #ifndef _LIBC
      25  # define __memmem	memmem
      26  #endif
      27  
      28  #define RETURN_TYPE void *
      29  #define AVAILABLE(h, h_l, j, n_l) ((j) <= (h_l) - (n_l))
      30  #define FASTSEARCH(S,C,N) (void*) memchr ((void *)(S), (C), (N))
      31  #include "str-two-way.h"
      32  
      33  #undef memmem
      34  
      35  /* Hash character pairs so a small shift table can be used.  All bits of
      36     p[0] are included, but not all bits from p[-1].  So if two equal hashes
      37     match on p[-1], p[0] matches too.  Hash collisions are harmless and result
      38     in smaller shifts.  */
      39  #define hash2(p) (((size_t)(p)[0] - ((size_t)(p)[-1] << 3)) % sizeof (shift))
      40  
      41  /* Fast memmem algorithm with guaranteed linear-time performance.
      42     Small needles up to size 2 use a dedicated linear search.  Longer needles
      43     up to size 256 use a novel modified Horspool algorithm.  It hashes pairs
      44     of characters to quickly skip past mismatches.  The main search loop only
      45     exits if the last 2 characters match, avoiding unnecessary calls to memcmp
      46     and allowing for a larger skip if there is no match.  A self-adapting
      47     filtering check is used to quickly detect mismatches in long needles.
      48     By limiting the needle length to 256, the shift table can be reduced to 8
      49     bits per entry, lowering preprocessing overhead and minimizing cache effects.
      50     The limit also implies worst-case performance is linear.
      51     Needles larger than 256 characters use the linear-time Two-Way algorithm.  */
      52  void *
      53  __memmem (const void *haystack, size_t hs_len,
      54  	  const void *needle, size_t ne_len)
      55  {
      56    const unsigned char *hs = (const unsigned char *) haystack;
      57    const unsigned char *ne = (const unsigned char *) needle;
      58  
      59    if (ne_len == 0)
      60      return (void *) hs;
      61    if (ne_len == 1)
      62      return (void *) memchr (hs, ne[0], hs_len);
      63  
      64    /* Ensure haystack length is >= needle length.  */
      65    if (hs_len < ne_len)
      66      return NULL;
      67  
      68    const unsigned char *end = hs + hs_len - ne_len;
      69  
      70    if (ne_len == 2)
      71      {
      72        uint32_t nw = ne[0] << 16 | ne[1], hw = hs[0] << 16 | hs[1];
      73        for (hs++; hs <= end && hw != nw; )
      74  	hw = hw << 16 | *++hs;
      75        return hw == nw ? (void *)hs - 1 : NULL;
      76      }
      77  
      78    /* Use Two-Way algorithm for very long needles.  */
      79    if (__builtin_expect (ne_len > 256, 0))
      80      return two_way_long_needle (hs, hs_len, ne, ne_len);
      81  
      82    uint8_t shift[256];
      83    size_t tmp, shift1;
      84    size_t m1 = ne_len - 1;
      85    size_t offset = 0;
      86  
      87    memset (shift, 0, sizeof (shift));
      88    for (int i = 1; i < m1; i++)
      89      shift[hash2 (ne + i)] = i;
      90    /* Shift1 is the amount we can skip after matching the hash of the
      91       needle end but not the full needle.  */
      92    shift1 = m1 - shift[hash2 (ne + m1)];
      93    shift[hash2 (ne + m1)] = m1;
      94  
      95    for ( ; hs <= end; )
      96      {
      97        /* Skip past character pairs not in the needle.  */
      98        do
      99  	{
     100  	  hs += m1;
     101  	  tmp = shift[hash2 (hs)];
     102  	}
     103        while (tmp == 0 && hs <= end);
     104  
     105        /* If the match is not at the end of the needle, shift to the end
     106  	 and continue until we match the hash of the needle end.  */
     107        hs -= tmp;
     108        if (tmp < m1)
     109  	continue;
     110  
     111        /* Hash of the last 2 characters matches.  If the needle is long,
     112  	 try to quickly filter out mismatches.  */
     113        if (m1 < 15 || memcmp (hs + offset, ne + offset, 8) == 0)
     114  	{
     115  	  if (memcmp (hs, ne, m1) == 0)
     116  	    return (void *) hs;
     117  
     118  	  /* Adjust filter offset when it doesn't find the mismatch.  */
     119  	  offset = (offset >= 8 ? offset : m1) - 8;
     120  	}
     121  
     122        /* Skip based on matching the hash of the needle end.  */
     123        hs += shift1;
     124      }
     125    return NULL;
     126  }
     127  libc_hidden_def (__memmem)
     128  weak_alias (__memmem, memmem)
     129  libc_hidden_weak (memmem)