(root)/
glibc-2.38/
benchtests/
bench-strstr.c
       1  /* Measure strstr functions.
       2     Copyright (C) 2013-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #define MIN_PAGE_SIZE 131072
      20  #define TEST_MAIN
      21  #define TEST_NAME "strstr"
      22  #include "bench-string.h"
      23  
      24  #include "json-lib.h"
      25  
      26  static const char input[] =
      27  "This manual is written with the assumption that you are at least "
      28  "somewhat familiar with the C programming language and basic programming "
      29  "concepts.  Specifically, familiarity with ISO standard C (*note ISO "
      30  "C::), rather than “traditional” pre-ISO C dialects, is assumed.\n"
      31  
      32  "   The GNU C Library includes several “header files”, each of which "
      33  "provides definitions and declarations for a group of related facilities; "
      34  "this information is used by the C compiler when processing your program. "
      35  "For example, the header file ‘stdio.h’ declares facilities for "
      36  "performing input and output, and the header file ‘string.h’ declares "
      37  "string processing utilities.  The organization of this manual generally "
      38  "follows the same division as the header files.\n"
      39  
      40  "   If you are reading this manual for the first time, you should read "
      41  "all of the introductory material and skim the remaining chapters.  There "
      42  "are a _lot_ of functions in the GNU C Library and it’s not realistic to "
      43  "expect that you will be able to remember exactly _how_ to use each and "
      44  "every one of them.  It’s more important to become generally familiar "
      45  "with the kinds of facilities that the library provides, so that when you "
      46  "are writing your programs you can recognize _when_ to make use of "
      47  "library functions, and _where_ in this manual you can find more specific "
      48  "information about them.\n";
      49  
      50  /* Simple yet efficient strstr - for needles < 32 bytes it is 2-4 times
      51     faster than the optimized twoway_strstr.  */
      52  static char *
      53  basic_strstr (const char *s1, const char *s2)
      54  {
      55    size_t i;
      56    int c = s2[0];
      57  
      58    if (c == 0)
      59      return (char*)s1;
      60  
      61    for ( ; s1[0] != '\0'; s1++)
      62      {
      63        if (s1[0] != c)
      64  	continue;
      65        for (i = 1; s2[i] != 0; i++)
      66  	if (s1[i] != s2[i])
      67  	  break;
      68        if (s2[i] == '\0')
      69  	return (char*)s1;
      70      }
      71  
      72    return NULL;
      73  }
      74  
      75  #define RETURN_TYPE char *
      76  #define AVAILABLE(h, h_l, j, n_l)			\
      77    (((j) + (n_l) <= (h_l)) \
      78     || ((h_l) += __strnlen ((void*)((h) + (h_l)), (n_l) + 512), \
      79         (j) + (n_l) <= (h_l)))
      80  #define CHECK_EOL (1)
      81  #define RET0_IF_0(a) if (!a) goto ret0
      82  #define FASTSEARCH(S,C,N) (void*) strchr ((void*)(S), (C))
      83  #define LONG_NEEDLE_THRESHOLD 32U
      84  #define __strnlen strnlen
      85  #include "string/str-two-way.h"
      86  
      87  /* Optimized Two-way implementation from GLIBC 2.29.  */
      88  static char *
      89  twoway_strstr (const char *haystack, const char *needle)
      90  {
      91    size_t needle_len; /* Length of NEEDLE.  */
      92    size_t haystack_len; /* Known minimum length of HAYSTACK.  */
      93  
      94    /* Handle empty NEEDLE special case.  */
      95    if (needle[0] == '\0')
      96      return (char *) haystack;
      97  
      98    /* Skip until we find the first matching char from NEEDLE.  */
      99    haystack = strchr (haystack, needle[0]);
     100    if (haystack == NULL || needle[1] == '\0')
     101      return (char *) haystack;
     102  
     103    /* Ensure HAYSTACK length is at least as long as NEEDLE length.
     104       Since a match may occur early on in a huge HAYSTACK, use strnlen
     105       and read ahead a few cachelines for improved performance.  */
     106    needle_len = strlen (needle);
     107    haystack_len = __strnlen (haystack, needle_len + 256);
     108    if (haystack_len < needle_len)
     109      return NULL;
     110  
     111    /* Check whether we have a match.  This improves performance since we avoid
     112       the initialization overhead of the two-way algorithm.  */
     113    if (memcmp (haystack, needle, needle_len) == 0)
     114      return (char *) haystack;
     115  
     116    /* Perform the search.  Abstract memory is considered to be an array
     117       of 'unsigned char' values, not an array of 'char' values.  See
     118       ISO C 99 section 6.2.6.1.  */
     119    if (needle_len < LONG_NEEDLE_THRESHOLD)
     120      return two_way_short_needle ((const unsigned char *) haystack,
     121  				  haystack_len,
     122  				 (const unsigned char *) needle, needle_len);
     123    return two_way_long_needle ((const unsigned char *) haystack, haystack_len,
     124  			      (const unsigned char *) needle, needle_len);
     125  }
     126  
     127  typedef char *(*proto_t) (const char *, const char *);
     128  
     129  IMPL (strstr, 1)
     130  IMPL (twoway_strstr, 0)
     131  IMPL (basic_strstr, 0)
     132  
     133  static void
     134  do_one_test (json_ctx_t *json_ctx, impl_t *impl, const char *s1,
     135  	     const char *s2, char *exp_result)
     136  {
     137    size_t i, iters = INNER_LOOP_ITERS_SMALL / 8;
     138    timing_t start, stop, cur;
     139    char *res;
     140  
     141    TIMING_NOW (start);
     142    for (i = 0; i < iters; ++i)
     143      res = CALL (impl, s1, s2);
     144    TIMING_NOW (stop);
     145  
     146    TIMING_DIFF (cur, start, stop);
     147  
     148    json_element_double (json_ctx, (double) cur / (double) iters);
     149  
     150    if (res != exp_result)
     151      {
     152        error (0, 0, "Wrong result in function %s %s %s", impl->name,
     153  	     (res == NULL) ? "(null)" : res,
     154  	     (exp_result == NULL) ? "(null)" : exp_result);
     155        ret = 1;
     156      }
     157  }
     158  
     159  static void
     160  do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len1,
     161  	 size_t len2, int fail)
     162  {
     163    char *s1 = (char *) (buf1 + align1);
     164    char *s2 = (char *) (buf2 + align2);
     165  
     166    size_t size = sizeof (input) - 1;
     167    size_t pos = (len1 + len2) % size;
     168  
     169    char *ss2 = s2;
     170    for (size_t l = len2; l > 0; l = l > size ? l - size : 0)
     171      {
     172        size_t t = l > size ? size : l;
     173        if (pos + t <= size)
     174  	ss2 = mempcpy (ss2, input + pos, t);
     175        else
     176  	{
     177  	  ss2 = mempcpy (ss2, input + pos, size - pos);
     178  	  ss2 = mempcpy (ss2, input, t - (size - pos));
     179  	}
     180      }
     181    s2[len2] = '\0';
     182  
     183    char *ss1 = s1;
     184    for (size_t l = len1; l > 0; l = l > size ? l - size : 0)
     185      {
     186        size_t t = l > size ? size : l;
     187        memcpy (ss1, input, t);
     188        ss1 += t;
     189      }
     190  
     191    if (!fail)
     192      memcpy (s1 + len1 - len2, s2, len2);
     193    s1[len1] = '\0';
     194  
     195    /* Remove any accidental matches except for the last if !fail.  */
     196    for (ss1 = basic_strstr (s1, s2); ss1; ss1 = basic_strstr (ss1 + 1, s2))
     197      if (fail || ss1 != s1 + len1 - len2)
     198        ++ss1[len2 / 2];
     199  
     200    json_element_object_begin (json_ctx);
     201    json_attr_uint (json_ctx, "len_haystack", len1);
     202    json_attr_uint (json_ctx, "len_needle", len2);
     203    json_attr_uint (json_ctx, "align_haystack", align1);
     204    json_attr_uint (json_ctx, "align_needle", align2);
     205    json_attr_uint (json_ctx, "fail", fail);
     206  
     207    json_array_begin (json_ctx, "timings");
     208  
     209    FOR_EACH_IMPL (impl, 0)
     210      do_one_test (json_ctx, impl, s1, s2, fail ? NULL : s1 + len1 - len2);
     211  
     212    json_array_end (json_ctx);
     213    json_element_object_end (json_ctx);
     214  
     215  }
     216  
     217  /* Test needles which exhibit worst-case performance.  This shows that
     218     basic_strstr is quadratic and thus unsuitable for large needles.
     219     On the other hand Two-way and skip table implementations are linear with
     220     increasing needle sizes.  The slowest cases of the two implementations are
     221     within a factor of 2 on several different microarchitectures.  */
     222  
     223  static void
     224  test_hard_needle (json_ctx_t *json_ctx, size_t ne_len, size_t hs_len)
     225  {
     226    char *ne = (char *) buf1;
     227    char *hs = (char *) buf2;
     228  
     229    /* Hard needle for strstr algorithm using skip table.  This results in many
     230       memcmp calls comparing most of the needle.  */
     231    {
     232      memset (ne, 'a', ne_len);
     233      ne[ne_len] = '\0';
     234      ne[ne_len - 14] = 'b';
     235  
     236      memset (hs, 'a', hs_len);
     237      for (size_t i = ne_len; i <= hs_len; i += ne_len)
     238        {
     239  	hs[i - 5] = 'b';
     240  	hs[i - 62] = 'b';
     241        }
     242  
     243      json_element_object_begin (json_ctx);
     244      json_attr_uint (json_ctx, "len_haystack", hs_len);
     245      json_attr_uint (json_ctx, "len_needle", ne_len);
     246      json_attr_uint (json_ctx, "align_haystack", 0);
     247      json_attr_uint (json_ctx, "align_needle", 0);
     248      json_attr_uint (json_ctx, "fail", 1);
     249      json_attr_string (json_ctx, "desc", "Difficult skiptable(0)");
     250  
     251      json_array_begin (json_ctx, "timings");
     252  
     253      FOR_EACH_IMPL (impl, 0)
     254        do_one_test (json_ctx, impl, hs, ne, NULL);
     255  
     256      json_array_end (json_ctx);
     257      json_element_object_end (json_ctx);
     258    }
     259  
     260    /* 2nd hard needle for strstr algorithm using skip table.  This results in
     261       many memcmp calls comparing most of the needle.  */
     262    {
     263      memset (ne, 'a', ne_len);
     264      ne[ne_len] = '\0';
     265      ne[ne_len - 6] = 'b';
     266  
     267      memset (hs, 'a', hs_len);
     268      for (size_t i = ne_len; i <= hs_len; i += ne_len)
     269        {
     270  	hs[i - 5] = 'b';
     271  	hs[i - 6] = 'b';
     272        }
     273  
     274      json_element_object_begin (json_ctx);
     275      json_attr_uint (json_ctx, "len_haystack", hs_len);
     276      json_attr_uint (json_ctx, "len_needle", ne_len);
     277      json_attr_uint (json_ctx, "align_haystack", 0);
     278      json_attr_uint (json_ctx, "align_needle", 0);
     279      json_attr_uint (json_ctx, "fail", 1);
     280      json_attr_string (json_ctx, "desc", "Difficult skiptable(1)");
     281  
     282      json_array_begin (json_ctx, "timings");
     283  
     284      FOR_EACH_IMPL (impl, 0)
     285        do_one_test (json_ctx, impl, hs, ne, NULL);
     286  
     287      json_array_end (json_ctx);
     288      json_element_object_end (json_ctx);
     289    }
     290  
     291    /* Hard needle for Two-way algorithm - the random input causes a large number
     292       of branch mispredictions which significantly reduces performance on modern
     293       micro architectures.  */
     294    {
     295      for (int i = 0; i < hs_len; i++)
     296        hs[i] = (rand () & 255) > 155 ? 'a' : 'b';
     297      hs[hs_len] = 0;
     298  
     299      memset (ne, 'a', ne_len);
     300      ne[ne_len - 2] = 'b';
     301      ne[0] = 'b';
     302      ne[ne_len] = 0;
     303  
     304      json_element_object_begin (json_ctx);
     305      json_attr_uint (json_ctx, "len_haystack", hs_len);
     306      json_attr_uint (json_ctx, "len_needle", ne_len);
     307      json_attr_uint (json_ctx, "align_haystack", 0);
     308      json_attr_uint (json_ctx, "align_needle", 0);
     309      json_attr_uint (json_ctx, "fail", 1);
     310      json_attr_string (json_ctx, "desc", "Difficult 2-way");
     311  
     312      json_array_begin (json_ctx, "timings");
     313  
     314      FOR_EACH_IMPL (impl, 0)
     315        do_one_test (json_ctx, impl, hs, ne, NULL);
     316  
     317      json_array_end (json_ctx);
     318      json_element_object_end (json_ctx);
     319    }
     320  
     321    /* Hard needle for standard algorithm testing first few characters of
     322     * needle.  */
     323    {
     324      for (int i = 0; i < hs_len; i++)
     325        hs[i] = (rand () & 255) >= 128 ? 'a' : 'b';
     326      hs[hs_len] = 0;
     327  
     328      for (int i = 0; i < ne_len; i++)
     329        {
     330  	if (i % 3 == 0)
     331  	  ne[i] = 'a';
     332  	else if (i % 3 == 1)
     333  	  ne[i] = 'b';
     334  	else
     335  	  ne[i] = 'c';
     336        }
     337      ne[ne_len] = 0;
     338  
     339      json_element_object_begin (json_ctx);
     340      json_attr_uint (json_ctx, "len_haystack", hs_len);
     341      json_attr_uint (json_ctx, "len_needle", ne_len);
     342      json_attr_uint (json_ctx, "align_haystack", 0);
     343      json_attr_uint (json_ctx, "align_needle", 0);
     344      json_attr_uint (json_ctx, "fail", 1);
     345      json_attr_string (json_ctx, "desc", "Difficult testing first 2");
     346  
     347      json_array_begin (json_ctx, "timings");
     348  
     349      FOR_EACH_IMPL (impl, 0)
     350        do_one_test (json_ctx, impl, hs, ne, NULL);
     351  
     352      json_array_end (json_ctx);
     353      json_element_object_end (json_ctx);
     354    }
     355  }
     356  
     357  static int
     358  test_main (void)
     359  {
     360    json_ctx_t json_ctx;
     361    test_init ();
     362  
     363    json_init (&json_ctx, 0, stdout);
     364  
     365    json_document_begin (&json_ctx);
     366    json_attr_string (&json_ctx, "timing_type", TIMING_TYPE);
     367  
     368    json_attr_object_begin (&json_ctx, "functions");
     369    json_attr_object_begin (&json_ctx, TEST_NAME);
     370    json_attr_string (&json_ctx, "bench-variant", "");
     371  
     372    json_array_begin (&json_ctx, "ifuncs");
     373    FOR_EACH_IMPL (impl, 0)
     374      json_element_string (&json_ctx, impl->name);
     375    json_array_end (&json_ctx);
     376  
     377    json_array_begin (&json_ctx, "results");
     378  
     379    for (size_t hlen = 8; hlen <= 256;)
     380      for (size_t klen = 1; klen <= 16; klen++)
     381        {
     382  	do_test (&json_ctx, 1, 3, hlen, klen, 0);
     383  	do_test (&json_ctx, 0, 9, hlen, klen, 1);
     384  
     385  	do_test (&json_ctx, 1, 3, hlen + 1, klen, 0);
     386  	do_test (&json_ctx, 0, 9, hlen + 1, klen, 1);
     387  
     388  	do_test (&json_ctx, getpagesize () - 15, 9, hlen, klen, 1);
     389  	if (hlen < 64)
     390  	  {
     391  	    hlen += 8;
     392  	  }
     393  	else
     394  	  {
     395  	    hlen += 32;
     396  	  }
     397        }
     398  
     399    for (size_t hlen = 256; hlen <= 65536; hlen *= 2)
     400      for (size_t klen = 4; klen <= 256; klen *= 2)
     401        {
     402  	do_test (&json_ctx, 1, 11, hlen, klen, 0);
     403  	do_test (&json_ctx, 14, 5, hlen, klen, 1);
     404  
     405      do_test (&json_ctx, 1, 11, hlen + 1, klen + 1, 0);
     406      do_test (&json_ctx, 14, 5, hlen + 1, klen + 1, 1);
     407  
     408  	do_test (&json_ctx, 1, 11, hlen + 1, klen, 0);
     409  	do_test (&json_ctx, 14, 5, hlen + 1, klen, 1);
     410  
     411  	do_test (&json_ctx, getpagesize () - 15, 5, hlen + 1, klen, 1);
     412        }
     413  
     414    test_hard_needle (&json_ctx, 64, 65536);
     415    test_hard_needle (&json_ctx, 256, 65536);
     416    test_hard_needle (&json_ctx, 1024, 65536);
     417  
     418    json_array_end (&json_ctx);
     419    json_attr_object_end (&json_ctx);
     420    json_attr_object_end (&json_ctx);
     421    json_document_end (&json_ctx);
     422  
     423    return ret;
     424  }
     425  
     426  #include <support/test-driver.c>