(root)/
glibc-2.38/
posix/
tst-regex.c
       1  /* Copyright (C) 2001-2023 Free Software Foundation, Inc.
       2     This file is part of the GNU C Library.
       3  
       4     The GNU C Library is free software; you can redistribute it and/or
       5     modify it under the terms of the GNU Lesser General Public
       6     License as published by the Free Software Foundation; either
       7     version 2.1 of the License, or (at your option) any later version.
       8  
       9     The GNU C Library is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      12     Lesser General Public License for more details.
      13  
      14     You should have received a copy of the GNU Lesser General Public
      15     License along with the GNU C Library; if not, see
      16     <https://www.gnu.org/licenses/>.  */
      17  
      18  #include <assert.h>
      19  #include <errno.h>
      20  #include <error.h>
      21  #include <fcntl.h>
      22  #include <getopt.h>
      23  #include <iconv.h>
      24  #include <locale.h>
      25  #include <mcheck.h>
      26  #include <stdint.h>
      27  #include <stdio.h>
      28  #include <stdlib.h>
      29  #include <string.h>
      30  #include <time.h>
      31  #include <unistd.h>
      32  #include <sys/stat.h>
      33  #include <sys/types.h>
      34  #include <regex.h>
      35  #include <support/support.h>
      36  
      37  
      38  #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
      39  static clockid_t cl;
      40  static int use_clock;
      41  #endif
      42  static iconv_t cd;
      43  static char *mem;
      44  static char *umem;
      45  static size_t memlen;
      46  static size_t umemlen;
      47  static int timing;
      48  
      49  static int test_expr (const char *expr, int expected, int expectedicase);
      50  static int run_test (const char *expr, const char *mem, size_t memlen,
      51  		     int icase, int expected);
      52  static int run_test_backwards (const char *expr, const char *mem,
      53  			       size_t memlen, int icase, int expected);
      54  
      55  
      56  static int
      57  do_test (void)
      58  {
      59    const char *file;
      60    int fd;
      61    struct stat st;
      62    int result = 0;
      63    char *inmem;
      64    char *outmem;
      65    size_t inlen;
      66    size_t outlen;
      67  
      68    mtrace ();
      69  
      70    /* Make the content of the file available in memory.  */
      71    file = "./tst-regex.input";
      72    fd = open (file, O_RDONLY);
      73    if (fd == -1)
      74      error (EXIT_FAILURE, errno, "cannot open %s", basename (file));
      75  
      76    if (fstat (fd, &st) != 0)
      77      error (EXIT_FAILURE, errno, "cannot stat %s", basename (file));
      78    memlen = st.st_size;
      79  
      80    mem = (char *) malloc (memlen + 1);
      81    if (mem == NULL)
      82      error (EXIT_FAILURE, errno, "while allocating buffer");
      83  
      84    if ((size_t) read (fd, mem, memlen) != memlen)
      85      error (EXIT_FAILURE, 0, "cannot read entire file");
      86    mem[memlen] = '\0';
      87  
      88    close (fd);
      89  
      90    /* We have to convert a few things from UTF-8 to Latin-1.  */
      91    cd = iconv_open ("ISO-8859-1", "UTF-8");
      92    if (cd == (iconv_t) -1)
      93      error (EXIT_FAILURE, errno, "cannot get conversion descriptor");
      94  
      95    /* For the second test we have to convert the file content to Latin-1.
      96       This cannot grow the data.  */
      97    umem = (char *) malloc (memlen + 1);
      98    if (umem == NULL)
      99      error (EXIT_FAILURE, errno, "while allocating buffer");
     100  
     101    inmem = mem;
     102    inlen = memlen;
     103    outmem = umem;
     104    outlen = memlen;
     105    iconv (cd, &inmem, &inlen, &outmem, &outlen);
     106    umemlen = outmem - umem;
     107    if (inlen != 0)
     108      error (EXIT_FAILURE, errno, "cannot convert buffer");
     109    umem[umemlen] = '\0';
     110  
     111  #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
     112  # if _POSIX_CPUTIME == 0
     113    if (sysconf (_SC_CPUTIME) < 0)
     114      use_clock = 0;
     115    else
     116  # endif
     117      /* See whether we can use the CPU clock.  */
     118      use_clock = clock_getcpuclockid (0, &cl) == 0;
     119  #endif
     120  
     121  #ifdef DEBUG
     122    re_set_syntax (RE_DEBUG);
     123  #endif
     124  
     125    /* Run the actual tests.  All tests are run in a single-byte and a
     126       multi-byte locale.  */
     127    result |= test_expr ("[äáàâéèêíìîñöóòôüúùû]", 4, 4);
     128    result |= test_expr ("G.ran", 2, 3);
     129    result |= test_expr ("G.\\{1\\}ran", 2, 3);
     130    result |= test_expr ("G.*ran", 3, 43);
     131    result |= test_expr ("[äáàâ]", 0, 0);
     132    result |= test_expr ("Uddeborg", 2, 2);
     133    result |= test_expr (".Uddeborg", 2, 2);
     134  
     135    /* Free the resources.  */
     136    free (umem);
     137    iconv_close (cd);
     138    free (mem);
     139  
     140    return result;
     141  }
     142  
     143  
     144  static int
     145  test_expr (const char *expr, int expected, int expectedicase)
     146  {
     147    int result = 0;
     148    char *inmem;
     149    char *outmem;
     150    size_t inlen;
     151    size_t outlen;
     152    char *uexpr;
     153  
     154    /* First test: search with basic C.UTF-8 locale.  */
     155    printf ("INFO: Testing C.UTF-8.\n");
     156    xsetlocale (LC_ALL, "C.UTF-8");
     157  
     158    printf ("\nTest \"%s\" with multi-byte locale\n", expr);
     159    result |= run_test (expr, mem, memlen, 0, expected);
     160    printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr);
     161    result |= run_test (expr, mem, memlen, 1, expectedicase);
     162    printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr);
     163    result |= run_test_backwards (expr, mem, memlen, 0, expected);
     164    printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n",
     165  	  expr);
     166    result |= run_test_backwards (expr, mem, memlen, 1, expectedicase);
     167  
     168    /* Second test: search with an UTF-8 locale.  */
     169    printf ("INFO: Testing de_DE.UTF-8.\n");
     170    xsetlocale (LC_ALL, "de_DE.UTF-8");
     171  
     172    printf ("\nTest \"%s\" with multi-byte locale\n", expr);
     173    result |= run_test (expr, mem, memlen, 0, expected);
     174    printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr);
     175    result |= run_test (expr, mem, memlen, 1, expectedicase);
     176    printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr);
     177    result |= run_test_backwards (expr, mem, memlen, 0, expected);
     178    printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n",
     179  	  expr);
     180    result |= run_test_backwards (expr, mem, memlen, 1, expectedicase);
     181  
     182    /* Second test: search with an ISO-8859-1 locale.  */
     183    printf ("INFO: Testing de_DE.ISO-8859-1.\n");
     184    xsetlocale (LC_ALL, "de_DE.ISO-8859-1");
     185  
     186    inmem = (char *) expr;
     187    inlen = strlen (expr);
     188    outlen = inlen;
     189    outmem = uexpr = alloca (outlen + 1);
     190    memset (outmem, '\0', outlen + 1);
     191    iconv (cd, &inmem, &inlen, &outmem, &outlen);
     192    if (inlen != 0)
     193      error (EXIT_FAILURE, errno, "cannot convert expression");
     194  
     195    /* Run the tests.  */
     196    printf ("\nTest \"%s\" with 8-bit locale\n", expr);
     197    result |= run_test (uexpr, umem, umemlen, 0, expected);
     198    printf ("\nTest \"%s\" with 8-bit locale, case insensitive\n", expr);
     199    result |= run_test (uexpr, umem, umemlen, 1, expectedicase);
     200    printf ("\nTest \"%s\" backwards with 8-bit locale\n", expr);
     201    result |= run_test_backwards (uexpr, umem, umemlen, 0, expected);
     202    printf ("\nTest \"%s\" backwards with 8-bit locale, case insensitive\n",
     203  	  expr);
     204    result |= run_test_backwards (uexpr, umem, umemlen, 1, expectedicase);
     205  
     206    return result;
     207  }
     208  
     209  
     210  static int
     211  run_test (const char *expr, const char *mem, size_t memlen, int icase,
     212  	  int expected)
     213  {
     214  #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
     215    struct timespec start;
     216    struct timespec finish;
     217  #endif
     218    regex_t re;
     219    int err;
     220    size_t offset;
     221    int cnt;
     222  
     223  #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
     224    if (use_clock && !timing)
     225      use_clock = clock_gettime (cl, &start) == 0;
     226  #endif
     227  
     228    err = regcomp (&re, expr, REG_NEWLINE | (icase ? REG_ICASE : 0));
     229    if (err != REG_NOERROR)
     230      {
     231        char buf[200];
     232        regerror (err, &re, buf, sizeof buf);
     233        error (EXIT_FAILURE, 0, "cannot compile expression: %s", buf);
     234      }
     235  
     236    cnt = 0;
     237    offset = 0;
     238    assert (mem[memlen] == '\0');
     239    while (offset < memlen)
     240      {
     241        regmatch_t ma[1];
     242        const char *sp;
     243        const char *ep;
     244  
     245        err = regexec (&re, mem + offset, 1, ma, 0);
     246        if (err == REG_NOMATCH)
     247  	break;
     248  
     249        if (err != REG_NOERROR)
     250  	{
     251  	  char buf[200];
     252  	  regerror (err, &re, buf, sizeof buf);
     253  	  error (EXIT_FAILURE, 0, "cannot use expression: %s", buf);
     254  	}
     255  
     256        assert (ma[0].rm_so >= 0);
     257        sp = mem + offset + ma[0].rm_so;
     258        while (sp > mem && sp[-1] != '\n')
     259  	--sp;
     260  
     261        ep = mem + offset + ma[0].rm_so;
     262        while (*ep != '\0' && *ep != '\n')
     263  	++ep;
     264  
     265        printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp);
     266  
     267        offset = ep + 1 - mem;
     268      }
     269  
     270    regfree (&re);
     271  
     272  #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
     273    if (use_clock && !timing)
     274      {
     275        use_clock = clock_gettime (cl, &finish) == 0;
     276        if (use_clock)
     277  	{
     278  	  if (finish.tv_nsec < start.tv_nsec)
     279  	    {
     280  	      finish.tv_nsec -= start.tv_nsec - 1000000000;
     281  	      finish.tv_sec -= 1 + start.tv_sec;
     282  	    }
     283  	  else
     284  	    {
     285  	      finish.tv_nsec -= start.tv_nsec;
     286  	      finish.tv_sec -= start.tv_sec;
     287  	    }
     288  
     289  	  printf ("elapsed time: %jd.%09jd sec\n",
     290  		  (intmax_t) finish.tv_sec, (intmax_t) finish.tv_nsec);
     291  	}
     292      }
     293  
     294    if (use_clock && timing)
     295      {
     296        struct timespec mintime = { .tv_sec = 24 * 60 * 60 };
     297  
     298        for (int i = 0; i < 10; ++i)
     299  	{
     300  	  offset = 0;
     301  	  use_clock = clock_gettime (cl, &start) == 0;
     302  
     303  	  if (!use_clock)
     304  	    continue;
     305  
     306  	  err = regcomp (&re, expr, REG_NEWLINE | (icase ? REG_ICASE : 0));
     307  	  if (err != REG_NOERROR)
     308  	    continue;
     309  
     310  	  while (offset < memlen)
     311  	    {
     312  	      regmatch_t ma[1];
     313  
     314  	      err = regexec (&re, mem + offset, 1, ma, 0);
     315  	      if (err != REG_NOERROR)
     316  		break;
     317  
     318  	      offset += ma[0].rm_eo;
     319  	    }
     320  
     321  	  regfree (&re);
     322  
     323  	  use_clock = clock_gettime (cl, &finish) == 0;
     324  	  if (use_clock)
     325  	    {
     326  	      if (finish.tv_nsec < start.tv_nsec)
     327  		{
     328  		  finish.tv_nsec -= start.tv_nsec - 1000000000;
     329  		  finish.tv_sec -= 1 + start.tv_sec;
     330  		}
     331  	      else
     332  		{
     333  		  finish.tv_nsec -= start.tv_nsec;
     334  		  finish.tv_sec -= start.tv_sec;
     335  		}
     336  	      if (finish.tv_sec < mintime.tv_sec
     337  		  || (finish.tv_sec == mintime.tv_sec
     338  		      && finish.tv_nsec < mintime.tv_nsec))
     339  		mintime = finish;
     340  	    }
     341  	}
     342        printf ("elapsed time: %jd.%09jd sec\n",
     343  	      (intmax_t) mintime.tv_sec, (intmax_t) mintime.tv_nsec);
     344      }
     345  #endif
     346  
     347    /* Return an error if the number of matches found is not match we
     348       expect.  */
     349    return cnt != expected;
     350  }
     351  
     352  
     353  static int
     354  run_test_backwards (const char *expr, const char *mem, size_t memlen,
     355  		    int icase, int expected)
     356  {
     357  #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
     358    struct timespec start;
     359    struct timespec finish;
     360  #endif
     361    struct re_pattern_buffer re;
     362    const char *err;
     363    size_t offset;
     364    int cnt;
     365  
     366  #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
     367    if (use_clock && !timing)
     368      use_clock = clock_gettime (cl, &start) == 0;
     369  #endif
     370  
     371    re_set_syntax ((RE_SYNTAX_POSIX_BASIC & ~RE_DOT_NEWLINE)
     372  		 | RE_HAT_LISTS_NOT_NEWLINE
     373  		 | (icase ? RE_ICASE : 0));
     374  
     375    memset (&re, 0, sizeof (re));
     376    re.fastmap = malloc (256);
     377    if (re.fastmap == NULL)
     378      error (EXIT_FAILURE, errno, "cannot allocate fastmap");
     379  
     380    err = re_compile_pattern (expr, strlen (expr), &re);
     381    if (err != NULL)
     382      error (EXIT_FAILURE, 0, "cannot compile expression: %s", err);
     383  
     384    if (re_compile_fastmap (&re))
     385      error (EXIT_FAILURE, 0, "couldn't compile fastmap");
     386  
     387    cnt = 0;
     388    offset = memlen;
     389    assert (mem[memlen] == '\0');
     390    while (offset <= memlen)
     391      {
     392        int start;
     393        const char *sp;
     394        const char *ep;
     395  
     396        start = re_search (&re, mem, memlen, offset, -offset, NULL);
     397        if (start == -1)
     398  	break;
     399  
     400        if (start == -2)
     401  	error (EXIT_FAILURE, 0, "internal error in re_search");
     402  
     403        sp = mem + start;
     404        while (sp > mem && sp[-1] != '\n')
     405  	--sp;
     406  
     407        ep = mem + start;
     408        while (*ep != '\0' && *ep != '\n')
     409  	++ep;
     410  
     411        printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp);
     412  
     413        offset = sp - 1 - mem;
     414      }
     415  
     416    regfree (&re);
     417  
     418  #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
     419    if (use_clock && !timing)
     420      {
     421        use_clock = clock_gettime (cl, &finish) == 0;
     422        if (use_clock)
     423  	{
     424  	  if (finish.tv_nsec < start.tv_nsec)
     425  	    {
     426  	      finish.tv_nsec -= start.tv_nsec - 1000000000;
     427  	      finish.tv_sec -= 1 + start.tv_sec;
     428  	    }
     429  	  else
     430  	    {
     431  	      finish.tv_nsec -= start.tv_nsec;
     432  	      finish.tv_sec -= start.tv_sec;
     433  	    }
     434  
     435  	  printf ("elapsed time: %jd.%09jd sec\n",
     436  		  (intmax_t) finish.tv_sec, (intmax_t) finish.tv_nsec);
     437  	}
     438      }
     439  
     440    if (use_clock && timing)
     441      {
     442        struct timespec mintime = { .tv_sec = 24 * 60 * 60 };
     443  
     444        for (int i = 0; i < 10; ++i)
     445  	{
     446  	  offset = memlen;
     447  	  use_clock = clock_gettime (cl, &start) == 0;
     448  
     449  	  if (!use_clock)
     450  	    continue;
     451  
     452  	  memset (&re, 0, sizeof (re));
     453  	  re.fastmap = malloc (256);
     454  	  if (re.fastmap == NULL)
     455  	    continue;
     456  
     457  	  err = re_compile_pattern (expr, strlen (expr), &re);
     458  	  if (err != NULL)
     459  	    continue;
     460  
     461  	  if (re_compile_fastmap (&re))
     462  	    {
     463  	      regfree (&re);
     464  	      continue;
     465  	    }
     466  
     467  	  while (offset <= memlen)
     468  	    {
     469  	      int start;
     470  	      const char *sp;
     471  
     472  	      start = re_search (&re, mem, memlen, offset, -offset, NULL);
     473  	      if (start < -1)
     474  		break;
     475  
     476  	      sp = mem + start;
     477  	      while (sp > mem && sp[-1] != '\n')
     478  		--sp;
     479  
     480  	      offset = sp - 1 - mem;
     481  	    }
     482  
     483  	  regfree (&re);
     484  
     485  	  use_clock = clock_gettime (cl, &finish) == 0;
     486  	  if (use_clock)
     487  	    {
     488  	      if (finish.tv_nsec < start.tv_nsec)
     489  		{
     490  		  finish.tv_nsec -= start.tv_nsec - 1000000000;
     491  		  finish.tv_sec -= 1 + start.tv_sec;
     492  		}
     493  	      else
     494  		{
     495  		  finish.tv_nsec -= start.tv_nsec;
     496  		  finish.tv_sec -= start.tv_sec;
     497  		}
     498  	      if (finish.tv_sec < mintime.tv_sec
     499  		  || (finish.tv_sec == mintime.tv_sec
     500  		      && finish.tv_nsec < mintime.tv_nsec))
     501  		mintime = finish;
     502  	    }
     503  	}
     504        printf ("elapsed time: %jd.%09jd sec\n",
     505  	      (intmax_t) mintime.tv_sec, (intmax_t) mintime.tv_nsec);
     506      }
     507  #endif
     508  
     509    /* Return an error if the number of matches found is not match we
     510       expect.  */
     511    return cnt != expected;
     512  }
     513  
     514  /* If --timing is used we will need a larger timeout.  */
     515  #define TIMEOUT 50
     516  #define CMDLINE_OPTIONS \
     517     {"timing", no_argument, &timing, 1 },
     518  #define TEST_FUNCTION do_test ()
     519  #include "../test-skeleton.c"