1  /* Test compilation of truncated regular expressions.
       2     Copyright (C) 2018-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  /* This test constructs various patterns in an attempt to trigger
      20     over-reading the regular expression compiler, such as bug
      21     23578.  */
      22  
      23  #include <array_length.h>
      24  #include <errno.h>
      25  #include <locale.h>
      26  #include <regex.h>
      27  #include <stdio.h>
      28  #include <stdlib.h>
      29  #include <string.h>
      30  #include <support/check.h>
      31  #include <support/next_to_fault.h>
      32  #include <support/support.h>
      33  #include <support/test-driver.h>
      34  #include <wchar.h>
      35  
      36  /* Locales to test.  */
      37  static const char locales[][17] =
      38    {
      39      "C",
      40      "C.UTF-8",
      41      "en_US.UTF-8",
      42      "de_DE.ISO-8859-1",
      43    };
      44  
      45  /* Syntax options.  Will be combined with other flags.  */
      46  static const reg_syntax_t syntaxes[] =
      47    {
      48      RE_SYNTAX_EMACS,
      49      RE_SYNTAX_AWK,
      50      RE_SYNTAX_GNU_AWK,
      51      RE_SYNTAX_POSIX_AWK,
      52      RE_SYNTAX_GREP,
      53      RE_SYNTAX_EGREP,
      54      RE_SYNTAX_POSIX_EGREP,
      55      RE_SYNTAX_POSIX_BASIC,
      56      RE_SYNTAX_POSIX_EXTENDED,
      57      RE_SYNTAX_POSIX_MINIMAL_EXTENDED,
      58    };
      59  
      60  /* Trailing characters placed after the initial character.  */
      61  static const char trailing_strings[][4] =
      62    {
      63      "",
      64      "[",
      65      "\\",
      66      "[\\",
      67      "(",
      68      "(\\",
      69      "\\(",
      70    };
      71  
      72  static int
      73  do_test (void)
      74  {
      75    /* Staging buffer for the constructed regular expression.  */
      76    char buffer[16];
      77  
      78    /* Allocation used to detect over-reading by the regular expression
      79       compiler.  */
      80    struct support_next_to_fault ntf
      81      = support_next_to_fault_allocate (sizeof (buffer));
      82  
      83    /* Arbitrary Unicode codepoint at which we stop generating
      84       characters.  We do not probe the whole range because that would
      85       take too long due to combinatorial explosion as the result of
      86       combination with other flags.  */
      87    static const wchar_t last_character = 0xfff;
      88  
      89    for (size_t locale_idx = 0; locale_idx < array_length (locales);
      90         ++ locale_idx)
      91      {
      92        if (setlocale (LC_ALL, locales[locale_idx]) == NULL)
      93          {
      94            support_record_failure ();
      95            printf ("error: setlocale (\"%s\"): %m", locales[locale_idx]);
      96            continue;
      97          }
      98        if (test_verbose > 0)
      99          printf ("info: testing locale \"%s\"\n", locales[locale_idx]);
     100  
     101        for (wchar_t wc = 0; wc <= last_character; ++wc)
     102          {
     103            char *after_wc;
     104            if (wc == 0)
     105              {
     106                /* wcrtomb treats L'\0' in a special way.  */
     107                *buffer = '\0';
     108                after_wc = &buffer[1];
     109              }
     110            else
     111              {
     112                mbstate_t ps = { };
     113                size_t ret = wcrtomb (buffer, wc, &ps);
     114                if (ret == (size_t) -1)
     115                  {
     116                    /* EILSEQ means that the target character set
     117                       cannot encode the character.  */
     118                    if (errno != EILSEQ)
     119                      {
     120                        support_record_failure ();
     121                        printf ("error: wcrtomb (0x%x) failed: %m\n",
     122                                (unsigned) wc);
     123                      }
     124                    continue;
     125                  }
     126                TEST_VERIFY_EXIT (ret != 0);
     127                after_wc = &buffer[ret];
     128              }
     129  
     130            for (size_t trailing_idx = 0;
     131                 trailing_idx < array_length (trailing_strings);
     132                 ++trailing_idx)
     133              {
     134                char *after_trailing
     135                  = stpcpy (after_wc, trailing_strings[trailing_idx]);
     136  
     137                for (int do_nul = 0; do_nul < 2; ++do_nul)
     138                  {
     139                    char *after_nul;
     140                    if (do_nul)
     141                      {
     142                        *after_trailing = '\0';
     143                        after_nul = &after_trailing[1];
     144                      }
     145                    else
     146                      after_nul = after_trailing;
     147  
     148                    size_t length = after_nul - buffer;
     149  
     150                    /* Make sure that the faulting region starts
     151                       after the used portion of the buffer.  */
     152                    char *ntf_start = ntf.buffer + sizeof (buffer) - length;
     153                    memcpy (ntf_start, buffer, length);
     154  
     155                    for (const reg_syntax_t *psyntax = syntaxes;
     156                         psyntax < array_end (syntaxes); ++psyntax)
     157                      for (int do_icase = 0; do_icase < 2; ++do_icase)
     158                        {
     159                          re_syntax_options = *psyntax;
     160                          if (do_icase)
     161                            re_syntax_options |= RE_ICASE;
     162  
     163                          regex_t reg;
     164                          memset (&reg, 0, sizeof (reg));
     165                          const char *msg = re_compile_pattern
     166                            (ntf_start, length, &reg);
     167                          if (msg != NULL)
     168                            {
     169                              if (test_verbose > 0)
     170                                {
     171                                  char *quoted = support_quote_blob
     172                                    (buffer, length);
     173                                  printf ("info: compilation failed for pattern"
     174                                          " \"%s\", syntax 0x%lx: %s\n",
     175                                          quoted, re_syntax_options, msg);
     176                                  free (quoted);
     177                                }
     178                            }
     179                          else
     180                            regfree (&reg);
     181                        }
     182                  }
     183              }
     184          }
     185      }
     186  
     187    support_next_to_fault_free (&ntf);
     188  
     189    return 0;
     190  }
     191  
     192  #include <support/test-driver.c>