(root)/
gettext-0.22.4/
gettext-tools/
libgrep/
m-fgrep.c
       1  /* Pattern Matcher for Fixed String search.
       2     Copyright (C) 1992, 1998, 2000, 2005-2006, 2010, 2013, 2020, 2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation; either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  #ifdef HAVE_CONFIG_H
      18  # include <config.h>
      19  #endif
      20  
      21  /* Specification.  */
      22  #include "libgrep.h"
      23  
      24  #include <ctype.h>
      25  #include <limits.h>
      26  #include <stdbool.h>
      27  #include <stdlib.h>
      28  #include <string.h>
      29  #include <wchar.h>
      30  
      31  #include "error.h"
      32  #include "exitfail.h"
      33  #include "xalloc.h"
      34  #include "kwset.h"
      35  #include "gettext.h"
      36  #define _(str) gettext (str)
      37  
      38  #define TOLOWER(C) (isupper (C) ? tolower (C) : (C))
      39  #define IS_WORD_CONSTITUENT(C) (isalnum (C) || (C) == '_')
      40  
      41  #define NCHAR (UCHAR_MAX + 1)
      42  
      43  struct compiled_kwset {
      44    kwset_t kwset;
      45    char *trans;
      46    bool match_words;
      47    bool match_lines;
      48    char eolbyte;
      49  };
      50  
      51  static void
      52  kwsinit (struct compiled_kwset *ckwset,
      53           bool match_icase, bool match_words, bool match_lines, char eolbyte)
      54  {
      55    if (match_icase)
      56      {
      57        int i;
      58  
      59        ckwset->trans = XNMALLOC (NCHAR, char);
      60        for (i = 0; i < NCHAR; i++)
      61          ckwset->trans[i] = TOLOWER (i);
      62        ckwset->kwset = kwsalloc (ckwset->trans);
      63      }
      64    else
      65      {
      66        ckwset->trans = NULL;
      67        ckwset->kwset = kwsalloc (NULL);
      68      }
      69    if (ckwset->kwset == NULL)
      70      error (exit_failure, 0, _("memory exhausted"));
      71    ckwset->match_words = match_words;
      72    ckwset->match_lines = match_lines;
      73    ckwset->eolbyte = eolbyte;
      74  }
      75  
      76  static void *
      77  Fcompile (const char *pattern, size_t pattern_size,
      78            bool match_icase, bool match_words, bool match_lines,
      79            char eolbyte)
      80  {
      81    struct compiled_kwset *ckwset;
      82    const char *beg;
      83    const char *err;
      84  
      85    ckwset = XMALLOC (struct compiled_kwset);
      86    kwsinit (ckwset, match_icase, match_words, match_lines, eolbyte);
      87  
      88    beg = pattern;
      89    do
      90      {
      91        const char *lim;
      92  
      93        for (lim = beg; lim < pattern + pattern_size && *lim != '\n'; ++lim)
      94          ;
      95        if ((err = kwsincr (ckwset->kwset, beg, lim - beg)) != NULL)
      96          error (exit_failure, 0, "%s", err);
      97        if (lim < pattern + pattern_size)
      98          ++lim;
      99        beg = lim;
     100      }
     101    while (beg < pattern + pattern_size);
     102  
     103    if ((err = kwsprep (ckwset->kwset)) != NULL)
     104      error (exit_failure, 0, "%s", err);
     105    return ckwset;
     106  }
     107  
     108  /* This function allocate the array which correspond to "buf".
     109     Then this check multibyte string and mark on the positions which
     110     are not singlebyte character nor the first byte of a multibyte
     111     character.  Caller must free the array.  */
     112  static char*
     113  check_multibyte_string (const char *buf, size_t buf_size)
     114  {
     115    char *mb_properties;
     116    mbstate_t cur_state;
     117    int i;
     118  
     119    mb_properties = (char *) malloc (buf_size);
     120    if (mb_properties == NULL)
     121      error (exit_failure, 0, _("memory exhausted"));
     122  
     123    memset (&cur_state, 0, sizeof (mbstate_t));
     124    memset (mb_properties, 0, sizeof (char) * buf_size);
     125    for (i = 0; i < buf_size ;)
     126      {
     127        size_t mbclen;
     128        mbclen = mbrlen (buf + i, buf_size - i, &cur_state);
     129  
     130        if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
     131          {
     132            /* An invalid sequence, or a truncated multibyte character.
     133               We treat it as a singlebyte character.  */
     134            mbclen = 1;
     135          }
     136        mb_properties[i] = mbclen;
     137        i += mbclen;
     138      }
     139  
     140    return mb_properties;
     141  }
     142  
     143  static size_t
     144  Fexecute (const void *compiled_pattern, const char *buf, size_t buf_size,
     145            size_t *match_size, bool exact)
     146  {
     147    const struct compiled_kwset *ckwset =
     148      (const struct compiled_kwset *) compiled_pattern;
     149    char eol = ckwset->eolbyte;
     150    register const char *buflim = buf + buf_size;
     151    register const char *beg;
     152    register size_t len;
     153    char *mb_properties;
     154    if (MB_CUR_MAX > 1)
     155      mb_properties = check_multibyte_string (buf, buf_size);
     156    else
     157      mb_properties = NULL;
     158  
     159    for (beg = buf; beg <= buflim; ++beg)
     160      {
     161        struct kwsmatch kwsmatch;
     162        size_t offset = kwsexec (ckwset->kwset, beg, buflim - beg, &kwsmatch);
     163        if (offset == (size_t) -1)
     164          {
     165            free (mb_properties);
     166            return offset;
     167          }
     168        if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
     169          continue; /* It is a part of multibyte character.  */
     170        beg += offset;
     171        len = kwsmatch.size[0];
     172        if (exact)
     173          {
     174            *match_size = len;
     175            free (mb_properties);
     176            return beg - buf;
     177          }
     178        if (ckwset->match_lines)
     179          {
     180            if (beg > buf && beg[-1] != eol)
     181              continue;
     182            if (beg + len < buflim && beg[len] != eol)
     183              continue;
     184            goto success;
     185          }
     186        else if (ckwset->match_words)
     187          {
     188            register const char *curr;
     189            for (curr = beg; len; )
     190              {
     191                if (curr > buf && IS_WORD_CONSTITUENT ((unsigned char) curr[-1]))
     192                  break;
     193                if (curr + len < buflim
     194                    && IS_WORD_CONSTITUENT ((unsigned char) curr[len]))
     195                  {
     196                    offset = kwsexec (ckwset->kwset, beg, --len, &kwsmatch);
     197                    if (offset == (size_t) -1)
     198                      {
     199                        free (mb_properties);
     200                        return offset;
     201                      }
     202                    curr = beg + offset;
     203                    len = kwsmatch.size[0];
     204                  }
     205                else
     206                  goto success;
     207              }
     208          }
     209        else
     210          goto success;
     211      }
     212  
     213    free (mb_properties);
     214    return -1;
     215  
     216   success:
     217    {
     218      register const char *end;
     219  
     220      end = (const char *) memchr (beg + len, eol, buflim - (beg + len));
     221      if (end != NULL)
     222        end++;
     223      else
     224        end = buflim;
     225      while (buf < beg && beg[-1] != eol)
     226        --beg;
     227      *match_size = end - beg;
     228      free (mb_properties);
     229      return beg - buf;
     230    }
     231  }
     232  
     233  static void
     234  Ffree (void *compiled_pattern)
     235  {
     236    struct compiled_kwset *ckwset = (struct compiled_kwset *) compiled_pattern;
     237  
     238    free (ckwset->trans);
     239    free (ckwset);
     240  }
     241  
     242  matcher_t matcher_fgrep =
     243    {
     244      Fcompile,
     245      Fexecute,
     246      Ffree
     247    };
     248