(root)/
coreutils-9.4/
lib/
exclude.c
       1  /* exclude.c -- exclude file names
       2  
       3     Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2023 Free Software
       4     Foundation, Inc.
       5  
       6     This program is free software: you can redistribute it and/or modify
       7     it under the terms of the GNU General Public License as published by
       8     the Free Software Foundation, either version 3 of the License, or
       9     (at your option) any later version.
      10  
      11     This program is distributed in the hope that it will be useful,
      12     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14     GNU General Public License for more details.
      15  
      16     You should have received a copy of the GNU General Public License
      17     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      18  
      19  /* Written by Paul Eggert <eggert@twinsun.com>
      20     and Sergey Poznyakoff <gray@gnu.org>.
      21     Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
      22     for improvement suggestions. */
      23  
      24  #include <config.h>
      25  
      26  #include <ctype.h>
      27  #include <errno.h>
      28  #include <regex.h>
      29  #include <stddef.h>
      30  #include <stdio.h>
      31  #include <stdlib.h>
      32  #include <string.h>
      33  #include <uchar.h>
      34  
      35  #include "exclude.h"
      36  #include "filename.h"
      37  #include "fnmatch.h"
      38  #include "hash.h"
      39  #include "mbuiter.h"
      40  #include "xalloc.h"
      41  
      42  #if GNULIB_EXCLUDE_SINGLE_THREAD
      43  # include "unlocked-io.h"
      44  #endif
      45  
      46  /* Non-GNU systems lack these options, so we don't need to check them.  */
      47  #ifndef FNM_CASEFOLD
      48  # define FNM_CASEFOLD 0
      49  #endif
      50  #ifndef FNM_EXTMATCH
      51  # define FNM_EXTMATCH 0
      52  #endif
      53  #ifndef FNM_LEADING_DIR
      54  # define FNM_LEADING_DIR 0
      55  #endif
      56  
      57  static_assert (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS)
      58                  & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR
      59                     | FNM_CASEFOLD | FNM_EXTMATCH))
      60                 == 0);
      61  
      62  
      63  /* Exclusion patterns are grouped into a singly-linked list of
      64     "exclusion segments".  Each segment represents a set of patterns
      65     that can be matches using the same algorithm.  Non-wildcard
      66     patterns are kept in hash tables, to speed up searches.  Wildcard
      67     patterns are stored as arrays of patterns. */
      68  
      69  
      70  /* An exclude pattern-options pair.  The options are fnmatch options
      71     ORed with EXCLUDE_* options.  */
      72  
      73  struct patopts
      74    {
      75      int options;
      76      union
      77      {
      78        char const *pattern;
      79        regex_t re;
      80      } v;
      81    };
      82  
      83  /* An array of pattern-options pairs.  */
      84  
      85  struct exclude_pattern
      86    {
      87      struct patopts *exclude;
      88      idx_t exclude_alloc;
      89      idx_t exclude_count;
      90    };
      91  
      92  enum exclude_type
      93    {
      94      exclude_hash,                    /* a hash table of excluded names */
      95      exclude_pattern                  /* an array of exclude patterns */
      96    };
      97  
      98  struct exclude_segment
      99    {
     100      struct exclude_segment *next;    /* next segment in list */
     101      enum exclude_type type;          /* type of this segment */
     102      int options;                     /* common options for this segment */
     103      union
     104      {
     105        Hash_table *table;             /* for type == exclude_hash */
     106        struct exclude_pattern pat;    /* for type == exclude_pattern */
     107      } v;
     108    };
     109  
     110  struct pattern_buffer
     111    {
     112      struct pattern_buffer *next;
     113      char *base;
     114    };
     115  
     116  /* The exclude structure keeps a singly-linked list of exclude segments,
     117     maintained in reverse order.  */
     118  struct exclude
     119    {
     120      struct exclude_segment *head;
     121      struct pattern_buffer *patbuf;
     122    };
     123  
     124  /* Register BUF in the pattern buffer list of EX.  ADD_FUNC (see
     125     add_exclude_file and add_exclude_fp below) can use this function
     126     if it modifies the pattern, to ensure the allocated memory will be
     127     properly reclaimed upon calling free_exclude. */
     128  void
     129  exclude_add_pattern_buffer (struct exclude *ex, char *buf)
     130  {
     131    struct pattern_buffer *pbuf = xmalloc (sizeof *pbuf);
     132    pbuf->base = buf;
     133    pbuf->next = ex->patbuf;
     134    ex->patbuf = pbuf;
     135  }
     136  
     137  /* Return true if STR has or may have wildcards, when matched with OPTIONS.
     138     Return false if STR definitely does not have wildcards.  */
     139  bool
     140  fnmatch_pattern_has_wildcards (const char *str, int options)
     141  {
     142    while (true)
     143      {
     144        switch (*str++)
     145          {
     146          case '.':
     147          case '{':
     148          case '}':
     149          case '(':
     150          case ')':
     151            if (options & EXCLUDE_REGEX)
     152              return true;
     153            break;
     154  
     155          case '\\':
     156            if (options & EXCLUDE_REGEX)
     157              continue;
     158            else
     159              str += ! (options & FNM_NOESCAPE) && *str;
     160            break;
     161  
     162          case '+': case '@': case '!':
     163            if (options & FNM_EXTMATCH && *str == '(')
     164              return true;
     165            break;
     166  
     167          case '?': case '*': case '[':
     168            return true;
     169  
     170          case '\0':
     171            return false;
     172          }
     173      }
     174  }
     175  
     176  static void
     177  unescape_pattern (char *str)
     178  {
     179    char const *q = str;
     180    do
     181      q += *q == '\\' && q[1];
     182    while ((*str++ = *q++));
     183  }
     184  
     185  /* Return a newly allocated and empty exclude list.  */
     186  
     187  struct exclude *
     188  new_exclude (void)
     189  {
     190    return xzalloc (sizeof *new_exclude ());
     191  }
     192  
     193  /* Calculate the hash of string.  */
     194  static size_t
     195  string_hasher (void const *data, size_t n_buckets)
     196  {
     197    return hash_string (data, n_buckets);
     198  }
     199  
     200  /* Ditto, for case-insensitive hashes */
     201  static size_t
     202  string_hasher_ci (void const *data, size_t n_buckets)
     203  {
     204    char const *p = data;
     205    size_t value = 0;
     206  
     207    mbui_iterator_t iter;
     208    for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
     209      {
     210        mbchar_t m = mbui_cur (iter);
     211        char32_t wc;
     212  
     213        if (m.wc_valid)
     214          wc = c32tolower (m.wc);
     215        else
     216          wc = *m.ptr;
     217  
     218        value = value * 31 + wc;
     219      }
     220  
     221    return value % n_buckets;
     222  }
     223  
     224  /* compare two strings for equality */
     225  static bool
     226  string_compare (void const *data1, void const *data2)
     227  {
     228    return strcmp (data1, data2) == 0;
     229  }
     230  
     231  /* compare two strings for equality, case-insensitive */
     232  static bool
     233  string_compare_ci (void const *data1, void const *data2)
     234  {
     235    return mbscasecmp (data1, data2) == 0;
     236  }
     237  
     238  /* Create new exclude segment of given TYPE and OPTIONS, and attach it
     239     to the head of EX.  */
     240  static void
     241  new_exclude_segment (struct exclude *ex, enum exclude_type type, int options)
     242  {
     243    struct exclude_segment *sp = xmalloc (sizeof (struct exclude_segment));
     244    sp->type = type;
     245    sp->options = options;
     246    switch (type)
     247      {
     248      case exclude_pattern:
     249        sp->v.pat = (struct exclude_pattern) {0};
     250        break;
     251  
     252      case exclude_hash:
     253        sp->v.table = hash_initialize (0, nullptr,
     254                                       (options & FNM_CASEFOLD
     255                                        ? string_hasher_ci
     256                                        : string_hasher),
     257                                       (options & FNM_CASEFOLD
     258                                        ? string_compare_ci
     259                                        : string_compare),
     260                                       free);
     261        break;
     262      }
     263    sp->next = ex->head;
     264    ex->head = sp;
     265  }
     266  
     267  /* Free a single exclude segment */
     268  static void
     269  free_exclude_segment (struct exclude_segment *seg)
     270  {
     271    switch (seg->type)
     272      {
     273      case exclude_pattern:
     274        for (idx_t i = 0; i < seg->v.pat.exclude_count; i++)
     275          if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX)
     276            regfree (&seg->v.pat.exclude[i].v.re);
     277        free (seg->v.pat.exclude);
     278        break;
     279  
     280      case exclude_hash:
     281        hash_free (seg->v.table);
     282        break;
     283      }
     284    free (seg);
     285  }
     286  
     287  /* Free the storage associated with an exclude list.  */
     288  void
     289  free_exclude (struct exclude *ex)
     290  {
     291    for (struct exclude_segment *seg = ex->head; seg; )
     292      {
     293        struct exclude_segment *next = seg->next;
     294        free_exclude_segment (seg);
     295        seg = next;
     296      }
     297  
     298    for (struct pattern_buffer *pbuf = ex->patbuf; pbuf; )
     299      {
     300        struct pattern_buffer *next = pbuf->next;
     301        free (pbuf->base);
     302        free (pbuf);
     303        pbuf = next;
     304      }
     305  
     306    free (ex);
     307  }
     308  
     309  /* Return zero if PATTERN matches F, obeying OPTIONS, except that
     310     (unlike fnmatch) wildcards are disabled in PATTERN.  */
     311  
     312  static int
     313  fnmatch_no_wildcards (char const *pattern, char const *f, int options)
     314  {
     315    if (! (options & FNM_LEADING_DIR))
     316      return ((options & FNM_CASEFOLD)
     317              ? mbscasecmp (pattern, f)
     318              : strcmp (pattern, f));
     319    else if (! (options & FNM_CASEFOLD))
     320      {
     321        idx_t patlen = strlen (pattern);
     322        int r = strncmp (pattern, f, patlen);
     323        if (! r)
     324          {
     325            r = f[patlen];
     326            if (r == '/')
     327              r = 0;
     328          }
     329        return r;
     330      }
     331    else
     332      {
     333        /* Walk through a copy of F, seeing whether P matches any prefix
     334           of F.
     335  
     336           FIXME: This is an O(N**2) algorithm; it should be O(N).
     337           Also, the copy should not be necessary.  However, fixing this
     338           will probably involve a change to the mbs* API.  */
     339  
     340        char *fcopy = xstrdup (f);
     341        int r;
     342        for (char *p = fcopy; ; *p++ = '/')
     343          {
     344            p = strchr (p, '/');
     345            if (p)
     346              *p = '\0';
     347            r = mbscasecmp (pattern, fcopy);
     348            if (!p || r <= 0)
     349              break;
     350          }
     351        free (fcopy);
     352        return r;
     353      }
     354  }
     355  
     356  bool
     357  exclude_fnmatch (char const *pattern, char const *f, int options)
     358  {
     359    int (*matcher) (char const *, char const *, int) =
     360      (options & EXCLUDE_WILDCARDS
     361       ? fnmatch
     362       : fnmatch_no_wildcards);
     363    bool matched = matcher (pattern, f, options) == 0;
     364  
     365    if (! (options & EXCLUDE_ANCHORED))
     366      for (char const *p = f; *p && ! matched; p++)
     367        if (*p == '/' && p[1] != '/')
     368          matched = matcher (pattern, p + 1, options) == 0;
     369  
     370    return matched;
     371  }
     372  
     373  static bool
     374  exclude_patopts (struct patopts const *opts, char const *f)
     375  {
     376    int options = opts->options;
     377  
     378    return (options & EXCLUDE_REGEX
     379            ? regexec (&opts->v.re, f, 0, nullptr, 0) == 0
     380            : exclude_fnmatch (opts->v.pattern, f, options));
     381  }
     382  
     383  /* Return true if the exclude_pattern segment SEG matches F.  */
     384  
     385  static bool
     386  file_pattern_matches (struct exclude_segment const *seg, char const *f)
     387  {
     388    idx_t exclude_count = seg->v.pat.exclude_count;
     389    struct patopts const *exclude = seg->v.pat.exclude;
     390  
     391    for (idx_t i = 0; i < exclude_count; i++)
     392      if (exclude_patopts (exclude + i, f))
     393        return true;
     394    return false;
     395  }
     396  
     397  /* Return true if the exclude_hash segment SEG matches F.
     398     BUFFER is an auxiliary storage of the same length as F (with nul
     399     terminator included) */
     400  static bool
     401  file_name_matches (struct exclude_segment const *seg, char const *f,
     402                     char *buffer)
     403  {
     404    int options = seg->options;
     405    Hash_table *table = seg->v.table;
     406  
     407    do
     408      {
     409        /* initialize the pattern */
     410        strcpy (buffer, f);
     411  
     412        while (true)
     413          {
     414            if (hash_lookup (table, buffer))
     415              return true;
     416            if (options & FNM_LEADING_DIR)
     417              {
     418                char *p = strrchr (buffer, '/');
     419                if (p)
     420                  {
     421                    *p = '\0';
     422                    continue;
     423                  }
     424              }
     425            break;
     426          }
     427  
     428        if (!(options & EXCLUDE_ANCHORED))
     429          {
     430            f = strchr (f, '/');
     431            if (f)
     432              f++;
     433          }
     434        else
     435          break;
     436      }
     437    while (f);
     438  
     439    return false;
     440  }
     441  
     442  /* Return true if EX excludes F.  */
     443  
     444  bool
     445  excluded_file_name (struct exclude const *ex, char const *f)
     446  {
     447    /* If no patterns are given, the default is to include.  */
     448    if (!ex->head)
     449      return false;
     450  
     451    bool invert = false;
     452    char *filename = nullptr;
     453  
     454    /* Scan through the segments, reporting the status of the first match.
     455       The segments are in reverse order, so this reports the status of
     456       the last match in the original option list.  */
     457    struct exclude_segment *seg;
     458    for (seg = ex->head; ; seg = seg->next)
     459      {
     460        if (seg->type == exclude_hash)
     461          {
     462            if (!filename)
     463              filename = xmalloc (strlen (f) + 1);
     464            if (file_name_matches (seg, f, filename))
     465              break;
     466          }
     467        else
     468          {
     469            if (file_pattern_matches (seg, f))
     470              break;
     471          }
     472  
     473        if (! seg->next)
     474          {
     475            /* If patterns are given but none match, the default is the
     476               opposite of the last segment (i.e., the first in the
     477               original option list).  For example, in the command
     478               'grep -r --exclude="a*" --include="*b" pat dir', the
     479               first option is --exclude so any file name matching
     480               neither a* nor *b is included.  */
     481            invert = true;
     482            break;
     483          }
     484      }
     485  
     486    free (filename);
     487    return invert ^ ! (seg->options & EXCLUDE_INCLUDE);
     488  }
     489  
     490  /* Append to EX the exclusion PATTERN with OPTIONS.  */
     491  
     492  void
     493  add_exclude (struct exclude *ex, char const *pattern, int options)
     494  {
     495    if ((options & (EXCLUDE_REGEX|EXCLUDE_WILDCARDS))
     496        && fnmatch_pattern_has_wildcards (pattern, options))
     497      {
     498        if (! (ex->head && ex->head->type == exclude_pattern
     499               && ((ex->head->options & EXCLUDE_INCLUDE)
     500                   == (options & EXCLUDE_INCLUDE))))
     501          new_exclude_segment (ex, exclude_pattern, options);
     502  
     503        struct exclude_pattern *pat = &ex->head->v.pat;
     504        if (pat->exclude_count == pat->exclude_alloc)
     505          pat->exclude = xpalloc (pat->exclude, &pat->exclude_alloc, 1, -1,
     506                                  sizeof *pat->exclude);
     507        struct patopts *patopts = &pat->exclude[pat->exclude_count++];
     508  
     509        patopts->options = options;
     510        if (options & EXCLUDE_REGEX)
     511          {
     512            int rc;
     513            int cflags = (REG_NOSUB | REG_EXTENDED
     514                          | (options & FNM_CASEFOLD ? REG_ICASE : 0));
     515  
     516            if (! (options & FNM_LEADING_DIR))
     517              rc = regcomp (&patopts->v.re, pattern, cflags);
     518            else
     519              for (idx_t len = strlen (pattern); ; len--)
     520                {
     521                  if (len == 0)
     522                    {
     523                      rc = 1;
     524                      break;
     525                    }
     526                  if (!ISSLASH (pattern[len - 1]))
     527                    {
     528                      static char const patsuffix[] = "(/.*)?";
     529                      char *tmp = ximalloc (len + sizeof patsuffix);
     530                      memcpy (tmp, pattern, len);
     531                      strcpy (tmp + len, patsuffix);
     532                      rc = regcomp (&patopts->v.re, tmp, cflags);
     533                      free (tmp);
     534                      break;
     535                    }
     536                }
     537  
     538            if (rc)
     539              {
     540                pat->exclude_count--;
     541                return;
     542              }
     543          }
     544        else
     545          {
     546            if (options & EXCLUDE_ALLOC)
     547              {
     548                char *dup = xstrdup (pattern);
     549                pattern = dup;
     550                exclude_add_pattern_buffer (ex, dup);
     551              }
     552            patopts->v.pattern = pattern;
     553          }
     554      }
     555    else
     556      {
     557        int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED
     558                                  | FNM_LEADING_DIR | FNM_CASEFOLD);
     559        if (! (ex->head && ex->head->type == exclude_hash
     560               && ((ex->head->options & exclude_hash_flags)
     561                   == (options & exclude_hash_flags))))
     562          new_exclude_segment (ex, exclude_hash, options);
     563  
     564        char *str = xstrdup (pattern);
     565        if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS)
     566          unescape_pattern (str);
     567        if (hash_insert (ex->head->v.table, str) != str)
     568          free (str);
     569      }
     570  }
     571  
     572  /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
     573     OPTIONS.  LINE_END terminates each pattern in the file.  If
     574     LINE_END is a space character, ignore trailing spaces and empty
     575     lines in FP.  Return -1 (setting errno) on failure, 0 on success.  */
     576  
     577  int
     578  add_exclude_fp (void (*add_func) (struct exclude *, char const *, int, void *),
     579                  struct exclude *ex, FILE *fp, int options,
     580                  char line_end, void *data)
     581  {
     582    char *buf = nullptr;
     583    idx_t buf_alloc = 0;
     584    idx_t buf_count = 0;
     585  
     586    for (int c; (c = getc (fp)) != EOF; )
     587      {
     588        if (buf_count == buf_alloc)
     589          buf = xpalloc (buf, &buf_alloc, 1, -1, 1);
     590        buf[buf_count++] = c;
     591      }
     592  
     593    int e = ferror (fp) ? errno : 0;
     594  
     595    buf = xirealloc (buf, buf_count + 1);
     596    buf[buf_count] = line_end;
     597    char const *lim = (buf + buf_count
     598                       + ! (buf_count == 0 || buf[buf_count - 1] == line_end));
     599  
     600    exclude_add_pattern_buffer (ex, buf);
     601  
     602    char *pattern = buf;
     603  
     604    for (char *p = buf; p < lim; p++)
     605      if (*p == line_end)
     606        {
     607          char *pattern_end = p;
     608  
     609          if (isspace ((unsigned char) line_end))
     610            {
     611              /* Assume that no multi-byte character has a trailing byte
     612                 that satisfies isspace, and that nobody cares about
     613                 trailing white space containing non-single-byte characters.
     614                 If either assumption turns out to be false, presumably
     615                 the code should be changed to scan forward through the
     616                 entire pattern, one multi-byte character at a time.  */
     617              for (; ; pattern_end--)
     618                if (pattern_end == pattern)
     619                  goto next_pattern;
     620                else if (! isspace ((unsigned char) pattern_end[-1]))
     621                  break;
     622            }
     623  
     624          *pattern_end = '\0';
     625          add_func (ex, pattern, options, data);
     626  
     627        next_pattern:
     628          pattern = p + 1;
     629        }
     630  
     631    errno = e;
     632    return e ? -1 : 0;
     633  }
     634  
     635  static void
     636  call_addfn (struct exclude *ex, char const *pattern, int options, void *data)
     637  {
     638    void (**addfnptr) (struct exclude *, char const *, int) = data;
     639    (*addfnptr) (ex, pattern, options);
     640  }
     641  
     642  int
     643  add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
     644                    struct exclude *ex, char const *file_name, int options,
     645                    char line_end)
     646  {
     647    if (strcmp (file_name, "-") == 0)
     648      return add_exclude_fp (call_addfn, ex, stdin, options, line_end, &add_func);
     649  
     650    FILE *in = fopen (file_name, "re");
     651    if (!in)
     652      return -1;
     653    int rc = add_exclude_fp (call_addfn, ex, in, options, line_end, &add_func);
     654    int e = errno;
     655    if (fclose (in) < 0)
     656      return -1;
     657    errno = e;
     658    return rc;
     659  }