(root)/
tar-1.35/
gnu/
exclude.c
       1  /* exclude.c -- exclude file names
       2  
       3     Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2023 Free Software
       4     Foundation, Inc.
       5  
       6     This program is free software: you can redistribute it and/or modify
       7     it under the terms of the GNU General Public License as published by
       8     the Free Software Foundation, either version 3 of the License, or
       9     (at your option) any later version.
      10  
      11     This program is distributed in the hope that it will be useful,
      12     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14     GNU General Public License for more details.
      15  
      16     You should have received a copy of the GNU General Public License
      17     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      18  
      19  /* Written by Paul Eggert <eggert@twinsun.com>
      20     and Sergey Poznyakoff <gray@gnu.org>.
      21     Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
      22     for improvement suggestions. */
      23  
      24  #include <config.h>
      25  
      26  #include <ctype.h>
      27  #include <errno.h>
      28  #include <stddef.h>
      29  #include <stdio.h>
      30  #include <stdlib.h>
      31  #include <string.h>
      32  #include <uchar.h>
      33  #include <regex.h>
      34  
      35  #include "exclude.h"
      36  #include "hash.h"
      37  #include "mbuiter.h"
      38  #include "fnmatch.h"
      39  #include "xalloc.h"
      40  #include "filename.h"
      41  
      42  #if GNULIB_EXCLUDE_SINGLE_THREAD
      43  # include "unlocked-io.h"
      44  #endif
      45  
      46  /* Non-GNU systems lack these options, so we don't need to check them.  */
      47  #ifndef FNM_CASEFOLD
      48  # define FNM_CASEFOLD 0
      49  #endif
      50  #ifndef FNM_EXTMATCH
      51  # define FNM_EXTMATCH 0
      52  #endif
      53  #ifndef FNM_LEADING_DIR
      54  # define FNM_LEADING_DIR 0
      55  #endif
      56  
      57  static_assert (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS)
      58                  & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR
      59                     | FNM_CASEFOLD | FNM_EXTMATCH))
      60                 == 0);
      61  
      62  
      63  /* Exclusion patterns are grouped into a singly-linked list of
      64     "exclusion segments".  Each segment represents a set of patterns
      65     that can be matches using the same algorithm.  Non-wildcard
      66     patterns are kept in hash tables, to speed up searches.  Wildcard
      67     patterns are stored as arrays of patterns. */
      68  
      69  
      70  /* An exclude pattern-options pair.  The options are fnmatch options
      71     ORed with EXCLUDE_* options.  */
      72  
      73  struct patopts
      74    {
      75      int options;
      76      union
      77      {
      78        char const *pattern;
      79        regex_t re;
      80      } v;
      81    };
      82  
      83  /* An array of pattern-options pairs.  */
      84  
      85  struct exclude_pattern
      86    {
      87      struct patopts *exclude;
      88      idx_t exclude_alloc;
      89      idx_t exclude_count;
      90    };
      91  
      92  enum exclude_type
      93    {
      94      exclude_hash,                    /* a hash table of excluded names */
      95      exclude_pattern                  /* an array of exclude patterns */
      96    };
      97  
      98  struct exclude_segment
      99    {
     100      struct exclude_segment *next;    /* next segment in list */
     101      enum exclude_type type;          /* type of this segment */
     102      int options;                     /* common options for this segment */
     103      union
     104      {
     105        Hash_table *table;             /* for type == exclude_hash */
     106        struct exclude_pattern pat;    /* for type == exclude_pattern */
     107      } v;
     108    };
     109  
     110  struct pattern_buffer
     111    {
     112      struct pattern_buffer *next;
     113      char *base;
     114    };
     115  
     116  /* The exclude structure keeps a singly-linked list of exclude segments,
     117     maintained in reverse order.  */
     118  struct exclude
     119    {
     120      struct exclude_segment *head;
     121      struct pattern_buffer *patbuf;
     122    };
     123  
     124  /* Register BUF in the pattern buffer list of EX.  ADD_FUNC (see
     125     add_exclude_file and add_exclude_fp below) can use this function
     126     if it modifies the pattern, to ensure the allocated memory will be
     127     properly reclaimed upon calling free_exclude. */
     128  void
     129  exclude_add_pattern_buffer (struct exclude *ex, char *buf)
     130  {
     131    struct pattern_buffer *pbuf = xmalloc (sizeof *pbuf);
     132    pbuf->base = buf;
     133    pbuf->next = ex->patbuf;
     134    ex->patbuf = pbuf;
     135  }
     136  
     137  /* Return true if STR has or may have wildcards, when matched with OPTIONS.
     138     Return false if STR definitely does not have wildcards.  */
     139  bool
     140  fnmatch_pattern_has_wildcards (const char *str, int options)
     141  {
     142    while (1)
     143      {
     144        switch (*str++)
     145          {
     146          case '.':
     147          case '{':
     148          case '}':
     149          case '(':
     150          case ')':
     151            if (options & EXCLUDE_REGEX)
     152              return true;
     153            break;
     154  
     155          case '\\':
     156            if (options & EXCLUDE_REGEX)
     157              continue;
     158            else
     159              str += ! (options & FNM_NOESCAPE) && *str;
     160            break;
     161  
     162          case '+': case '@': case '!':
     163            if (options & FNM_EXTMATCH && *str == '(')
     164              return true;
     165            break;
     166  
     167          case '?': case '*': case '[':
     168            return true;
     169  
     170          case '\0':
     171            return false;
     172          }
     173      }
     174  }
     175  
     176  static void
     177  unescape_pattern (char *str)
     178  {
     179    char const *q = str;
     180    do
     181      q += *q == '\\' && q[1];
     182    while ((*str++ = *q++));
     183  }
     184  
     185  /* Return a newly allocated and empty exclude list.  */
     186  
     187  struct exclude *
     188  new_exclude (void)
     189  {
     190    return xzalloc (sizeof *new_exclude ());
     191  }
     192  
     193  /* Calculate the hash of string.  */
     194  static size_t
     195  string_hasher (void const *data, size_t n_buckets)
     196  {
     197    char const *p = data;
     198    return hash_string (p, n_buckets);
     199  }
     200  
     201  /* Ditto, for case-insensitive hashes */
     202  static size_t
     203  string_hasher_ci (void const *data, size_t n_buckets)
     204  {
     205    char const *p = data;
     206    mbui_iterator_t iter;
     207    size_t value = 0;
     208  
     209    for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
     210      {
     211        mbchar_t m = mbui_cur (iter);
     212        char32_t wc;
     213  
     214        if (m.wc_valid)
     215          wc = c32tolower (m.wc);
     216        else
     217          wc = *m.ptr;
     218  
     219        value = value * 31 + wc;
     220      }
     221  
     222    return value % n_buckets;
     223  }
     224  
     225  /* compare two strings for equality */
     226  static bool
     227  string_compare (void const *data1, void const *data2)
     228  {
     229    char const *p1 = data1;
     230    char const *p2 = data2;
     231    return strcmp (p1, p2) == 0;
     232  }
     233  
     234  /* compare two strings for equality, case-insensitive */
     235  static bool
     236  string_compare_ci (void const *data1, void const *data2)
     237  {
     238    char const *p1 = data1;
     239    char const *p2 = data2;
     240    return mbscasecmp (p1, p2) == 0;
     241  }
     242  
     243  static void
     244  string_free (void *data)
     245  {
     246    free (data);
     247  }
     248  
     249  /* Create new exclude segment of given TYPE and OPTIONS, and attach it
     250     to the head of EX.  */
     251  static void
     252  new_exclude_segment (struct exclude *ex, enum exclude_type type, int options)
     253  {
     254    struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment));
     255    sp->type = type;
     256    sp->options = options;
     257    switch (type)
     258      {
     259      case exclude_pattern:
     260        break;
     261  
     262      case exclude_hash:
     263        sp->v.table = hash_initialize (0, NULL,
     264                                       (options & FNM_CASEFOLD) ?
     265                                         string_hasher_ci
     266                                         : string_hasher,
     267                                       (options & FNM_CASEFOLD) ?
     268                                         string_compare_ci
     269                                         : string_compare,
     270                                       string_free);
     271        break;
     272      }
     273    sp->next = ex->head;
     274    ex->head = sp;
     275  }
     276  
     277  /* Free a single exclude segment */
     278  static void
     279  free_exclude_segment (struct exclude_segment *seg)
     280  {
     281    switch (seg->type)
     282      {
     283      case exclude_pattern:
     284        for (idx_t i = 0; i < seg->v.pat.exclude_count; i++)
     285          {
     286            if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX)
     287              regfree (&seg->v.pat.exclude[i].v.re);
     288          }
     289        free (seg->v.pat.exclude);
     290        break;
     291  
     292      case exclude_hash:
     293        hash_free (seg->v.table);
     294        break;
     295      }
     296    free (seg);
     297  }
     298  
     299  /* Free the storage associated with an exclude list.  */
     300  void
     301  free_exclude (struct exclude *ex)
     302  {
     303    struct exclude_segment *seg;
     304    struct pattern_buffer *pbuf;
     305  
     306    for (seg = ex->head; seg; )
     307      {
     308        struct exclude_segment *next = seg->next;
     309        free_exclude_segment (seg);
     310        seg = next;
     311      }
     312  
     313    for (pbuf = ex->patbuf; pbuf; )
     314      {
     315        struct pattern_buffer *next = pbuf->next;
     316        free (pbuf->base);
     317        free (pbuf);
     318        pbuf = next;
     319      }
     320  
     321    free (ex);
     322  }
     323  
     324  /* Return zero if PATTERN matches F, obeying OPTIONS, except that
     325     (unlike fnmatch) wildcards are disabled in PATTERN.  */
     326  
     327  static int
     328  fnmatch_no_wildcards (char const *pattern, char const *f, int options)
     329  {
     330    if (! (options & FNM_LEADING_DIR))
     331      return ((options & FNM_CASEFOLD)
     332              ? mbscasecmp (pattern, f)
     333              : strcmp (pattern, f));
     334    else if (! (options & FNM_CASEFOLD))
     335      {
     336        size_t patlen = strlen (pattern);
     337        int r = strncmp (pattern, f, patlen);
     338        if (! r)
     339          {
     340            r = f[patlen];
     341            if (r == '/')
     342              r = 0;
     343          }
     344        return r;
     345      }
     346    else
     347      {
     348        /* Walk through a copy of F, seeing whether P matches any prefix
     349           of F.
     350  
     351           FIXME: This is an O(N**2) algorithm; it should be O(N).
     352           Also, the copy should not be necessary.  However, fixing this
     353           will probably involve a change to the mbs* API.  */
     354  
     355        char *fcopy = xstrdup (f);
     356        char *p;
     357        int r;
     358        for (p = fcopy; ; *p++ = '/')
     359          {
     360            p = strchr (p, '/');
     361            if (p)
     362              *p = '\0';
     363            r = mbscasecmp (pattern, fcopy);
     364            if (!p || r <= 0)
     365              break;
     366          }
     367        free (fcopy);
     368        return r;
     369      }
     370  }
     371  
     372  bool
     373  exclude_fnmatch (char const *pattern, char const *f, int options)
     374  {
     375    int (*matcher) (char const *, char const *, int) =
     376      (options & EXCLUDE_WILDCARDS
     377       ? fnmatch
     378       : fnmatch_no_wildcards);
     379    bool matched = ((*matcher) (pattern, f, options) == 0);
     380    char const *p;
     381  
     382    if (! (options & EXCLUDE_ANCHORED))
     383      for (p = f; *p && ! matched; p++)
     384        if (*p == '/' && p[1] != '/')
     385          matched = ((*matcher) (pattern, p + 1, options) == 0);
     386  
     387    return matched;
     388  }
     389  
     390  static bool
     391  exclude_patopts (struct patopts const *opts, char const *f)
     392  {
     393    int options = opts->options;
     394  
     395    return (options & EXCLUDE_REGEX)
     396            ? regexec (&opts->v.re, f, 0, NULL, 0) == 0
     397            : exclude_fnmatch (opts->v.pattern, f, options);
     398  }
     399  
     400  /* Return true if the exclude_pattern segment SEG matches F.  */
     401  
     402  static bool
     403  file_pattern_matches (struct exclude_segment const *seg, char const *f)
     404  {
     405    idx_t exclude_count = seg->v.pat.exclude_count;
     406    struct patopts const *exclude = seg->v.pat.exclude;
     407  
     408    for (idx_t i = 0; i < exclude_count; i++)
     409      {
     410        if (exclude_patopts (exclude + i, f))
     411          return true;
     412      }
     413    return false;
     414  }
     415  
     416  /* Return true if the exclude_hash segment SEG matches F.
     417     BUFFER is an auxiliary storage of the same length as F (with nul
     418     terminator included) */
     419  static bool
     420  file_name_matches (struct exclude_segment const *seg, char const *f,
     421                     char *buffer)
     422  {
     423    int options = seg->options;
     424    Hash_table *table = seg->v.table;
     425  
     426    do
     427      {
     428        /* initialize the pattern */
     429        strcpy (buffer, f);
     430  
     431        while (1)
     432          {
     433            if (hash_lookup (table, buffer))
     434              return true;
     435            if (options & FNM_LEADING_DIR)
     436              {
     437                char *p = strrchr (buffer, '/');
     438                if (p)
     439                  {
     440                    *p = 0;
     441                    continue;
     442                  }
     443              }
     444            break;
     445          }
     446  
     447        if (!(options & EXCLUDE_ANCHORED))
     448          {
     449            f = strchr (f, '/');
     450            if (f)
     451              f++;
     452          }
     453        else
     454          break;
     455      }
     456    while (f);
     457  
     458    return false;
     459  }
     460  
     461  /* Return true if EX excludes F.  */
     462  
     463  bool
     464  excluded_file_name (struct exclude const *ex, char const *f)
     465  {
     466    struct exclude_segment *seg;
     467    bool invert = false;
     468    char *filename = NULL;
     469  
     470    /* If no patterns are given, the default is to include.  */
     471    if (!ex->head)
     472      return false;
     473  
     474    /* Scan through the segments, reporting the status of the first match.
     475       The segments are in reverse order, so this reports the status of
     476       the last match in the original option list.  */
     477    for (seg = ex->head; ; seg = seg->next)
     478      {
     479        if (seg->type == exclude_hash)
     480          {
     481            if (!filename)
     482              filename = xmalloc (strlen (f) + 1);
     483            if (file_name_matches (seg, f, filename))
     484              break;
     485          }
     486        else
     487          {
     488            if (file_pattern_matches (seg, f))
     489              break;
     490          }
     491  
     492        if (! seg->next)
     493          {
     494            /* If patterns are given but none match, the default is the
     495               opposite of the last segment (i.e., the first in the
     496               original option list).  For example, in the command
     497               'grep -r --exclude="a*" --include="*b" pat dir', the
     498               first option is --exclude so any file name matching
     499               neither a* nor *b is included.  */
     500            invert = true;
     501            break;
     502          }
     503      }
     504  
     505    free (filename);
     506    return invert ^ ! (seg->options & EXCLUDE_INCLUDE);
     507  }
     508  
     509  /* Append to EX the exclusion PATTERN with OPTIONS.  */
     510  
     511  void
     512  add_exclude (struct exclude *ex, char const *pattern, int options)
     513  {
     514    struct exclude_segment *seg;
     515    struct exclude_pattern *pat;
     516    struct patopts *patopts;
     517  
     518    if ((options & (EXCLUDE_REGEX|EXCLUDE_WILDCARDS))
     519        && fnmatch_pattern_has_wildcards (pattern, options))
     520      {
     521        if (! (ex->head && ex->head->type == exclude_pattern
     522               && ((ex->head->options & EXCLUDE_INCLUDE)
     523                   == (options & EXCLUDE_INCLUDE))))
     524          new_exclude_segment (ex, exclude_pattern, options);
     525  
     526        seg = ex->head;
     527  
     528        pat = &seg->v.pat;
     529        if (pat->exclude_count == pat->exclude_alloc)
     530          pat->exclude = xpalloc (pat->exclude, &pat->exclude_alloc, 1, -1,
     531                                  sizeof *pat->exclude);
     532        patopts = &pat->exclude[pat->exclude_count++];
     533  
     534        patopts->options = options;
     535        if (options & EXCLUDE_REGEX)
     536          {
     537            int rc;
     538            int cflags = REG_NOSUB|REG_EXTENDED|
     539                         ((options & FNM_CASEFOLD) ? REG_ICASE : 0);
     540  
     541            if (options & FNM_LEADING_DIR)
     542              {
     543                char *tmp;
     544                idx_t len = strlen (pattern);
     545  
     546                while (len > 0 && ISSLASH (pattern[len-1]))
     547                  --len;
     548  
     549                if (len == 0)
     550                  rc = 1;
     551                else
     552                  {
     553                    tmp = ximalloc (len + 7);
     554                    memcpy (tmp, pattern, len);
     555                    strcpy (tmp + len, "(/.*)?");
     556                    rc = regcomp (&patopts->v.re, tmp, cflags);
     557                    free (tmp);
     558                  }
     559              }
     560            else
     561              rc = regcomp (&patopts->v.re, pattern, cflags);
     562  
     563            if (rc)
     564              {
     565                pat->exclude_count--;
     566                return;
     567              }
     568          }
     569        else
     570          {
     571            if (options & EXCLUDE_ALLOC)
     572              {
     573                pattern = xstrdup (pattern);
     574                exclude_add_pattern_buffer (ex, (char*) pattern);
     575              }
     576            patopts->v.pattern = pattern;
     577          }
     578      }
     579    else
     580      {
     581        char *str, *p;
     582        int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED
     583                                  | FNM_LEADING_DIR | FNM_CASEFOLD);
     584        if (! (ex->head && ex->head->type == exclude_hash
     585               && ((ex->head->options & exclude_hash_flags)
     586                   == (options & exclude_hash_flags))))
     587          new_exclude_segment (ex, exclude_hash, options);
     588        seg = ex->head;
     589  
     590        str = xstrdup (pattern);
     591        if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS)
     592          unescape_pattern (str);
     593        p = hash_insert (seg->v.table, str);
     594        if (p != str)
     595          free (str);
     596      }
     597  }
     598  
     599  /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
     600     OPTIONS.  LINE_END terminates each pattern in the file.  If
     601     LINE_END is a space character, ignore trailing spaces and empty
     602     lines in FP.  Return -1 (setting errno) on failure, 0 on success.  */
     603  
     604  int
     605  add_exclude_fp (void (*add_func) (struct exclude *, char const *, int, void *),
     606                  struct exclude *ex, FILE *fp, int options,
     607                  char line_end,
     608                  void *data)
     609  {
     610    char *buf = NULL;
     611    char *p;
     612    char *pattern;
     613    char const *lim;
     614    idx_t buf_alloc = 0;
     615    idx_t buf_count = 0;
     616    int c;
     617    int e = 0;
     618  
     619    while ((c = getc (fp)) != EOF)
     620      {
     621        if (buf_count == buf_alloc)
     622          buf = xpalloc (buf, &buf_alloc, 1, -1, 1);
     623        buf[buf_count++] = c;
     624      }
     625  
     626    if (ferror (fp))
     627      e = errno;
     628  
     629    buf = xirealloc (buf, buf_count + 1);
     630    buf[buf_count] = line_end;
     631    lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end);
     632  
     633    exclude_add_pattern_buffer (ex, buf);
     634  
     635    pattern = buf;
     636  
     637    for (p = buf; p < lim; p++)
     638      if (*p == line_end)
     639        {
     640          char *pattern_end = p;
     641  
     642          if (isspace ((unsigned char) line_end))
     643            {
     644              for (; ; pattern_end--)
     645                if (pattern_end == pattern)
     646                  goto next_pattern;
     647                else if (! isspace ((unsigned char) pattern_end[-1]))
     648                  break;
     649            }
     650  
     651          *pattern_end = '\0';
     652          (*add_func) (ex, pattern, options, data);
     653  
     654        next_pattern:
     655          pattern = p + 1;
     656        }
     657  
     658    errno = e;
     659    return e ? -1 : 0;
     660  }
     661  
     662  static void
     663  call_addfn (struct exclude *ex, char const *pattern, int options, void *data)
     664  {
     665    void (**addfnptr) (struct exclude *, char const *, int) = data;
     666    (*addfnptr) (ex, pattern, options);
     667  }
     668  
     669  int
     670  add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
     671                    struct exclude *ex, char const *file_name, int options,
     672                    char line_end)
     673  {
     674    if (strcmp (file_name, "-") == 0)
     675      return add_exclude_fp (call_addfn, ex, stdin, options, line_end, &add_func);
     676  
     677    FILE *in = fopen (file_name, "re");
     678    if (!in)
     679      return -1;
     680    int rc = add_exclude_fp (call_addfn, ex, in, options, line_end, &add_func);
     681    int e = errno;
     682    if (fclose (in) != 0)
     683      return -1;
     684    errno = e;
     685    return rc;
     686  }