1 /* exclude.c -- exclude file names
2
3 Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2023 Free Software
4 Foundation, Inc.
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19 /* Written by Paul Eggert <eggert@twinsun.com>
20 and Sergey Poznyakoff <gray@gnu.org>.
21 Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
22 for improvement suggestions. */
23
24 #include <config.h>
25
26 #include <ctype.h>
27 #include <errno.h>
28 #include <stddef.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <uchar.h>
33 #include <regex.h>
34
35 #include "exclude.h"
36 #include "hash.h"
37 #include "mbuiter.h"
38 #include "fnmatch.h"
39 #include "xalloc.h"
40 #include "filename.h"
41
42 #if GNULIB_EXCLUDE_SINGLE_THREAD
43 # include "unlocked-io.h"
44 #endif
45
46 /* Non-GNU systems lack these options, so we don't need to check them. */
47 #ifndef FNM_CASEFOLD
48 # define FNM_CASEFOLD 0
49 #endif
50 #ifndef FNM_EXTMATCH
51 # define FNM_EXTMATCH 0
52 #endif
53 #ifndef FNM_LEADING_DIR
54 # define FNM_LEADING_DIR 0
55 #endif
56
57 static_assert (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS)
58 & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR
59 | FNM_CASEFOLD | FNM_EXTMATCH))
60 == 0);
61
62
63 /* Exclusion patterns are grouped into a singly-linked list of
64 "exclusion segments". Each segment represents a set of patterns
65 that can be matches using the same algorithm. Non-wildcard
66 patterns are kept in hash tables, to speed up searches. Wildcard
67 patterns are stored as arrays of patterns. */
68
69
70 /* An exclude pattern-options pair. The options are fnmatch options
71 ORed with EXCLUDE_* options. */
72
73 struct patopts
74 {
75 int options;
76 union
77 {
78 char const *pattern;
79 regex_t re;
80 } v;
81 };
82
83 /* An array of pattern-options pairs. */
84
85 struct exclude_pattern
86 {
87 struct patopts *exclude;
88 idx_t exclude_alloc;
89 idx_t exclude_count;
90 };
91
92 enum exclude_type
93 {
94 exclude_hash, /* a hash table of excluded names */
95 exclude_pattern /* an array of exclude patterns */
96 };
97
98 struct exclude_segment
99 {
100 struct exclude_segment *next; /* next segment in list */
101 enum exclude_type type; /* type of this segment */
102 int options; /* common options for this segment */
103 union
104 {
105 Hash_table *table; /* for type == exclude_hash */
106 struct exclude_pattern pat; /* for type == exclude_pattern */
107 } v;
108 };
109
110 struct pattern_buffer
111 {
112 struct pattern_buffer *next;
113 char *base;
114 };
115
116 /* The exclude structure keeps a singly-linked list of exclude segments,
117 maintained in reverse order. */
118 struct exclude
119 {
120 struct exclude_segment *head;
121 struct pattern_buffer *patbuf;
122 };
123
124 /* Register BUF in the pattern buffer list of EX. ADD_FUNC (see
125 add_exclude_file and add_exclude_fp below) can use this function
126 if it modifies the pattern, to ensure the allocated memory will be
127 properly reclaimed upon calling free_exclude. */
128 void
129 exclude_add_pattern_buffer (struct exclude *ex, char *buf)
130 {
131 struct pattern_buffer *pbuf = xmalloc (sizeof *pbuf);
132 pbuf->base = buf;
133 pbuf->next = ex->patbuf;
134 ex->patbuf = pbuf;
135 }
136
137 /* Return true if STR has or may have wildcards, when matched with OPTIONS.
138 Return false if STR definitely does not have wildcards. */
139 bool
140 fnmatch_pattern_has_wildcards (const char *str, int options)
141 {
142 while (1)
143 {
144 switch (*str++)
145 {
146 case '.':
147 case '{':
148 case '}':
149 case '(':
150 case ')':
151 if (options & EXCLUDE_REGEX)
152 return true;
153 break;
154
155 case '\\':
156 if (options & EXCLUDE_REGEX)
157 continue;
158 else
159 str += ! (options & FNM_NOESCAPE) && *str;
160 break;
161
162 case '+': case '@': case '!':
163 if (options & FNM_EXTMATCH && *str == '(')
164 return true;
165 break;
166
167 case '?': case '*': case '[':
168 return true;
169
170 case '\0':
171 return false;
172 }
173 }
174 }
175
176 static void
177 unescape_pattern (char *str)
178 {
179 char const *q = str;
180 do
181 q += *q == '\\' && q[1];
182 while ((*str++ = *q++));
183 }
184
185 /* Return a newly allocated and empty exclude list. */
186
187 struct exclude *
188 new_exclude (void)
189 {
190 return xzalloc (sizeof *new_exclude ());
191 }
192
193 /* Calculate the hash of string. */
194 static size_t
195 string_hasher (void const *data, size_t n_buckets)
196 {
197 char const *p = data;
198 return hash_string (p, n_buckets);
199 }
200
201 /* Ditto, for case-insensitive hashes */
202 static size_t
203 string_hasher_ci (void const *data, size_t n_buckets)
204 {
205 char const *p = data;
206 mbui_iterator_t iter;
207 size_t value = 0;
208
209 for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
210 {
211 mbchar_t m = mbui_cur (iter);
212 char32_t wc;
213
214 if (m.wc_valid)
215 wc = c32tolower (m.wc);
216 else
217 wc = *m.ptr;
218
219 value = value * 31 + wc;
220 }
221
222 return value % n_buckets;
223 }
224
225 /* compare two strings for equality */
226 static bool
227 string_compare (void const *data1, void const *data2)
228 {
229 char const *p1 = data1;
230 char const *p2 = data2;
231 return strcmp (p1, p2) == 0;
232 }
233
234 /* compare two strings for equality, case-insensitive */
235 static bool
236 string_compare_ci (void const *data1, void const *data2)
237 {
238 char const *p1 = data1;
239 char const *p2 = data2;
240 return mbscasecmp (p1, p2) == 0;
241 }
242
243 static void
244 string_free (void *data)
245 {
246 free (data);
247 }
248
249 /* Create new exclude segment of given TYPE and OPTIONS, and attach it
250 to the head of EX. */
251 static void
252 new_exclude_segment (struct exclude *ex, enum exclude_type type, int options)
253 {
254 struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment));
255 sp->type = type;
256 sp->options = options;
257 switch (type)
258 {
259 case exclude_pattern:
260 break;
261
262 case exclude_hash:
263 sp->v.table = hash_initialize (0, NULL,
264 (options & FNM_CASEFOLD) ?
265 string_hasher_ci
266 : string_hasher,
267 (options & FNM_CASEFOLD) ?
268 string_compare_ci
269 : string_compare,
270 string_free);
271 break;
272 }
273 sp->next = ex->head;
274 ex->head = sp;
275 }
276
277 /* Free a single exclude segment */
278 static void
279 free_exclude_segment (struct exclude_segment *seg)
280 {
281 switch (seg->type)
282 {
283 case exclude_pattern:
284 for (idx_t i = 0; i < seg->v.pat.exclude_count; i++)
285 {
286 if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX)
287 regfree (&seg->v.pat.exclude[i].v.re);
288 }
289 free (seg->v.pat.exclude);
290 break;
291
292 case exclude_hash:
293 hash_free (seg->v.table);
294 break;
295 }
296 free (seg);
297 }
298
299 /* Free the storage associated with an exclude list. */
300 void
301 free_exclude (struct exclude *ex)
302 {
303 struct exclude_segment *seg;
304 struct pattern_buffer *pbuf;
305
306 for (seg = ex->head; seg; )
307 {
308 struct exclude_segment *next = seg->next;
309 free_exclude_segment (seg);
310 seg = next;
311 }
312
313 for (pbuf = ex->patbuf; pbuf; )
314 {
315 struct pattern_buffer *next = pbuf->next;
316 free (pbuf->base);
317 free (pbuf);
318 pbuf = next;
319 }
320
321 free (ex);
322 }
323
324 /* Return zero if PATTERN matches F, obeying OPTIONS, except that
325 (unlike fnmatch) wildcards are disabled in PATTERN. */
326
327 static int
328 fnmatch_no_wildcards (char const *pattern, char const *f, int options)
329 {
330 if (! (options & FNM_LEADING_DIR))
331 return ((options & FNM_CASEFOLD)
332 ? mbscasecmp (pattern, f)
333 : strcmp (pattern, f));
334 else if (! (options & FNM_CASEFOLD))
335 {
336 size_t patlen = strlen (pattern);
337 int r = strncmp (pattern, f, patlen);
338 if (! r)
339 {
340 r = f[patlen];
341 if (r == '/')
342 r = 0;
343 }
344 return r;
345 }
346 else
347 {
348 /* Walk through a copy of F, seeing whether P matches any prefix
349 of F.
350
351 FIXME: This is an O(N**2) algorithm; it should be O(N).
352 Also, the copy should not be necessary. However, fixing this
353 will probably involve a change to the mbs* API. */
354
355 char *fcopy = xstrdup (f);
356 char *p;
357 int r;
358 for (p = fcopy; ; *p++ = '/')
359 {
360 p = strchr (p, '/');
361 if (p)
362 *p = '\0';
363 r = mbscasecmp (pattern, fcopy);
364 if (!p || r <= 0)
365 break;
366 }
367 free (fcopy);
368 return r;
369 }
370 }
371
372 bool
373 exclude_fnmatch (char const *pattern, char const *f, int options)
374 {
375 int (*matcher) (char const *, char const *, int) =
376 (options & EXCLUDE_WILDCARDS
377 ? fnmatch
378 : fnmatch_no_wildcards);
379 bool matched = ((*matcher) (pattern, f, options) == 0);
380 char const *p;
381
382 if (! (options & EXCLUDE_ANCHORED))
383 for (p = f; *p && ! matched; p++)
384 if (*p == '/' && p[1] != '/')
385 matched = ((*matcher) (pattern, p + 1, options) == 0);
386
387 return matched;
388 }
389
390 static bool
391 exclude_patopts (struct patopts const *opts, char const *f)
392 {
393 int options = opts->options;
394
395 return (options & EXCLUDE_REGEX)
396 ? regexec (&opts->v.re, f, 0, NULL, 0) == 0
397 : exclude_fnmatch (opts->v.pattern, f, options);
398 }
399
400 /* Return true if the exclude_pattern segment SEG matches F. */
401
402 static bool
403 file_pattern_matches (struct exclude_segment const *seg, char const *f)
404 {
405 idx_t exclude_count = seg->v.pat.exclude_count;
406 struct patopts const *exclude = seg->v.pat.exclude;
407
408 for (idx_t i = 0; i < exclude_count; i++)
409 {
410 if (exclude_patopts (exclude + i, f))
411 return true;
412 }
413 return false;
414 }
415
416 /* Return true if the exclude_hash segment SEG matches F.
417 BUFFER is an auxiliary storage of the same length as F (with nul
418 terminator included) */
419 static bool
420 file_name_matches (struct exclude_segment const *seg, char const *f,
421 char *buffer)
422 {
423 int options = seg->options;
424 Hash_table *table = seg->v.table;
425
426 do
427 {
428 /* initialize the pattern */
429 strcpy (buffer, f);
430
431 while (1)
432 {
433 if (hash_lookup (table, buffer))
434 return true;
435 if (options & FNM_LEADING_DIR)
436 {
437 char *p = strrchr (buffer, '/');
438 if (p)
439 {
440 *p = 0;
441 continue;
442 }
443 }
444 break;
445 }
446
447 if (!(options & EXCLUDE_ANCHORED))
448 {
449 f = strchr (f, '/');
450 if (f)
451 f++;
452 }
453 else
454 break;
455 }
456 while (f);
457
458 return false;
459 }
460
461 /* Return true if EX excludes F. */
462
463 bool
464 excluded_file_name (struct exclude const *ex, char const *f)
465 {
466 struct exclude_segment *seg;
467 bool invert = false;
468 char *filename = NULL;
469
470 /* If no patterns are given, the default is to include. */
471 if (!ex->head)
472 return false;
473
474 /* Scan through the segments, reporting the status of the first match.
475 The segments are in reverse order, so this reports the status of
476 the last match in the original option list. */
477 for (seg = ex->head; ; seg = seg->next)
478 {
479 if (seg->type == exclude_hash)
480 {
481 if (!filename)
482 filename = xmalloc (strlen (f) + 1);
483 if (file_name_matches (seg, f, filename))
484 break;
485 }
486 else
487 {
488 if (file_pattern_matches (seg, f))
489 break;
490 }
491
492 if (! seg->next)
493 {
494 /* If patterns are given but none match, the default is the
495 opposite of the last segment (i.e., the first in the
496 original option list). For example, in the command
497 'grep -r --exclude="a*" --include="*b" pat dir', the
498 first option is --exclude so any file name matching
499 neither a* nor *b is included. */
500 invert = true;
501 break;
502 }
503 }
504
505 free (filename);
506 return invert ^ ! (seg->options & EXCLUDE_INCLUDE);
507 }
508
509 /* Append to EX the exclusion PATTERN with OPTIONS. */
510
511 void
512 add_exclude (struct exclude *ex, char const *pattern, int options)
513 {
514 struct exclude_segment *seg;
515 struct exclude_pattern *pat;
516 struct patopts *patopts;
517
518 if ((options & (EXCLUDE_REGEX|EXCLUDE_WILDCARDS))
519 && fnmatch_pattern_has_wildcards (pattern, options))
520 {
521 if (! (ex->head && ex->head->type == exclude_pattern
522 && ((ex->head->options & EXCLUDE_INCLUDE)
523 == (options & EXCLUDE_INCLUDE))))
524 new_exclude_segment (ex, exclude_pattern, options);
525
526 seg = ex->head;
527
528 pat = &seg->v.pat;
529 if (pat->exclude_count == pat->exclude_alloc)
530 pat->exclude = xpalloc (pat->exclude, &pat->exclude_alloc, 1, -1,
531 sizeof *pat->exclude);
532 patopts = &pat->exclude[pat->exclude_count++];
533
534 patopts->options = options;
535 if (options & EXCLUDE_REGEX)
536 {
537 int rc;
538 int cflags = REG_NOSUB|REG_EXTENDED|
539 ((options & FNM_CASEFOLD) ? REG_ICASE : 0);
540
541 if (options & FNM_LEADING_DIR)
542 {
543 char *tmp;
544 idx_t len = strlen (pattern);
545
546 while (len > 0 && ISSLASH (pattern[len-1]))
547 --len;
548
549 if (len == 0)
550 rc = 1;
551 else
552 {
553 tmp = ximalloc (len + 7);
554 memcpy (tmp, pattern, len);
555 strcpy (tmp + len, "(/.*)?");
556 rc = regcomp (&patopts->v.re, tmp, cflags);
557 free (tmp);
558 }
559 }
560 else
561 rc = regcomp (&patopts->v.re, pattern, cflags);
562
563 if (rc)
564 {
565 pat->exclude_count--;
566 return;
567 }
568 }
569 else
570 {
571 if (options & EXCLUDE_ALLOC)
572 {
573 pattern = xstrdup (pattern);
574 exclude_add_pattern_buffer (ex, (char*) pattern);
575 }
576 patopts->v.pattern = pattern;
577 }
578 }
579 else
580 {
581 char *str, *p;
582 int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED
583 | FNM_LEADING_DIR | FNM_CASEFOLD);
584 if (! (ex->head && ex->head->type == exclude_hash
585 && ((ex->head->options & exclude_hash_flags)
586 == (options & exclude_hash_flags))))
587 new_exclude_segment (ex, exclude_hash, options);
588 seg = ex->head;
589
590 str = xstrdup (pattern);
591 if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS)
592 unescape_pattern (str);
593 p = hash_insert (seg->v.table, str);
594 if (p != str)
595 free (str);
596 }
597 }
598
599 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
600 OPTIONS. LINE_END terminates each pattern in the file. If
601 LINE_END is a space character, ignore trailing spaces and empty
602 lines in FP. Return -1 (setting errno) on failure, 0 on success. */
603
604 int
605 add_exclude_fp (void (*add_func) (struct exclude *, char const *, int, void *),
606 struct exclude *ex, FILE *fp, int options,
607 char line_end,
608 void *data)
609 {
610 char *buf = NULL;
611 char *p;
612 char *pattern;
613 char const *lim;
614 idx_t buf_alloc = 0;
615 idx_t buf_count = 0;
616 int c;
617 int e = 0;
618
619 while ((c = getc (fp)) != EOF)
620 {
621 if (buf_count == buf_alloc)
622 buf = xpalloc (buf, &buf_alloc, 1, -1, 1);
623 buf[buf_count++] = c;
624 }
625
626 if (ferror (fp))
627 e = errno;
628
629 buf = xirealloc (buf, buf_count + 1);
630 buf[buf_count] = line_end;
631 lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end);
632
633 exclude_add_pattern_buffer (ex, buf);
634
635 pattern = buf;
636
637 for (p = buf; p < lim; p++)
638 if (*p == line_end)
639 {
640 char *pattern_end = p;
641
642 if (isspace ((unsigned char) line_end))
643 {
644 for (; ; pattern_end--)
645 if (pattern_end == pattern)
646 goto next_pattern;
647 else if (! isspace ((unsigned char) pattern_end[-1]))
648 break;
649 }
650
651 *pattern_end = '\0';
652 (*add_func) (ex, pattern, options, data);
653
654 next_pattern:
655 pattern = p + 1;
656 }
657
658 errno = e;
659 return e ? -1 : 0;
660 }
661
662 static void
663 call_addfn (struct exclude *ex, char const *pattern, int options, void *data)
664 {
665 void (**addfnptr) (struct exclude *, char const *, int) = data;
666 (*addfnptr) (ex, pattern, options);
667 }
668
669 int
670 add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
671 struct exclude *ex, char const *file_name, int options,
672 char line_end)
673 {
674 if (strcmp (file_name, "-") == 0)
675 return add_exclude_fp (call_addfn, ex, stdin, options, line_end, &add_func);
676
677 FILE *in = fopen (file_name, "re");
678 if (!in)
679 return -1;
680 int rc = add_exclude_fp (call_addfn, ex, in, options, line_end, &add_func);
681 int e = errno;
682 if (fclose (in) != 0)
683 return -1;
684 errno = e;
685 return rc;
686 }