1 /* search.c - searching subroutines using dfa, kwset and regex for grep.
2 Copyright 1992, 1998, 2000, 2007, 2009-2023 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 02110-1301, USA. */
18
19 #ifndef GREP_SEARCH_H
20 #define GREP_SEARCH_H 1
21
22 #include <config.h>
23
24 #include <sys/types.h>
25 #include <stdint.h>
26 #include <wchar.h>
27 #include <wctype.h>
28 #include <regex.h>
29
30 #include "system.h"
31 #include "grep.h"
32 #include "dfa.h"
33 #include "kwset.h"
34 #include "xalloc.h"
35 #include "localeinfo.h"
36
37 _GL_INLINE_HEADER_BEGIN
38 #ifndef SEARCH_INLINE
39 # define SEARCH_INLINE _GL_INLINE
40 #endif
41
42 /* This must be a signed type. Each value is the difference in the size
43 of a character (in bytes) induced by converting to lower case.
44 The vast majority of values are 0, but a few are 1 or -1, so
45 technically, two bits may be sufficient. */
46 typedef signed char mb_len_map_t;
47
48 /* searchutils.c */
49 extern void wordinit (void);
50 extern kwset_t kwsinit (bool);
51 extern idx_t wordchars_size (char const *, char const *) _GL_ATTRIBUTE_PURE;
52 extern idx_t wordchar_next (char const *, char const *) _GL_ATTRIBUTE_PURE;
53 extern idx_t wordchar_prev (char const *, char const *, char const *)
54 _GL_ATTRIBUTE_PURE;
55 extern ptrdiff_t mb_goback (char const **, idx_t *, char const *, char const *);
56
57 /* dfasearch.c */
58 extern void *GEAcompile (char *, idx_t, reg_syntax_t, bool);
59 extern ptrdiff_t EGexecute (void *, char const *, idx_t, idx_t *, char const *);
60
61 /* kwsearch.c */
62 extern void *Fcompile (char *, idx_t, reg_syntax_t, bool);
63 extern ptrdiff_t Fexecute (void *, char const *, idx_t, idx_t *, char const *);
64
65 /* pcresearch.c */
66 extern void *Pcompile (char *, idx_t, reg_syntax_t, bool);
67 extern ptrdiff_t Pexecute (void *, char const *, idx_t, idx_t *, char const *);
68 extern void Pprint_version (void);
69
70 /* grep.c */
71 extern struct localeinfo localeinfo;
72 extern void fgrep_to_grep_pattern (char **, idx_t *);
73
74 /* Return the number of bytes in the character at the start of S, which
75 is of size N. N must be positive. MBS is the conversion state.
76 This acts like mbrlen, except it returns -1 and -2 instead of
77 (size_t) -1 and (size_t) -2. */
78 SEARCH_INLINE ptrdiff_t
79 imbrlen (char const *s, idx_t n, mbstate_t *mbs)
80 {
81 size_t len = mbrlen (s, n, mbs);
82
83 /* Convert result to ptrdiff_t portably, even on oddball platforms.
84 When optimizing, this typically uses no machine instructions. */
85 if (len <= MB_LEN_MAX)
86 return len;
87 ptrdiff_t neglen = -len;
88 return -neglen;
89 }
90
91 /* Return the number of bytes in the character at the start of S, which
92 is of size N. N must be positive. MBS is the conversion state.
93 This acts like mbrlen, except it returns 1 when mbrlen would return 0,
94 it returns -1 and -2 instead of (size_t) -1 and (size_t) -2,
95 and it is typically faster because of the cache. */
96 SEARCH_INLINE ptrdiff_t
97 mb_clen (char const *s, idx_t n, mbstate_t *mbs)
98 {
99 signed char len = localeinfo.sbclen[to_uchar (*s)];
100 return len == -2 ? imbrlen (s, n, mbs) : len;
101 }
102
103 extern char const *input_filename (void);
104
105 _GL_INLINE_HEADER_END
106
107 #endif /* GREP_SEARCH_H */