1 /* Pattern Matcher for Fixed String search.
2 Copyright (C) 1992, 1998, 2000, 2005-2006, 2010, 2013, 2020, 2023 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 #ifdef HAVE_CONFIG_H
18 # include <config.h>
19 #endif
20
21 /* Specification. */
22 #include "libgrep.h"
23
24 #include <ctype.h>
25 #include <limits.h>
26 #include <stdbool.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30
31 #include "error.h"
32 #include "exitfail.h"
33 #include "xalloc.h"
34 #include "kwset.h"
35 #include "gettext.h"
36 #define _(str) gettext (str)
37
38 #define TOLOWER(C) (isupper (C) ? tolower (C) : (C))
39 #define IS_WORD_CONSTITUENT(C) (isalnum (C) || (C) == '_')
40
41 #define NCHAR (UCHAR_MAX + 1)
42
43 struct compiled_kwset {
44 kwset_t kwset;
45 char *trans;
46 bool match_words;
47 bool match_lines;
48 char eolbyte;
49 };
50
51 static void
52 kwsinit (struct compiled_kwset *ckwset,
53 bool match_icase, bool match_words, bool match_lines, char eolbyte)
54 {
55 if (match_icase)
56 {
57 int i;
58
59 ckwset->trans = XNMALLOC (NCHAR, char);
60 for (i = 0; i < NCHAR; i++)
61 ckwset->trans[i] = TOLOWER (i);
62 ckwset->kwset = kwsalloc (ckwset->trans);
63 }
64 else
65 {
66 ckwset->trans = NULL;
67 ckwset->kwset = kwsalloc (NULL);
68 }
69 if (ckwset->kwset == NULL)
70 error (exit_failure, 0, _("memory exhausted"));
71 ckwset->match_words = match_words;
72 ckwset->match_lines = match_lines;
73 ckwset->eolbyte = eolbyte;
74 }
75
76 static void *
77 Fcompile (const char *pattern, size_t pattern_size,
78 bool match_icase, bool match_words, bool match_lines,
79 char eolbyte)
80 {
81 struct compiled_kwset *ckwset;
82 const char *beg;
83 const char *err;
84
85 ckwset = XMALLOC (struct compiled_kwset);
86 kwsinit (ckwset, match_icase, match_words, match_lines, eolbyte);
87
88 beg = pattern;
89 do
90 {
91 const char *lim;
92
93 for (lim = beg; lim < pattern + pattern_size && *lim != '\n'; ++lim)
94 ;
95 if ((err = kwsincr (ckwset->kwset, beg, lim - beg)) != NULL)
96 error (exit_failure, 0, "%s", err);
97 if (lim < pattern + pattern_size)
98 ++lim;
99 beg = lim;
100 }
101 while (beg < pattern + pattern_size);
102
103 if ((err = kwsprep (ckwset->kwset)) != NULL)
104 error (exit_failure, 0, "%s", err);
105 return ckwset;
106 }
107
108 /* This function allocate the array which correspond to "buf".
109 Then this check multibyte string and mark on the positions which
110 are not singlebyte character nor the first byte of a multibyte
111 character. Caller must free the array. */
112 static char*
113 check_multibyte_string (const char *buf, size_t buf_size)
114 {
115 char *mb_properties;
116 mbstate_t cur_state;
117 int i;
118
119 mb_properties = (char *) malloc (buf_size);
120 if (mb_properties == NULL)
121 error (exit_failure, 0, _("memory exhausted"));
122
123 memset (&cur_state, 0, sizeof (mbstate_t));
124 memset (mb_properties, 0, sizeof (char) * buf_size);
125 for (i = 0; i < buf_size ;)
126 {
127 size_t mbclen;
128 mbclen = mbrlen (buf + i, buf_size - i, &cur_state);
129
130 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
131 {
132 /* An invalid sequence, or a truncated multibyte character.
133 We treat it as a singlebyte character. */
134 mbclen = 1;
135 }
136 mb_properties[i] = mbclen;
137 i += mbclen;
138 }
139
140 return mb_properties;
141 }
142
143 static size_t
144 Fexecute (const void *compiled_pattern, const char *buf, size_t buf_size,
145 size_t *match_size, bool exact)
146 {
147 const struct compiled_kwset *ckwset =
148 (const struct compiled_kwset *) compiled_pattern;
149 char eol = ckwset->eolbyte;
150 register const char *buflim = buf + buf_size;
151 register const char *beg;
152 register size_t len;
153 char *mb_properties;
154 if (MB_CUR_MAX > 1)
155 mb_properties = check_multibyte_string (buf, buf_size);
156 else
157 mb_properties = NULL;
158
159 for (beg = buf; beg <= buflim; ++beg)
160 {
161 struct kwsmatch kwsmatch;
162 size_t offset = kwsexec (ckwset->kwset, beg, buflim - beg, &kwsmatch);
163 if (offset == (size_t) -1)
164 {
165 free (mb_properties);
166 return offset;
167 }
168 if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
169 continue; /* It is a part of multibyte character. */
170 beg += offset;
171 len = kwsmatch.size[0];
172 if (exact)
173 {
174 *match_size = len;
175 free (mb_properties);
176 return beg - buf;
177 }
178 if (ckwset->match_lines)
179 {
180 if (beg > buf && beg[-1] != eol)
181 continue;
182 if (beg + len < buflim && beg[len] != eol)
183 continue;
184 goto success;
185 }
186 else if (ckwset->match_words)
187 {
188 register const char *curr;
189 for (curr = beg; len; )
190 {
191 if (curr > buf && IS_WORD_CONSTITUENT ((unsigned char) curr[-1]))
192 break;
193 if (curr + len < buflim
194 && IS_WORD_CONSTITUENT ((unsigned char) curr[len]))
195 {
196 offset = kwsexec (ckwset->kwset, beg, --len, &kwsmatch);
197 if (offset == (size_t) -1)
198 {
199 free (mb_properties);
200 return offset;
201 }
202 curr = beg + offset;
203 len = kwsmatch.size[0];
204 }
205 else
206 goto success;
207 }
208 }
209 else
210 goto success;
211 }
212
213 free (mb_properties);
214 return -1;
215
216 success:
217 {
218 register const char *end;
219
220 end = (const char *) memchr (beg + len, eol, buflim - (beg + len));
221 if (end != NULL)
222 end++;
223 else
224 end = buflim;
225 while (buf < beg && beg[-1] != eol)
226 --beg;
227 *match_size = end - beg;
228 free (mb_properties);
229 return beg - buf;
230 }
231 }
232
233 static void
234 Ffree (void *compiled_pattern)
235 {
236 struct compiled_kwset *ckwset = (struct compiled_kwset *) compiled_pattern;
237
238 free (ckwset->trans);
239 free (ckwset);
240 }
241
242 matcher_t matcher_fgrep =
243 {
244 Fcompile,
245 Fexecute,
246 Ffree
247 };
248