1 /* frcode -- front-compress a sorted list
2 Copyright (C) 1994-2022 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18 /* Usage: frcode < sorted-list > compressed-list
19
20 Uses front compression (also known as incremental encoding);
21 see ";login:", March 1983, p. 8.
22
23 The input is a sorted list of NUL-terminated strings (or
24 newline-terminated if the -0 option is not given).
25
26 The output entries are in the same order as the input; each entry
27 consists of a signed offset-differential count byte (the additional
28 number of characters of prefix of the preceding entry to use beyond
29 the number that the preceding entry is using of its predecessor),
30 followed by a null-terminated ASCII remainder.
31
32 If the offset-differential count is larger than can be stored
33 in a byte (+/-127), the byte has the value LOCATEDB_ESCAPE
34 and the count follows in a 2-byte word, with the high byte first
35 (network byte order).
36
37 Example:
38
39 Input, with NULs changed to newlines:
40 /usr/src
41 /usr/src/cmd/aardvark.c
42 /usr/src/cmd/armadillo.c
43 /usr/tmp/zoo
44
45 Length of the longest prefix of the preceding entry to share:
46 0 /usr/src
47 8 /cmd/aardvark.c
48 14 rmadillo.c
49 5 tmp/zoo
50
51 Output, with NULs changed to newlines and count bytes made printable:
52 0 LOCATE02
53 0 /usr/src
54 8 /cmd/aardvark.c
55 6 rmadillo.c
56 -9 tmp/zoo
57
58 (6 = 14 - 8, and -9 = 5 - 14)
59
60 Written by James A. Woods <jwoods@adobe.com>.
61 Modified by David MacKenzie <djm@gnu.org>.
62 Modified by James Youngman <jay@gnu.org>.
63 */
64
65 /* config.h must be included first. */
66 #include <config.h>
67
68 /* system headers. */
69 #include <assert.h>
70 #include <errno.h>
71 #include <getopt.h>
72 #include <limits.h>
73 #include <stdbool.h>
74 #include <stdio.h>
75 #include <stdlib.h>
76 #include <string.h>
77 #include <sys/types.h>
78
79 /* gnulib headers. */
80 #include "closeout.h"
81 #include "error.h"
82 #include "progname.h"
83 #include "xalloc.h"
84
85 /* find headers. */
86 #include "system.h"
87 #include "bugreports.h"
88 #include "die.h"
89 #include "findutils-version.h"
90 #include "gcc-function-attributes.h"
91 #include "locatedb.h"
92
93
94 /* Write out a 16-bit int, high byte first (network byte order).
95 * Return true iff all went well.
96 */
97 static int
98 put_short (int c, FILE *fp)
99 {
100 /* XXX: The value of c may be negative. ANSI C 1989 (section 6.3.7)
101 * indicates that the result of shifting a negative value right is
102 * implementation defined.
103 */
104 assert (c <= SHRT_MAX);
105 assert (c >= SHRT_MIN);
106 return (putc (c >> 8, fp) != EOF) && (putc (c, fp) != EOF);
107 }
108
109 /* Return the length of the longest common prefix of strings S1 and S2. */
110
111 static int
112 prefix_length (char *s1, char *s2)
113 {
114 register char *start;
115 int limit = INT_MAX;
116 for (start = s1; *s1 == *s2 && *s1 != '\0'; s1++, s2++)
117 {
118 /* Don't emit a prefix length that will not fit into
119 * our return type.
120 */
121 if (0 == --limit)
122 break;
123 }
124 return s1 - start;
125 }
126
127 static struct option const longopts[] =
128 {
129 {"help", no_argument, NULL, 'h'},
130 {"version", no_argument, NULL, 'v'},
131 {"null", no_argument, NULL, '0'},
132 {NULL, no_argument, NULL, 0}
133 };
134
135 extern char *version_string;
136
137 static void _GL_ATTRIBUTE_NORETURN
138 usage (int status)
139 {
140 if (status != EXIT_SUCCESS)
141 {
142 fprintf (stderr, _("Try '%s --help' for more information.\n"), program_name);
143 exit (status);
144 }
145
146 fprintf (stdout,
147 _("Usage: %s [-0 | --null] [--version] [--help]\n"),
148 program_name);
149
150 explain_how_to_report_bugs (stdout, program_name);
151 exit (status);
152 }
153
154 static long
155 get_seclevel (char *s)
156 {
157 long result;
158 char *p;
159
160 /* Reset errno in order to be able to distinguish LONG_MAX/LONG_MIN
161 * from values which are actually out of range.
162 */
163 errno = 0;
164
165 result = strtol (s, &p, 10);
166 if ((0==result) && (p == optarg))
167 {
168 die (EXIT_FAILURE, 0,
169 _("You need to specify a security level as a decimal integer."));
170 /*NOTREACHED*/
171 return -1;
172 }
173 else if ((LONG_MIN==result || LONG_MAX==result) && errno)
174
175 {
176 die (EXIT_FAILURE, 0,
177 _("Security level %s is outside the convertible range."), s);
178 /*NOTREACHED*/
179 return -1;
180 }
181 else if (*p)
182 {
183 /* Some suffix exists */
184 die (EXIT_FAILURE, 0,
185 _("Security level %s has unexpected suffix %s."), s, p);
186 /*NOTREACHED*/
187 return -1;
188 }
189 else
190 {
191 return result;
192 }
193 }
194
195 static void
196 outerr (void)
197 {
198 /* Issue the same error message as closeout () would. */
199 die (EXIT_FAILURE, errno, _("write error"));
200 }
201
202 int
203 main (int argc, char **argv)
204 {
205 char *path; /* The current input entry. */
206 char *oldpath; /* The previous input entry. */
207 size_t pathsize, oldpathsize; /* Amounts allocated for them. */
208 int count, oldcount, diffcount; /* Their prefix lengths & the difference. */
209 int line_len; /* Length of input line. */
210 int delimiter = '\n';
211 int optc;
212 int slocate_compat = 0;
213 long slocate_seclevel = 0L;
214
215 if (argv[0])
216 set_program_name (argv[0]);
217 else
218 set_program_name ("frcode");
219
220 if (atexit (close_stdout))
221 {
222 die (EXIT_FAILURE, errno, _("The atexit library function failed"));
223 }
224
225 pathsize = oldpathsize = 1026; /* Increased as necessary by getline. */
226 path = xmalloc (pathsize);
227 oldpath = xmalloc (oldpathsize);
228
229 oldpath[0] = 0;
230 oldcount = 0;
231
232
233 while ((optc = getopt_long (argc, argv, "hv0S:", longopts, (int *) 0)) != -1)
234 switch (optc)
235 {
236 case '0':
237 delimiter = 0;
238 break;
239
240 case 'S':
241 slocate_compat = 1;
242 slocate_seclevel = get_seclevel (optarg);
243 if (slocate_seclevel < 0 || slocate_seclevel > 1)
244 {
245 die (EXIT_FAILURE, 0,
246 _("slocate security level %ld is unsupported."),
247 slocate_seclevel);
248 }
249 break;
250
251 case 'h':
252 usage (EXIT_SUCCESS);
253
254 case 'v':
255 display_findutils_version ("frcode");
256 return 0;
257
258 default:
259 usage (EXIT_FAILURE);
260 }
261
262 /* We expect to have no arguments. */
263 if (optind != argc)
264 {
265 error (0, 0, _("no argument expected."));
266 usage (EXIT_FAILURE);
267 }
268
269
270 if (slocate_compat)
271 {
272 fputc (slocate_seclevel ? '1' : '0', stdout);
273 fputc (0, stdout);
274
275 }
276 else
277 {
278 /* GNU LOCATE02 format */
279 if (fwrite (LOCATEDB_MAGIC, 1, sizeof (LOCATEDB_MAGIC), stdout)
280 != sizeof (LOCATEDB_MAGIC))
281 {
282 die (EXIT_FAILURE, errno, _("Failed to write to standard output"));
283 }
284 }
285
286
287 while ((line_len = getdelim (&path, &pathsize, delimiter, stdin)) > 0)
288 {
289 if (path[line_len - 1] != delimiter)
290 {
291 error (0, 0, _("The input file should end with the delimiter"));
292 }
293 else
294 {
295 path[line_len - 1] = '\0'; /* FIXME temporary: nuke the delimiter. */
296 }
297
298 count = prefix_length (oldpath, path);
299 diffcount = count - oldcount;
300 if ( (diffcount > SHRT_MAX) || (diffcount < SHRT_MIN) )
301 {
302 /* We do this to prevent overflow of the value we
303 * write with put_short ()
304 */
305 count = 0;
306 diffcount = (-oldcount);
307 }
308 oldcount = count;
309
310 if (slocate_compat)
311 {
312 /* Emit no count for the first pathname. */
313 slocate_compat = 0;
314 }
315 else
316 {
317 /* If the difference is small, it fits in one byte;
318 otherwise, two bytes plus a marker noting that fact. */
319 if (diffcount < LOCATEDB_ONEBYTE_MIN
320 || diffcount > LOCATEDB_ONEBYTE_MAX)
321 {
322 if (EOF == putc (LOCATEDB_ESCAPE, stdout))
323 outerr ();
324 if (!put_short (diffcount, stdout))
325 outerr ();
326 }
327 else
328 {
329 if (EOF == putc (diffcount, stdout))
330 outerr ();
331 }
332 }
333
334 if ( (EOF == fputs (path + count, stdout))
335 || (EOF == putc ('\0', stdout)))
336 {
337 outerr ();
338 }
339
340 if (1)
341 {
342 /* Swap path and oldpath and their sizes. */
343 char *tmppath = oldpath;
344 size_t tmppathsize = oldpathsize;
345 oldpath = path;
346 oldpathsize = pathsize;
347 path = tmppath;
348 pathsize = tmppathsize;
349 }
350 }
351
352 free (path);
353 free (oldpath);
354
355 return 0;
356 }