1 /*
2 * db_lookup.c: low level database interface routines for man.
3 *
4 * Copyright (C) 1994, 1995 Graeme W. Wilford. (Wilf.)
5 * Copyright (C) 2001, 2002, 2003, 2006, 2007, 2008, 2009, 2012
6 * Colin Watson.
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 *
22 * Mon Aug 8 20:35:30 BST 1994 Wilf. (G.Wilford@ee.surrey.ac.uk)
23 */
24
25 #ifdef HAVE_CONFIG_H
26 # include "config.h"
27 #endif /* HAVE_CONFIG_H */
28
29 #include <assert.h>
30 #include <stdbool.h>
31 #include <stdio.h>
32 #include <errno.h>
33 #include <ctype.h>
34 #include <string.h>
35 #include <stdlib.h>
36 #include <unistd.h>
37
38 #include "attribute.h"
39 #include "error.h"
40 #include "fnmatch.h"
41 #include "gl_array_list.h"
42 #include "gl_xlist.h"
43 #include "regex.h"
44 #include "xalloc.h"
45 #include "xvasprintf.h"
46
47 #include "gettext.h"
48 #define _(String) gettext (String)
49
50 #include "manconfig.h"
51
52 #include "debug.h"
53 #include "fatal.h"
54 #include "filenames.h"
55 #include "glcontainers.h"
56 #include "wordfnmatch.h"
57 #include "xregcomp.h"
58
59 #include "mydbm.h"
60 #include "db_storage.h"
61
62 /* If using ndbm or BTREE, copy the static storage before doing anything
63 * interesting with it. If using gdbm, firstkey and nextkey need to copy the
64 * storage because our ordered wrappers keep an effectively static copy.
65 */
66 datum copy_datum (datum dat)
67 {
68 if (MYDBM_DPTR (dat)) {
69 MYDBM_SET_DPTR (dat, memcpy (xmalloc (MYDBM_DSIZE (dat) + 1),
70 MYDBM_DPTR (dat),
71 MYDBM_DSIZE (dat)));
72 MYDBM_DPTR (dat)[MYDBM_DSIZE (dat)] = '\0';
73 }
74 return dat;
75 }
76
77 /* gdbm does locking itself. */
78 #if defined(NDBM) || defined(BTREE)
79 void gripe_lock (const char *filename)
80 {
81 error (0, errno, _("can't lock index cache %s"), filename);
82 }
83 #endif /* NDBM || BTREE */
84
85 /* issue fatal message, then exit */
86 _Noreturn void gripe_corrupt_data (MYDBM_FILE dbf)
87 {
88 fatal (0, _("index cache %s corrupt"), dbf->name);
89 }
90
91 /* deal with situation where we cannot replace a key */
92 _Noreturn void gripe_replace_key (MYDBM_FILE dbf, const char *data)
93 {
94 error (0, 0, _("cannot replace key %s"), data);
95 gripe_corrupt_data (dbf);
96 }
97
98 static char *copy_if_set (const char *str)
99 {
100 if (STREQ (str, "-"))
101 return NULL;
102 else
103 return xstrdup (str);
104 }
105
106 const char * ATTRIBUTE_CONST dash_if_unset (const char *str)
107 {
108 if (str)
109 return str;
110 else
111 return "-";
112 }
113
114 /* Just print out what would be stored in the db */
115 void dbprintf (const struct mandata *info)
116 {
117 debug ("name: %s\n"
118 "sec. ext: %s\n"
119 "section: %s\n"
120 "comp. ext: %s\n"
121 "id: %c\n"
122 "mtime: %ld.%09ld\n"
123 "pointer: %s\n"
124 "filter: %s\n"
125 "whatis: %s\n\n",
126 dash_if_unset (info->name),
127 info->ext, info->sec, info->comp,
128 info->id, (long) info->mtime.tv_sec, (long) info->mtime.tv_nsec,
129 info->pointer, info->filter, info->whatis);
130 }
131
132 /* Form a multi-style key from page and extension info. The page should
133 * *not* be name_to_key()'d - that should only happen to the parent.
134 */
135 datum make_multi_key (const char *page, const char *ext)
136 {
137 datum key;
138 char *value;
139
140 value = xasprintf ("%s\t%s", page, ext);
141 assert (value);
142 memset (&key, 0, sizeof key);
143 MYDBM_SET (key, value);
144 return key;
145 }
146
147 /* Get the key that should be used for a given name. The caller is
148 * responsible for freeing the return value.
149 */
150 char *name_to_key (const char *name)
151 {
152 char *low, *p;
153
154 p = low = xmalloc (strlen (name) + 1);
155 while (*name)
156 *p++ = CTYPE (tolower, *name++);
157 *p = *name;
158 return low;
159 }
160
161 /* return char ptr array to the data's fields */
162 static char **split_data (MYDBM_FILE dbf, char *content, char *start[])
163 {
164 int count;
165
166 /* initialise pointers to first N-1 fields */
167 for (count = 0; count < FIELDS - 1 ; count++) {
168 start[count] = strsep (&content, "\t");
169 if (!start[count]) {
170 error (0, 0,
171 ngettext ("only %d field in content",
172 "only %d fields in content", count),
173 count);
174 gripe_corrupt_data (dbf);
175 }
176 }
177
178 /* initialise pointer to Nth field (whatis) */
179 start[FIELDS - 1] = content;
180 if (!start[FIELDS - 1]) {
181 error (0, 0,
182 ngettext ("only %d field in content",
183 "only %d fields in content", FIELDS - 1),
184 FIELDS - 1);
185 gripe_corrupt_data (dbf);
186 }
187
188 return start;
189 }
190
191 /* Parse the db-returned data and put it into a mandata format */
192 struct mandata *split_content (MYDBM_FILE dbf, char *cont_ptr)
193 {
194 struct mandata *info;
195 char *start[FIELDS];
196 char **data;
197
198 data = split_data (dbf, cont_ptr, start);
199
200 info = XZALLOC (struct mandata);
201 info->name = copy_if_set (*(data++));
202 info->ext = xstrdup (*(data++));
203 info->sec = xstrdup (*(data++));
204 info->mtime.tv_sec = (time_t) atol (*(data++));
205 info->mtime.tv_nsec = atol (*(data++));
206 info->id = **(data++); /* single char id */
207 info->pointer = xstrdup (*(data++));
208 info->filter = xstrdup (*(data++));
209 info->comp = xstrdup (*(data++));
210 info->whatis = xstrdup (*(data));
211 return info;
212 }
213
214 bool ATTRIBUTE_PURE name_ext_equals (const void *elt1, const void *elt2)
215 {
216 const struct name_ext *ref1 = elt1, *ref2 = elt2;
217 return STREQ (ref1->name, ref2->name) && STREQ (ref1->ext, ref2->ext);
218 }
219
220 int ATTRIBUTE_PURE name_ext_compare (const void *elt1, const void *elt2)
221 {
222 const struct name_ext *ref1 = elt1, *ref2 = elt2;
223 int name_cmp = strcmp (ref1->name, ref2->name);
224 if (name_cmp)
225 return name_cmp;
226 return strcmp (ref1->ext, ref2->ext);
227 }
228
229 /* Extract all of the names/extensions associated with this key. Each case
230 * variant of a name will be returned separately.
231 *
232 * This returns a newly-allocated list of struct name_ext, which the caller
233 * is expected to free.
234 */
235 gl_list_t list_extensions (char *data)
236 {
237 gl_list_t list = gl_list_create_empty (GL_ARRAY_LIST, name_ext_equals,
238 NULL, plain_free, true);
239 char *name;
240
241 while ((name = strsep (&data, "\t")) != NULL) {
242 char *ext;
243 struct name_ext *name_ext;
244
245 ext = strsep (&data, "\t");
246 if (!ext)
247 break;
248
249 name_ext = XMALLOC (struct name_ext);
250 /* Don't copy these; they will point into the given string. */
251 name_ext->name = name;
252 name_ext->ext = ext;
253 gl_sortedlist_add (list, name_ext_compare, name_ext);
254 }
255
256 debug ("found %zu names/extensions\n", gl_list_size (list));
257 return list;
258 }
259
260 /* These should be bitwise-ored together. */
261 #define ALL 0
262 #define EXACT 1
263 #define MATCH_CASE 2
264
265 /*
266 There are three possibilities on lookup:
267
268 1) No data exists, lookup will fail, zero-length list will be returned.
269 2) One data item exists. Item is returned as first in list of structures.
270 3) Many items exist. They are all returned, in a multiple structure list.
271 */
272 static gl_list_t dblookup (MYDBM_FILE dbf, const char *page,
273 const char *section, int flags)
274 {
275 gl_list_t infos;
276 struct mandata *info = NULL;
277 datum key, cont;
278
279 infos = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL,
280 (gl_listelement_dispose_fn)
281 free_mandata_struct,
282 true);
283
284 memset (&key, 0, sizeof key);
285 memset (&cont, 0, sizeof cont);
286
287 MYDBM_SET (key, name_to_key (page));
288 cont = MYDBM_FETCH (dbf, key);
289 MYDBM_FREE_DPTR (key);
290
291 if (MYDBM_DPTR (cont) == NULL) /* No entries at all */
292 ;
293 else if (*MYDBM_DPTR (cont) != '\t') { /* Just one entry */
294 bool matches = false;
295
296 info = split_content (dbf, MYDBM_DPTR (cont));
297 if (!info->name)
298 info->name = xstrdup (page);
299 if (!(flags & MATCH_CASE) || STREQ (info->name, page)) {
300 if (section == NULL)
301 matches = true;
302 else if (flags & EXACT) {
303 if (STREQ (section, info->ext))
304 matches = true;
305 } else {
306 if (STRNEQ (section, info->ext,
307 strlen (section)))
308 matches = true;
309 }
310 }
311 if (matches)
312 gl_list_add_last (infos, info);
313 else
314 free_mandata_struct (info);
315 } else { /* Multiple entries */
316 gl_list_t refs;
317 struct name_ext *ref;
318
319 /* Extract all of the case-variant-names/extensions
320 * associated with this key.
321 */
322
323 refs = list_extensions (MYDBM_DPTR (cont) + 1);
324
325 /* Make the multi keys and look them up */
326
327 GL_LIST_FOREACH (refs, ref) {
328 datum multi_cont;
329
330 memset (&multi_cont, 0, sizeof multi_cont);
331
332 /* Decide whether this part of a multi key is
333 * suitable.
334 */
335
336 if ((flags & MATCH_CASE) && !STREQ (ref->name, page))
337 continue;
338
339 if (section != NULL) {
340 if (flags & EXACT) {
341 if (!STREQ (section, ref->ext))
342 continue;
343 } else {
344 if (!STRNEQ (section, ref->ext,
345 strlen (section)))
346 continue;
347 }
348 }
349
350 /* So the key is suitable ... */
351 key = make_multi_key (ref->name, ref->ext);
352 debug ("multi key lookup (%s)\n", MYDBM_DPTR (key));
353 multi_cont = MYDBM_FETCH (dbf, key);
354 if (MYDBM_DPTR (multi_cont) == NULL) {
355 error (0, 0, _("bad fetch on multi key %s"),
356 MYDBM_DPTR (key));
357 gripe_corrupt_data (dbf);
358 }
359 MYDBM_FREE_DPTR (key);
360
361 /* Allocate info struct and add it to the list. */
362 info = split_content (dbf, MYDBM_DPTR (multi_cont));
363 if (!info->name)
364 info->name = xstrdup (ref->name);
365 gl_list_add_last (infos, info);
366 }
367
368 gl_list_free (refs);
369 }
370 MYDBM_FREE_DPTR (cont);
371
372 return infos;
373 }
374
375 gl_list_t dblookup_all (MYDBM_FILE dbf, const char *page,
376 const char *section, bool match_case)
377 {
378 return dblookup (dbf, page, section,
379 ALL | (match_case ? MATCH_CASE : 0));
380 }
381
382 struct mandata *dblookup_exact (MYDBM_FILE dbf, const char *page,
383 const char *section, bool match_case)
384 {
385 gl_list_t infos = dblookup (dbf, page, section,
386 EXACT | (match_case ? MATCH_CASE : 0));
387 struct mandata *info = NULL;
388
389 if (gl_list_size (infos)) {
390 /* Return the first item and free the rest of the list. */
391 info = (struct mandata *) gl_list_get_at (infos, 0);
392 gl_list_set_at (infos, 0, NULL); /* steal memory */
393 }
394 gl_list_free (infos);
395 return info;
396 }
397
398 gl_list_t dblookup_pattern (MYDBM_FILE dbf, const char *pattern,
399 const char *section, bool match_case,
400 bool pattern_regex, bool try_descriptions)
401 {
402 gl_list_t infos;
403 datum key, cont;
404 regex_t preg;
405
406 infos = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL,
407 (gl_listelement_dispose_fn)
408 free_mandata_struct,
409 true);
410
411 if (pattern_regex)
412 xregcomp (&preg, pattern,
413 REG_EXTENDED | REG_NOSUB |
414 (match_case ? 0 : REG_ICASE));
415
416 #ifndef BTREE
417 datum nextkey;
418
419 key = MYDBM_FIRSTKEY (dbf);
420 while (MYDBM_DPTR (key)) {
421 cont = MYDBM_FETCH (dbf, key);
422 #else /* BTREE */
423 int end;
424
425 end = man_btree_nextkeydata (dbf, &key, &cont);
426 while (!end) {
427 #endif /* !BTREE */
428 struct mandata *info = NULL;
429 char *tab;
430 bool got_match;
431
432 if (!MYDBM_DPTR (cont))
433 {
434 debug ("key was %s\n", MYDBM_DPTR (key));
435 fatal (0,
436 _("Database %s corrupted; rebuild with "
437 "mandb --create"),
438 dbf->name);
439 }
440
441 if (*MYDBM_DPTR (key) == '$')
442 goto nextpage;
443
444 #pragma GCC diagnostic push
445 #if GNUC_PREREQ(10,0)
446 # pragma GCC diagnostic ignored "-Wanalyzer-use-after-free"
447 #endif
448 if (*MYDBM_DPTR (cont) == '\t')
449 goto nextpage;
450 #pragma GCC diagnostic pop
451
452 /* a real page */
453
454 info = split_content (dbf, MYDBM_DPTR (cont));
455
456 /* If there's a section given, does it match either the
457 * section or extension of this page?
458 */
459 if (section &&
460 (!STREQ (section, info->sec) &&
461 !STREQ (section, info->ext)))
462 goto nextpage;
463
464 tab = strrchr (MYDBM_DPTR (key), '\t');
465 if (tab)
466 *tab = '\0';
467
468 if (!info->name)
469 info->name = xstrdup (MYDBM_DPTR (key));
470
471 if (pattern_regex)
472 got_match = (regexec (&preg, info->name,
473 0, NULL, 0) == 0);
474 else
475 got_match = fnmatch (pattern, info->name,
476 match_case ? 0
477 : FNM_CASEFOLD) == 0;
478 if (try_descriptions && !got_match && info->whatis) {
479 if (pattern_regex)
480 got_match = (regexec (&preg, info->whatis,
481 0, NULL, 0) == 0);
482 else
483 got_match = word_fnmatch (pattern,
484 info->whatis);
485 }
486 if (!got_match)
487 goto nextpage_tab;
488
489 gl_list_add_last (infos, info);
490 info = NULL; /* avoid freeing later */
491
492 nextpage_tab:
493 if (tab)
494 *tab = '\t';
495 nextpage:
496 #ifndef BTREE
497 nextkey = MYDBM_NEXTKEY (dbf, key);
498 MYDBM_FREE_DPTR (cont);
499 MYDBM_FREE_DPTR (key);
500 key = nextkey;
501 #else /* BTREE */
502 MYDBM_FREE_DPTR (cont);
503 MYDBM_FREE_DPTR (key);
504 end = man_btree_nextkeydata (dbf, &key, &cont);
505 #endif /* !BTREE */
506 free_mandata_struct (info);
507 }
508
509 if (pattern_regex)
510 regfree (&preg);
511
512 return infos;
513 }