1 /* -*- mode: C; c-file-style: "gnu" -*- */
2 /* xdgmimeglob.c: Private file. Datastructure for storing the globs.
3 *
4 * More info can be found at http://www.freedesktop.org/standards/
5 *
6 * Copyright (C) 2003 Red Hat, Inc.
7 * Copyright (C) 2003 Jonathan Blandford <jrb@alum.mit.edu>
8 *
9 * SPDX-License-Identifier: LGPL-2.1-or-later or AFL-2.0
10 */
11
12 #ifdef HAVE_CONFIG_H
13 #include <config.h>
14 #endif
15
16 #include "xdgmimeglob.h"
17 #include "xdgmimeint.h"
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <assert.h>
21 #include <string.h>
22 #include <fnmatch.h>
23
24 #ifndef MAX
25 #define MAX(a,b) ((a) > (b) ? (a) : (b))
26 #endif
27
28 #ifndef FALSE
29 #define FALSE (0)
30 #endif
31
32 #ifndef TRUE
33 #define TRUE (!FALSE)
34 #endif
35
36 typedef struct XdgGlobHashNode XdgGlobHashNode;
37 typedef struct XdgGlobList XdgGlobList;
38
39 struct XdgGlobHashNode
40 {
41 xdg_unichar_t character;
42 const char *mime_type;
43 int weight;
44 int case_sensitive;
45 XdgGlobHashNode *next;
46 XdgGlobHashNode *child;
47 };
48 struct XdgGlobList
49 {
50 const char *data;
51 const char *mime_type;
52 int weight;
53 int case_sensitive;
54 XdgGlobList *next;
55 };
56
57 struct XdgGlobHash
58 {
59 XdgGlobList *literal_list;
60 XdgGlobHashNode *simple_node;
61 XdgGlobList *full_list;
62 };
63
64
65 /* XdgGlobList
66 */
67 static XdgGlobList *
68 _xdg_glob_list_new (void)
69 {
70 XdgGlobList *new_element;
71
72 new_element = calloc (1, sizeof (XdgGlobList));
73
74 return new_element;
75 }
76
77 /* Frees glob_list and all of its children */
78 static void
79 _xdg_glob_list_free (XdgGlobList *glob_list)
80 {
81 XdgGlobList *ptr, *next;
82
83 ptr = glob_list;
84
85 while (ptr != NULL)
86 {
87 next = ptr->next;
88
89 if (ptr->data)
90 free ((void *) ptr->data);
91 if (ptr->mime_type)
92 free ((void *) ptr->mime_type);
93 free (ptr);
94
95 ptr = next;
96 }
97 }
98
99 static XdgGlobList *
100 _xdg_glob_list_append (XdgGlobList *glob_list,
101 void *data,
102 const char *mime_type,
103 int weight,
104 int case_sensitive)
105 {
106 XdgGlobList *new_element;
107 XdgGlobList *tmp_element;
108
109 tmp_element = glob_list;
110 while (tmp_element != NULL)
111 {
112 if (strcmp (tmp_element->data, data) == 0 &&
113 strcmp (tmp_element->mime_type, mime_type) == 0)
114 return glob_list;
115
116 tmp_element = tmp_element->next;
117 }
118
119 new_element = _xdg_glob_list_new ();
120 new_element->data = data;
121 new_element->mime_type = mime_type;
122 new_element->weight = weight;
123 new_element->case_sensitive = case_sensitive;
124 if (glob_list == NULL)
125 return new_element;
126
127 tmp_element = glob_list;
128 while (tmp_element->next != NULL)
129 tmp_element = tmp_element->next;
130
131 tmp_element->next = new_element;
132
133 return glob_list;
134 }
135
136 /* XdgGlobHashNode
137 */
138
139 static XdgGlobHashNode *
140 _xdg_glob_hash_node_new (void)
141 {
142 XdgGlobHashNode *glob_hash_node;
143
144 glob_hash_node = calloc (1, sizeof (XdgGlobHashNode));
145
146 return glob_hash_node;
147 }
148
149 static void
150 _xdg_glob_hash_node_dump (XdgGlobHashNode *glob_hash_node,
151 int depth)
152 {
153 int i;
154 for (i = 0; i < depth; i++)
155 printf (" ");
156
157 printf ("%c", (char)glob_hash_node->character);
158 if (glob_hash_node->mime_type)
159 printf (" - %s %d\n", glob_hash_node->mime_type, glob_hash_node->weight);
160 else
161 printf ("\n");
162 if (glob_hash_node->child)
163 _xdg_glob_hash_node_dump (glob_hash_node->child, depth + 1);
164 if (glob_hash_node->next)
165 _xdg_glob_hash_node_dump (glob_hash_node->next, depth);
166 }
167
168 static XdgGlobHashNode *
169 _xdg_glob_hash_insert_ucs4 (XdgGlobHashNode *glob_hash_node,
170 xdg_unichar_t *text,
171 const char *mime_type,
172 int weight,
173 int case_sensitive)
174 {
175 XdgGlobHashNode *node;
176 xdg_unichar_t character;
177
178 character = text[0];
179
180 if ((glob_hash_node == NULL) ||
181 (character < glob_hash_node->character))
182 {
183 node = _xdg_glob_hash_node_new ();
184 node->character = character;
185 node->next = glob_hash_node;
186 glob_hash_node = node;
187 }
188 else if (character == glob_hash_node->character)
189 {
190 node = glob_hash_node;
191 }
192 else
193 {
194 XdgGlobHashNode *prev_node;
195 int found_node = FALSE;
196
197 /* Look for the first character of text in glob_hash_node, and insert it if we
198 * have to.*/
199 prev_node = glob_hash_node;
200 node = prev_node->next;
201
202 while (node != NULL)
203 {
204 if (character < node->character)
205 {
206 node = _xdg_glob_hash_node_new ();
207 node->character = character;
208 node->next = prev_node->next;
209 prev_node->next = node;
210
211 found_node = TRUE;
212 break;
213 }
214 else if (character == node->character)
215 {
216 found_node = TRUE;
217 break;
218 }
219 prev_node = node;
220 node = node->next;
221 }
222
223 if (! found_node)
224 {
225 node = _xdg_glob_hash_node_new ();
226 node->character = character;
227 node->next = prev_node->next;
228 prev_node->next = node;
229 }
230 }
231
232 text++;
233 if (*text == 0)
234 {
235 if (node->mime_type)
236 {
237 if (strcmp (node->mime_type, mime_type) != 0)
238 {
239 XdgGlobHashNode *child;
240 int found_node = FALSE;
241
242 child = node->child;
243 while (child && child->character == 0)
244 {
245 if (strcmp (child->mime_type, mime_type) == 0)
246 {
247 found_node = TRUE;
248 break;
249 }
250 child = child->next;
251 }
252
253 if (!found_node)
254 {
255 child = _xdg_glob_hash_node_new ();
256 child->character = 0;
257 child->mime_type = strdup (mime_type);
258 child->weight = weight;
259 child->case_sensitive = case_sensitive;
260 child->child = NULL;
261 child->next = node->child;
262 node->child = child;
263 }
264 }
265 }
266 else
267 {
268 node->mime_type = strdup (mime_type);
269 node->weight = weight;
270 node->case_sensitive = case_sensitive;
271 }
272 }
273 else
274 {
275 node->child = _xdg_glob_hash_insert_ucs4 (node->child, text, mime_type, weight, case_sensitive);
276 }
277 return glob_hash_node;
278 }
279
280 /* glob must be valid UTF-8 */
281 static XdgGlobHashNode *
282 _xdg_glob_hash_insert_text (XdgGlobHashNode *glob_hash_node,
283 const char *text,
284 const char *mime_type,
285 int weight,
286 int case_sensitive)
287 {
288 XdgGlobHashNode *node;
289 xdg_unichar_t *unitext;
290 int len;
291
292 unitext = _xdg_convert_to_ucs4 (text, &len);
293 _xdg_reverse_ucs4 (unitext, len);
294 node = _xdg_glob_hash_insert_ucs4 (glob_hash_node, unitext, mime_type, weight, case_sensitive);
295 free (unitext);
296 return node;
297 }
298
299 typedef struct {
300 const char *mime;
301 int weight;
302 } MimeWeight;
303
304 static int
305 _xdg_glob_hash_node_lookup_file_name (XdgGlobHashNode *glob_hash_node,
306 const char *file_name,
307 int len,
308 int case_sensitive_check,
309 MimeWeight mime_types[],
310 int n_mime_types)
311 {
312 int n;
313 XdgGlobHashNode *node;
314 xdg_unichar_t character;
315
316 if (glob_hash_node == NULL)
317 return 0;
318
319 character = file_name[len - 1];
320
321 for (node = glob_hash_node; node && character >= node->character; node = node->next)
322 {
323 if (character == node->character)
324 {
325 len--;
326 n = 0;
327 if (len > 0)
328 {
329 n = _xdg_glob_hash_node_lookup_file_name (node->child,
330 file_name,
331 len,
332 case_sensitive_check,
333 mime_types,
334 n_mime_types);
335 }
336 if (n == 0)
337 {
338 if (node->mime_type &&
339 (case_sensitive_check ||
340 !node->case_sensitive))
341 {
342 mime_types[n].mime = node->mime_type;
343 mime_types[n].weight = node->weight;
344 n++;
345 }
346 node = node->child;
347 while (n < n_mime_types && node && node->character == 0)
348 {
349 if (node->mime_type &&
350 (case_sensitive_check ||
351 !node->case_sensitive))
352 {
353 mime_types[n].mime = node->mime_type;
354 mime_types[n].weight = node->weight;
355 n++;
356 }
357 node = node->next;
358 }
359 }
360 return n;
361 }
362 }
363
364 return 0;
365 }
366
367 static int compare_mime_weight (const void *a, const void *b)
368 {
369 const MimeWeight *aa = (const MimeWeight *)a;
370 const MimeWeight *bb = (const MimeWeight *)b;
371
372 return bb->weight - aa->weight;
373 }
374
375 #define ISUPPER(c) ((c) >= 'A' && (c) <= 'Z')
376 static char *
377 ascii_tolower (const char *str)
378 {
379 char *p, *lower;
380
381 lower = strdup (str);
382 p = lower;
383 while (*p != 0)
384 {
385 char c = *p;
386 *p++ = ISUPPER (c) ? c - 'A' + 'a' : c;
387 }
388 return lower;
389 }
390
391 static int
392 filter_out_dupes (MimeWeight mimes[], int n_mimes)
393 {
394 int last;
395 int i, j;
396
397 last = n_mimes;
398
399 for (i = 0; i < last; i++)
400 {
401 j = i + 1;
402 while (j < last)
403 {
404 if (strcmp (mimes[i].mime, mimes[j].mime) == 0)
405 {
406 mimes[i].weight = MAX (mimes[i].weight, mimes[j].weight);
407 last--;
408 mimes[j].mime = mimes[last].mime;
409 mimes[j].weight = mimes[last].weight;
410 }
411 else
412 j++;
413 }
414 }
415
416 return last;
417 }
418
419 int
420 _xdg_glob_hash_lookup_file_name (XdgGlobHash *glob_hash,
421 const char *file_name,
422 const char *mime_types[],
423 int n_mime_types)
424 {
425 XdgGlobList *list;
426 int i, n;
427 MimeWeight mimes[10];
428 int n_mimes = 10;
429 int len;
430 char *lower_case;
431
432 /* First, check the literals */
433
434 assert (file_name != NULL && n_mime_types > 0);
435
436 n = 0;
437
438 lower_case = ascii_tolower (file_name);
439
440 for (list = glob_hash->literal_list; list; list = list->next)
441 {
442 if (strcmp ((const char *)list->data, file_name) == 0)
443 {
444 mime_types[0] = list->mime_type;
445 free (lower_case);
446 return 1;
447 }
448 }
449
450 for (list = glob_hash->literal_list; list; list = list->next)
451 {
452 if (!list->case_sensitive &&
453 strcmp ((const char *)list->data, lower_case) == 0)
454 {
455 mime_types[0] = list->mime_type;
456 free (lower_case);
457 return 1;
458 }
459 }
460
461
462 len = strlen (file_name);
463 n = _xdg_glob_hash_node_lookup_file_name (glob_hash->simple_node, lower_case, len, FALSE,
464 mimes, n_mimes);
465 if (n < 2)
466 n += _xdg_glob_hash_node_lookup_file_name (glob_hash->simple_node, file_name, len, TRUE,
467 mimes + n, n_mimes - n);
468
469 if (n < 2)
470 {
471 for (list = glob_hash->full_list; list && n < n_mime_types; list = list->next)
472 {
473 if (fnmatch ((const char *)list->data, file_name, 0) == 0)
474 {
475 mimes[n].mime = list->mime_type;
476 mimes[n].weight = list->weight;
477 n++;
478 }
479 }
480 }
481 free (lower_case);
482
483 n = filter_out_dupes (mimes, n);
484
485 qsort (mimes, n, sizeof (MimeWeight), compare_mime_weight);
486
487 if (n_mime_types < n)
488 n = n_mime_types;
489
490 for (i = 0; i < n; i++)
491 mime_types[i] = mimes[i].mime;
492
493 return n;
494 }
495
496
497
498 /* XdgGlobHash
499 */
500
501 XdgGlobHash *
502 _xdg_glob_hash_new (void)
503 {
504 XdgGlobHash *glob_hash;
505
506 glob_hash = calloc (1, sizeof (XdgGlobHash));
507
508 return glob_hash;
509 }
510
511
512 static void
513 _xdg_glob_hash_free_nodes (XdgGlobHashNode *node)
514 {
515 if (node)
516 {
517 if (node->child)
518 _xdg_glob_hash_free_nodes (node->child);
519 if (node->next)
520 _xdg_glob_hash_free_nodes (node->next);
521 if (node->mime_type)
522 free ((void *) node->mime_type);
523 free (node);
524 }
525 }
526
527 void
528 _xdg_glob_hash_free (XdgGlobHash *glob_hash)
529 {
530 _xdg_glob_list_free (glob_hash->literal_list);
531 _xdg_glob_list_free (glob_hash->full_list);
532 _xdg_glob_hash_free_nodes (glob_hash->simple_node);
533 free (glob_hash);
534 }
535
536 XdgGlobType
537 _xdg_glob_determine_type (const char *glob)
538 {
539 const char *ptr;
540 int maybe_in_simple_glob = FALSE;
541 int first_char = TRUE;
542
543 ptr = glob;
544
545 while (*ptr != '\0')
546 {
547 if (*ptr == '*' && first_char)
548 maybe_in_simple_glob = TRUE;
549 else if (*ptr == '\\' || *ptr == '[' || *ptr == '?' || *ptr == '*')
550 return XDG_GLOB_FULL;
551
552 first_char = FALSE;
553 ptr = _xdg_utf8_next_char (ptr);
554 }
555 if (maybe_in_simple_glob)
556 return XDG_GLOB_SIMPLE;
557 else
558 return XDG_GLOB_LITERAL;
559 }
560
561 /* glob must be valid UTF-8 */
562 void
563 _xdg_glob_hash_append_glob (XdgGlobHash *glob_hash,
564 const char *glob,
565 const char *mime_type,
566 int weight,
567 int case_sensitive)
568 {
569 XdgGlobType type;
570
571 assert (glob_hash != NULL);
572 assert (glob != NULL);
573
574 type = _xdg_glob_determine_type (glob);
575
576 switch (type)
577 {
578 case XDG_GLOB_LITERAL:
579 glob_hash->literal_list = _xdg_glob_list_append (glob_hash->literal_list, strdup (glob), strdup (mime_type), weight, case_sensitive);
580 break;
581 case XDG_GLOB_SIMPLE:
582 glob_hash->simple_node = _xdg_glob_hash_insert_text (glob_hash->simple_node, glob + 1, mime_type, weight, case_sensitive);
583 break;
584 case XDG_GLOB_FULL:
585 glob_hash->full_list = _xdg_glob_list_append (glob_hash->full_list, strdup (glob), strdup (mime_type), weight, case_sensitive);
586 break;
587 }
588 }
589
590 void
591 _xdg_glob_hash_dump (XdgGlobHash *glob_hash)
592 {
593 XdgGlobList *list;
594 printf ("LITERAL STRINGS\n");
595 if (!glob_hash || glob_hash->literal_list == NULL)
596 {
597 printf (" None\n");
598 }
599 else
600 {
601 for (list = glob_hash->literal_list; list; list = list->next)
602 printf (" %s - %s %d\n", (char *)list->data, list->mime_type, list->weight);
603 }
604 printf ("\nSIMPLE GLOBS\n");
605 if (!glob_hash || glob_hash->simple_node == NULL)
606 {
607 printf (" None\n");
608 }
609 else
610 {
611 _xdg_glob_hash_node_dump (glob_hash->simple_node, 4);
612 }
613
614 printf ("\nFULL GLOBS\n");
615 if (!glob_hash || glob_hash->full_list == NULL)
616 {
617 printf (" None\n");
618 }
619 else
620 {
621 for (list = glob_hash->full_list; list; list = list->next)
622 printf (" %s - %s %d\n", (char *)list->data, list->mime_type, list->weight);
623 }
624 }
625
626
627 void
628 _xdg_mime_glob_read_from_file (XdgGlobHash *glob_hash,
629 const char *file_name,
630 int version_two)
631 {
632 FILE *glob_file;
633 char line[255];
634 char *p;
635
636 glob_file = fopen (file_name, "r");
637
638 if (glob_file == NULL)
639 return;
640
641 /* FIXME: Not UTF-8 safe. Doesn't work if lines are greater than 255 chars.
642 * Blah */
643 while (fgets (line, 255, glob_file) != NULL)
644 {
645 char *colon;
646 char *mimetype, *glob, *end;
647 int weight;
648 int case_sensitive;
649
650 if (line[0] == '#' || line[0] == 0)
651 continue;
652
653 end = line + strlen(line) - 1;
654 if (*end == '\n')
655 *end = 0;
656
657 p = line;
658 if (version_two)
659 {
660 colon = strchr (p, ':');
661 if (colon == NULL)
662 continue;
663 *colon = 0;
664 weight = atoi (p);
665 p = colon + 1;
666 }
667 else
668 weight = 50;
669
670 colon = strchr (p, ':');
671 if (colon == NULL)
672 continue;
673 *colon = 0;
674
675 mimetype = p;
676 p = colon + 1;
677 glob = p;
678 case_sensitive = FALSE;
679
680 colon = strchr (p, ':');
681 if (version_two && colon != NULL)
682 {
683 char *flag;
684
685 /* We got flags */
686 *colon = 0;
687 p = colon + 1;
688
689 /* Flags end at next colon */
690 colon = strchr (p, ':');
691 if (colon != NULL)
692 *colon = 0;
693
694 flag = strstr (p, "cs");
695 if (flag != NULL &&
696 /* Start or after comma */
697 (flag == p ||
698 flag[-1] == ',') &&
699 /* ends with comma or end of string */
700 (flag[2] == 0 ||
701 flag[2] == ','))
702 case_sensitive = TRUE;
703 }
704
705 _xdg_glob_hash_append_glob (glob_hash, glob, mimetype, weight, case_sensitive);
706 }
707
708 fclose (glob_file);
709 }