1 /* hash - hashing table processing.
2
3 Copyright (C) 1998-2004, 2006-2007, 2009-2023 Free Software Foundation, Inc.
4
5 Written by Jim Meyering, 1992.
6
7 This file is free software: you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as
9 published by the Free Software Foundation; either version 2.1 of the
10 License, or (at your option) any later version.
11
12 This file is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with this program. If not, see <https://www.gnu.org/licenses/>. */
19
20 /* A generic hash table package. */
21
22 /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
23 of malloc. If you change USE_OBSTACK, you have to recompile! */
24
25 #include <config.h>
26
27 #include "hash.h"
28
29 #include "bitrotate.h"
30 #include "xalloc-oversized.h"
31
32 #include <errno.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36
37 #if USE_OBSTACK
38 # include "obstack.h"
39 # ifndef obstack_chunk_alloc
40 # define obstack_chunk_alloc malloc
41 # endif
42 # ifndef obstack_chunk_free
43 # define obstack_chunk_free free
44 # endif
45 #endif
46
47 struct hash_entry
48 {
49 void *data;
50 struct hash_entry *next;
51 };
52
53 struct hash_table
54 {
55 /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
56 for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets
57 are not empty, there are N_ENTRIES active entries in the table. */
58 struct hash_entry *bucket;
59 struct hash_entry const *bucket_limit;
60 size_t n_buckets;
61 size_t n_buckets_used;
62 size_t n_entries;
63
64 /* Tuning arguments, kept in a physically separate structure. */
65 const Hash_tuning *tuning;
66
67 /* Three functions are given to 'hash_initialize', see the documentation
68 block for this function. In a word, HASHER randomizes a user entry
69 into a number up from 0 up to some maximum minus 1; COMPARATOR returns
70 true if two user entries compare equally; and DATA_FREER is the cleanup
71 function for a user entry. */
72 Hash_hasher hasher;
73 Hash_comparator comparator;
74 Hash_data_freer data_freer;
75
76 /* A linked list of freed struct hash_entry structs. */
77 struct hash_entry *free_entry_list;
78
79 #if USE_OBSTACK
80 /* Whenever obstacks are used, it is possible to allocate all overflowed
81 entries into a single stack, so they all can be freed in a single
82 operation. It is not clear if the speedup is worth the trouble. */
83 struct obstack entry_stack;
84 #endif
85 };
86
87 /* A hash table contains many internal entries, each holding a pointer to
88 some user-provided data (also called a user entry). An entry indistinctly
89 refers to both the internal entry and its associated user entry. A user
90 entry contents may be hashed by a randomization function (the hashing
91 function, or just "hasher" for short) into a number (or "slot") between 0
92 and the current table size. At each slot position in the hash table,
93 starts a linked chain of entries for which the user data all hash to this
94 slot. A bucket is the collection of all entries hashing to the same slot.
95
96 A good "hasher" function will distribute entries rather evenly in buckets.
97 In the ideal case, the length of each bucket is roughly the number of
98 entries divided by the table size. Finding the slot for a data is usually
99 done in constant time by the "hasher", and the later finding of a precise
100 entry is linear in time with the size of the bucket. Consequently, a
101 larger hash table size (that is, a larger number of buckets) is prone to
102 yielding shorter chains, *given* the "hasher" function behaves properly.
103
104 Long buckets slow down the lookup algorithm. One might use big hash table
105 sizes in hope to reduce the average length of buckets, but this might
106 become inordinate, as unused slots in the hash table take some space. The
107 best bet is to make sure you are using a good "hasher" function (beware
108 that those are not that easy to write! :-), and to use a table size
109 larger than the actual number of entries. */
110
111 /* If an insertion makes the ratio of nonempty buckets to table size larger
112 than the growth threshold (a number between 0.0 and 1.0), then increase
113 the table size by multiplying by the growth factor (a number greater than
114 1.0). The growth threshold defaults to 0.8, and the growth factor
115 defaults to 1.414, meaning that the table will have doubled its size
116 every second time 80% of the buckets get used. */
117 #define DEFAULT_GROWTH_THRESHOLD 0.8f
118 #define DEFAULT_GROWTH_FACTOR 1.414f
119
120 /* If a deletion empties a bucket and causes the ratio of used buckets to
121 table size to become smaller than the shrink threshold (a number between
122 0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
123 number greater than the shrink threshold but smaller than 1.0). The shrink
124 threshold and factor default to 0.0 and 1.0, meaning that the table never
125 shrinks. */
126 #define DEFAULT_SHRINK_THRESHOLD 0.0f
127 #define DEFAULT_SHRINK_FACTOR 1.0f
128
129 /* Use this to initialize or reset a TUNING structure to
130 some sensible values. */
131 static const Hash_tuning default_tuning =
132 {
133 DEFAULT_SHRINK_THRESHOLD,
134 DEFAULT_SHRINK_FACTOR,
135 DEFAULT_GROWTH_THRESHOLD,
136 DEFAULT_GROWTH_FACTOR,
137 false
138 };
139
140 /* Information and lookup. */
141
142 size_t
143 hash_get_n_buckets (const Hash_table *table)
144 {
145 return table->n_buckets;
146 }
147
148 size_t
149 hash_get_n_buckets_used (const Hash_table *table)
150 {
151 return table->n_buckets_used;
152 }
153
154 size_t
155 hash_get_n_entries (const Hash_table *table)
156 {
157 return table->n_entries;
158 }
159
160 size_t
161 hash_get_max_bucket_length (const Hash_table *table)
162 {
163 struct hash_entry const *bucket;
164 size_t max_bucket_length = 0;
165
166 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
167 {
168 if (bucket->data)
169 {
170 struct hash_entry const *cursor = bucket;
171 size_t bucket_length = 1;
172
173 while (cursor = cursor->next, cursor)
174 bucket_length++;
175
176 if (bucket_length > max_bucket_length)
177 max_bucket_length = bucket_length;
178 }
179 }
180
181 return max_bucket_length;
182 }
183
184 bool
185 hash_table_ok (const Hash_table *table)
186 {
187 struct hash_entry const *bucket;
188 size_t n_buckets_used = 0;
189 size_t n_entries = 0;
190
191 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
192 {
193 if (bucket->data)
194 {
195 struct hash_entry const *cursor = bucket;
196
197 /* Count bucket head. */
198 n_buckets_used++;
199 n_entries++;
200
201 /* Count bucket overflow. */
202 while (cursor = cursor->next, cursor)
203 n_entries++;
204 }
205 }
206
207 if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
208 return true;
209
210 return false;
211 }
212
213 void
214 hash_print_statistics (const Hash_table *table, FILE *stream)
215 {
216 size_t n_entries = hash_get_n_entries (table);
217 size_t n_buckets = hash_get_n_buckets (table);
218 size_t n_buckets_used = hash_get_n_buckets_used (table);
219 size_t max_bucket_length = hash_get_max_bucket_length (table);
220
221 fprintf (stream, "# entries: %lu\n", (unsigned long int) n_entries);
222 fprintf (stream, "# buckets: %lu\n", (unsigned long int) n_buckets);
223 fprintf (stream, "# buckets used: %lu (%.2f%%)\n",
224 (unsigned long int) n_buckets_used,
225 (100.0 * n_buckets_used) / n_buckets);
226 fprintf (stream, "max bucket length: %lu\n",
227 (unsigned long int) max_bucket_length);
228 }
229
230 /* Hash KEY and return a pointer to the selected bucket.
231 If TABLE->hasher misbehaves, abort. */
232 static struct hash_entry *
233 safe_hasher (const Hash_table *table, const void *key)
234 {
235 size_t n = table->hasher (key, table->n_buckets);
236 if (! (n < table->n_buckets))
237 abort ();
238 return table->bucket + n;
239 }
240
241 void *
242 hash_lookup (const Hash_table *table, const void *entry)
243 {
244 struct hash_entry const *bucket = safe_hasher (table, entry);
245 struct hash_entry const *cursor;
246
247 if (bucket->data == NULL)
248 return NULL;
249
250 for (cursor = bucket; cursor; cursor = cursor->next)
251 if (entry == cursor->data || table->comparator (entry, cursor->data))
252 return cursor->data;
253
254 return NULL;
255 }
256
257 /* Walking. */
258
259 void *
260 hash_get_first (const Hash_table *table)
261 {
262 struct hash_entry const *bucket;
263
264 if (table->n_entries == 0)
265 return NULL;
266
267 for (bucket = table->bucket; ; bucket++)
268 if (! (bucket < table->bucket_limit))
269 abort ();
270 else if (bucket->data)
271 return bucket->data;
272 }
273
274 void *
275 hash_get_next (const Hash_table *table, const void *entry)
276 {
277 struct hash_entry const *bucket = safe_hasher (table, entry);
278 struct hash_entry const *cursor;
279
280 /* Find next entry in the same bucket. */
281 cursor = bucket;
282 do
283 {
284 if (cursor->data == entry && cursor->next)
285 return cursor->next->data;
286 cursor = cursor->next;
287 }
288 while (cursor != NULL);
289
290 /* Find first entry in any subsequent bucket. */
291 while (++bucket < table->bucket_limit)
292 if (bucket->data)
293 return bucket->data;
294
295 /* None found. */
296 return NULL;
297 }
298
299 size_t
300 hash_get_entries (const Hash_table *table, void **buffer,
301 size_t buffer_size)
302 {
303 size_t counter = 0;
304 struct hash_entry const *bucket;
305 struct hash_entry const *cursor;
306
307 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
308 {
309 if (bucket->data)
310 {
311 for (cursor = bucket; cursor; cursor = cursor->next)
312 {
313 if (counter >= buffer_size)
314 return counter;
315 buffer[counter++] = cursor->data;
316 }
317 }
318 }
319
320 return counter;
321 }
322
323 size_t
324 hash_do_for_each (const Hash_table *table, Hash_processor processor,
325 void *processor_data)
326 {
327 size_t counter = 0;
328 struct hash_entry const *bucket;
329 struct hash_entry const *cursor;
330
331 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
332 {
333 if (bucket->data)
334 {
335 for (cursor = bucket; cursor; cursor = cursor->next)
336 {
337 if (! processor (cursor->data, processor_data))
338 return counter;
339 counter++;
340 }
341 }
342 }
343
344 return counter;
345 }
346
347 /* Allocation and clean-up. */
348
349 #if 0
350
351 #if USE_DIFF_HASH
352
353 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
354 B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
355 Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash
356 algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
357 may not be good for your application." */
358
359 size_t
360 hash_string (const char *string, size_t n_buckets)
361 {
362 # define HASH_ONE_CHAR(Value, Byte) \
363 ((Byte) + rotl_sz (Value, 7))
364
365 size_t value = 0;
366 unsigned char ch;
367
368 for (; (ch = *string); string++)
369 value = HASH_ONE_CHAR (value, ch);
370 return value % n_buckets;
371
372 # undef HASH_ONE_CHAR
373 }
374
375 #else /* not USE_DIFF_HASH */
376
377 /* This one comes from 'recode', and performs a bit better than the above as
378 per a few experiments. It is inspired from a hashing routine found in the
379 very old Cyber 'snoop', itself written in typical Greg Mansfield style.
380 (By the way, what happened to this excellent man? Is he still alive?) */
381
382 size_t
383 hash_string (const char *string, size_t n_buckets)
384 {
385 size_t value = 0;
386 unsigned char ch;
387
388 for (; (ch = *string); string++)
389 value = (value * 31 + ch) % n_buckets;
390 return value;
391 }
392
393 #endif /* not USE_DIFF_HASH */
394
395 #endif
396
397 /* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd
398 number at least equal to 11. */
399
400 static bool _GL_ATTRIBUTE_CONST
401 is_prime (size_t candidate)
402 {
403 size_t divisor = 3;
404 size_t square = divisor * divisor;
405
406 while (square < candidate && (candidate % divisor))
407 {
408 divisor++;
409 square += 4 * divisor;
410 divisor++;
411 }
412
413 return (candidate % divisor ? true : false);
414 }
415
416 /* Round a given CANDIDATE number up to the nearest prime, and return that
417 prime. Primes lower than 10 are merely skipped. */
418
419 static size_t _GL_ATTRIBUTE_CONST
420 next_prime (size_t candidate)
421 {
422 /* Skip small primes. */
423 if (candidate < 10)
424 candidate = 10;
425
426 /* Make it definitely odd. */
427 candidate |= 1;
428
429 while (SIZE_MAX != candidate && !is_prime (candidate))
430 candidate += 2;
431
432 return candidate;
433 }
434
435 void
436 hash_reset_tuning (Hash_tuning *tuning)
437 {
438 *tuning = default_tuning;
439 }
440
441 /* If the user passes a NULL hasher, we hash the raw pointer. */
442 static size_t
443 raw_hasher (const void *data, size_t n)
444 {
445 /* When hashing unique pointers, it is often the case that they were
446 generated by malloc and thus have the property that the low-order
447 bits are 0. As this tends to give poorer performance with small
448 tables, we rotate the pointer value before performing division,
449 in an attempt to improve hash quality. */
450 size_t val = rotr_sz ((size_t) data, 3);
451 return val % n;
452 }
453
454 /* If the user passes a NULL comparator, we use pointer comparison. */
455 static bool
456 raw_comparator (const void *a, const void *b)
457 {
458 return a == b;
459 }
460
461
462 /* For the given hash TABLE, check the user supplied tuning structure for
463 reasonable values, and return true if there is no gross error with it.
464 Otherwise, definitively reset the TUNING field to some acceptable default
465 in the hash table (that is, the user loses the right of further modifying
466 tuning arguments), and return false. */
467
468 static bool
469 check_tuning (Hash_table *table)
470 {
471 const Hash_tuning *tuning = table->tuning;
472 float epsilon;
473 if (tuning == &default_tuning)
474 return true;
475
476 /* Be a bit stricter than mathematics would require, so that
477 rounding errors in size calculations do not cause allocations to
478 fail to grow or shrink as they should. The smallest allocation
479 is 11 (due to next_prime's algorithm), so an epsilon of 0.1
480 should be good enough. */
481 epsilon = 0.1f;
482
483 if (epsilon < tuning->growth_threshold
484 && tuning->growth_threshold < 1 - epsilon
485 && 1 + epsilon < tuning->growth_factor
486 && 0 <= tuning->shrink_threshold
487 && tuning->shrink_threshold + epsilon < tuning->shrink_factor
488 && tuning->shrink_factor <= 1
489 && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
490 return true;
491
492 table->tuning = &default_tuning;
493 return false;
494 }
495
496 /* Compute the size of the bucket array for the given CANDIDATE and
497 TUNING, or return 0 if there is no possible way to allocate that
498 many entries. */
499
500 static size_t _GL_ATTRIBUTE_PURE
501 compute_bucket_size (size_t candidate, const Hash_tuning *tuning)
502 {
503 if (!tuning->is_n_buckets)
504 {
505 float new_candidate = candidate / tuning->growth_threshold;
506 if ((float) SIZE_MAX <= new_candidate)
507 goto nomem;
508 candidate = new_candidate;
509 }
510 candidate = next_prime (candidate);
511 if (xalloc_oversized (candidate, sizeof (struct hash_entry *)))
512 goto nomem;
513 return candidate;
514
515 nomem:
516 errno = ENOMEM;
517 return 0;
518 }
519
520 Hash_table *
521 hash_initialize (size_t candidate, const Hash_tuning *tuning,
522 Hash_hasher hasher, Hash_comparator comparator,
523 Hash_data_freer data_freer)
524 {
525 Hash_table *table;
526
527 if (hasher == NULL)
528 hasher = raw_hasher;
529 if (comparator == NULL)
530 comparator = raw_comparator;
531
532 table = malloc (sizeof *table);
533 if (table == NULL)
534 return NULL;
535
536 if (!tuning)
537 tuning = &default_tuning;
538 table->tuning = tuning;
539 if (!check_tuning (table))
540 {
541 /* Fail if the tuning options are invalid. This is the only occasion
542 when the user gets some feedback about it. Once the table is created,
543 if the user provides invalid tuning options, we silently revert to
544 using the defaults, and ignore further request to change the tuning
545 options. */
546 errno = EINVAL;
547 goto fail;
548 }
549
550 table->n_buckets = compute_bucket_size (candidate, tuning);
551 if (!table->n_buckets)
552 goto fail;
553
554 table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
555 if (table->bucket == NULL)
556 goto fail;
557 table->bucket_limit = table->bucket + table->n_buckets;
558 table->n_buckets_used = 0;
559 table->n_entries = 0;
560
561 table->hasher = hasher;
562 table->comparator = comparator;
563 table->data_freer = data_freer;
564
565 table->free_entry_list = NULL;
566 #if USE_OBSTACK
567 obstack_init (&table->entry_stack);
568 #endif
569 return table;
570
571 fail:
572 free (table);
573 return NULL;
574 }
575
576 void
577 hash_clear (Hash_table *table)
578 {
579 struct hash_entry *bucket;
580
581 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
582 {
583 if (bucket->data)
584 {
585 struct hash_entry *cursor;
586 struct hash_entry *next;
587
588 /* Free the bucket overflow. */
589 for (cursor = bucket->next; cursor; cursor = next)
590 {
591 if (table->data_freer)
592 table->data_freer (cursor->data);
593 cursor->data = NULL;
594
595 next = cursor->next;
596 /* Relinking is done one entry at a time, as it is to be expected
597 that overflows are either rare or short. */
598 cursor->next = table->free_entry_list;
599 table->free_entry_list = cursor;
600 }
601
602 /* Free the bucket head. */
603 if (table->data_freer)
604 table->data_freer (bucket->data);
605 bucket->data = NULL;
606 bucket->next = NULL;
607 }
608 }
609
610 table->n_buckets_used = 0;
611 table->n_entries = 0;
612 }
613
614 void
615 hash_free (Hash_table *table)
616 {
617 struct hash_entry *bucket;
618 struct hash_entry *cursor;
619 struct hash_entry *next;
620 int err = errno;
621
622 /* Call the user data_freer function. */
623 if (table->data_freer && table->n_entries)
624 {
625 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
626 {
627 if (bucket->data)
628 {
629 for (cursor = bucket; cursor; cursor = cursor->next)
630 table->data_freer (cursor->data);
631 }
632 }
633 }
634
635 #if USE_OBSTACK
636
637 obstack_free (&table->entry_stack, NULL);
638
639 #else
640
641 /* Free all bucket overflowed entries. */
642 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
643 {
644 for (cursor = bucket->next; cursor; cursor = next)
645 {
646 next = cursor->next;
647 free (cursor);
648 }
649 }
650
651 /* Also reclaim the internal list of previously freed entries. */
652 for (cursor = table->free_entry_list; cursor; cursor = next)
653 {
654 next = cursor->next;
655 free (cursor);
656 }
657
658 #endif
659
660 /* Free the remainder of the hash table structure. */
661 free (table->bucket);
662 free (table);
663
664 errno = err;
665 }
666
667 /* Insertion and deletion. */
668
669 /* Get a new hash entry for a bucket overflow, possibly by recycling a
670 previously freed one. If this is not possible, allocate a new one. */
671
672 static struct hash_entry *
673 allocate_entry (Hash_table *table)
674 {
675 struct hash_entry *new;
676
677 if (table->free_entry_list)
678 {
679 new = table->free_entry_list;
680 table->free_entry_list = new->next;
681 }
682 else
683 {
684 #if USE_OBSTACK
685 new = obstack_alloc (&table->entry_stack, sizeof *new);
686 #else
687 new = malloc (sizeof *new);
688 #endif
689 }
690
691 return new;
692 }
693
694 /* Free a hash entry which was part of some bucket overflow,
695 saving it for later recycling. */
696
697 static void
698 free_entry (Hash_table *table, struct hash_entry *entry)
699 {
700 entry->data = NULL;
701 entry->next = table->free_entry_list;
702 table->free_entry_list = entry;
703 }
704
705 /* This private function is used to help with insertion and deletion. When
706 ENTRY matches an entry in the table, return a pointer to the corresponding
707 user data and set *BUCKET_HEAD to the head of the selected bucket.
708 Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in
709 the table, unlink the matching entry. */
710
711 static void *
712 hash_find_entry (Hash_table *table, const void *entry,
713 struct hash_entry **bucket_head, bool delete)
714 {
715 struct hash_entry *bucket = safe_hasher (table, entry);
716 struct hash_entry *cursor;
717
718 *bucket_head = bucket;
719
720 /* Test for empty bucket. */
721 if (bucket->data == NULL)
722 return NULL;
723
724 /* See if the entry is the first in the bucket. */
725 if (entry == bucket->data || table->comparator (entry, bucket->data))
726 {
727 void *data = bucket->data;
728
729 if (delete)
730 {
731 if (bucket->next)
732 {
733 struct hash_entry *next = bucket->next;
734
735 /* Bump the first overflow entry into the bucket head, then save
736 the previous first overflow entry for later recycling. */
737 *bucket = *next;
738 free_entry (table, next);
739 }
740 else
741 {
742 bucket->data = NULL;
743 }
744 }
745
746 return data;
747 }
748
749 /* Scan the bucket overflow. */
750 for (cursor = bucket; cursor->next; cursor = cursor->next)
751 {
752 if (entry == cursor->next->data
753 || table->comparator (entry, cursor->next->data))
754 {
755 void *data = cursor->next->data;
756
757 if (delete)
758 {
759 struct hash_entry *next = cursor->next;
760
761 /* Unlink the entry to delete, then save the freed entry for later
762 recycling. */
763 cursor->next = next->next;
764 free_entry (table, next);
765 }
766
767 return data;
768 }
769 }
770
771 /* No entry found. */
772 return NULL;
773 }
774
775 /* Internal helper, to move entries from SRC to DST. Both tables must
776 share the same free entry list. If SAFE, only move overflow
777 entries, saving bucket heads for later, so that no allocations will
778 occur. Return false (setting errno) if the free entry list is
779 exhausted and an allocation fails. */
780
781 static bool
782 transfer_entries (Hash_table *dst, Hash_table *src, bool safe)
783 {
784 struct hash_entry *bucket;
785 struct hash_entry *cursor;
786 struct hash_entry *next;
787 for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
788 if (bucket->data)
789 {
790 void *data;
791 struct hash_entry *new_bucket;
792
793 /* Within each bucket, transfer overflow entries first and
794 then the bucket head, to minimize memory pressure. After
795 all, the only time we might allocate is when moving the
796 bucket head, but moving overflow entries first may create
797 free entries that can be recycled by the time we finally
798 get to the bucket head. */
799 for (cursor = bucket->next; cursor; cursor = next)
800 {
801 data = cursor->data;
802 new_bucket = safe_hasher (dst, data);
803
804 next = cursor->next;
805
806 if (new_bucket->data)
807 {
808 /* Merely relink an existing entry, when moving from a
809 bucket overflow into a bucket overflow. */
810 cursor->next = new_bucket->next;
811 new_bucket->next = cursor;
812 }
813 else
814 {
815 /* Free an existing entry, when moving from a bucket
816 overflow into a bucket header. */
817 new_bucket->data = data;
818 dst->n_buckets_used++;
819 free_entry (dst, cursor);
820 }
821 }
822 /* Now move the bucket head. Be sure that if we fail due to
823 allocation failure that the src table is in a consistent
824 state. */
825 data = bucket->data;
826 bucket->next = NULL;
827 if (safe)
828 continue;
829 new_bucket = safe_hasher (dst, data);
830
831 if (new_bucket->data)
832 {
833 /* Allocate or recycle an entry, when moving from a bucket
834 header into a bucket overflow. */
835 struct hash_entry *new_entry = allocate_entry (dst);
836
837 if (new_entry == NULL)
838 return false;
839
840 new_entry->data = data;
841 new_entry->next = new_bucket->next;
842 new_bucket->next = new_entry;
843 }
844 else
845 {
846 /* Move from one bucket header to another. */
847 new_bucket->data = data;
848 dst->n_buckets_used++;
849 }
850 bucket->data = NULL;
851 src->n_buckets_used--;
852 }
853 return true;
854 }
855
856 bool
857 hash_rehash (Hash_table *table, size_t candidate)
858 {
859 Hash_table storage;
860 Hash_table *new_table;
861 size_t new_size = compute_bucket_size (candidate, table->tuning);
862
863 if (!new_size)
864 return false;
865 if (new_size == table->n_buckets)
866 return true;
867 new_table = &storage;
868 new_table->bucket = calloc (new_size, sizeof *new_table->bucket);
869 if (new_table->bucket == NULL)
870 return false;
871 new_table->n_buckets = new_size;
872 new_table->bucket_limit = new_table->bucket + new_size;
873 new_table->n_buckets_used = 0;
874 new_table->n_entries = 0;
875 new_table->tuning = table->tuning;
876 new_table->hasher = table->hasher;
877 new_table->comparator = table->comparator;
878 new_table->data_freer = table->data_freer;
879
880 /* In order for the transfer to successfully complete, we need
881 additional overflow entries when distinct buckets in the old
882 table collide into a common bucket in the new table. The worst
883 case possible is a hasher that gives a good spread with the old
884 size, but returns a constant with the new size; if we were to
885 guarantee table->n_buckets_used-1 free entries in advance, then
886 the transfer would be guaranteed to not allocate memory.
887 However, for large tables, a guarantee of no further allocation
888 introduces a lot of extra memory pressure, all for an unlikely
889 corner case (most rehashes reduce, rather than increase, the
890 number of overflow entries needed). So, we instead ensure that
891 the transfer process can be reversed if we hit a memory
892 allocation failure mid-transfer. */
893
894 /* Merely reuse the extra old space into the new table. */
895 #if USE_OBSTACK
896 new_table->entry_stack = table->entry_stack;
897 #endif
898 new_table->free_entry_list = table->free_entry_list;
899
900 if (transfer_entries (new_table, table, false))
901 {
902 /* Entries transferred successfully; tie up the loose ends. */
903 free (table->bucket);
904 table->bucket = new_table->bucket;
905 table->bucket_limit = new_table->bucket_limit;
906 table->n_buckets = new_table->n_buckets;
907 table->n_buckets_used = new_table->n_buckets_used;
908 table->free_entry_list = new_table->free_entry_list;
909 /* table->n_entries and table->entry_stack already hold their value. */
910 return true;
911 }
912
913 /* We've allocated new_table->bucket (and possibly some entries),
914 exhausted the free list, and moved some but not all entries into
915 new_table. We must undo the partial move before returning
916 failure. The only way to get into this situation is if new_table
917 uses fewer buckets than the old table, so we will reclaim some
918 free entries as overflows in the new table are put back into
919 distinct buckets in the old table.
920
921 There are some pathological cases where a single pass through the
922 table requires more intermediate overflow entries than using two
923 passes. Two passes give worse cache performance and takes
924 longer, but at this point, we're already out of memory, so slow
925 and safe is better than failure. */
926 int err = errno;
927 table->free_entry_list = new_table->free_entry_list;
928 if (! (transfer_entries (table, new_table, true)
929 && transfer_entries (table, new_table, false)))
930 abort ();
931 /* table->n_entries already holds its value. */
932 free (new_table->bucket);
933 errno = err;
934 return false;
935 }
936
937 int
938 hash_insert_if_absent (Hash_table *table, void const *entry,
939 void const **matched_ent)
940 {
941 void *data;
942 struct hash_entry *bucket;
943
944 /* The caller cannot insert a NULL entry, since hash_lookup returns NULL
945 to indicate "not found", and hash_find_entry uses "bucket->data == NULL"
946 to indicate an empty bucket. */
947 if (! entry)
948 abort ();
949
950 /* If there's a matching entry already in the table, return that. */
951 if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
952 {
953 if (matched_ent)
954 *matched_ent = data;
955 return 0;
956 }
957
958 /* If the growth threshold of the buckets in use has been reached, increase
959 the table size and rehash. There's no point in checking the number of
960 entries: if the hashing function is ill-conditioned, rehashing is not
961 likely to improve it. */
962
963 if (table->n_buckets_used
964 > table->tuning->growth_threshold * table->n_buckets)
965 {
966 /* Check more fully, before starting real work. If tuning arguments
967 became invalid, the second check will rely on proper defaults. */
968 check_tuning (table);
969 if (table->n_buckets_used
970 > table->tuning->growth_threshold * table->n_buckets)
971 {
972 const Hash_tuning *tuning = table->tuning;
973 float candidate =
974 (tuning->is_n_buckets
975 ? (table->n_buckets * tuning->growth_factor)
976 : (table->n_buckets * tuning->growth_factor
977 * tuning->growth_threshold));
978
979 if ((float) SIZE_MAX <= candidate)
980 {
981 errno = ENOMEM;
982 return -1;
983 }
984
985 /* If the rehash fails, arrange to return NULL. */
986 if (!hash_rehash (table, candidate))
987 return -1;
988
989 /* Update the bucket we are interested in. */
990 if (hash_find_entry (table, entry, &bucket, false) != NULL)
991 abort ();
992 }
993 }
994
995 /* ENTRY is not matched, it should be inserted. */
996
997 if (bucket->data)
998 {
999 struct hash_entry *new_entry = allocate_entry (table);
1000
1001 if (new_entry == NULL)
1002 return -1;
1003
1004 /* Add ENTRY in the overflow of the bucket. */
1005
1006 new_entry->data = (void *) entry;
1007 new_entry->next = bucket->next;
1008 bucket->next = new_entry;
1009 table->n_entries++;
1010 return 1;
1011 }
1012
1013 /* Add ENTRY right in the bucket head. */
1014
1015 bucket->data = (void *) entry;
1016 table->n_entries++;
1017 table->n_buckets_used++;
1018
1019 return 1;
1020 }
1021
1022 void *
1023 hash_insert (Hash_table *table, void const *entry)
1024 {
1025 void const *matched_ent;
1026 int err = hash_insert_if_absent (table, entry, &matched_ent);
1027 return (err == -1
1028 ? NULL
1029 : (void *) (err == 0 ? matched_ent : entry));
1030 }
1031
1032 void *
1033 hash_remove (Hash_table *table, const void *entry)
1034 {
1035 void *data;
1036 struct hash_entry *bucket;
1037
1038 data = hash_find_entry (table, entry, &bucket, true);
1039 if (!data)
1040 return NULL;
1041
1042 table->n_entries--;
1043 if (!bucket->data)
1044 {
1045 table->n_buckets_used--;
1046
1047 /* If the shrink threshold of the buckets in use has been reached,
1048 rehash into a smaller table. */
1049
1050 if (table->n_buckets_used
1051 < table->tuning->shrink_threshold * table->n_buckets)
1052 {
1053 /* Check more fully, before starting real work. If tuning arguments
1054 became invalid, the second check will rely on proper defaults. */
1055 check_tuning (table);
1056 if (table->n_buckets_used
1057 < table->tuning->shrink_threshold * table->n_buckets)
1058 {
1059 const Hash_tuning *tuning = table->tuning;
1060 size_t candidate =
1061 (tuning->is_n_buckets
1062 ? table->n_buckets * tuning->shrink_factor
1063 : (table->n_buckets * tuning->shrink_factor
1064 * tuning->growth_threshold));
1065
1066 if (!hash_rehash (table, candidate))
1067 {
1068 /* Failure to allocate memory in an attempt to
1069 shrink the table is not fatal. But since memory
1070 is low, we can at least be kind and free any
1071 spare entries, rather than keeping them tied up
1072 in the free entry list. */
1073 #if ! USE_OBSTACK
1074 struct hash_entry *cursor = table->free_entry_list;
1075 struct hash_entry *next;
1076 while (cursor)
1077 {
1078 next = cursor->next;
1079 free (cursor);
1080 cursor = next;
1081 }
1082 table->free_entry_list = NULL;
1083 #endif
1084 }
1085 }
1086 }
1087 }
1088
1089 return data;
1090 }
1091
1092 void *
1093 hash_delete (Hash_table *table, const void *entry)
1094 {
1095 return hash_remove (table, entry);
1096 }
1097
1098 /* Testing. */
1099
1100 #if TESTING
1101
1102 void
1103 hash_print (const Hash_table *table)
1104 {
1105 struct hash_entry *bucket = (struct hash_entry *) table->bucket;
1106
1107 for ( ; bucket < table->bucket_limit; bucket++)
1108 {
1109 struct hash_entry *cursor;
1110
1111 if (bucket)
1112 printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1113
1114 for (cursor = bucket; cursor; cursor = cursor->next)
1115 {
1116 char const *s = cursor->data;
1117 /* FIXME */
1118 if (s)
1119 printf (" %s\n", s);
1120 }
1121 }
1122 }
1123
1124 #endif /* TESTING */