1 /* hash.c -- hash table maintenance
2 Copyright (C) 1995, 1999, 2002, 2010 Free Software Foundation, Inc.
3 Written by Greg McGary <gkm@gnu.org> <greg@mcgary.org>
4
5 GNU Make is free software; you can redistribute it and/or modify it under the
6 terms of the GNU General Public License as published by the Free Software
7 Foundation; either version 3 of the License, or (at your option) any later
8 version.
9
10 GNU Make is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along with
15 this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 #include "makeint.h"
18 #include "hash.h"
19 #include <assert.h>
20
21 #define CALLOC(t, n) ((t *) xcalloc (sizeof (t) * (n)))
22 #define MALLOC(t, n) ((t *) xmalloc (sizeof (t) * (n)))
23 #define REALLOC(o, t, n) ((t *) xrealloc ((o), sizeof (t) * (n)))
24 #define CLONE(o, t, n) ((t *) memcpy (MALLOC (t, (n)), (o), sizeof (t) * (n)))
25
26 static void hash_rehash __P((struct hash_table* ht));
27 static unsigned long round_up_2 __P((unsigned long rough));
28
29 /* Implement double hashing with open addressing. The table size is
30 always a power of two. The secondary ('increment') hash function
31 is forced to return an odd-value, in order to be relatively prime
32 to the table size. This guarantees that the increment can
33 potentially hit every slot in the table during collision
34 resolution. */
35
36 void *hash_deleted_item = &hash_deleted_item;
37
38 /* Force the table size to be a power of two, possibly rounding up the
39 given size. */
40
41 void
42 hash_init (struct hash_table *ht, unsigned long size,
43 hash_func_t hash_1, hash_func_t hash_2, hash_cmp_func_t hash_cmp)
44 {
45 ht->ht_size = round_up_2 (size);
46 ht->ht_empty_slots = ht->ht_size;
47 ht->ht_vec = CALLOC (void *, ht->ht_size);
48 if (ht->ht_vec == 0)
49 {
50 fprintf (stderr, _("can't allocate %lu bytes for hash table: memory exhausted"),
51 ht->ht_size * (unsigned long) sizeof (void *));
52 exit (MAKE_TROUBLE);
53 }
54
55 ht->ht_capacity = ht->ht_size - (ht->ht_size / 16); /* 93.75% loading factor */
56 ht->ht_fill = 0;
57 ht->ht_collisions = 0;
58 ht->ht_lookups = 0;
59 ht->ht_rehashes = 0;
60 ht->ht_hash_1 = hash_1;
61 ht->ht_hash_2 = hash_2;
62 ht->ht_compare = hash_cmp;
63 }
64
65 /* Load an array of items into 'ht'. */
66
67 void
68 hash_load (struct hash_table *ht, void *item_table,
69 unsigned long cardinality, unsigned long size)
70 {
71 char *items = (char *) item_table;
72 while (cardinality--)
73 {
74 hash_insert (ht, items);
75 items += size;
76 }
77 }
78
79 /* Returns the address of the table slot matching 'key'. If 'key' is
80 not found, return the address of an empty slot suitable for
81 inserting 'key'. The caller is responsible for incrementing
82 ht_fill on insertion. */
83
84 void **
85 hash_find_slot (struct hash_table *ht, const void *key)
86 {
87 void **slot;
88 void **deleted_slot = 0;
89 unsigned int hash_2 = 0;
90 unsigned int hash_1 = (*ht->ht_hash_1) (key);
91
92 ht->ht_lookups++;
93 for (;;)
94 {
95 hash_1 &= (ht->ht_size - 1);
96 slot = &ht->ht_vec[hash_1];
97
98 if (*slot == 0)
99 return (deleted_slot ? deleted_slot : slot);
100 if (*slot == hash_deleted_item)
101 {
102 if (deleted_slot == 0)
103 deleted_slot = slot;
104 }
105 else
106 {
107 if (key == *slot)
108 return slot;
109 if ((*ht->ht_compare) (key, *slot) == 0)
110 return slot;
111 ht->ht_collisions++;
112 }
113 if (!hash_2)
114 hash_2 = (*ht->ht_hash_2) (key) | 1;
115 hash_1 += hash_2;
116 }
117 }
118
119 void *
120 hash_find_item (struct hash_table *ht, const void *key)
121 {
122 void **slot = hash_find_slot (ht, key);
123 return ((HASH_VACANT (*slot)) ? 0 : *slot);
124 }
125
126 void *
127 hash_insert (struct hash_table *ht, const void *item)
128 {
129 void **slot = hash_find_slot (ht, item);
130 const void *old_item = *slot;
131 hash_insert_at (ht, item, slot);
132 return (void *)((HASH_VACANT (old_item)) ? 0 : old_item);
133 }
134
135 void *
136 hash_insert_at (struct hash_table *ht, const void *item, const void *slot)
137 {
138 const void *old_item = *(void **) slot;
139 if (HASH_VACANT (old_item))
140 {
141 ht->ht_fill++;
142 if (old_item == 0)
143 ht->ht_empty_slots--;
144 old_item = item;
145 }
146 *(void const **) slot = item;
147 if (ht->ht_empty_slots < ht->ht_size - ht->ht_capacity)
148 {
149 hash_rehash (ht);
150 return (void *) hash_find_slot (ht, item);
151 }
152 else
153 return (void *) slot;
154 }
155
156 void *
157 hash_delete (struct hash_table *ht, const void *item)
158 {
159 void **slot = hash_find_slot (ht, item);
160 return hash_delete_at (ht, slot);
161 }
162
163 void *
164 hash_delete_at (struct hash_table *ht, const void *slot)
165 {
166 void *item = *(void **) slot;
167 if (!HASH_VACANT (item))
168 {
169 *(void const **) slot = hash_deleted_item;
170 ht->ht_fill--;
171 return item;
172 }
173 else
174 return 0;
175 }
176
177 void
178 hash_free_items (struct hash_table *ht)
179 {
180 void **vec = ht->ht_vec;
181 void **end = &vec[ht->ht_size];
182 for (; vec < end; vec++)
183 {
184 void *item = *vec;
185 if (!HASH_VACANT (item))
186 free (item);
187 *vec = 0;
188 }
189 ht->ht_fill = 0;
190 ht->ht_empty_slots = ht->ht_size;
191 }
192
193 void
194 hash_delete_items (struct hash_table *ht)
195 {
196 void **vec = ht->ht_vec;
197 void **end = &vec[ht->ht_size];
198 for (; vec < end; vec++)
199 *vec = 0;
200 ht->ht_fill = 0;
201 ht->ht_collisions = 0;
202 ht->ht_lookups = 0;
203 ht->ht_rehashes = 0;
204 ht->ht_empty_slots = ht->ht_size;
205 }
206
207 void
208 hash_free (struct hash_table *ht, int free_items)
209 {
210 if (free_items)
211 hash_free_items (ht);
212 else
213 {
214 ht->ht_fill = 0;
215 ht->ht_empty_slots = ht->ht_size;
216 }
217 free (ht->ht_vec);
218 ht->ht_vec = 0;
219 ht->ht_capacity = 0;
220 }
221
222 void
223 hash_map (struct hash_table *ht, hash_map_func_t map)
224 {
225 void **slot;
226 void **end = &ht->ht_vec[ht->ht_size];
227
228 for (slot = ht->ht_vec; slot < end; slot++)
229 {
230 if (!HASH_VACANT (*slot))
231 (*map) (*slot);
232 }
233 }
234
235 void
236 hash_map_arg (struct hash_table *ht, hash_map_arg_func_t map, void *arg)
237 {
238 void **slot;
239 void **end = &ht->ht_vec[ht->ht_size];
240
241 for (slot = ht->ht_vec; slot < end; slot++)
242 {
243 if (!HASH_VACANT (*slot))
244 (*map) (*slot, arg);
245 }
246 }
247
248 /* Double the size of the hash table in the event of overflow... */
249
250 static void
251 hash_rehash (struct hash_table *ht)
252 {
253 unsigned long old_ht_size = ht->ht_size;
254 void **old_vec = ht->ht_vec;
255 void **ovp;
256
257 if (ht->ht_fill >= ht->ht_capacity)
258 {
259 ht->ht_size *= 2;
260 ht->ht_capacity = ht->ht_size - (ht->ht_size >> 4);
261 }
262 ht->ht_rehashes++;
263 ht->ht_vec = CALLOC (void *, ht->ht_size);
264
265 for (ovp = old_vec; ovp < &old_vec[old_ht_size]; ovp++)
266 {
267 if (! HASH_VACANT (*ovp))
268 {
269 void **slot = hash_find_slot (ht, *ovp);
270 *slot = *ovp;
271 }
272 }
273 ht->ht_empty_slots = ht->ht_size - ht->ht_fill;
274 free (old_vec);
275 }
276
277 void
278 hash_print_stats (struct hash_table *ht, FILE *out_FILE)
279 {
280 fprintf (out_FILE, _("Load=%lu/%lu=%.0f%%, "), ht->ht_fill, ht->ht_size,
281 100.0 * (double) ht->ht_fill / (double) ht->ht_size);
282 fprintf (out_FILE, _("Rehash=%u, "), ht->ht_rehashes);
283 fprintf (out_FILE, _("Collisions=%lu/%lu=%.0f%%"), ht->ht_collisions, ht->ht_lookups,
284 (ht->ht_lookups
285 ? (100.0 * (double) ht->ht_collisions / (double) ht->ht_lookups)
286 : 0));
287 }
288
289 /* Dump all items into a NULL-terminated vector. Use the
290 user-supplied vector, or malloc one. */
291
292 void **
293 hash_dump (struct hash_table *ht, void **vector_0, qsort_cmp_t compare)
294 {
295 void **vector;
296 void **slot;
297 void **end = &ht->ht_vec[ht->ht_size];
298
299 if (vector_0 == 0)
300 vector_0 = MALLOC (void *, ht->ht_fill + 1);
301 vector = vector_0;
302
303 for (slot = ht->ht_vec; slot < end; slot++)
304 if (!HASH_VACANT (*slot))
305 *vector++ = *slot;
306 *vector = 0;
307
308 if (compare)
309 qsort (vector_0, ht->ht_fill, sizeof (void *), compare);
310 return vector_0;
311 }
312
313 /* Round a given number up to the nearest power of 2. */
314
315 static unsigned long
316 round_up_2 (unsigned long n)
317 {
318 n |= (n >> 1);
319 n |= (n >> 2);
320 n |= (n >> 4);
321 n |= (n >> 8);
322 n |= (n >> 16);
323
324 #if !defined(HAVE_LIMITS_H) || ULONG_MAX > 4294967295
325 /* We only need this on systems where unsigned long is >32 bits. */
326 n |= (n >> 32);
327 #endif
328
329 return n + 1;
330 }
331
332 #define rol32(v, n) \
333 ((v) << (n) | ((v) >> (32 - (n))))
334
335 /* jhash_mix -- mix 3 32-bit values reversibly. */
336 #define jhash_mix(a, b, c) \
337 { \
338 a -= c; a ^= rol32(c, 4); c += b; \
339 b -= a; b ^= rol32(a, 6); a += c; \
340 c -= b; c ^= rol32(b, 8); b += a; \
341 a -= c; a ^= rol32(c, 16); c += b; \
342 b -= a; b ^= rol32(a, 19); a += c; \
343 c -= b; c ^= rol32(b, 4); b += a; \
344 }
345
346 /* jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
347 #define jhash_final(a, b, c) \
348 { \
349 c ^= b; c -= rol32(b, 14); \
350 a ^= c; a -= rol32(c, 11); \
351 b ^= a; b -= rol32(a, 25); \
352 c ^= b; c -= rol32(b, 16); \
353 a ^= c; a -= rol32(c, 4); \
354 b ^= a; b -= rol32(a, 14); \
355 c ^= b; c -= rol32(b, 24); \
356 }
357
358 /* An arbitrary initial parameter */
359 #define JHASH_INITVAL 0xdeadbeef
360
361 #define sum_get_unaligned_32(r, p) \
362 do { \
363 unsigned int val; \
364 memcpy(&val, (p), 4); \
365 r += val; \
366 } while(0);
367
368 unsigned int
369 jhash(unsigned const char *k, int length)
370 {
371 unsigned int a, b, c;
372
373 /* Set up the internal state */
374 a = b = c = JHASH_INITVAL + length;
375
376 /* All but the last block: affect some 32 bits of (a,b,c) */
377 while (length > 12) {
378 sum_get_unaligned_32(a, k);
379 sum_get_unaligned_32(b, k + 4);
380 sum_get_unaligned_32(c, k + 8);
381 jhash_mix(a, b, c);
382 length -= 12;
383 k += 12;
384 }
385
386 if (!length)
387 return c;
388
389 if (length > 8)
390 {
391 sum_get_unaligned_32(a, k);
392 length -= 4;
393 k += 4;
394 }
395 if (length > 4)
396 {
397 sum_get_unaligned_32(b, k);
398 length -= 4;
399 k += 4;
400 }
401
402 if (length == 4)
403 c += (unsigned)k[3]<<24;
404 if (length >= 3)
405 c += (unsigned)k[2]<<16;
406 if (length >= 2)
407 c += (unsigned)k[1]<<8;
408 c += k[0];
409 jhash_final(a, b, c);
410 return c;
411 }
412
413 #define UINTSZ sizeof (unsigned int)
414
415 #ifdef WORDS_BIGENDIAN
416 /* The ifs are ordered from the first byte in memory to the last. */
417 #define sum_up_to_nul(r, p, plen, flag) \
418 do { \
419 unsigned int val = 0; \
420 size_t pn = (plen); \
421 size_t n = pn < UINTSZ ? pn : UINTSZ; \
422 memcpy (&val, (p), n); \
423 if ((val & 0xFF000000) == 0) \
424 flag = 1; \
425 else if ((val & 0xFF0000) == 0) \
426 r += val & ~0xFFFF, flag = 1; \
427 else if ((val & 0xFF00) == 0) \
428 r += val & ~0xFF, flag = 1; \
429 else \
430 r += val, flag = (val & 0xFF) == 0; \
431 } while (0)
432 #else
433 /* First detect the presence of zeroes. If there is none, we can
434 sum the 4 bytes directly. Otherwise, the ifs are ordered as in the
435 big endian case, from the first byte in memory to the last. */
436 #define sum_up_to_nul(r, p, plen, flag) \
437 do { \
438 unsigned int val = 0; \
439 size_t pn = (plen); \
440 size_t n = pn < UINTSZ ? pn : UINTSZ; \
441 memcpy (&val, (p), n); \
442 flag = ((val - 0x01010101) & ~val) & 0x80808080; \
443 if (!flag) \
444 r += val; \
445 else if (val & 0xFF) \
446 { \
447 if ((val & 0xFF00) == 0) \
448 r += val & 0xFF; \
449 else if ((val & 0xFF0000) == 0) \
450 r += val & 0xFFFF; \
451 else \
452 r += val; \
453 } \
454 } while (0)
455 #endif
456
457 unsigned int
458 jhash_string(unsigned const char *k)
459 {
460 unsigned int a, b, c;
461 unsigned int have_nul = 0;
462 unsigned const char *start = k;
463 size_t klen = strlen ((const char*)k);
464
465 /* Set up the internal state */
466 a = b = c = JHASH_INITVAL;
467
468 /* All but the last block: affect some 32 bits of (a,b,c) */
469 for (;;) {
470 sum_up_to_nul(a, k, klen, have_nul);
471 if (have_nul)
472 break;
473 k += UINTSZ;
474 assert (klen >= UINTSZ);
475 klen -= UINTSZ;
476
477 sum_up_to_nul(b, k, klen, have_nul);
478 if (have_nul)
479 break;
480 k += UINTSZ;
481 assert (klen >= UINTSZ);
482 klen -= UINTSZ;
483
484 sum_up_to_nul(c, k, klen, have_nul);
485 if (have_nul)
486 break;
487 k += UINTSZ;
488 assert (klen >= UINTSZ);
489 klen -= UINTSZ;
490 jhash_mix(a, b, c);
491 }
492
493 jhash_final(a, b, c);
494 return c + (unsigned) (k - start);
495 }