(root)/
make-4.4/
src/
hash.c
       1  /* hash.c -- hash table maintenance
       2  Copyright (C) 1995, 1999, 2002, 2010 Free Software Foundation, Inc.
       3  Written by Greg McGary <gkm@gnu.org> <greg@mcgary.org>
       4  
       5  GNU Make is free software; you can redistribute it and/or modify it under the
       6  terms of the GNU General Public License as published by the Free Software
       7  Foundation; either version 3 of the License, or (at your option) any later
       8  version.
       9  
      10  GNU Make is distributed in the hope that it will be useful, but WITHOUT ANY
      11  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
      12  A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
      13  
      14  You should have received a copy of the GNU General Public License along with
      15  this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  #include "makeint.h"
      18  #include "hash.h"
      19  #include <assert.h>
      20  
      21  #define CALLOC(t, n) ((t *) xcalloc (sizeof (t) * (n)))
      22  #define MALLOC(t, n) ((t *) xmalloc (sizeof (t) * (n)))
      23  #define REALLOC(o, t, n) ((t *) xrealloc ((o), sizeof (t) * (n)))
      24  #define CLONE(o, t, n) ((t *) memcpy (MALLOC (t, (n)), (o), sizeof (t) * (n)))
      25  
      26  static void hash_rehash __P((struct hash_table* ht));
      27  static unsigned long round_up_2 __P((unsigned long rough));
      28  
      29  /* Implement double hashing with open addressing.  The table size is
      30     always a power of two.  The secondary ('increment') hash function
      31     is forced to return an odd-value, in order to be relatively prime
      32     to the table size.  This guarantees that the increment can
      33     potentially hit every slot in the table during collision
      34     resolution.  */
      35  
      36  void *hash_deleted_item = &hash_deleted_item;
      37  
      38  /* Force the table size to be a power of two, possibly rounding up the
      39     given size.  */
      40  
      41  void
      42  hash_init (struct hash_table *ht, unsigned long size,
      43             hash_func_t hash_1, hash_func_t hash_2, hash_cmp_func_t hash_cmp)
      44  {
      45    ht->ht_size = round_up_2 (size);
      46    ht->ht_empty_slots = ht->ht_size;
      47    ht->ht_vec = CALLOC (void *, ht->ht_size);
      48    if (ht->ht_vec == 0)
      49      {
      50        fprintf (stderr, _("can't allocate %lu bytes for hash table: memory exhausted"),
      51                 ht->ht_size * (unsigned long) sizeof (void *));
      52        exit (MAKE_TROUBLE);
      53      }
      54  
      55    ht->ht_capacity = ht->ht_size - (ht->ht_size / 16); /* 93.75% loading factor */
      56    ht->ht_fill = 0;
      57    ht->ht_collisions = 0;
      58    ht->ht_lookups = 0;
      59    ht->ht_rehashes = 0;
      60    ht->ht_hash_1 = hash_1;
      61    ht->ht_hash_2 = hash_2;
      62    ht->ht_compare = hash_cmp;
      63  }
      64  
      65  /* Load an array of items into 'ht'.  */
      66  
      67  void
      68  hash_load (struct hash_table *ht, void *item_table,
      69             unsigned long cardinality, unsigned long size)
      70  {
      71    char *items = (char *) item_table;
      72    while (cardinality--)
      73      {
      74        hash_insert (ht, items);
      75        items += size;
      76      }
      77  }
      78  
      79  /* Returns the address of the table slot matching 'key'.  If 'key' is
      80     not found, return the address of an empty slot suitable for
      81     inserting 'key'.  The caller is responsible for incrementing
      82     ht_fill on insertion.  */
      83  
      84  void **
      85  hash_find_slot (struct hash_table *ht, const void *key)
      86  {
      87    void **slot;
      88    void **deleted_slot = 0;
      89    unsigned int hash_2 = 0;
      90    unsigned int hash_1 = (*ht->ht_hash_1) (key);
      91  
      92    ht->ht_lookups++;
      93    for (;;)
      94      {
      95        hash_1 &= (ht->ht_size - 1);
      96        slot = &ht->ht_vec[hash_1];
      97  
      98        if (*slot == 0)
      99          return (deleted_slot ? deleted_slot : slot);
     100        if (*slot == hash_deleted_item)
     101          {
     102            if (deleted_slot == 0)
     103              deleted_slot = slot;
     104          }
     105        else
     106          {
     107            if (key == *slot)
     108              return slot;
     109            if ((*ht->ht_compare) (key, *slot) == 0)
     110              return slot;
     111            ht->ht_collisions++;
     112          }
     113        if (!hash_2)
     114            hash_2 = (*ht->ht_hash_2) (key) | 1;
     115        hash_1 += hash_2;
     116      }
     117  }
     118  
     119  void *
     120  hash_find_item (struct hash_table *ht, const void *key)
     121  {
     122    void **slot = hash_find_slot (ht, key);
     123    return ((HASH_VACANT (*slot)) ? 0 : *slot);
     124  }
     125  
     126  void *
     127  hash_insert (struct hash_table *ht, const void *item)
     128  {
     129    void **slot = hash_find_slot (ht, item);
     130    const void *old_item = *slot;
     131    hash_insert_at (ht, item, slot);
     132    return (void *)((HASH_VACANT (old_item)) ? 0 : old_item);
     133  }
     134  
     135  void *
     136  hash_insert_at (struct hash_table *ht, const void *item, const void *slot)
     137  {
     138    const void *old_item = *(void **) slot;
     139    if (HASH_VACANT (old_item))
     140      {
     141        ht->ht_fill++;
     142        if (old_item == 0)
     143          ht->ht_empty_slots--;
     144        old_item = item;
     145      }
     146    *(void const **) slot = item;
     147    if (ht->ht_empty_slots < ht->ht_size - ht->ht_capacity)
     148      {
     149        hash_rehash (ht);
     150        return (void *) hash_find_slot (ht, item);
     151      }
     152    else
     153      return (void *) slot;
     154  }
     155  
     156  void *
     157  hash_delete (struct hash_table *ht, const void *item)
     158  {
     159    void **slot = hash_find_slot (ht, item);
     160    return hash_delete_at (ht, slot);
     161  }
     162  
     163  void *
     164  hash_delete_at (struct hash_table *ht, const void *slot)
     165  {
     166    void *item = *(void **) slot;
     167    if (!HASH_VACANT (item))
     168      {
     169        *(void const **) slot = hash_deleted_item;
     170        ht->ht_fill--;
     171        return item;
     172      }
     173    else
     174      return 0;
     175  }
     176  
     177  void
     178  hash_free_items (struct hash_table *ht)
     179  {
     180    void **vec = ht->ht_vec;
     181    void **end = &vec[ht->ht_size];
     182    for (; vec < end; vec++)
     183      {
     184        void *item = *vec;
     185        if (!HASH_VACANT (item))
     186          free (item);
     187        *vec = 0;
     188      }
     189    ht->ht_fill = 0;
     190    ht->ht_empty_slots = ht->ht_size;
     191  }
     192  
     193  void
     194  hash_delete_items (struct hash_table *ht)
     195  {
     196    void **vec = ht->ht_vec;
     197    void **end = &vec[ht->ht_size];
     198    for (; vec < end; vec++)
     199      *vec = 0;
     200    ht->ht_fill = 0;
     201    ht->ht_collisions = 0;
     202    ht->ht_lookups = 0;
     203    ht->ht_rehashes = 0;
     204    ht->ht_empty_slots = ht->ht_size;
     205  }
     206  
     207  void
     208  hash_free (struct hash_table *ht, int free_items)
     209  {
     210    if (free_items)
     211      hash_free_items (ht);
     212    else
     213      {
     214        ht->ht_fill = 0;
     215        ht->ht_empty_slots = ht->ht_size;
     216      }
     217    free (ht->ht_vec);
     218    ht->ht_vec = 0;
     219    ht->ht_capacity = 0;
     220  }
     221  
     222  void
     223  hash_map (struct hash_table *ht, hash_map_func_t map)
     224  {
     225    void **slot;
     226    void **end = &ht->ht_vec[ht->ht_size];
     227  
     228    for (slot = ht->ht_vec; slot < end; slot++)
     229      {
     230        if (!HASH_VACANT (*slot))
     231          (*map) (*slot);
     232      }
     233  }
     234  
     235  void
     236  hash_map_arg (struct hash_table *ht, hash_map_arg_func_t map, void *arg)
     237  {
     238    void **slot;
     239    void **end = &ht->ht_vec[ht->ht_size];
     240  
     241    for (slot = ht->ht_vec; slot < end; slot++)
     242      {
     243        if (!HASH_VACANT (*slot))
     244          (*map) (*slot, arg);
     245      }
     246  }
     247  
     248  /* Double the size of the hash table in the event of overflow... */
     249  
     250  static void
     251  hash_rehash (struct hash_table *ht)
     252  {
     253    unsigned long old_ht_size = ht->ht_size;
     254    void **old_vec = ht->ht_vec;
     255    void **ovp;
     256  
     257    if (ht->ht_fill >= ht->ht_capacity)
     258      {
     259        ht->ht_size *= 2;
     260        ht->ht_capacity = ht->ht_size - (ht->ht_size >> 4);
     261      }
     262    ht->ht_rehashes++;
     263    ht->ht_vec = CALLOC (void *, ht->ht_size);
     264  
     265    for (ovp = old_vec; ovp < &old_vec[old_ht_size]; ovp++)
     266      {
     267        if (! HASH_VACANT (*ovp))
     268          {
     269            void **slot = hash_find_slot (ht, *ovp);
     270            *slot = *ovp;
     271          }
     272      }
     273    ht->ht_empty_slots = ht->ht_size - ht->ht_fill;
     274    free (old_vec);
     275  }
     276  
     277  void
     278  hash_print_stats (struct hash_table *ht, FILE *out_FILE)
     279  {
     280    fprintf (out_FILE, _("Load=%lu/%lu=%.0f%%, "), ht->ht_fill, ht->ht_size,
     281             100.0 * (double) ht->ht_fill / (double) ht->ht_size);
     282    fprintf (out_FILE, _("Rehash=%u, "), ht->ht_rehashes);
     283    fprintf (out_FILE, _("Collisions=%lu/%lu=%.0f%%"), ht->ht_collisions, ht->ht_lookups,
     284             (ht->ht_lookups
     285              ? (100.0 * (double) ht->ht_collisions / (double) ht->ht_lookups)
     286              : 0));
     287  }
     288  
     289  /* Dump all items into a NULL-terminated vector.  Use the
     290     user-supplied vector, or malloc one.  */
     291  
     292  void **
     293  hash_dump (struct hash_table *ht, void **vector_0, qsort_cmp_t compare)
     294  {
     295    void **vector;
     296    void **slot;
     297    void **end = &ht->ht_vec[ht->ht_size];
     298  
     299    if (vector_0 == 0)
     300      vector_0 = MALLOC (void *, ht->ht_fill + 1);
     301    vector = vector_0;
     302  
     303    for (slot = ht->ht_vec; slot < end; slot++)
     304      if (!HASH_VACANT (*slot))
     305        *vector++ = *slot;
     306    *vector = 0;
     307  
     308    if (compare)
     309      qsort (vector_0, ht->ht_fill, sizeof (void *), compare);
     310    return vector_0;
     311  }
     312  
     313  /* Round a given number up to the nearest power of 2. */
     314  
     315  static unsigned long
     316  round_up_2 (unsigned long n)
     317  {
     318    n |= (n >> 1);
     319    n |= (n >> 2);
     320    n |= (n >> 4);
     321    n |= (n >> 8);
     322    n |= (n >> 16);
     323  
     324  #if !defined(HAVE_LIMITS_H) || ULONG_MAX > 4294967295
     325    /* We only need this on systems where unsigned long is >32 bits.  */
     326    n |= (n >> 32);
     327  #endif
     328  
     329    return n + 1;
     330  }
     331  
     332  #define rol32(v, n) \
     333          ((v) << (n) | ((v) >> (32 - (n))))
     334  
     335  /* jhash_mix -- mix 3 32-bit values reversibly. */
     336  #define jhash_mix(a, b, c)                      \
     337  {                                               \
     338          a -= c;  a ^= rol32(c, 4);  c += b;     \
     339          b -= a;  b ^= rol32(a, 6);  a += c;     \
     340          c -= b;  c ^= rol32(b, 8);  b += a;     \
     341          a -= c;  a ^= rol32(c, 16); c += b;     \
     342          b -= a;  b ^= rol32(a, 19); a += c;     \
     343          c -= b;  c ^= rol32(b, 4);  b += a;     \
     344  }
     345  
     346  /* jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
     347  #define jhash_final(a, b, c)                    \
     348  {                                               \
     349          c ^= b; c -= rol32(b, 14);              \
     350          a ^= c; a -= rol32(c, 11);              \
     351          b ^= a; b -= rol32(a, 25);              \
     352          c ^= b; c -= rol32(b, 16);              \
     353          a ^= c; a -= rol32(c, 4);               \
     354          b ^= a; b -= rol32(a, 14);              \
     355          c ^= b; c -= rol32(b, 24);              \
     356  }
     357  
     358  /* An arbitrary initial parameter */
     359  #define JHASH_INITVAL           0xdeadbeef
     360  
     361  #define sum_get_unaligned_32(r, p)              \
     362    do {                                          \
     363      unsigned int val;                           \
     364      memcpy(&val, (p), 4);                       \
     365      r += val;                                   \
     366    } while(0);
     367  
     368  unsigned int
     369  jhash(unsigned const char *k, int length)
     370  {
     371    unsigned int a, b, c;
     372  
     373    /* Set up the internal state */
     374    a = b = c = JHASH_INITVAL + length;
     375  
     376    /* All but the last block: affect some 32 bits of (a,b,c) */
     377    while (length > 12) {
     378      sum_get_unaligned_32(a, k);
     379      sum_get_unaligned_32(b, k + 4);
     380      sum_get_unaligned_32(c, k + 8);
     381      jhash_mix(a, b, c);
     382      length -= 12;
     383      k += 12;
     384    }
     385  
     386    if (!length)
     387      return c;
     388  
     389    if (length > 8)
     390      {
     391        sum_get_unaligned_32(a, k);
     392        length -= 4;
     393        k += 4;
     394      }
     395    if (length > 4)
     396      {
     397        sum_get_unaligned_32(b, k);
     398        length -= 4;
     399        k += 4;
     400      }
     401  
     402    if (length == 4)
     403      c += (unsigned)k[3]<<24;
     404    if (length >= 3)
     405      c += (unsigned)k[2]<<16;
     406    if (length >= 2)
     407      c += (unsigned)k[1]<<8;
     408    c += k[0];
     409    jhash_final(a, b, c);
     410    return c;
     411  }
     412  
     413  #define UINTSZ sizeof (unsigned int)
     414  
     415  #ifdef WORDS_BIGENDIAN
     416  /* The ifs are ordered from the first byte in memory to the last.  */
     417  #define sum_up_to_nul(r, p, plen, flag)   \
     418    do {                                    \
     419      unsigned int val = 0;                 \
     420      size_t pn = (plen);                   \
     421      size_t n = pn < UINTSZ ? pn : UINTSZ; \
     422      memcpy (&val, (p), n);                \
     423      if ((val & 0xFF000000) == 0)          \
     424        flag = 1;                           \
     425      else if ((val & 0xFF0000) == 0)       \
     426        r += val & ~0xFFFF, flag = 1;       \
     427      else if ((val & 0xFF00) == 0)         \
     428        r += val & ~0xFF, flag = 1;         \
     429      else                                  \
     430        r += val, flag = (val & 0xFF) == 0; \
     431    } while (0)
     432  #else
     433  /* First detect the presence of zeroes.  If there is none, we can
     434     sum the 4 bytes directly.  Otherwise, the ifs are ordered as in the
     435     big endian case, from the first byte in memory to the last.  */
     436  #define sum_up_to_nul(r, p, plen, flag)              \
     437    do {                                               \
     438      unsigned int val = 0;                            \
     439      size_t pn = (plen);                              \
     440      size_t n = pn < UINTSZ ? pn : UINTSZ;            \
     441      memcpy (&val, (p), n);                           \
     442      flag = ((val - 0x01010101) & ~val) & 0x80808080; \
     443      if (!flag)                                       \
     444        r += val;                                      \
     445      else if (val & 0xFF)                             \
     446        {                                              \
     447          if ((val & 0xFF00) == 0)                     \
     448            r += val & 0xFF;                           \
     449          else if ((val & 0xFF0000) == 0)              \
     450            r += val & 0xFFFF;                         \
     451          else                                         \
     452            r += val;                                  \
     453        }                                              \
     454    } while (0)
     455  #endif
     456  
     457  unsigned int
     458  jhash_string(unsigned const char *k)
     459  {
     460    unsigned int a, b, c;
     461    unsigned int have_nul = 0;
     462    unsigned const char *start = k;
     463    size_t klen = strlen ((const char*)k);
     464  
     465    /* Set up the internal state */
     466    a = b = c = JHASH_INITVAL;
     467  
     468    /* All but the last block: affect some 32 bits of (a,b,c) */
     469    for (;;) {
     470      sum_up_to_nul(a, k, klen, have_nul);
     471      if (have_nul)
     472        break;
     473      k += UINTSZ;
     474      assert (klen >= UINTSZ);
     475      klen -= UINTSZ;
     476  
     477      sum_up_to_nul(b, k, klen, have_nul);
     478      if (have_nul)
     479        break;
     480      k += UINTSZ;
     481      assert (klen >= UINTSZ);
     482      klen -= UINTSZ;
     483  
     484      sum_up_to_nul(c, k, klen, have_nul);
     485      if (have_nul)
     486        break;
     487      k += UINTSZ;
     488      assert (klen >= UINTSZ);
     489      klen -= UINTSZ;
     490      jhash_mix(a, b, c);
     491    }
     492  
     493    jhash_final(a, b, c);
     494    return c + (unsigned) (k - start);
     495  }