1  /* hash - hashing table processing.
       2  
       3     Copyright (C) 1998-2004, 2006-2007, 2009-2023 Free Software Foundation, Inc.
       4  
       5     Written by Jim Meyering, 1992.
       6  
       7     This file is free software: you can redistribute it and/or modify
       8     it under the terms of the GNU Lesser General Public License as
       9     published by the Free Software Foundation; either version 2.1 of the
      10     License, or (at your option) any later version.
      11  
      12     This file is distributed in the hope that it will be useful,
      13     but WITHOUT ANY WARRANTY; without even the implied warranty of
      14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15     GNU Lesser General Public License for more details.
      16  
      17     You should have received a copy of the GNU Lesser General Public License
      18     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      19  
      20  /* A generic hash table package.  */
      21  
      22  /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
      23     of malloc.  If you change USE_OBSTACK, you have to recompile!  */
      24  
      25  #include <config.h>
      26  
      27  #include "hash.h"
      28  
      29  #include "bitrotate.h"
      30  #include "xalloc-oversized.h"
      31  
      32  #include <errno.h>
      33  #include <stdint.h>
      34  #include <stdio.h>
      35  #include <stdlib.h>
      36  
      37  #if USE_OBSTACK
      38  # include "obstack.h"
      39  # ifndef obstack_chunk_alloc
      40  #  define obstack_chunk_alloc malloc
      41  # endif
      42  # ifndef obstack_chunk_free
      43  #  define obstack_chunk_free free
      44  # endif
      45  #endif
      46  
      47  struct hash_entry
      48    {
      49      void *data;
      50      struct hash_entry *next;
      51    };
      52  
      53  struct hash_table
      54    {
      55      /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
      56         for a possibility of N_BUCKETS.  Among those, N_BUCKETS_USED buckets
      57         are not empty, there are N_ENTRIES active entries in the table.  */
      58      struct hash_entry *bucket;
      59      struct hash_entry const *bucket_limit;
      60      size_t n_buckets;
      61      size_t n_buckets_used;
      62      size_t n_entries;
      63  
      64      /* Tuning arguments, kept in a physically separate structure.  */
      65      const Hash_tuning *tuning;
      66  
      67      /* Three functions are given to 'hash_initialize', see the documentation
      68         block for this function.  In a word, HASHER randomizes a user entry
      69         into a number up from 0 up to some maximum minus 1; COMPARATOR returns
      70         true if two user entries compare equally; and DATA_FREER is the cleanup
      71         function for a user entry.  */
      72      Hash_hasher hasher;
      73      Hash_comparator comparator;
      74      Hash_data_freer data_freer;
      75  
      76      /* A linked list of freed struct hash_entry structs.  */
      77      struct hash_entry *free_entry_list;
      78  
      79  #if USE_OBSTACK
      80      /* Whenever obstacks are used, it is possible to allocate all overflowed
      81         entries into a single stack, so they all can be freed in a single
      82         operation.  It is not clear if the speedup is worth the trouble.  */
      83      struct obstack entry_stack;
      84  #endif
      85    };
      86  
      87  /* A hash table contains many internal entries, each holding a pointer to
      88     some user-provided data (also called a user entry).  An entry indistinctly
      89     refers to both the internal entry and its associated user entry.  A user
      90     entry contents may be hashed by a randomization function (the hashing
      91     function, or just "hasher" for short) into a number (or "slot") between 0
      92     and the current table size.  At each slot position in the hash table,
      93     starts a linked chain of entries for which the user data all hash to this
      94     slot.  A bucket is the collection of all entries hashing to the same slot.
      95  
      96     A good "hasher" function will distribute entries rather evenly in buckets.
      97     In the ideal case, the length of each bucket is roughly the number of
      98     entries divided by the table size.  Finding the slot for a data is usually
      99     done in constant time by the "hasher", and the later finding of a precise
     100     entry is linear in time with the size of the bucket.  Consequently, a
     101     larger hash table size (that is, a larger number of buckets) is prone to
     102     yielding shorter chains, *given* the "hasher" function behaves properly.
     103  
     104     Long buckets slow down the lookup algorithm.  One might use big hash table
     105     sizes in hope to reduce the average length of buckets, but this might
     106     become inordinate, as unused slots in the hash table take some space.  The
     107     best bet is to make sure you are using a good "hasher" function (beware
     108     that those are not that easy to write! :-), and to use a table size
     109     larger than the actual number of entries.  */
     110  
     111  /* If an insertion makes the ratio of nonempty buckets to table size larger
     112     than the growth threshold (a number between 0.0 and 1.0), then increase
     113     the table size by multiplying by the growth factor (a number greater than
     114     1.0).  The growth threshold defaults to 0.8, and the growth factor
     115     defaults to 1.414, meaning that the table will have doubled its size
     116     every second time 80% of the buckets get used.  */
     117  #define DEFAULT_GROWTH_THRESHOLD 0.8f
     118  #define DEFAULT_GROWTH_FACTOR 1.414f
     119  
     120  /* If a deletion empties a bucket and causes the ratio of used buckets to
     121     table size to become smaller than the shrink threshold (a number between
     122     0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
     123     number greater than the shrink threshold but smaller than 1.0).  The shrink
     124     threshold and factor default to 0.0 and 1.0, meaning that the table never
     125     shrinks.  */
     126  #define DEFAULT_SHRINK_THRESHOLD 0.0f
     127  #define DEFAULT_SHRINK_FACTOR 1.0f
     128  
     129  /* Use this to initialize or reset a TUNING structure to
     130     some sensible values. */
     131  static const Hash_tuning default_tuning =
     132    {
     133      DEFAULT_SHRINK_THRESHOLD,
     134      DEFAULT_SHRINK_FACTOR,
     135      DEFAULT_GROWTH_THRESHOLD,
     136      DEFAULT_GROWTH_FACTOR,
     137      false
     138    };
     139  
     140  /* Information and lookup.  */
     141  
     142  size_t
     143  hash_get_n_buckets (const Hash_table *table)
     144  {
     145    return table->n_buckets;
     146  }
     147  
     148  size_t
     149  hash_get_n_buckets_used (const Hash_table *table)
     150  {
     151    return table->n_buckets_used;
     152  }
     153  
     154  size_t
     155  hash_get_n_entries (const Hash_table *table)
     156  {
     157    return table->n_entries;
     158  }
     159  
     160  size_t
     161  hash_get_max_bucket_length (const Hash_table *table)
     162  {
     163    struct hash_entry const *bucket;
     164    size_t max_bucket_length = 0;
     165  
     166    for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     167      {
     168        if (bucket->data)
     169          {
     170            struct hash_entry const *cursor = bucket;
     171            size_t bucket_length = 1;
     172  
     173            while (cursor = cursor->next, cursor)
     174              bucket_length++;
     175  
     176            if (bucket_length > max_bucket_length)
     177              max_bucket_length = bucket_length;
     178          }
     179      }
     180  
     181    return max_bucket_length;
     182  }
     183  
     184  bool
     185  hash_table_ok (const Hash_table *table)
     186  {
     187    struct hash_entry const *bucket;
     188    size_t n_buckets_used = 0;
     189    size_t n_entries = 0;
     190  
     191    for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     192      {
     193        if (bucket->data)
     194          {
     195            struct hash_entry const *cursor = bucket;
     196  
     197            /* Count bucket head.  */
     198            n_buckets_used++;
     199            n_entries++;
     200  
     201            /* Count bucket overflow.  */
     202            while (cursor = cursor->next, cursor)
     203              n_entries++;
     204          }
     205      }
     206  
     207    if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
     208      return true;
     209  
     210    return false;
     211  }
     212  
     213  void
     214  hash_print_statistics (const Hash_table *table, FILE *stream)
     215  {
     216    size_t n_entries = hash_get_n_entries (table);
     217    size_t n_buckets = hash_get_n_buckets (table);
     218    size_t n_buckets_used = hash_get_n_buckets_used (table);
     219    size_t max_bucket_length = hash_get_max_bucket_length (table);
     220  
     221    fprintf (stream, "# entries:         %lu\n", (unsigned long int) n_entries);
     222    fprintf (stream, "# buckets:         %lu\n", (unsigned long int) n_buckets);
     223    fprintf (stream, "# buckets used:    %lu (%.2f%%)\n",
     224             (unsigned long int) n_buckets_used,
     225             (100.0 * n_buckets_used) / n_buckets);
     226    fprintf (stream, "max bucket length: %lu\n",
     227             (unsigned long int) max_bucket_length);
     228  }
     229  
     230  /* Hash KEY and return a pointer to the selected bucket.
     231     If TABLE->hasher misbehaves, abort.  */
     232  static struct hash_entry *
     233  safe_hasher (const Hash_table *table, const void *key)
     234  {
     235    size_t n = table->hasher (key, table->n_buckets);
     236    if (! (n < table->n_buckets))
     237      abort ();
     238    return table->bucket + n;
     239  }
     240  
     241  void *
     242  hash_lookup (const Hash_table *table, const void *entry)
     243  {
     244    struct hash_entry const *bucket = safe_hasher (table, entry);
     245    struct hash_entry const *cursor;
     246  
     247    if (bucket->data == NULL)
     248      return NULL;
     249  
     250    for (cursor = bucket; cursor; cursor = cursor->next)
     251      if (entry == cursor->data || table->comparator (entry, cursor->data))
     252        return cursor->data;
     253  
     254    return NULL;
     255  }
     256  
     257  /* Walking.  */
     258  
     259  void *
     260  hash_get_first (const Hash_table *table)
     261  {
     262    struct hash_entry const *bucket;
     263  
     264    if (table->n_entries == 0)
     265      return NULL;
     266  
     267    for (bucket = table->bucket; ; bucket++)
     268      if (! (bucket < table->bucket_limit))
     269        abort ();
     270      else if (bucket->data)
     271        return bucket->data;
     272  }
     273  
     274  void *
     275  hash_get_next (const Hash_table *table, const void *entry)
     276  {
     277    struct hash_entry const *bucket = safe_hasher (table, entry);
     278    struct hash_entry const *cursor;
     279  
     280    /* Find next entry in the same bucket.  */
     281    cursor = bucket;
     282    do
     283      {
     284        if (cursor->data == entry && cursor->next)
     285          return cursor->next->data;
     286        cursor = cursor->next;
     287      }
     288    while (cursor != NULL);
     289  
     290    /* Find first entry in any subsequent bucket.  */
     291    while (++bucket < table->bucket_limit)
     292      if (bucket->data)
     293        return bucket->data;
     294  
     295    /* None found.  */
     296    return NULL;
     297  }
     298  
     299  size_t
     300  hash_get_entries (const Hash_table *table, void **buffer,
     301                    size_t buffer_size)
     302  {
     303    size_t counter = 0;
     304    struct hash_entry const *bucket;
     305    struct hash_entry const *cursor;
     306  
     307    for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     308      {
     309        if (bucket->data)
     310          {
     311            for (cursor = bucket; cursor; cursor = cursor->next)
     312              {
     313                if (counter >= buffer_size)
     314                  return counter;
     315                buffer[counter++] = cursor->data;
     316              }
     317          }
     318      }
     319  
     320    return counter;
     321  }
     322  
     323  size_t
     324  hash_do_for_each (const Hash_table *table, Hash_processor processor,
     325                    void *processor_data)
     326  {
     327    size_t counter = 0;
     328    struct hash_entry const *bucket;
     329    struct hash_entry const *cursor;
     330  
     331    for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     332      {
     333        if (bucket->data)
     334          {
     335            for (cursor = bucket; cursor; cursor = cursor->next)
     336              {
     337                if (! processor (cursor->data, processor_data))
     338                  return counter;
     339                counter++;
     340              }
     341          }
     342      }
     343  
     344    return counter;
     345  }
     346  
     347  /* Allocation and clean-up.  */
     348  
     349  #if 0
     350  
     351  #if USE_DIFF_HASH
     352  
     353  /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
     354     B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
     355     Software--practice & experience 20, 2 (Feb 1990), 209-224.  Good hash
     356     algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
     357     may not be good for your application."  */
     358  
     359  size_t
     360  hash_string (const char *string, size_t n_buckets)
     361  {
     362  # define HASH_ONE_CHAR(Value, Byte) \
     363    ((Byte) + rotl_sz (Value, 7))
     364  
     365    size_t value = 0;
     366    unsigned char ch;
     367  
     368    for (; (ch = *string); string++)
     369      value = HASH_ONE_CHAR (value, ch);
     370    return value % n_buckets;
     371  
     372  # undef HASH_ONE_CHAR
     373  }
     374  
     375  #else /* not USE_DIFF_HASH */
     376  
     377  /* This one comes from 'recode', and performs a bit better than the above as
     378     per a few experiments.  It is inspired from a hashing routine found in the
     379     very old Cyber 'snoop', itself written in typical Greg Mansfield style.
     380     (By the way, what happened to this excellent man?  Is he still alive?)  */
     381  
     382  size_t
     383  hash_string (const char *string, size_t n_buckets)
     384  {
     385    size_t value = 0;
     386    unsigned char ch;
     387  
     388    for (; (ch = *string); string++)
     389      value = (value * 31 + ch) % n_buckets;
     390    return value;
     391  }
     392  
     393  #endif /* not USE_DIFF_HASH */
     394  
     395  #endif
     396  
     397  /* Return true if CANDIDATE is a prime number.  CANDIDATE should be an odd
     398     number at least equal to 11.  */
     399  
     400  static bool _GL_ATTRIBUTE_CONST
     401  is_prime (size_t candidate)
     402  {
     403    size_t divisor = 3;
     404    size_t square = divisor * divisor;
     405  
     406    while (square < candidate && (candidate % divisor))
     407      {
     408        divisor++;
     409        square += 4 * divisor;
     410        divisor++;
     411      }
     412  
     413    return (candidate % divisor ? true : false);
     414  }
     415  
     416  /* Round a given CANDIDATE number up to the nearest prime, and return that
     417     prime.  Primes lower than 10 are merely skipped.  */
     418  
     419  static size_t _GL_ATTRIBUTE_CONST
     420  next_prime (size_t candidate)
     421  {
     422    /* Skip small primes.  */
     423    if (candidate < 10)
     424      candidate = 10;
     425  
     426    /* Make it definitely odd.  */
     427    candidate |= 1;
     428  
     429    while (SIZE_MAX != candidate && !is_prime (candidate))
     430      candidate += 2;
     431  
     432    return candidate;
     433  }
     434  
     435  void
     436  hash_reset_tuning (Hash_tuning *tuning)
     437  {
     438    *tuning = default_tuning;
     439  }
     440  
     441  /* If the user passes a NULL hasher, we hash the raw pointer.  */
     442  static size_t
     443  raw_hasher (const void *data, size_t n)
     444  {
     445    /* When hashing unique pointers, it is often the case that they were
     446       generated by malloc and thus have the property that the low-order
     447       bits are 0.  As this tends to give poorer performance with small
     448       tables, we rotate the pointer value before performing division,
     449       in an attempt to improve hash quality.  */
     450    size_t val = rotr_sz ((size_t) data, 3);
     451    return val % n;
     452  }
     453  
     454  /* If the user passes a NULL comparator, we use pointer comparison.  */
     455  static bool
     456  raw_comparator (const void *a, const void *b)
     457  {
     458    return a == b;
     459  }
     460  
     461  
     462  /* For the given hash TABLE, check the user supplied tuning structure for
     463     reasonable values, and return true if there is no gross error with it.
     464     Otherwise, definitively reset the TUNING field to some acceptable default
     465     in the hash table (that is, the user loses the right of further modifying
     466     tuning arguments), and return false.  */
     467  
     468  static bool
     469  check_tuning (Hash_table *table)
     470  {
     471    const Hash_tuning *tuning = table->tuning;
     472    float epsilon;
     473    if (tuning == &default_tuning)
     474      return true;
     475  
     476    /* Be a bit stricter than mathematics would require, so that
     477       rounding errors in size calculations do not cause allocations to
     478       fail to grow or shrink as they should.  The smallest allocation
     479       is 11 (due to next_prime's algorithm), so an epsilon of 0.1
     480       should be good enough.  */
     481    epsilon = 0.1f;
     482  
     483    if (epsilon < tuning->growth_threshold
     484        && tuning->growth_threshold < 1 - epsilon
     485        && 1 + epsilon < tuning->growth_factor
     486        && 0 <= tuning->shrink_threshold
     487        && tuning->shrink_threshold + epsilon < tuning->shrink_factor
     488        && tuning->shrink_factor <= 1
     489        && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
     490      return true;
     491  
     492    table->tuning = &default_tuning;
     493    return false;
     494  }
     495  
     496  /* Compute the size of the bucket array for the given CANDIDATE and
     497     TUNING, or return 0 if there is no possible way to allocate that
     498     many entries.  */
     499  
     500  static size_t _GL_ATTRIBUTE_PURE
     501  compute_bucket_size (size_t candidate, const Hash_tuning *tuning)
     502  {
     503    if (!tuning->is_n_buckets)
     504      {
     505        float new_candidate = candidate / tuning->growth_threshold;
     506        if ((float) SIZE_MAX <= new_candidate)
     507          goto nomem;
     508        candidate = new_candidate;
     509      }
     510    candidate = next_prime (candidate);
     511    if (xalloc_oversized (candidate, sizeof (struct hash_entry *)))
     512      goto nomem;
     513    return candidate;
     514  
     515   nomem:
     516    errno = ENOMEM;
     517    return 0;
     518  }
     519  
     520  Hash_table *
     521  hash_initialize (size_t candidate, const Hash_tuning *tuning,
     522                   Hash_hasher hasher, Hash_comparator comparator,
     523                   Hash_data_freer data_freer)
     524  {
     525    Hash_table *table;
     526  
     527    if (hasher == NULL)
     528      hasher = raw_hasher;
     529    if (comparator == NULL)
     530      comparator = raw_comparator;
     531  
     532    table = malloc (sizeof *table);
     533    if (table == NULL)
     534      return NULL;
     535  
     536    if (!tuning)
     537      tuning = &default_tuning;
     538    table->tuning = tuning;
     539    if (!check_tuning (table))
     540      {
     541        /* Fail if the tuning options are invalid.  This is the only occasion
     542           when the user gets some feedback about it.  Once the table is created,
     543           if the user provides invalid tuning options, we silently revert to
     544           using the defaults, and ignore further request to change the tuning
     545           options.  */
     546        errno = EINVAL;
     547        goto fail;
     548      }
     549  
     550    table->n_buckets = compute_bucket_size (candidate, tuning);
     551    if (!table->n_buckets)
     552      goto fail;
     553  
     554    table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
     555    if (table->bucket == NULL)
     556      goto fail;
     557    table->bucket_limit = table->bucket + table->n_buckets;
     558    table->n_buckets_used = 0;
     559    table->n_entries = 0;
     560  
     561    table->hasher = hasher;
     562    table->comparator = comparator;
     563    table->data_freer = data_freer;
     564  
     565    table->free_entry_list = NULL;
     566  #if USE_OBSTACK
     567    obstack_init (&table->entry_stack);
     568  #endif
     569    return table;
     570  
     571   fail:
     572    free (table);
     573    return NULL;
     574  }
     575  
     576  void
     577  hash_clear (Hash_table *table)
     578  {
     579    struct hash_entry *bucket;
     580  
     581    for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     582      {
     583        if (bucket->data)
     584          {
     585            struct hash_entry *cursor;
     586            struct hash_entry *next;
     587  
     588            /* Free the bucket overflow.  */
     589            for (cursor = bucket->next; cursor; cursor = next)
     590              {
     591                if (table->data_freer)
     592                  table->data_freer (cursor->data);
     593                cursor->data = NULL;
     594  
     595                next = cursor->next;
     596                /* Relinking is done one entry at a time, as it is to be expected
     597                   that overflows are either rare or short.  */
     598                cursor->next = table->free_entry_list;
     599                table->free_entry_list = cursor;
     600              }
     601  
     602            /* Free the bucket head.  */
     603            if (table->data_freer)
     604              table->data_freer (bucket->data);
     605            bucket->data = NULL;
     606            bucket->next = NULL;
     607          }
     608      }
     609  
     610    table->n_buckets_used = 0;
     611    table->n_entries = 0;
     612  }
     613  
     614  void
     615  hash_free (Hash_table *table)
     616  {
     617    struct hash_entry *bucket;
     618    struct hash_entry *cursor;
     619    struct hash_entry *next;
     620    int err = errno;
     621  
     622    /* Call the user data_freer function.  */
     623    if (table->data_freer && table->n_entries)
     624      {
     625        for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     626          {
     627            if (bucket->data)
     628              {
     629                for (cursor = bucket; cursor; cursor = cursor->next)
     630                  table->data_freer (cursor->data);
     631              }
     632          }
     633      }
     634  
     635  #if USE_OBSTACK
     636  
     637    obstack_free (&table->entry_stack, NULL);
     638  
     639  #else
     640  
     641    /* Free all bucket overflowed entries.  */
     642    for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     643      {
     644        for (cursor = bucket->next; cursor; cursor = next)
     645          {
     646            next = cursor->next;
     647            free (cursor);
     648          }
     649      }
     650  
     651    /* Also reclaim the internal list of previously freed entries.  */
     652    for (cursor = table->free_entry_list; cursor; cursor = next)
     653      {
     654        next = cursor->next;
     655        free (cursor);
     656      }
     657  
     658  #endif
     659  
     660    /* Free the remainder of the hash table structure.  */
     661    free (table->bucket);
     662    free (table);
     663  
     664    errno = err;
     665  }
     666  
     667  /* Insertion and deletion.  */
     668  
     669  /* Get a new hash entry for a bucket overflow, possibly by recycling a
     670     previously freed one.  If this is not possible, allocate a new one.  */
     671  
     672  static struct hash_entry *
     673  allocate_entry (Hash_table *table)
     674  {
     675    struct hash_entry *new;
     676  
     677    if (table->free_entry_list)
     678      {
     679        new = table->free_entry_list;
     680        table->free_entry_list = new->next;
     681      }
     682    else
     683      {
     684  #if USE_OBSTACK
     685        new = obstack_alloc (&table->entry_stack, sizeof *new);
     686  #else
     687        new = malloc (sizeof *new);
     688  #endif
     689      }
     690  
     691    return new;
     692  }
     693  
     694  /* Free a hash entry which was part of some bucket overflow,
     695     saving it for later recycling.  */
     696  
     697  static void
     698  free_entry (Hash_table *table, struct hash_entry *entry)
     699  {
     700    entry->data = NULL;
     701    entry->next = table->free_entry_list;
     702    table->free_entry_list = entry;
     703  }
     704  
     705  /* This private function is used to help with insertion and deletion.  When
     706     ENTRY matches an entry in the table, return a pointer to the corresponding
     707     user data and set *BUCKET_HEAD to the head of the selected bucket.
     708     Otherwise, return NULL.  When DELETE is true and ENTRY matches an entry in
     709     the table, unlink the matching entry.  */
     710  
     711  static void *
     712  hash_find_entry (Hash_table *table, const void *entry,
     713                   struct hash_entry **bucket_head, bool delete)
     714  {
     715    struct hash_entry *bucket = safe_hasher (table, entry);
     716    struct hash_entry *cursor;
     717  
     718    *bucket_head = bucket;
     719  
     720    /* Test for empty bucket.  */
     721    if (bucket->data == NULL)
     722      return NULL;
     723  
     724    /* See if the entry is the first in the bucket.  */
     725    if (entry == bucket->data || table->comparator (entry, bucket->data))
     726      {
     727        void *data = bucket->data;
     728  
     729        if (delete)
     730          {
     731            if (bucket->next)
     732              {
     733                struct hash_entry *next = bucket->next;
     734  
     735                /* Bump the first overflow entry into the bucket head, then save
     736                   the previous first overflow entry for later recycling.  */
     737                *bucket = *next;
     738                free_entry (table, next);
     739              }
     740            else
     741              {
     742                bucket->data = NULL;
     743              }
     744          }
     745  
     746        return data;
     747      }
     748  
     749    /* Scan the bucket overflow.  */
     750    for (cursor = bucket; cursor->next; cursor = cursor->next)
     751      {
     752        if (entry == cursor->next->data
     753            || table->comparator (entry, cursor->next->data))
     754          {
     755            void *data = cursor->next->data;
     756  
     757            if (delete)
     758              {
     759                struct hash_entry *next = cursor->next;
     760  
     761                /* Unlink the entry to delete, then save the freed entry for later
     762                   recycling.  */
     763                cursor->next = next->next;
     764                free_entry (table, next);
     765              }
     766  
     767            return data;
     768          }
     769      }
     770  
     771    /* No entry found.  */
     772    return NULL;
     773  }
     774  
     775  /* Internal helper, to move entries from SRC to DST.  Both tables must
     776     share the same free entry list.  If SAFE, only move overflow
     777     entries, saving bucket heads for later, so that no allocations will
     778     occur.  Return false (setting errno) if the free entry list is
     779     exhausted and an allocation fails.  */
     780  
     781  static bool
     782  transfer_entries (Hash_table *dst, Hash_table *src, bool safe)
     783  {
     784    struct hash_entry *bucket;
     785    struct hash_entry *cursor;
     786    struct hash_entry *next;
     787    for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
     788      if (bucket->data)
     789        {
     790          void *data;
     791          struct hash_entry *new_bucket;
     792  
     793          /* Within each bucket, transfer overflow entries first and
     794             then the bucket head, to minimize memory pressure.  After
     795             all, the only time we might allocate is when moving the
     796             bucket head, but moving overflow entries first may create
     797             free entries that can be recycled by the time we finally
     798             get to the bucket head.  */
     799          for (cursor = bucket->next; cursor; cursor = next)
     800            {
     801              data = cursor->data;
     802              new_bucket = safe_hasher (dst, data);
     803  
     804              next = cursor->next;
     805  
     806              if (new_bucket->data)
     807                {
     808                  /* Merely relink an existing entry, when moving from a
     809                     bucket overflow into a bucket overflow.  */
     810                  cursor->next = new_bucket->next;
     811                  new_bucket->next = cursor;
     812                }
     813              else
     814                {
     815                  /* Free an existing entry, when moving from a bucket
     816                     overflow into a bucket header.  */
     817                  new_bucket->data = data;
     818                  dst->n_buckets_used++;
     819                  free_entry (dst, cursor);
     820                }
     821            }
     822          /* Now move the bucket head.  Be sure that if we fail due to
     823             allocation failure that the src table is in a consistent
     824             state.  */
     825          data = bucket->data;
     826          bucket->next = NULL;
     827          if (safe)
     828            continue;
     829          new_bucket = safe_hasher (dst, data);
     830  
     831          if (new_bucket->data)
     832            {
     833              /* Allocate or recycle an entry, when moving from a bucket
     834                 header into a bucket overflow.  */
     835              struct hash_entry *new_entry = allocate_entry (dst);
     836  
     837              if (new_entry == NULL)
     838                return false;
     839  
     840              new_entry->data = data;
     841              new_entry->next = new_bucket->next;
     842              new_bucket->next = new_entry;
     843            }
     844          else
     845            {
     846              /* Move from one bucket header to another.  */
     847              new_bucket->data = data;
     848              dst->n_buckets_used++;
     849            }
     850          bucket->data = NULL;
     851          src->n_buckets_used--;
     852        }
     853    return true;
     854  }
     855  
     856  bool
     857  hash_rehash (Hash_table *table, size_t candidate)
     858  {
     859    Hash_table storage;
     860    Hash_table *new_table;
     861    size_t new_size = compute_bucket_size (candidate, table->tuning);
     862  
     863    if (!new_size)
     864      return false;
     865    if (new_size == table->n_buckets)
     866      return true;
     867    new_table = &storage;
     868    new_table->bucket = calloc (new_size, sizeof *new_table->bucket);
     869    if (new_table->bucket == NULL)
     870      return false;
     871    new_table->n_buckets = new_size;
     872    new_table->bucket_limit = new_table->bucket + new_size;
     873    new_table->n_buckets_used = 0;
     874    new_table->n_entries = 0;
     875    new_table->tuning = table->tuning;
     876    new_table->hasher = table->hasher;
     877    new_table->comparator = table->comparator;
     878    new_table->data_freer = table->data_freer;
     879  
     880    /* In order for the transfer to successfully complete, we need
     881       additional overflow entries when distinct buckets in the old
     882       table collide into a common bucket in the new table.  The worst
     883       case possible is a hasher that gives a good spread with the old
     884       size, but returns a constant with the new size; if we were to
     885       guarantee table->n_buckets_used-1 free entries in advance, then
     886       the transfer would be guaranteed to not allocate memory.
     887       However, for large tables, a guarantee of no further allocation
     888       introduces a lot of extra memory pressure, all for an unlikely
     889       corner case (most rehashes reduce, rather than increase, the
     890       number of overflow entries needed).  So, we instead ensure that
     891       the transfer process can be reversed if we hit a memory
     892       allocation failure mid-transfer.  */
     893  
     894    /* Merely reuse the extra old space into the new table.  */
     895  #if USE_OBSTACK
     896    new_table->entry_stack = table->entry_stack;
     897  #endif
     898    new_table->free_entry_list = table->free_entry_list;
     899  
     900    if (transfer_entries (new_table, table, false))
     901      {
     902        /* Entries transferred successfully; tie up the loose ends.  */
     903        free (table->bucket);
     904        table->bucket = new_table->bucket;
     905        table->bucket_limit = new_table->bucket_limit;
     906        table->n_buckets = new_table->n_buckets;
     907        table->n_buckets_used = new_table->n_buckets_used;
     908        table->free_entry_list = new_table->free_entry_list;
     909        /* table->n_entries and table->entry_stack already hold their value.  */
     910        return true;
     911      }
     912  
     913    /* We've allocated new_table->bucket (and possibly some entries),
     914       exhausted the free list, and moved some but not all entries into
     915       new_table.  We must undo the partial move before returning
     916       failure.  The only way to get into this situation is if new_table
     917       uses fewer buckets than the old table, so we will reclaim some
     918       free entries as overflows in the new table are put back into
     919       distinct buckets in the old table.
     920  
     921       There are some pathological cases where a single pass through the
     922       table requires more intermediate overflow entries than using two
     923       passes.  Two passes give worse cache performance and takes
     924       longer, but at this point, we're already out of memory, so slow
     925       and safe is better than failure.  */
     926    int err = errno;
     927    table->free_entry_list = new_table->free_entry_list;
     928    if (! (transfer_entries (table, new_table, true)
     929           && transfer_entries (table, new_table, false)))
     930      abort ();
     931    /* table->n_entries already holds its value.  */
     932    free (new_table->bucket);
     933    errno = err;
     934    return false;
     935  }
     936  
     937  int
     938  hash_insert_if_absent (Hash_table *table, void const *entry,
     939                         void const **matched_ent)
     940  {
     941    void *data;
     942    struct hash_entry *bucket;
     943  
     944    /* The caller cannot insert a NULL entry, since hash_lookup returns NULL
     945       to indicate "not found", and hash_find_entry uses "bucket->data == NULL"
     946       to indicate an empty bucket.  */
     947    if (! entry)
     948      abort ();
     949  
     950    /* If there's a matching entry already in the table, return that.  */
     951    if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
     952      {
     953        if (matched_ent)
     954          *matched_ent = data;
     955        return 0;
     956      }
     957  
     958    /* If the growth threshold of the buckets in use has been reached, increase
     959       the table size and rehash.  There's no point in checking the number of
     960       entries:  if the hashing function is ill-conditioned, rehashing is not
     961       likely to improve it.  */
     962  
     963    if (table->n_buckets_used
     964        > table->tuning->growth_threshold * table->n_buckets)
     965      {
     966        /* Check more fully, before starting real work.  If tuning arguments
     967           became invalid, the second check will rely on proper defaults.  */
     968        check_tuning (table);
     969        if (table->n_buckets_used
     970            > table->tuning->growth_threshold * table->n_buckets)
     971          {
     972            const Hash_tuning *tuning = table->tuning;
     973            float candidate =
     974              (tuning->is_n_buckets
     975               ? (table->n_buckets * tuning->growth_factor)
     976               : (table->n_buckets * tuning->growth_factor
     977                  * tuning->growth_threshold));
     978  
     979            if ((float) SIZE_MAX <= candidate)
     980              {
     981                errno = ENOMEM;
     982                return -1;
     983              }
     984  
     985            /* If the rehash fails, arrange to return NULL.  */
     986            if (!hash_rehash (table, candidate))
     987              return -1;
     988  
     989            /* Update the bucket we are interested in.  */
     990            if (hash_find_entry (table, entry, &bucket, false) != NULL)
     991              abort ();
     992          }
     993      }
     994  
     995    /* ENTRY is not matched, it should be inserted.  */
     996  
     997    if (bucket->data)
     998      {
     999        struct hash_entry *new_entry = allocate_entry (table);
    1000  
    1001        if (new_entry == NULL)
    1002          return -1;
    1003  
    1004        /* Add ENTRY in the overflow of the bucket.  */
    1005  
    1006        new_entry->data = (void *) entry;
    1007        new_entry->next = bucket->next;
    1008        bucket->next = new_entry;
    1009        table->n_entries++;
    1010        return 1;
    1011      }
    1012  
    1013    /* Add ENTRY right in the bucket head.  */
    1014  
    1015    bucket->data = (void *) entry;
    1016    table->n_entries++;
    1017    table->n_buckets_used++;
    1018  
    1019    return 1;
    1020  }
    1021  
    1022  void *
    1023  hash_insert (Hash_table *table, void const *entry)
    1024  {
    1025    void const *matched_ent;
    1026    int err = hash_insert_if_absent (table, entry, &matched_ent);
    1027    return (err == -1
    1028            ? NULL
    1029            : (void *) (err == 0 ? matched_ent : entry));
    1030  }
    1031  
    1032  void *
    1033  hash_remove (Hash_table *table, const void *entry)
    1034  {
    1035    void *data;
    1036    struct hash_entry *bucket;
    1037  
    1038    data = hash_find_entry (table, entry, &bucket, true);
    1039    if (!data)
    1040      return NULL;
    1041  
    1042    table->n_entries--;
    1043    if (!bucket->data)
    1044      {
    1045        table->n_buckets_used--;
    1046  
    1047        /* If the shrink threshold of the buckets in use has been reached,
    1048           rehash into a smaller table.  */
    1049  
    1050        if (table->n_buckets_used
    1051            < table->tuning->shrink_threshold * table->n_buckets)
    1052          {
    1053            /* Check more fully, before starting real work.  If tuning arguments
    1054               became invalid, the second check will rely on proper defaults.  */
    1055            check_tuning (table);
    1056            if (table->n_buckets_used
    1057                < table->tuning->shrink_threshold * table->n_buckets)
    1058              {
    1059                const Hash_tuning *tuning = table->tuning;
    1060                size_t candidate =
    1061                  (tuning->is_n_buckets
    1062                   ? table->n_buckets * tuning->shrink_factor
    1063                   : (table->n_buckets * tuning->shrink_factor
    1064                      * tuning->growth_threshold));
    1065  
    1066                if (!hash_rehash (table, candidate))
    1067                  {
    1068                    /* Failure to allocate memory in an attempt to
    1069                       shrink the table is not fatal.  But since memory
    1070                       is low, we can at least be kind and free any
    1071                       spare entries, rather than keeping them tied up
    1072                       in the free entry list.  */
    1073  #if ! USE_OBSTACK
    1074                    struct hash_entry *cursor = table->free_entry_list;
    1075                    struct hash_entry *next;
    1076                    while (cursor)
    1077                      {
    1078                        next = cursor->next;
    1079                        free (cursor);
    1080                        cursor = next;
    1081                      }
    1082                    table->free_entry_list = NULL;
    1083  #endif
    1084                  }
    1085              }
    1086          }
    1087      }
    1088  
    1089    return data;
    1090  }
    1091  
    1092  void *
    1093  hash_delete (Hash_table *table, const void *entry)
    1094  {
    1095    return hash_remove (table, entry);
    1096  }
    1097  
    1098  /* Testing.  */
    1099  
    1100  #if TESTING
    1101  
    1102  void
    1103  hash_print (const Hash_table *table)
    1104  {
    1105    struct hash_entry *bucket = (struct hash_entry *) table->bucket;
    1106  
    1107    for ( ; bucket < table->bucket_limit; bucket++)
    1108      {
    1109        struct hash_entry *cursor;
    1110  
    1111        if (bucket)
    1112          printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
    1113  
    1114        for (cursor = bucket; cursor; cursor = cursor->next)
    1115          {
    1116            char const *s = cursor->data;
    1117            /* FIXME */
    1118            if (s)
    1119              printf ("  %s\n", s);
    1120          }
    1121      }
    1122  }
    1123  
    1124  #endif /* TESTING */