(root)/
Python-3.12.0/
Python/
hashtable.c
       1  /* The implementation of the hash table (_Py_hashtable_t) is based on the
       2     cfuhash project:
       3     http://sourceforge.net/projects/libcfu/
       4  
       5     Copyright of cfuhash:
       6     ----------------------------------
       7     Creation date: 2005-06-24 21:22:40
       8     Authors: Don
       9     Change log:
      10  
      11     Copyright (c) 2005 Don Owens
      12     All rights reserved.
      13  
      14     This code is released under the BSD license:
      15  
      16     Redistribution and use in source and binary forms, with or without
      17     modification, are permitted provided that the following conditions
      18     are met:
      19  
      20       * Redistributions of source code must retain the above copyright
      21         notice, this list of conditions and the following disclaimer.
      22  
      23       * Redistributions in binary form must reproduce the above
      24         copyright notice, this list of conditions and the following
      25         disclaimer in the documentation and/or other materials provided
      26         with the distribution.
      27  
      28       * Neither the name of the author nor the names of its
      29         contributors may be used to endorse or promote products derived
      30         from this software without specific prior written permission.
      31  
      32     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
      33     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      34     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
      35     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
      36     COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
      37     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
      38     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
      39     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      40     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
      41     STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
      42     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
      43     OF THE POSSIBILITY OF SUCH DAMAGE.
      44     ----------------------------------
      45  */
      46  
      47  #include "Python.h"
      48  #include "pycore_hashtable.h"
      49  
      50  #define HASHTABLE_MIN_SIZE 16
      51  #define HASHTABLE_HIGH 0.50
      52  #define HASHTABLE_LOW 0.10
      53  #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
      54  
      55  #define BUCKETS_HEAD(SLIST) \
      56          ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
      57  #define TABLE_HEAD(HT, BUCKET) \
      58          ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
      59  #define ENTRY_NEXT(ENTRY) \
      60          ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
      61  
      62  /* Forward declaration */
      63  static int hashtable_rehash(_Py_hashtable_t *ht);
      64  
      65  static void
      66  _Py_slist_init(_Py_slist_t *list)
      67  {
      68      list->head = NULL;
      69  }
      70  
      71  
      72  static void
      73  _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
      74  {
      75      item->next = list->head;
      76      list->head = item;
      77  }
      78  
      79  
      80  static void
      81  _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
      82                   _Py_slist_item_t *item)
      83  {
      84      if (previous != NULL)
      85          previous->next = item->next;
      86      else
      87          list->head = item->next;
      88  }
      89  
      90  
      91  Py_uhash_t
      92  _Py_hashtable_hash_ptr(const void *key)
      93  {
      94      return (Py_uhash_t)_Py_HashPointerRaw(key);
      95  }
      96  
      97  
      98  int
      99  _Py_hashtable_compare_direct(const void *key1, const void *key2)
     100  {
     101      return (key1 == key2);
     102  }
     103  
     104  
     105  /* makes sure the real size of the buckets array is a power of 2 */
     106  static size_t
     107  round_size(size_t s)
     108  {
     109      size_t i;
     110      if (s < HASHTABLE_MIN_SIZE)
     111          return HASHTABLE_MIN_SIZE;
     112      i = 1;
     113      while (i < s)
     114          i <<= 1;
     115      return i;
     116  }
     117  
     118  
     119  size_t
     120  _Py_hashtable_size(const _Py_hashtable_t *ht)
     121  {
     122      size_t size = sizeof(_Py_hashtable_t);
     123      /* buckets */
     124      size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *);
     125      /* entries */
     126      size += ht->nentries * sizeof(_Py_hashtable_entry_t);
     127      return size;
     128  }
     129  
     130  
     131  _Py_hashtable_entry_t *
     132  _Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key)
     133  {
     134      Py_uhash_t key_hash = ht->hash_func(key);
     135      size_t index = key_hash & (ht->nbuckets - 1);
     136      _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
     137      while (1) {
     138          if (entry == NULL) {
     139              return NULL;
     140          }
     141          if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
     142              break;
     143          }
     144          entry = ENTRY_NEXT(entry);
     145      }
     146      return entry;
     147  }
     148  
     149  
     150  // Specialized for:
     151  // hash_func == _Py_hashtable_hash_ptr
     152  // compare_func == _Py_hashtable_compare_direct
     153  static _Py_hashtable_entry_t *
     154  _Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key)
     155  {
     156      Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key);
     157      size_t index = key_hash & (ht->nbuckets - 1);
     158      _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
     159      while (1) {
     160          if (entry == NULL) {
     161              return NULL;
     162          }
     163          // Compare directly keys (ignore entry->key_hash)
     164          if (entry->key == key) {
     165              break;
     166          }
     167          entry = ENTRY_NEXT(entry);
     168      }
     169      return entry;
     170  }
     171  
     172  
     173  void*
     174  _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key)
     175  {
     176      Py_uhash_t key_hash = ht->hash_func(key);
     177      size_t index = key_hash & (ht->nbuckets - 1);
     178  
     179      _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
     180      _Py_hashtable_entry_t *previous = NULL;
     181      while (1) {
     182          if (entry == NULL) {
     183              // not found
     184              return NULL;
     185          }
     186          if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
     187              break;
     188          }
     189          previous = entry;
     190          entry = ENTRY_NEXT(entry);
     191      }
     192  
     193      _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
     194                       (_Py_slist_item_t *)entry);
     195      ht->nentries--;
     196  
     197      void *value = entry->value;
     198      ht->alloc.free(entry);
     199  
     200      if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) {
     201          // Ignore failure: error cannot be reported to the caller
     202          hashtable_rehash(ht);
     203      }
     204      return value;
     205  }
     206  
     207  
     208  int
     209  _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value)
     210  {
     211      _Py_hashtable_entry_t *entry;
     212  
     213  #ifndef NDEBUG
     214      /* Don't write the assertion on a single line because it is interesting
     215         to know the duplicated entry if the assertion failed. The entry can
     216         be read using a debugger. */
     217      entry = ht->get_entry_func(ht, key);
     218      assert(entry == NULL);
     219  #endif
     220  
     221  
     222      entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t));
     223      if (entry == NULL) {
     224          /* memory allocation failed */
     225          return -1;
     226      }
     227  
     228      entry->key_hash = ht->hash_func(key);
     229      entry->key = (void *)key;
     230      entry->value = value;
     231  
     232      ht->nentries++;
     233      if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) {
     234          if (hashtable_rehash(ht) < 0) {
     235              ht->nentries--;
     236              ht->alloc.free(entry);
     237              return -1;
     238          }
     239      }
     240  
     241      size_t index = entry->key_hash & (ht->nbuckets - 1);
     242      _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
     243      return 0;
     244  }
     245  
     246  
     247  void*
     248  _Py_hashtable_get(_Py_hashtable_t *ht, const void *key)
     249  {
     250      _Py_hashtable_entry_t *entry = ht->get_entry_func(ht, key);
     251      if (entry != NULL) {
     252          return entry->value;
     253      }
     254      else {
     255          return NULL;
     256      }
     257  }
     258  
     259  
     260  int
     261  _Py_hashtable_foreach(_Py_hashtable_t *ht,
     262                        _Py_hashtable_foreach_func func,
     263                        void *user_data)
     264  {
     265      for (size_t hv = 0; hv < ht->nbuckets; hv++) {
     266          _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv);
     267          while (entry != NULL) {
     268              int res = func(ht, entry->key, entry->value, user_data);
     269              if (res) {
     270                  return res;
     271              }
     272              entry = ENTRY_NEXT(entry);
     273          }
     274      }
     275      return 0;
     276  }
     277  
     278  
     279  static int
     280  hashtable_rehash(_Py_hashtable_t *ht)
     281  {
     282      size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR));
     283      if (new_size == ht->nbuckets) {
     284          return 0;
     285      }
     286  
     287      size_t buckets_size = new_size * sizeof(ht->buckets[0]);
     288      _Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size);
     289      if (new_buckets == NULL) {
     290          /* memory allocation failed */
     291          return -1;
     292      }
     293      memset(new_buckets, 0, buckets_size);
     294  
     295      for (size_t bucket = 0; bucket < ht->nbuckets; bucket++) {
     296          _Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]);
     297          while (entry != NULL) {
     298              assert(ht->hash_func(entry->key) == entry->key_hash);
     299              _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
     300              size_t entry_index = entry->key_hash & (new_size - 1);
     301  
     302              _Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry);
     303  
     304              entry = next;
     305          }
     306      }
     307  
     308      ht->alloc.free(ht->buckets);
     309      ht->nbuckets = new_size;
     310      ht->buckets = new_buckets;
     311      return 0;
     312  }
     313  
     314  
     315  _Py_hashtable_t *
     316  _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,
     317                         _Py_hashtable_compare_func compare_func,
     318                         _Py_hashtable_destroy_func key_destroy_func,
     319                         _Py_hashtable_destroy_func value_destroy_func,
     320                         _Py_hashtable_allocator_t *allocator)
     321  {
     322      _Py_hashtable_allocator_t alloc;
     323      if (allocator == NULL) {
     324          alloc.malloc = PyMem_Malloc;
     325          alloc.free = PyMem_Free;
     326      }
     327      else {
     328          alloc = *allocator;
     329      }
     330  
     331      _Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
     332      if (ht == NULL) {
     333          return ht;
     334      }
     335  
     336      ht->nbuckets = HASHTABLE_MIN_SIZE;
     337      ht->nentries = 0;
     338  
     339      size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]);
     340      ht->buckets = alloc.malloc(buckets_size);
     341      if (ht->buckets == NULL) {
     342          alloc.free(ht);
     343          return NULL;
     344      }
     345      memset(ht->buckets, 0, buckets_size);
     346  
     347      ht->get_entry_func = _Py_hashtable_get_entry_generic;
     348      ht->hash_func = hash_func;
     349      ht->compare_func = compare_func;
     350      ht->key_destroy_func = key_destroy_func;
     351      ht->value_destroy_func = value_destroy_func;
     352      ht->alloc = alloc;
     353      if (ht->hash_func == _Py_hashtable_hash_ptr
     354          && ht->compare_func == _Py_hashtable_compare_direct)
     355      {
     356          ht->get_entry_func = _Py_hashtable_get_entry_ptr;
     357      }
     358      return ht;
     359  }
     360  
     361  
     362  _Py_hashtable_t *
     363  _Py_hashtable_new(_Py_hashtable_hash_func hash_func,
     364                    _Py_hashtable_compare_func compare_func)
     365  {
     366      return _Py_hashtable_new_full(hash_func, compare_func,
     367                                    NULL, NULL, NULL);
     368  }
     369  
     370  
     371  static void
     372  _Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry)
     373  {
     374      if (ht->key_destroy_func) {
     375          ht->key_destroy_func(entry->key);
     376      }
     377      if (ht->value_destroy_func) {
     378          ht->value_destroy_func(entry->value);
     379      }
     380      ht->alloc.free(entry);
     381  }
     382  
     383  
     384  void
     385  _Py_hashtable_clear(_Py_hashtable_t *ht)
     386  {
     387      for (size_t i=0; i < ht->nbuckets; i++) {
     388          _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
     389          while (entry != NULL) {
     390              _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
     391              _Py_hashtable_destroy_entry(ht, entry);
     392              entry = next;
     393          }
     394          _Py_slist_init(&ht->buckets[i]);
     395      }
     396      ht->nentries = 0;
     397      // Ignore failure: clear function is not expected to fail
     398      // because of a memory allocation failure.
     399      (void)hashtable_rehash(ht);
     400  }
     401  
     402  
     403  void
     404  _Py_hashtable_destroy(_Py_hashtable_t *ht)
     405  {
     406      for (size_t i = 0; i < ht->nbuckets; i++) {
     407          _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
     408          while (entry) {
     409              _Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry);
     410              _Py_hashtable_destroy_entry(ht, entry);
     411              entry = entry_next;
     412          }
     413      }
     414  
     415      ht->alloc.free(ht->buckets);
     416      ht->alloc.free(ht);
     417  }