(root)/
libredwg-0.13/
src/
hash.c
       1  /*****************************************************************************/
       2  /*  LibreDWG - free implementation of the DWG file format                    */
       3  /*                                                                           */
       4  /*  Copyright (C) 2018-2019,2023 Free Software Foundation, Inc.              */
       5  /*                                                                           */
       6  /*  This library is free software, licensed under the terms of the GNU       */
       7  /*  General Public License as published by the Free Software Foundation,     */
       8  /*  either version 3 of the License, or (at your option) any later version.  */
       9  /*  You should have received a copy of the GNU General Public License        */
      10  /*  along with this program.  If not, see <http://www.gnu.org/licenses/>.    */
      11  /*****************************************************************************/
      12  
      13  /*
      14   * hash.c: int hashmap for the object_ref map.
      15   *         uses linear probing for best cache usage.
      16   *         values are inlined into the array. The 0 key is disallowed.
      17   * written by Reini Urban
      18   */
      19  
      20  #include "hash.h"
      21  #include <stdlib.h>
      22  #include <stdio.h>
      23  #include <string.h>
      24  #include "logging.h"
      25  
      26  dwg_inthash *
      27  hash_new (uint64_t size)
      28  {
      29    dwg_inthash *hash = (dwg_inthash *)malloc (sizeof (dwg_inthash));
      30    uint64_t cap;
      31    if (!hash)
      32      return NULL;
      33    // multiply with load factor,
      34    // and round size to next power of 2 (fast) or prime (secure),
      35    if (size < 15)
      36      size = 15;
      37    cap = (uint64_t)(size * 100.0 / HASH_LOAD);
      38    // this is slow, but only done once. clz would be much faster
      39    while (size <= cap)
      40      size <<= 1U;
      41    hash->array = (struct _hashbucket *)calloc (
      42        size, sizeof (struct _hashbucket)); // key+value pairs
      43    hash->elems = 0;
      44    hash->size = size;
      45    return hash;
      46  }
      47  
      48  // if exceeds load factor
      49  static inline int
      50  hash_need_resize (dwg_inthash *hash)
      51  {
      52    return (uint64_t)(hash->elems * 100.0 / HASH_LOAD) > hash->size;
      53  }
      54  
      55  static void
      56  hash_resize (dwg_inthash *hash)
      57  {
      58    dwg_inthash oldhash = *hash;
      59    uint64_t size = hash->size * 2;
      60    uint64_t i;
      61  
      62    // allocate key+value pairs afresh
      63    hash->array
      64        = (struct _hashbucket *)calloc (size, sizeof (struct _hashbucket));
      65    if (!hash->array)
      66      {
      67        *hash = oldhash;
      68        return;
      69      }
      70    hash->elems = 0;
      71    hash->size = size;
      72    memset (hash->array, 0, size * sizeof (struct _hashbucket));
      73    // spread out the old elements in double space, less collisions
      74    for (i = 0; i < oldhash.size; i++)
      75      {
      76        if (oldhash.array[i].key)
      77          hash_set (hash, oldhash.array[i].key, oldhash.array[i].value);
      78      }
      79    free (oldhash.array);
      80    return;
      81  }
      82  
      83  // found this gem by Thomas Mueller at stackoverflow. triviality threshold.
      84  // it's like a normal murmur or jenkins finalizer,
      85  // just statistically tested to be optimal.
      86  // 2023: changed to 64bit, checked at https://nullprogram.com/blog/2018/07/31/
      87  // Note that this is entirely "insecure", the inverse func is trivial.
      88  // We don't care as we deal with DWG and had linear search before.
      89  static inline uint64_t
      90  hash_func (uint64_t key)
      91  {
      92    key = ((key >> 32) ^ key) * UINT64_C (0xd6e8feb86659fd93);
      93    key = ((key >> 32) ^ key) * UINT64_C (0xd6e8feb86659fd93);
      94    key = (key >> 32) ^ key;
      95    return key;
      96  }
      97  
      98  // 0 is disallowed as key, even if there's no deletion.
      99  uint64_t
     100  hash_get (dwg_inthash *hash, uint64_t key)
     101  {
     102    uint64_t i = hash_func (key) % hash->size;
     103    uint64_t j = i;
     104    while (hash->array[i].key && hash->array[i].key != key)
     105      {
     106        // HANDLER (OUTPUT, "get collision at %d\n", i);
     107        i++; // linear probing with wrap around
     108        if (i == hash->size)
     109          i = 0;
     110        if (i == j) // not found
     111          return HASH_NOT_FOUND;
     112      }
     113    if (hash->array[i].key)
     114      return hash->array[i].value;
     115    else
     116      return HASH_NOT_FOUND;
     117  }
     118  
     119  // search or insert. key 0 is forbidden.
     120  void
     121  hash_set (dwg_inthash *hash, uint64_t key, uint64_t value)
     122  {
     123    uint64_t i = hash_func (key) % hash->size;
     124    uint64_t j = i;
     125    if (key == 0)
     126      {
     127        HANDLER (OUTPUT, "forbidden 0 key\n");
     128        return;
     129      }
     130    // empty slot
     131    if (!hash->array[i].key)
     132      {
     133        hash->array[i].key = key;
     134        hash->array[i].value = value;
     135        hash->elems++;
     136        return;
     137      }
     138    while (hash->array[i].key)
     139      {
     140        if (hash->array[i].key == key)
     141          { // found
     142            hash->array[i].value = value;
     143            return;
     144          }
     145        // HANDLER (OUTPUT, "set collision at %d\n", i);
     146        i++; // linear probing with wrap around
     147        if (i == hash->size)
     148          i = 0;
     149        if (i == j) // not found
     150          {
     151            // HANDLER (OUTPUT, "set not found at %d\n", i);
     152            // if does not exist, add at i+1
     153            if (hash_need_resize (hash))
     154              {
     155                // HANDLER (OUTPUT, "resize at %d\n", hash->size);
     156                hash_resize (hash);
     157                hash_set (hash, key, value);
     158                return;
     159              }
     160            while (hash->array[i].key) // find next empty slot
     161              {
     162                // up to here we have no coverage!
     163                // HANDLER (OUTPUT, "set 2nd collision at %d\n", i);
     164                i++; // again linear probing with wrap around
     165                if (i == hash->size)
     166                  i = 0;
     167                if (i == j) // not found
     168                  {
     169                    // HANDLER (OUTPUT, "not found resize at %d\n", hash->size);
     170                    hash_resize (hash); // guarantees new empty slots
     171                    hash_set (hash, key, value);
     172                    return;
     173                  }
     174                else
     175                  { // insert at empty slot
     176                    hash->array[i].key = key;
     177                    hash->array[i].value = value;
     178                    hash->elems++;
     179                    return;
     180                  }
     181              }
     182          }
     183      }
     184    // empty slot
     185    hash->array[i].key = key;
     186    hash->array[i].value = value;
     187    hash->elems++;
     188    return;
     189  }
     190  
     191  void
     192  hash_free (dwg_inthash *hash)
     193  {
     194    free (hash->array);
     195    hash->array = NULL;
     196    hash->size = 0;
     197    hash->elems = 0;
     198    free (hash);
     199  }