1 /*****************************************************************************/
2 /* LibreDWG - free implementation of the DWG file format */
3 /* */
4 /* Copyright (C) 2018-2019,2023 Free Software Foundation, Inc. */
5 /* */
6 /* This library is free software, licensed under the terms of the GNU */
7 /* General Public License as published by the Free Software Foundation, */
8 /* either version 3 of the License, or (at your option) any later version. */
9 /* You should have received a copy of the GNU General Public License */
10 /* along with this program. If not, see <http://www.gnu.org/licenses/>. */
11 /*****************************************************************************/
12
13 /*
14 * hash.c: int hashmap for the object_ref map.
15 * uses linear probing for best cache usage.
16 * values are inlined into the array. The 0 key is disallowed.
17 * written by Reini Urban
18 */
19
20 #include "hash.h"
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include "logging.h"
25
26 dwg_inthash *
27 hash_new (uint64_t size)
28 {
29 dwg_inthash *hash = (dwg_inthash *)malloc (sizeof (dwg_inthash));
30 uint64_t cap;
31 if (!hash)
32 return NULL;
33 // multiply with load factor,
34 // and round size to next power of 2 (fast) or prime (secure),
35 if (size < 15)
36 size = 15;
37 cap = (uint64_t)(size * 100.0 / HASH_LOAD);
38 // this is slow, but only done once. clz would be much faster
39 while (size <= cap)
40 size <<= 1U;
41 hash->array = (struct _hashbucket *)calloc (
42 size, sizeof (struct _hashbucket)); // key+value pairs
43 hash->elems = 0;
44 hash->size = size;
45 return hash;
46 }
47
48 // if exceeds load factor
49 static inline int
50 hash_need_resize (dwg_inthash *hash)
51 {
52 return (uint64_t)(hash->elems * 100.0 / HASH_LOAD) > hash->size;
53 }
54
55 static void
56 hash_resize (dwg_inthash *hash)
57 {
58 dwg_inthash oldhash = *hash;
59 uint64_t size = hash->size * 2;
60 uint64_t i;
61
62 // allocate key+value pairs afresh
63 hash->array
64 = (struct _hashbucket *)calloc (size, sizeof (struct _hashbucket));
65 if (!hash->array)
66 {
67 *hash = oldhash;
68 return;
69 }
70 hash->elems = 0;
71 hash->size = size;
72 memset (hash->array, 0, size * sizeof (struct _hashbucket));
73 // spread out the old elements in double space, less collisions
74 for (i = 0; i < oldhash.size; i++)
75 {
76 if (oldhash.array[i].key)
77 hash_set (hash, oldhash.array[i].key, oldhash.array[i].value);
78 }
79 free (oldhash.array);
80 return;
81 }
82
83 // found this gem by Thomas Mueller at stackoverflow. triviality threshold.
84 // it's like a normal murmur or jenkins finalizer,
85 // just statistically tested to be optimal.
86 // 2023: changed to 64bit, checked at https://nullprogram.com/blog/2018/07/31/
87 // Note that this is entirely "insecure", the inverse func is trivial.
88 // We don't care as we deal with DWG and had linear search before.
89 static inline uint64_t
90 hash_func (uint64_t key)
91 {
92 key = ((key >> 32) ^ key) * UINT64_C (0xd6e8feb86659fd93);
93 key = ((key >> 32) ^ key) * UINT64_C (0xd6e8feb86659fd93);
94 key = (key >> 32) ^ key;
95 return key;
96 }
97
98 // 0 is disallowed as key, even if there's no deletion.
99 uint64_t
100 hash_get (dwg_inthash *hash, uint64_t key)
101 {
102 uint64_t i = hash_func (key) % hash->size;
103 uint64_t j = i;
104 while (hash->array[i].key && hash->array[i].key != key)
105 {
106 // HANDLER (OUTPUT, "get collision at %d\n", i);
107 i++; // linear probing with wrap around
108 if (i == hash->size)
109 i = 0;
110 if (i == j) // not found
111 return HASH_NOT_FOUND;
112 }
113 if (hash->array[i].key)
114 return hash->array[i].value;
115 else
116 return HASH_NOT_FOUND;
117 }
118
119 // search or insert. key 0 is forbidden.
120 void
121 hash_set (dwg_inthash *hash, uint64_t key, uint64_t value)
122 {
123 uint64_t i = hash_func (key) % hash->size;
124 uint64_t j = i;
125 if (key == 0)
126 {
127 HANDLER (OUTPUT, "forbidden 0 key\n");
128 return;
129 }
130 // empty slot
131 if (!hash->array[i].key)
132 {
133 hash->array[i].key = key;
134 hash->array[i].value = value;
135 hash->elems++;
136 return;
137 }
138 while (hash->array[i].key)
139 {
140 if (hash->array[i].key == key)
141 { // found
142 hash->array[i].value = value;
143 return;
144 }
145 // HANDLER (OUTPUT, "set collision at %d\n", i);
146 i++; // linear probing with wrap around
147 if (i == hash->size)
148 i = 0;
149 if (i == j) // not found
150 {
151 // HANDLER (OUTPUT, "set not found at %d\n", i);
152 // if does not exist, add at i+1
153 if (hash_need_resize (hash))
154 {
155 // HANDLER (OUTPUT, "resize at %d\n", hash->size);
156 hash_resize (hash);
157 hash_set (hash, key, value);
158 return;
159 }
160 while (hash->array[i].key) // find next empty slot
161 {
162 // up to here we have no coverage!
163 // HANDLER (OUTPUT, "set 2nd collision at %d\n", i);
164 i++; // again linear probing with wrap around
165 if (i == hash->size)
166 i = 0;
167 if (i == j) // not found
168 {
169 // HANDLER (OUTPUT, "not found resize at %d\n", hash->size);
170 hash_resize (hash); // guarantees new empty slots
171 hash_set (hash, key, value);
172 return;
173 }
174 else
175 { // insert at empty slot
176 hash->array[i].key = key;
177 hash->array[i].value = value;
178 hash->elems++;
179 return;
180 }
181 }
182 }
183 }
184 // empty slot
185 hash->array[i].key = key;
186 hash->array[i].value = value;
187 hash->elems++;
188 return;
189 }
190
191 void
192 hash_free (dwg_inthash *hash)
193 {
194 free (hash->array);
195 hash->array = NULL;
196 hash->size = 0;
197 hash->elems = 0;
198 free (hash);
199 }