1 /* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*-
2 * GObject introspection: Typelib hashing
3 *
4 * Copyright (C) 2010 Red Hat, Inc.
5 *
6 * SPDX-License-Identifier: LGPL-2.1-or-later
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the
20 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 * Boston, MA 02111-1307, USA.
22 */
23
24 #include <glib.h>
25 #include <glib-object.h>
26 #include <string.h>
27
28 #include "cmph/cmph.h"
29 #include "gitypelib-internal.h"
30
31 #define ALIGN_VALUE(this, boundary) \
32 (( ((unsigned long)(this)) + (((unsigned long)(boundary)) -1)) & (~(((unsigned long)(boundary))-1)))
33
34 /*
35 * String hashing in the typelib. We have a set of static (fixed) strings,
36 * and given one, we need to find its index number. This problem is perfect
37 * hashing: http://en.wikipedia.org/wiki/Perfect_hashing
38 *
39 * I chose CMPH (http://cmph.sourceforge.net/) as it seemed high
40 * quality, well documented, and easy to embed.
41 *
42 * CMPH provides a number of algorithms; I chose BDZ, because while CHD
43 * appears to be the "best", the simplicitly of BDZ appealed, and really,
44 * we're only talking about thousands of strings here, not millions, so
45 * a few microseconds is no big deal.
46 *
47 * In memory, the format is:
48 * INT32 mph_size
49 * MPH (mph_size bytes)
50 * (padding for alignment to uint32 if necessary)
51 * INDEX (array of guint16)
52 *
53 * Because BDZ is not order preserving, we need a lookaside table which
54 * maps the hash value into the directory index.
55 */
56
57 struct _GITypelibHashBuilder {
58 gboolean prepared;
59 gboolean buildable;
60 cmph_t *c;
61 GHashTable *strings;
62 guint32 dirmap_offset;
63 guint32 packed_size;
64 };
65
66 GITypelibHashBuilder *
67 gi_typelib_hash_builder_new (void)
68 {
69 GITypelibHashBuilder *builder = g_slice_new0 (GITypelibHashBuilder);
70 builder->c = NULL;
71 builder->strings = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
72 return builder;
73 }
74
75 void
76 gi_typelib_hash_builder_add_string (GITypelibHashBuilder *builder,
77 const char *str,
78 guint16 value)
79 {
80 g_return_if_fail (builder->c == NULL);
81 g_hash_table_insert (builder->strings, g_strdup (str), GUINT_TO_POINTER ((guint) value));
82 }
83
84 gboolean
85 gi_typelib_hash_builder_prepare (GITypelibHashBuilder *builder)
86 {
87 char **strs;
88 GHashTableIter hashiter;
89 gpointer key, value;
90 cmph_io_adapter_t *io;
91 cmph_config_t *config;
92 guint32 num_elts;
93 guint32 offset;
94 guint i;
95
96 if (builder->prepared)
97 return builder->buildable;
98 g_assert (builder->c == NULL);
99
100 num_elts = g_hash_table_size (builder->strings);
101 g_assert (num_elts <= 65536);
102
103 strs = (char**) g_new (char *, num_elts + 1);
104
105 i = 0;
106 g_hash_table_iter_init (&hashiter, builder->strings);
107 while (g_hash_table_iter_next (&hashiter, &key, &value))
108 {
109 const char *str = key;
110
111 strs[i++] = g_strdup (str);
112 }
113 strs[i++] = NULL;
114
115 io = cmph_io_vector_adapter (strs, num_elts);
116 config = cmph_config_new (io);
117 cmph_config_set_algo (config, CMPH_BDZ);
118
119 builder->c = cmph_new (config);
120 builder->prepared = TRUE;
121 if (!builder->c)
122 {
123 builder->buildable = FALSE;
124 goto out;
125 }
126 builder->buildable = TRUE;
127 g_assert (cmph_size (builder->c) == num_elts);
128
129 /* Pack a size counter at front */
130 offset = sizeof(guint32) + cmph_packed_size (builder->c);
131 builder->dirmap_offset = ALIGN_VALUE (offset, 4);
132 builder->packed_size = builder->dirmap_offset + (num_elts * sizeof(guint16));
133 out:
134 cmph_config_destroy (config);
135 cmph_io_vector_adapter_destroy (io);
136 return builder->buildable;
137 }
138
139 guint32
140 gi_typelib_hash_builder_get_buffer_size (GITypelibHashBuilder *builder)
141 {
142 g_return_val_if_fail (builder != NULL, 0);
143 g_return_val_if_fail (builder->prepared, 0);
144 g_return_val_if_fail (builder->buildable, 0 );
145
146 return builder->packed_size;
147 }
148
149 void
150 gi_typelib_hash_builder_pack (GITypelibHashBuilder *builder, guint8* mem, guint32 len)
151 {
152 guint16 *table;
153 GHashTableIter hashiter;
154 gpointer key, value;
155 #ifndef G_DISABLE_ASSERT
156 guint32 num_elts;
157 #endif
158 guint8 *packed_mem;
159
160 g_return_if_fail (builder != NULL);
161 g_return_if_fail (builder->prepared);
162 g_return_if_fail (builder->buildable);
163
164 g_assert (len >= builder->packed_size);
165 g_assert ((((size_t)mem) & 0x3) == 0);
166
167 memset (mem, 0, len);
168
169 *((guint32*) mem) = builder->dirmap_offset;
170 packed_mem = (guint8*)(mem + sizeof(guint32));
171 cmph_pack (builder->c, packed_mem);
172
173 table = (guint16*) (mem + builder->dirmap_offset);
174
175 #ifndef G_DISABLE_ASSERT
176 num_elts = g_hash_table_size (builder->strings);
177 #endif
178 g_hash_table_iter_init (&hashiter, builder->strings);
179 while (g_hash_table_iter_next (&hashiter, &key, &value))
180 {
181 const char *str = key;
182 guint16 strval = (guint16)GPOINTER_TO_UINT(value);
183 guint32 hashv;
184
185 hashv = cmph_search_packed (packed_mem, str, strlen (str));
186 g_assert (hashv < num_elts);
187 table[hashv] = strval;
188 }
189 }
190
191 void
192 gi_typelib_hash_builder_destroy (GITypelibHashBuilder *builder)
193 {
194 if (builder->c)
195 {
196 cmph_destroy (builder->c);
197 builder->c = NULL;
198 }
199 g_hash_table_destroy (builder->strings);
200 g_slice_free (GITypelibHashBuilder, builder);
201 }
202
203 guint16
204 gi_typelib_hash_search (guint8* memory, const char *str, guint n_entries)
205 {
206 guint32 *mph;
207 guint16 *table;
208 guint32 dirmap_offset;
209 guint32 offset;
210
211 g_assert ((((size_t)memory) & 0x3) == 0);
212 mph = ((guint32*)memory)+1;
213
214 offset = cmph_search_packed (mph, str, strlen (str));
215
216 /* Make sure that offset always lies in the entries array. cmph
217 cometimes generates offset larger than number of entries (for
218 'str' argument which is not in the hashed list). In this case,
219 fake the correct result and depend on caller's final check that
220 the entry is really the one that the caller wanted. */
221 if (offset >= n_entries)
222 offset = 0;
223
224 dirmap_offset = *((guint32*)memory);
225 table = (guint16*) (memory + dirmap_offset);
226
227 return table[offset];
228 }
229