1 #include<stdio.h>
2 #include<stdlib.h>
3 #include<string.h>
4 #include<math.h>
5 #include<time.h>
6 #include<assert.h>
7 #include<limits.h>
8 #include<errno.h>
9
10 #include "cmph_structs.h"
11 #include "chd_structs.h"
12 #include "chd.h"
13 #include "bitbool.h"
14 //#define DEBUG
15 #include "debug.h"
16
17 chd_config_data_t *chd_config_new(cmph_config_t *mph)
18 {
19 cmph_io_adapter_t *key_source = mph->key_source;
20 chd_config_data_t *chd;
21 chd = (chd_config_data_t *)malloc(sizeof(chd_config_data_t));
22 assert(chd);
23 memset(chd, 0, sizeof(chd_config_data_t));
24
25 chd->chd_ph = cmph_config_new(key_source);
26 cmph_config_set_algo(chd->chd_ph, CMPH_CHD_PH);
27
28 return chd;
29 }
30
31 void chd_config_destroy(cmph_config_t *mph)
32 {
33 chd_config_data_t *data = (chd_config_data_t *) mph->data;
34 DEBUGP("Destroying algorithm dependent data\n");
35 if(data->chd_ph)
36 {
37 cmph_config_destroy(data->chd_ph);
38 data->chd_ph = NULL;
39 }
40 free(data);
41 }
42
43
44 void chd_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
45 {
46 chd_config_data_t *data = (chd_config_data_t *) mph->data;
47 cmph_config_set_hashfuncs(data->chd_ph, hashfuncs);
48 }
49
50
51 void chd_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket)
52 {
53 chd_config_data_t *data = (chd_config_data_t *) mph->data;
54 cmph_config_set_b(data->chd_ph, keys_per_bucket);
55 }
56
57
58 void chd_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
59 {
60 chd_config_data_t *data = (chd_config_data_t *) mph->data;
61 cmph_config_set_keys_per_bin(data->chd_ph, keys_per_bin);
62 }
63
64
65 cmph_t *chd_new(cmph_config_t *mph, double c)
66 {
67 cmph_t *mphf = NULL;
68 chd_data_t *chdf = NULL;
69 chd_config_data_t *chd = (chd_config_data_t *)mph->data;
70 chd_ph_config_data_t * chd_ph = (chd_ph_config_data_t *)chd->chd_ph->data;
71 compressed_rank_t cr;
72
73 register cmph_t * chd_phf = NULL;
74 register cmph_uint32 packed_chd_phf_size = 0;
75 cmph_uint8 * packed_chd_phf = NULL;
76
77 register cmph_uint32 packed_cr_size = 0;
78 cmph_uint8 * packed_cr = NULL;
79
80 register cmph_uint32 i, idx, nkeys, nvals, nbins;
81 cmph_uint32 * vals_table = NULL;
82 register cmph_uint32 * occup_table = NULL;
83 #ifdef CMPH_TIMING
84 double construction_time_begin = 0.0;
85 double construction_time = 0.0;
86 ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
87 #endif
88
89 cmph_config_set_verbosity(chd->chd_ph, mph->verbosity);
90 cmph_config_set_graphsize(chd->chd_ph, c);
91
92 if (mph->verbosity)
93 {
94 fprintf(stderr, "Generating a CHD_PH perfect hash function with a load factor equal to %.3f\n", c);
95 }
96
97 chd_phf = cmph_new(chd->chd_ph);
98
99 if(chd_phf == NULL)
100 {
101 return NULL;
102 }
103
104 packed_chd_phf_size = cmph_packed_size(chd_phf);
105 DEBUGP("packed_chd_phf_size = %u\n", packed_chd_phf_size);
106
107 /* Make sure that we have enough space to pack the mphf. */
108 packed_chd_phf = calloc((size_t)packed_chd_phf_size,(size_t)1);
109
110 /* Pack the mphf. */
111 cmph_pack(chd_phf, packed_chd_phf);
112
113 cmph_destroy(chd_phf);
114
115
116 if (mph->verbosity)
117 {
118 fprintf(stderr, "Compressing the range of the resulting CHD_PH perfect hash function\n");
119 }
120
121 compressed_rank_init(&cr);
122 nbins = chd_ph->n;
123 nkeys = chd_ph->m;
124 nvals = nbins - nkeys;
125
126 vals_table = (cmph_uint32 *)calloc(nvals, sizeof(cmph_uint32));
127 occup_table = (cmph_uint32 *)chd_ph->occup_table;
128
129 for(i = 0, idx = 0; i < nbins; i++)
130 {
131 if(!GETBIT32(occup_table, i))
132 {
133 vals_table[idx++] = i;
134 }
135 }
136
137 compressed_rank_generate(&cr, vals_table, nvals);
138 free(vals_table);
139
140 packed_cr_size = compressed_rank_packed_size(&cr);
141 packed_cr = (cmph_uint8 *) calloc(packed_cr_size, sizeof(cmph_uint8));
142 compressed_rank_pack(&cr, packed_cr);
143 compressed_rank_destroy(&cr);
144
145 mphf = (cmph_t *)malloc(sizeof(cmph_t));
146 mphf->algo = mph->algo;
147 chdf = (chd_data_t *)malloc(sizeof(chd_data_t));
148
149 chdf->packed_cr = packed_cr;
150 packed_cr = NULL; //transfer memory ownership
151
152 chdf->packed_chd_phf = packed_chd_phf;
153 packed_chd_phf = NULL; //transfer memory ownership
154
155 chdf->packed_chd_phf_size = packed_chd_phf_size;
156 chdf->packed_cr_size = packed_cr_size;
157
158 mphf->data = chdf;
159 mphf->size = nkeys;
160
161 DEBUGP("Successfully generated minimal perfect hash\n");
162 if (mph->verbosity)
163 {
164 fprintf(stderr, "Successfully generated minimal perfect hash function\n");
165 }
166 #ifdef CMPH_TIMING
167 ELAPSED_TIME_IN_SECONDS(&construction_time);
168 register cmph_uint32 space_usage = chd_packed_size(mphf)*8;
169 construction_time = construction_time - construction_time_begin;
170 fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", nkeys, c, chd_ph->keys_per_bucket, construction_time, space_usage/(double)nkeys);
171 #endif
172
173 return mphf;
174 }
175
176 void chd_load(FILE *fd, cmph_t *mphf)
177 {
178 register size_t nbytes;
179 chd_data_t *chd = (chd_data_t *)malloc(sizeof(chd_data_t));
180
181 DEBUGP("Loading chd mphf\n");
182 mphf->data = chd;
183
184 nbytes = fread(&chd->packed_chd_phf_size, sizeof(cmph_uint32), (size_t)1, fd);
185 DEBUGP("Loading CHD_PH perfect hash function with %u bytes to disk\n", chd->packed_chd_phf_size);
186 chd->packed_chd_phf = (cmph_uint8 *) calloc((size_t)chd->packed_chd_phf_size,(size_t)1);
187 nbytes = fread(chd->packed_chd_phf, chd->packed_chd_phf_size, (size_t)1, fd);
188
189 nbytes = fread(&chd->packed_cr_size, sizeof(cmph_uint32), (size_t)1, fd);
190 DEBUGP("Loading Compressed rank structure, which has %u bytes\n", chd->packed_cr_size);
191 chd->packed_cr = (cmph_uint8 *) calloc((size_t)chd->packed_cr_size, (size_t)1);
192 nbytes = fread(chd->packed_cr, chd->packed_cr_size, (size_t)1, fd);
193 if (nbytes == 0 && ferror(fd)) {
194 fprintf(stderr, "ERROR: %s\n", strerror(errno));
195 }
196
197 }
198
199 int chd_dump(cmph_t *mphf, FILE *fd)
200 {
201 register size_t nbytes;
202 chd_data_t *data = (chd_data_t *)mphf->data;
203
204 __cmph_dump(mphf, fd);
205 // Dumping CHD_PH perfect hash function
206
207 DEBUGP("Dumping CHD_PH perfect hash function with %u bytes to disk\n", data->packed_chd_phf_size);
208 nbytes = fwrite(&data->packed_chd_phf_size, sizeof(cmph_uint32), (size_t)1, fd);
209 nbytes = fwrite(data->packed_chd_phf, data->packed_chd_phf_size, (size_t)1, fd);
210
211 DEBUGP("Dumping compressed rank structure with %u bytes to disk\n", data->packed_cr_size);
212 nbytes = fwrite(&data->packed_cr_size, sizeof(cmph_uint32), (size_t)1, fd);
213 nbytes = fwrite(data->packed_cr, data->packed_cr_size, (size_t)1, fd);
214 if (nbytes == 0 && ferror(fd)) {
215 fprintf(stderr, "ERROR: %s\n", strerror(errno));
216 return 0;
217 }
218
219 return 1;
220 }
221
222 void chd_destroy(cmph_t *mphf)
223 {
224 chd_data_t *data = (chd_data_t *)mphf->data;
225 free(data->packed_chd_phf);
226 free(data->packed_cr);
227 free(data);
228 free(mphf);
229 }
230
231 static inline cmph_uint32 _chd_search(void * packed_chd_phf, void * packed_cr, const char *key, cmph_uint32 keylen)
232 {
233 register cmph_uint32 bin_idx = cmph_search_packed(packed_chd_phf, key, keylen);
234 register cmph_uint32 rank = compressed_rank_query_packed(packed_cr, bin_idx);
235 return bin_idx - rank;
236 }
237
238 cmph_uint32 chd_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
239 {
240 register chd_data_t * chd = mphf->data;
241 return _chd_search(chd->packed_chd_phf, chd->packed_cr, key, keylen);
242 }
243
244 void chd_pack(cmph_t *mphf, void *packed_mphf)
245 {
246 chd_data_t *data = (chd_data_t *)mphf->data;
247 cmph_uint32 * ptr = packed_mphf;
248 cmph_uint8 * ptr8;
249
250 // packing packed_cr_size and packed_cr
251 *ptr = data->packed_cr_size;
252 ptr8 = (cmph_uint8 *) (ptr + 1);
253
254 memcpy(ptr8, data->packed_cr, data->packed_cr_size);
255 ptr8 += data->packed_cr_size;
256
257 ptr = (cmph_uint32 *) ptr8;
258 *ptr = data->packed_chd_phf_size;
259
260 ptr8 = (cmph_uint8 *) (ptr + 1);
261 memcpy(ptr8, data->packed_chd_phf, data->packed_chd_phf_size);
262 }
263
264 cmph_uint32 chd_packed_size(cmph_t *mphf)
265 {
266 register chd_data_t *data = (chd_data_t *)mphf->data;
267 return (cmph_uint32)(sizeof(CMPH_ALGO) + 2*sizeof(cmph_uint32) + data->packed_cr_size + data->packed_chd_phf_size);
268
269 }
270
271 cmph_uint32 chd_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
272 {
273
274 register cmph_uint32 * ptr = packed_mphf;
275 register cmph_uint32 packed_cr_size = *ptr++;
276 register cmph_uint8 * packed_chd_phf = ((cmph_uint8 *) ptr) + packed_cr_size + sizeof(cmph_uint32);
277 return _chd_search(packed_chd_phf, ptr, key, keylen);
278 }
279
280