1 #include "compressed_seq.h"
2 #include <stdlib.h>
3 #include <stdio.h>
4 #include <assert.h>
5 #include <limits.h>
6 #include <string.h>
7
8 #include "bitbool.h"
9
10 // #define DEBUG
11 #include "debug.h"
12
13 static inline cmph_uint32 compressed_seq_i_log2(cmph_uint32 x)
14 {
15 register cmph_uint32 res = 0;
16
17 while(x > 1)
18 {
19 x >>= 1;
20 res++;
21 }
22 return res;
23 };
24
25 void compressed_seq_init(compressed_seq_t * cs)
26 {
27 select_init(&cs->sel);
28 cs->n = 0;
29 cs->rem_r = 0;
30 cs->length_rems = 0;
31 cs->total_length = 0;
32 cs->store_table = 0;
33 }
34
35 void compressed_seq_destroy(compressed_seq_t * cs)
36 {
37 free(cs->store_table);
38 cs->store_table = 0;
39 free(cs->length_rems);
40 cs->length_rems = 0;
41 select_destroy(&cs->sel);
42 };
43
44
45 void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n)
46 {
47 register cmph_uint32 i;
48 // lengths: represents lengths of encoded values
49 register cmph_uint32 * lengths = (cmph_uint32 *)calloc(n, sizeof(cmph_uint32));
50 register cmph_uint32 rems_mask;
51 register cmph_uint32 stored_value;
52
53 cs->n = n;
54 cs->total_length = 0;
55
56 for(i = 0; i < cs->n; i++)
57 {
58 if(vals_table[i] == 0)
59 {
60 lengths[i] = 0;
61 }
62 else
63 {
64 lengths[i] = compressed_seq_i_log2(vals_table[i] + 1);
65 cs->total_length += lengths[i];
66 };
67 };
68
69 if(cs->store_table)
70 {
71 free(cs->store_table);
72 }
73 cs->store_table = (cmph_uint32 *) calloc(((cs->total_length + 31) >> 5), sizeof(cmph_uint32));
74 cs->total_length = 0;
75
76 for(i = 0; i < cs->n; i++)
77 {
78 if(vals_table[i] == 0)
79 continue;
80 stored_value = vals_table[i] - ((1U << lengths[i]) - 1U);
81 set_bits_at_pos(cs->store_table, cs->total_length, stored_value, lengths[i]);
82 cs->total_length += lengths[i];
83 };
84
85 cs->rem_r = compressed_seq_i_log2(cs->total_length/cs->n);
86
87 if(cs->rem_r == 0)
88 {
89 cs->rem_r = 1;
90 }
91
92 if(cs->length_rems)
93 {
94 free(cs->length_rems);
95 }
96
97 cs->length_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cs->n, cs->rem_r), sizeof(cmph_uint32));
98
99 rems_mask = (1U << cs->rem_r) - 1U;
100 cs->total_length = 0;
101
102 for(i = 0; i < cs->n; i++)
103 {
104 cs->total_length += lengths[i];
105 set_bits_value(cs->length_rems, i, cs->total_length & rems_mask, cs->rem_r, rems_mask);
106 lengths[i] = cs->total_length >> cs->rem_r;
107 };
108
109 select_init(&cs->sel);
110
111 // FABIANO: before it was (cs->total_length >> cs->rem_r) + 1. But I wiped out the + 1 because
112 // I changed the select structure to work up to m, instead of up to m - 1.
113 select_generate(&cs->sel, lengths, cs->n, (cs->total_length >> cs->rem_r));
114
115 free(lengths);
116 };
117
118 cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs)
119 {
120 register cmph_uint32 space_usage = select_get_space_usage(&cs->sel);
121 space_usage += ((cs->total_length + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32) * 8;
122 space_usage += BITS_TABLE_SIZE(cs->n, cs->rem_r) * (cmph_uint32)sizeof(cmph_uint32) * 8;
123 return 4 * (cmph_uint32)sizeof(cmph_uint32) * 8 + space_usage;
124 }
125
126 cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx)
127 {
128 register cmph_uint32 enc_idx, enc_length;
129 register cmph_uint32 rems_mask;
130 register cmph_uint32 stored_value;
131 register cmph_uint32 sel_res;
132
133 assert(idx < cs->n); // FABIANO ADDED
134
135 rems_mask = (1U << cs->rem_r) - 1U;
136
137 if(idx == 0)
138 {
139 enc_idx = 0;
140 sel_res = select_query(&cs->sel, idx);
141 }
142 else
143 {
144 sel_res = select_query(&cs->sel, idx - 1);
145
146 enc_idx = (sel_res - (idx - 1)) << cs->rem_r;
147 enc_idx += get_bits_value(cs->length_rems, idx-1, cs->rem_r, rems_mask);
148
149 sel_res = select_next_query(&cs->sel, sel_res);
150 };
151
152 enc_length = (sel_res - idx) << cs->rem_r;
153 enc_length += get_bits_value(cs->length_rems, idx, cs->rem_r, rems_mask);
154 enc_length -= enc_idx;
155 if(enc_length == 0)
156 return 0;
157
158 stored_value = get_bits_at_pos(cs->store_table, enc_idx, enc_length);
159 return stored_value + ((1U << enc_length) - 1U);
160 };
161
162 void compressed_seq_dump(compressed_seq_t * cs, char ** buf, cmph_uint32 * buflen)
163 {
164 register cmph_uint32 sel_size = select_packed_size(&(cs->sel));
165 register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * 4;
166 register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * 4;
167 register cmph_uint32 pos = 0;
168 char * buf_sel = 0;
169 cmph_uint32 buflen_sel = 0;
170 #ifdef DEBUG
171 cmph_uint32 i;
172 #endif
173
174 *buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size + length_rems_size + store_table_size;
175
176 DEBUGP("sel_size = %u\n", sel_size);
177 DEBUGP("length_rems_size = %u\n", length_rems_size);
178 DEBUGP("store_table_size = %u\n", store_table_size);
179 *buf = (char *)calloc(*buflen, sizeof(char));
180
181 if (!*buf)
182 {
183 *buflen = UINT_MAX;
184 return;
185 }
186
187 // dumping n, rem_r and total_length
188 memcpy(*buf, &(cs->n), sizeof(cmph_uint32));
189 pos += (cmph_uint32)sizeof(cmph_uint32);
190 DEBUGP("n = %u\n", cs->n);
191
192 memcpy(*buf + pos, &(cs->rem_r), sizeof(cmph_uint32));
193 pos += (cmph_uint32)sizeof(cmph_uint32);
194 DEBUGP("rem_r = %u\n", cs->rem_r);
195
196 memcpy(*buf + pos, &(cs->total_length), sizeof(cmph_uint32));
197 pos += (cmph_uint32)sizeof(cmph_uint32);
198 DEBUGP("total_length = %u\n", cs->total_length);
199
200
201 // dumping sel
202 select_dump(&cs->sel, &buf_sel, &buflen_sel);
203 memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
204 pos += (cmph_uint32)sizeof(cmph_uint32);
205 DEBUGP("buflen_sel = %u\n", buflen_sel);
206
207 memcpy(*buf + pos, buf_sel, buflen_sel);
208 #ifdef DEBUG
209 i = 0;
210 for(i = 0; i < buflen_sel; i++)
211 {
212 DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
213 }
214 #endif
215 pos += buflen_sel;
216
217 free(buf_sel);
218
219 // dumping length_rems
220 memcpy(*buf + pos, cs->length_rems, length_rems_size);
221 #ifdef DEBUG
222 for(i = 0; i < length_rems_size; i++)
223 {
224 DEBUGP("pos = %u -- length_rems_size = %u -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(*buf + pos + i));
225 }
226 #endif
227 pos += length_rems_size;
228
229 // dumping store_table
230 memcpy(*buf + pos, cs->store_table, store_table_size);
231
232 #ifdef DEBUG
233 for(i = 0; i < store_table_size; i++)
234 {
235 DEBUGP("pos = %u -- store_table_size = %u -- store_table[%u] = %u\n", pos, store_table_size, i, *(*buf + pos + i));
236 }
237 #endif
238 DEBUGP("Dumped compressed sequence structure with size %u bytes\n", *buflen);
239 }
240
241 void compressed_seq_load(compressed_seq_t * cs, const char * buf, cmph_uint32 buflen)
242 {
243 register cmph_uint32 pos = 0;
244 cmph_uint32 buflen_sel = 0;
245 register cmph_uint32 length_rems_size = 0;
246 register cmph_uint32 store_table_size = 0;
247 #ifdef DEBUG
248 cmph_uint32 i;
249 #endif
250
251 // loading n, rem_r and total_length
252 memcpy(&(cs->n), buf, sizeof(cmph_uint32));
253 pos += (cmph_uint32)sizeof(cmph_uint32);
254 DEBUGP("n = %u\n", cs->n);
255
256 memcpy(&(cs->rem_r), buf + pos, sizeof(cmph_uint32));
257 pos += (cmph_uint32)sizeof(cmph_uint32);
258 DEBUGP("rem_r = %u\n", cs->rem_r);
259
260 memcpy(&(cs->total_length), buf + pos, sizeof(cmph_uint32));
261 pos += (cmph_uint32)sizeof(cmph_uint32);
262 DEBUGP("total_length = %u\n", cs->total_length);
263
264 // loading sel
265 memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
266 pos += (cmph_uint32)sizeof(cmph_uint32);
267 DEBUGP("buflen_sel = %u\n", buflen_sel);
268
269 select_load(&cs->sel, buf + pos, buflen_sel);
270 #ifdef DEBUG
271 i = 0;
272 for(i = 0; i < buflen_sel; i++)
273 {
274 DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
275 }
276 #endif
277 pos += buflen_sel;
278
279 // loading length_rems
280 if(cs->length_rems)
281 {
282 free(cs->length_rems);
283 }
284 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r);
285 cs->length_rems = (cmph_uint32 *) calloc(length_rems_size, sizeof(cmph_uint32));
286 length_rems_size *= 4;
287 memcpy(cs->length_rems, buf + pos, length_rems_size);
288
289 #ifdef DEBUG
290 for(i = 0; i < length_rems_size; i++)
291 {
292 DEBUGP("pos = %u -- length_rems_size = %u -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(buf + pos + i));
293 }
294 #endif
295 pos += length_rems_size;
296
297 // loading store_table
298 store_table_size = ((cs->total_length + 31) >> 5);
299 if(cs->store_table)
300 {
301 free(cs->store_table);
302 }
303 cs->store_table = (cmph_uint32 *) calloc(store_table_size, sizeof(cmph_uint32));
304 store_table_size *= 4;
305 memcpy(cs->store_table, buf + pos, store_table_size);
306
307 #ifdef DEBUG
308 for(i = 0; i < store_table_size; i++)
309 {
310 DEBUGP("pos = %u -- store_table_size = %u -- store_table[%u] = %u\n", pos, store_table_size, i, *(buf + pos + i));
311 }
312 #endif
313
314 DEBUGP("Loaded compressed sequence structure with size %u bytes\n", buflen);
315 }
316
317 void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed)
318 {
319 if (cs && cs_packed)
320 {
321 char *buf = NULL;
322 cmph_uint32 buflen = 0;
323 compressed_seq_dump(cs, &buf, &buflen);
324 memcpy(cs_packed, buf, buflen);
325 free(buf);
326 }
327
328 }
329
330 cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs)
331 {
332 register cmph_uint32 sel_size = select_packed_size(&cs->sel);
333 register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32);
334 register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
335 return 4 * (cmph_uint32)sizeof(cmph_uint32) + sel_size + store_table_size + length_rems_size;
336 }
337
338
339 cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx)
340 {
341 // unpacking cs_packed
342 register cmph_uint32 *ptr = (cmph_uint32 *)cs_packed;
343 register cmph_uint32 n = *ptr++;
344 register cmph_uint32 rem_r = *ptr++;
345 register cmph_uint32 buflen_sel, length_rems_size, enc_idx, enc_length;
346 // compressed sequence query computation
347 register cmph_uint32 rems_mask, stored_value, sel_res;
348 register cmph_uint32 *sel_packed, *length_rems, *store_table;
349
350 ptr++; // skipping total_length
351 // register cmph_uint32 total_length = *ptr++;
352 buflen_sel = *ptr++;
353 sel_packed = ptr;
354 length_rems = (ptr += (buflen_sel >> 2));
355 length_rems_size = BITS_TABLE_SIZE(n, rem_r);
356 store_table = (ptr += length_rems_size);
357
358
359 rems_mask = (1U << rem_r) - 1U;
360
361 if(idx == 0)
362 {
363 enc_idx = 0;
364 sel_res = select_query_packed(sel_packed, idx);
365 }
366 else
367 {
368 sel_res = select_query_packed(sel_packed, idx - 1);
369
370 enc_idx = (sel_res - (idx - 1)) << rem_r;
371 enc_idx += get_bits_value(length_rems, idx-1, rem_r, rems_mask);
372
373 sel_res = select_next_query_packed(sel_packed, sel_res);
374 };
375
376 enc_length = (sel_res - idx) << rem_r;
377 enc_length += get_bits_value(length_rems, idx, rem_r, rems_mask);
378 enc_length -= enc_idx;
379 if(enc_length == 0)
380 return 0;
381
382 stored_value = get_bits_at_pos(store_table, enc_idx, enc_length);
383 return stored_value + ((1U << enc_length) - 1U);
384 }