(root)/
glib-2.79.0/
girepository/
cmph/
compressed_seq.c
       1  #include "compressed_seq.h"
       2  #include <stdlib.h>
       3  #include <stdio.h>
       4  #include <assert.h>
       5  #include <limits.h>
       6  #include <string.h>
       7  
       8  #include "bitbool.h"
       9  
      10  // #define DEBUG
      11  #include "debug.h"
      12  
      13  static inline cmph_uint32 compressed_seq_i_log2(cmph_uint32 x)
      14  {
      15  	register cmph_uint32 res = 0;
      16  	
      17  	while(x > 1)
      18  	{
      19  		x >>= 1;
      20  		res++;
      21  	}
      22  	return res;
      23  };
      24  
      25  void compressed_seq_init(compressed_seq_t * cs)
      26  {
      27  	select_init(&cs->sel);
      28  	cs->n = 0;
      29  	cs->rem_r = 0;
      30  	cs->length_rems = 0;
      31  	cs->total_length = 0;
      32  	cs->store_table = 0;
      33  }
      34  
      35  void compressed_seq_destroy(compressed_seq_t * cs)
      36  {
      37  	free(cs->store_table);
      38  	cs->store_table = 0;
      39  	free(cs->length_rems);
      40  	cs->length_rems = 0;
      41  	select_destroy(&cs->sel);
      42  };
      43  
      44  
      45  void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n)
      46  {
      47  	register cmph_uint32 i;
      48  	// lengths: represents lengths of encoded values	
      49  	register cmph_uint32 * lengths = (cmph_uint32 *)calloc(n, sizeof(cmph_uint32));
      50  	register cmph_uint32 rems_mask;
      51  	register cmph_uint32 stored_value;
      52  	
      53  	cs->n = n;
      54  	cs->total_length = 0;
      55  	
      56  	for(i = 0; i < cs->n; i++)
      57  	{
      58  		if(vals_table[i] == 0)
      59  		{
      60  			lengths[i] = 0;
      61  		}
      62  		else
      63  		{
      64  			lengths[i] = compressed_seq_i_log2(vals_table[i] + 1);
      65  			cs->total_length += lengths[i];
      66  		};
      67  	};
      68  	
      69  	if(cs->store_table)
      70  	{
      71  		free(cs->store_table);
      72  	}
      73  	cs->store_table = (cmph_uint32 *) calloc(((cs->total_length + 31) >> 5), sizeof(cmph_uint32));
      74  	cs->total_length = 0;
      75  	
      76  	for(i = 0; i < cs->n; i++)
      77  	{
      78  		if(vals_table[i] == 0)
      79  			continue;
      80  		stored_value = vals_table[i] - ((1U << lengths[i]) - 1U);
      81  		set_bits_at_pos(cs->store_table, cs->total_length, stored_value, lengths[i]);
      82  		cs->total_length += lengths[i];
      83  	};
      84  	
      85  	cs->rem_r = compressed_seq_i_log2(cs->total_length/cs->n);
      86  	
      87  	if(cs->rem_r == 0)
      88  	{
      89  		cs->rem_r = 1;
      90  	}
      91  	
      92  	if(cs->length_rems)
      93  	{
      94  		free(cs->length_rems);
      95  	}
      96  	
      97  	cs->length_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cs->n, cs->rem_r), sizeof(cmph_uint32));
      98  	
      99  	rems_mask = (1U << cs->rem_r) - 1U;
     100  	cs->total_length = 0;
     101  	
     102  	for(i = 0; i < cs->n; i++)
     103  	{
     104  		cs->total_length += lengths[i];
     105  		set_bits_value(cs->length_rems, i, cs->total_length & rems_mask, cs->rem_r, rems_mask);
     106  		lengths[i] = cs->total_length >> cs->rem_r;
     107  	};
     108  	
     109  	select_init(&cs->sel);
     110  	 
     111  	// FABIANO: before it was (cs->total_length >> cs->rem_r) + 1. But I wiped out the + 1 because
     112  	// I changed the select structure to work up to m, instead of up to m - 1.
     113  	select_generate(&cs->sel, lengths, cs->n, (cs->total_length >> cs->rem_r));
     114  	
     115  	free(lengths);
     116  };
     117  
     118  cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs)
     119  {
     120  	register cmph_uint32 space_usage = select_get_space_usage(&cs->sel);
     121  	space_usage += ((cs->total_length + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32) * 8;
     122  	space_usage += BITS_TABLE_SIZE(cs->n, cs->rem_r) * (cmph_uint32)sizeof(cmph_uint32) * 8;
     123  	return  4 * (cmph_uint32)sizeof(cmph_uint32) * 8 + space_usage;
     124  }
     125  
     126  cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx)
     127  {
     128  	register cmph_uint32 enc_idx, enc_length;
     129  	register cmph_uint32 rems_mask;
     130  	register cmph_uint32 stored_value;
     131  	register cmph_uint32 sel_res;
     132  
     133  	assert(idx < cs->n); // FABIANO ADDED
     134  
     135  	rems_mask = (1U << cs->rem_r) - 1U;
     136  	
     137  	if(idx == 0)
     138  	{
     139  		enc_idx = 0;
     140  		sel_res = select_query(&cs->sel, idx);
     141  	}
     142  	else
     143  	{
     144  		sel_res = select_query(&cs->sel, idx - 1);
     145  		
     146  		enc_idx = (sel_res - (idx - 1)) << cs->rem_r;
     147  		enc_idx += get_bits_value(cs->length_rems, idx-1, cs->rem_r, rems_mask);
     148  		
     149  		sel_res = select_next_query(&cs->sel, sel_res);
     150  	};
     151  
     152  	enc_length = (sel_res - idx) << cs->rem_r;
     153  	enc_length += get_bits_value(cs->length_rems, idx, cs->rem_r, rems_mask);
     154  	enc_length -= enc_idx;
     155  	if(enc_length == 0)
     156  		return 0;
     157  		
     158  	stored_value = get_bits_at_pos(cs->store_table, enc_idx, enc_length);
     159  	return stored_value + ((1U << enc_length) - 1U);
     160  };
     161  
     162  void compressed_seq_dump(compressed_seq_t * cs, char ** buf, cmph_uint32 * buflen)
     163  {
     164  	register cmph_uint32 sel_size = select_packed_size(&(cs->sel));
     165  	register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * 4;
     166  	register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * 4;
     167  	register cmph_uint32 pos = 0;
     168  	char * buf_sel = 0;
     169  	cmph_uint32 buflen_sel = 0;
     170  #ifdef DEBUG
     171  	cmph_uint32 i;
     172  #endif
     173  	
     174  	*buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size +  length_rems_size + store_table_size;
     175  	
     176  	DEBUGP("sel_size = %u\n", sel_size);
     177  	DEBUGP("length_rems_size = %u\n", length_rems_size);
     178  	DEBUGP("store_table_size = %u\n", store_table_size);
     179  	*buf = (char *)calloc(*buflen, sizeof(char));
     180  	
     181  	if (!*buf) 
     182  	{
     183  		*buflen = UINT_MAX;
     184  		return;
     185  	}
     186  	
     187  	// dumping n, rem_r and total_length
     188  	memcpy(*buf, &(cs->n), sizeof(cmph_uint32));
     189  	pos += (cmph_uint32)sizeof(cmph_uint32);
     190  	DEBUGP("n = %u\n", cs->n);
     191  	
     192  	memcpy(*buf + pos, &(cs->rem_r), sizeof(cmph_uint32));
     193  	pos += (cmph_uint32)sizeof(cmph_uint32);
     194  	DEBUGP("rem_r = %u\n", cs->rem_r);
     195  
     196  	memcpy(*buf + pos, &(cs->total_length), sizeof(cmph_uint32));
     197  	pos += (cmph_uint32)sizeof(cmph_uint32);
     198  	DEBUGP("total_length = %u\n", cs->total_length);
     199  
     200  	
     201  	// dumping sel
     202  	select_dump(&cs->sel, &buf_sel, &buflen_sel);
     203  	memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
     204  	pos += (cmph_uint32)sizeof(cmph_uint32);
     205  	DEBUGP("buflen_sel = %u\n", buflen_sel);
     206  
     207  	memcpy(*buf + pos, buf_sel, buflen_sel);
     208  	#ifdef DEBUG
     209  	i = 0;
     210  	for(i = 0; i < buflen_sel; i++)
     211  	{
     212  	    DEBUGP("pos = %u  -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
     213  	}
     214  	#endif
     215  	pos += buflen_sel;
     216  	
     217  	free(buf_sel);
     218  	
     219  	// dumping length_rems
     220  	memcpy(*buf + pos, cs->length_rems, length_rems_size);
     221  	#ifdef DEBUG
     222  	for(i = 0; i < length_rems_size; i++)
     223  	{
     224  	    DEBUGP("pos = %u -- length_rems_size = %u  -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(*buf + pos + i));
     225  	}
     226  	#endif
     227  	pos += length_rems_size;
     228  
     229  	// dumping store_table
     230  	memcpy(*buf + pos, cs->store_table, store_table_size);
     231  
     232  	#ifdef DEBUG
     233  	for(i = 0; i < store_table_size; i++)
     234  	{
     235  	    DEBUGP("pos = %u -- store_table_size = %u  -- store_table[%u] = %u\n", pos, store_table_size, i, *(*buf + pos + i));
     236  	}
     237  	#endif
     238  	DEBUGP("Dumped compressed sequence structure with size %u bytes\n", *buflen);
     239  }
     240  
     241  void compressed_seq_load(compressed_seq_t * cs, const char * buf, cmph_uint32 buflen)
     242  {
     243  	register cmph_uint32 pos = 0;
     244  	cmph_uint32 buflen_sel = 0;
     245  	register cmph_uint32 length_rems_size = 0;
     246  	register cmph_uint32 store_table_size = 0;
     247  #ifdef DEBUG
     248  	cmph_uint32 i;
     249  #endif
     250  	
     251  	// loading n, rem_r and total_length
     252  	memcpy(&(cs->n), buf, sizeof(cmph_uint32));
     253  	pos += (cmph_uint32)sizeof(cmph_uint32);
     254  	DEBUGP("n = %u\n", cs->n);
     255  
     256  	memcpy(&(cs->rem_r), buf + pos, sizeof(cmph_uint32));
     257  	pos += (cmph_uint32)sizeof(cmph_uint32);
     258  	DEBUGP("rem_r = %u\n", cs->rem_r);
     259  
     260  	memcpy(&(cs->total_length), buf + pos, sizeof(cmph_uint32));
     261  	pos += (cmph_uint32)sizeof(cmph_uint32);
     262  	DEBUGP("total_length = %u\n", cs->total_length);
     263  	
     264  	// loading sel
     265  	memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
     266  	pos += (cmph_uint32)sizeof(cmph_uint32);
     267  	DEBUGP("buflen_sel = %u\n", buflen_sel);
     268  
     269  	select_load(&cs->sel, buf + pos, buflen_sel);
     270  	#ifdef DEBUG
     271  	i = 0;
     272  	for(i = 0; i < buflen_sel; i++)
     273  	{
     274  	    DEBUGP("pos = %u  -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
     275  	}
     276  	#endif
     277  	pos += buflen_sel;
     278  	
     279  	// loading length_rems
     280  	if(cs->length_rems)
     281  	{
     282  		free(cs->length_rems);
     283  	}
     284  	length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r);
     285  	cs->length_rems = (cmph_uint32 *) calloc(length_rems_size, sizeof(cmph_uint32));
     286  	length_rems_size *= 4;
     287  	memcpy(cs->length_rems, buf + pos, length_rems_size);
     288  	
     289  	#ifdef DEBUG
     290  	for(i = 0; i < length_rems_size; i++)
     291  	{
     292  	    DEBUGP("pos = %u -- length_rems_size = %u  -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(buf + pos + i));
     293  	}
     294  	#endif
     295  	pos += length_rems_size;
     296  
     297  	// loading store_table
     298  	store_table_size = ((cs->total_length + 31) >> 5);
     299  	if(cs->store_table)
     300  	{
     301  		free(cs->store_table);
     302  	}
     303  	cs->store_table = (cmph_uint32 *) calloc(store_table_size, sizeof(cmph_uint32));
     304          store_table_size *= 4;
     305  	memcpy(cs->store_table, buf + pos, store_table_size);
     306  	
     307  	#ifdef DEBUG
     308  	for(i = 0; i < store_table_size; i++)
     309  	{
     310  	    DEBUGP("pos = %u -- store_table_size = %u  -- store_table[%u] = %u\n", pos, store_table_size, i, *(buf + pos + i));
     311  	}
     312  	#endif
     313  
     314  	DEBUGP("Loaded compressed sequence structure with size %u bytes\n", buflen);
     315  }
     316  
     317  void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed)
     318  {
     319  	if (cs && cs_packed)
     320  	{
     321  		char *buf = NULL;
     322  		cmph_uint32 buflen = 0;
     323  		compressed_seq_dump(cs, &buf, &buflen);
     324  		memcpy(cs_packed, buf, buflen);
     325  		free(buf);
     326  	}
     327  
     328  }
     329  
     330  cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs)
     331  {
     332  	register cmph_uint32 sel_size = select_packed_size(&cs->sel);
     333  	register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32);
     334  	register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
     335  	return 4 * (cmph_uint32)sizeof(cmph_uint32) + sel_size + store_table_size + length_rems_size;
     336  }
     337  
     338  
     339  cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx)
     340  {
     341  	// unpacking cs_packed
     342  	register cmph_uint32 *ptr = (cmph_uint32 *)cs_packed;
     343  	register cmph_uint32 n = *ptr++;
     344  	register cmph_uint32 rem_r = *ptr++;
     345  	register cmph_uint32 buflen_sel, length_rems_size, enc_idx, enc_length;
     346  	// compressed sequence query computation
     347  	register cmph_uint32 rems_mask, stored_value, sel_res;
     348  	register cmph_uint32 *sel_packed, *length_rems, *store_table;
     349  
     350  	ptr++; // skipping total_length 
     351  // 	register cmph_uint32 total_length = *ptr++;
     352  	buflen_sel = *ptr++;
     353  	sel_packed = ptr;
     354  	length_rems = (ptr += (buflen_sel >> 2));
     355  	length_rems_size = BITS_TABLE_SIZE(n, rem_r);
     356  	store_table = (ptr += length_rems_size);
     357  
     358  
     359  	rems_mask = (1U << rem_r) - 1U;
     360  	
     361  	if(idx == 0)
     362  	{
     363  		enc_idx = 0;
     364  		sel_res = select_query_packed(sel_packed, idx);
     365  	}
     366  	else
     367  	{
     368  		sel_res = select_query_packed(sel_packed, idx - 1);
     369  		
     370  		enc_idx = (sel_res - (idx - 1)) << rem_r;
     371  		enc_idx += get_bits_value(length_rems, idx-1, rem_r, rems_mask);
     372  		
     373  		sel_res = select_next_query_packed(sel_packed, sel_res);
     374  	};
     375  
     376  	enc_length = (sel_res - idx) << rem_r;
     377  	enc_length += get_bits_value(length_rems, idx, rem_r, rems_mask);
     378  	enc_length -= enc_idx;
     379  	if(enc_length == 0)
     380  		return 0;
     381  		
     382  	stored_value = get_bits_at_pos(store_table, enc_idx, enc_length);
     383  	return stored_value + ((1U << enc_length) - 1U);
     384  }