(root)/
glib-2.79.0/
girepository/
cmph/
compressed_rank.c
       1  #include<stdlib.h>
       2  #include<stdio.h>
       3  #include<limits.h>
       4  #include<string.h>
       5  #include"compressed_rank.h"
       6  #include"bitbool.h"
       7  // #define DEBUG
       8  #include"debug.h"
       9  static inline cmph_uint32 compressed_rank_i_log2(cmph_uint32 x)
      10  {
      11  	register cmph_uint32 res = 0;
      12  	
      13  	while(x > 1)
      14  	{
      15  		x >>= 1;
      16  		res++;
      17  	}
      18  	return res;
      19  };
      20  
      21  void compressed_rank_init(compressed_rank_t * cr)
      22  {
      23  	cr->max_val = 0;
      24  	cr->n = 0;
      25  	cr->rem_r = 0;
      26  	select_init(&cr->sel);
      27  	cr->vals_rems = 0;
      28  }
      29  
      30  void compressed_rank_destroy(compressed_rank_t * cr)
      31  {
      32  	free(cr->vals_rems);
      33  	cr->vals_rems = 0;
      34  	select_destroy(&cr->sel);
      35  }
      36  
      37  void compressed_rank_generate(compressed_rank_t * cr, cmph_uint32 * vals_table, cmph_uint32 n)
      38  {
      39  	register cmph_uint32 i,j;
      40  	register cmph_uint32 rems_mask;
      41  	register cmph_uint32 * select_vec = 0;
      42  	cr->n = n;
      43  	cr->max_val = vals_table[cr->n - 1];
      44  	cr->rem_r = compressed_rank_i_log2(cr->max_val/cr->n);
      45  	if(cr->rem_r == 0)
      46  	{
      47  		cr->rem_r = 1;
      48  	}
      49  	select_vec = (cmph_uint32 *) calloc(cr->max_val >> cr->rem_r, sizeof(cmph_uint32));
      50  	cr->vals_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cr->n, cr->rem_r), sizeof(cmph_uint32));
      51  	rems_mask = (1U << cr->rem_r) - 1U;
      52  	
      53  	for(i = 0; i < cr->n; i++)
      54  	{
      55  		set_bits_value(cr->vals_rems, i, vals_table[i] & rems_mask, cr->rem_r, rems_mask);
      56  	}
      57  
      58  	for(i = 1, j = 0; i <= cr->max_val >> cr->rem_r; i++)
      59  	{
      60  		while(i > (vals_table[j] >> cr->rem_r))
      61  		{
      62  			j++;
      63  		}
      64  		select_vec[i - 1] = j;
      65  	};
      66  
      67  
      68  	// FABIANO: before it was (cr->total_length >> cr->rem_r) + 1. But I wiped out the + 1 because
      69  	// I changed the select structure to work up to m, instead of up to m - 1.
      70  	select_generate(&cr->sel, select_vec, cr->max_val >> cr->rem_r, cr->n);
      71  
      72  	free(select_vec);
      73  }
      74  
      75  cmph_uint32 compressed_rank_query(compressed_rank_t * cr, cmph_uint32 idx)
      76  {
      77  	register cmph_uint32 rems_mask;
      78  	register cmph_uint32 val_quot, val_rem;
      79  	register cmph_uint32 sel_res, rank;
      80  	
      81  	if(idx > cr->max_val)
      82  	{
      83  		return cr->n;
      84  	}
      85  	
      86  	val_quot = idx >> cr->rem_r;
      87  	rems_mask = (1U << cr->rem_r) - 1U;
      88  	val_rem = idx & rems_mask;
      89  	if(val_quot == 0)
      90  	{
      91  		rank = sel_res = 0;
      92  	}
      93  	else
      94  	{
      95  		sel_res = select_query(&cr->sel, val_quot - 1) + 1;
      96  		rank = sel_res - val_quot;
      97  	}
      98  	
      99  	do
     100  	{
     101  		if(GETBIT32(cr->sel.bits_vec, sel_res))
     102  		{
     103  			break;
     104  		}
     105  		if(get_bits_value(cr->vals_rems, rank, cr->rem_r, rems_mask) >= val_rem)
     106  		{
     107  			break;
     108  		}
     109  		sel_res++;
     110  		rank++;
     111  	} while(1);	
     112  	
     113  	return rank;
     114  }
     115  
     116  cmph_uint32 compressed_rank_get_space_usage(compressed_rank_t * cr)
     117  {
     118  	register cmph_uint32 space_usage = select_get_space_usage(&cr->sel);
     119  	space_usage += BITS_TABLE_SIZE(cr->n, cr->rem_r)*(cmph_uint32)sizeof(cmph_uint32)*8;
     120  	space_usage += 3*(cmph_uint32)sizeof(cmph_uint32)*8;
     121  	return space_usage;
     122  }
     123  
     124  void compressed_rank_dump(compressed_rank_t * cr, char **buf, cmph_uint32 *buflen)
     125  {
     126  	register cmph_uint32 sel_size = select_packed_size(&(cr->sel));
     127  	register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
     128  	register cmph_uint32 pos = 0;
     129  	char * buf_sel = 0;
     130  	cmph_uint32 buflen_sel = 0;
     131  #ifdef DEBUG
     132  	cmph_uint32 i;
     133  #endif
     134  	
     135  	*buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size +  vals_rems_size;
     136  	
     137  	DEBUGP("sel_size = %u\n", sel_size);
     138  	DEBUGP("vals_rems_size = %u\n", vals_rems_size);
     139  	
     140  	*buf = (char *)calloc(*buflen, sizeof(char));
     141  	
     142  	if (!*buf) 
     143  	{
     144  		*buflen = UINT_MAX;
     145  		return;
     146  	}
     147  	
     148  	// dumping max_val, n and rem_r
     149  	memcpy(*buf, &(cr->max_val), sizeof(cmph_uint32));
     150  	pos += (cmph_uint32)sizeof(cmph_uint32);
     151  	DEBUGP("max_val = %u\n", cr->max_val);
     152  
     153  	memcpy(*buf + pos, &(cr->n), sizeof(cmph_uint32));
     154  	pos += (cmph_uint32)sizeof(cmph_uint32);
     155  	DEBUGP("n = %u\n", cr->n);
     156  	
     157  	memcpy(*buf + pos, &(cr->rem_r), sizeof(cmph_uint32));
     158  	pos += (cmph_uint32)sizeof(cmph_uint32);
     159  	DEBUGP("rem_r = %u\n", cr->rem_r);
     160  
     161  	// dumping sel
     162  	select_dump(&cr->sel, &buf_sel, &buflen_sel);
     163  	memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
     164  	pos += (cmph_uint32)sizeof(cmph_uint32);
     165  	DEBUGP("buflen_sel = %u\n", buflen_sel);
     166  
     167  	memcpy(*buf + pos, buf_sel, buflen_sel);
     168  	
     169  	#ifdef DEBUG	
     170  	i = 0;
     171  	for(i = 0; i < buflen_sel; i++)
     172  	{
     173  	    DEBUGP("pos = %u  -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
     174  	}
     175  	#endif
     176  	pos += buflen_sel;
     177  	
     178  	free(buf_sel);
     179  	
     180  	// dumping vals_rems
     181  	memcpy(*buf + pos, cr->vals_rems, vals_rems_size);
     182  	#ifdef DEBUG	
     183  	for(i = 0; i < vals_rems_size; i++)
     184  	{
     185  	    DEBUGP("pos = %u -- vals_rems_size = %u  -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(*buf + pos + i));
     186  	}
     187  	#endif
     188  	pos += vals_rems_size;
     189  
     190  	DEBUGP("Dumped compressed rank structure with size %u bytes\n", *buflen);
     191  }
     192  
     193  void compressed_rank_load(compressed_rank_t * cr, const char *buf, cmph_uint32 buflen)
     194  {
     195  	register cmph_uint32 pos = 0;
     196  	cmph_uint32 buflen_sel = 0;
     197  	register cmph_uint32 vals_rems_size = 0;
     198  #ifdef DEBUG
     199  	cmph_uint32 i;
     200  #endif
     201  	
     202  	// loading max_val, n, and rem_r
     203  	memcpy(&(cr->max_val), buf, sizeof(cmph_uint32));
     204  	pos += (cmph_uint32)sizeof(cmph_uint32);
     205  	DEBUGP("max_val = %u\n", cr->max_val);
     206  
     207  	memcpy(&(cr->n), buf + pos, sizeof(cmph_uint32));
     208  	pos += (cmph_uint32)sizeof(cmph_uint32);
     209  	DEBUGP("n = %u\n", cr->n);
     210  
     211  	memcpy(&(cr->rem_r), buf + pos, sizeof(cmph_uint32));
     212  	pos += (cmph_uint32)sizeof(cmph_uint32);
     213  	DEBUGP("rem_r = %u\n", cr->rem_r);
     214  	
     215  	// loading sel
     216  	memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
     217  	pos += (cmph_uint32)sizeof(cmph_uint32);
     218  	DEBUGP("buflen_sel = %u\n", buflen_sel);
     219  
     220  	select_load(&cr->sel, buf + pos, buflen_sel);
     221  	#ifdef DEBUG	
     222  	i = 0;
     223  	for(i = 0; i < buflen_sel; i++)
     224  	{
     225  	    DEBUGP("pos = %u  -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
     226  	}
     227  	#endif
     228  	pos += buflen_sel;
     229  	
     230  	// loading vals_rems
     231  	if(cr->vals_rems)
     232  	{
     233  		free(cr->vals_rems);
     234  	}
     235  	vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r);
     236  	cr->vals_rems = (cmph_uint32 *) calloc(vals_rems_size, sizeof(cmph_uint32));
     237  	vals_rems_size *= 4;
     238  	memcpy(cr->vals_rems, buf + pos, vals_rems_size);
     239  	
     240  	#ifdef DEBUG	
     241  	for(i = 0; i < vals_rems_size; i++)
     242  	{
     243  	    DEBUGP("pos = %u -- vals_rems_size = %u  -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(buf + pos + i));
     244  	}
     245  	#endif
     246  	pos += vals_rems_size;
     247  	
     248  	DEBUGP("Loaded compressed rank structure with size %u bytes\n", buflen);
     249  }
     250  
     251  
     252  
     253  void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed)
     254  {
     255  	if (cr && cr_packed)
     256  	{
     257  		char *buf = NULL;
     258  		cmph_uint32 buflen = 0;
     259  		compressed_rank_dump(cr, &buf, &buflen);
     260  		memcpy(cr_packed, buf, buflen);
     261  		free(buf);
     262  	}
     263  }
     264  
     265  cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr)
     266  {
     267  	register cmph_uint32 sel_size = select_packed_size(&cr->sel);
     268  	register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);	
     269  	return 4 * (cmph_uint32)sizeof(cmph_uint32)  + sel_size +  vals_rems_size;
     270  }
     271  
     272  cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx)
     273  {
     274  	// unpacking cr_packed
     275  	register cmph_uint32 *ptr = (cmph_uint32 *)cr_packed;
     276  	register cmph_uint32 max_val = *ptr++;
     277  	register cmph_uint32 n = *ptr++;
     278  	register cmph_uint32 rem_r = *ptr++;
     279  	register cmph_uint32 buflen_sel = *ptr++;
     280  	register cmph_uint32 * sel_packed = ptr;
     281  	
     282  	register cmph_uint32 * bits_vec = sel_packed + 2; // skipping n and m
     283  
     284  	register cmph_uint32 * vals_rems = (ptr += (buflen_sel >> 2)); 
     285  
     286  	// compressed sequence query computation
     287  	register cmph_uint32 rems_mask;
     288  	register cmph_uint32 val_quot, val_rem;
     289  	register cmph_uint32 sel_res, rank;
     290  	
     291  	if(idx > max_val)
     292  	{
     293  		return n;
     294  	}
     295  	
     296  	val_quot = idx >> rem_r; 	
     297  	rems_mask = (1U << rem_r) - 1U; 
     298  	val_rem = idx & rems_mask; 
     299  	if(val_quot == 0)
     300  	{
     301  		rank = sel_res = 0;
     302  	}
     303  	else
     304  	{
     305  		sel_res = select_query_packed(sel_packed, val_quot - 1) + 1;
     306  		rank = sel_res - val_quot;
     307  	}
     308  	
     309  	do
     310  	{
     311  		if(GETBIT32(bits_vec, sel_res))
     312  		{
     313  			break;
     314  		}
     315  		if(get_bits_value(vals_rems, rank, rem_r, rems_mask) >= val_rem)
     316  		{
     317  			break;
     318  		}
     319  		sel_res++;
     320  		rank++;
     321  	} while(1);	
     322  	
     323  	return rank;
     324  }
     325  
     326  
     327