(root)/
glib-2.79.0/
girepository/
cmph/
chd.c
       1  #include<stdio.h>
       2  #include<stdlib.h>
       3  #include<string.h>
       4  #include<math.h>
       5  #include<time.h>
       6  #include<assert.h>
       7  #include<limits.h>
       8  #include<errno.h>
       9  
      10  #include "cmph_structs.h"
      11  #include "chd_structs.h"
      12  #include "chd.h"
      13  #include "bitbool.h"
      14  //#define DEBUG
      15  #include "debug.h"
      16  
      17  chd_config_data_t *chd_config_new(cmph_config_t *mph)
      18  {
      19  	cmph_io_adapter_t *key_source = mph->key_source;
      20  	chd_config_data_t *chd;
      21  	chd = (chd_config_data_t *)malloc(sizeof(chd_config_data_t));
      22  	assert(chd);
      23  	memset(chd, 0, sizeof(chd_config_data_t));
      24  
      25  	chd->chd_ph = cmph_config_new(key_source);
      26  	cmph_config_set_algo(chd->chd_ph, CMPH_CHD_PH);
      27  
      28  	return chd;
      29  }
      30  
      31  void chd_config_destroy(cmph_config_t *mph)
      32  {
      33  	chd_config_data_t *data = (chd_config_data_t *) mph->data;
      34  	DEBUGP("Destroying algorithm dependent data\n");
      35  	if(data->chd_ph)
      36  	{
      37  		cmph_config_destroy(data->chd_ph);
      38  		data->chd_ph = NULL;
      39  	}
      40  	free(data);
      41  }
      42  
      43  
      44  void chd_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
      45  {
      46  	chd_config_data_t *data = (chd_config_data_t *) mph->data;
      47  	cmph_config_set_hashfuncs(data->chd_ph, hashfuncs);
      48  }
      49  
      50  
      51  void chd_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket)
      52  {
      53  	chd_config_data_t *data = (chd_config_data_t *) mph->data;
      54  	cmph_config_set_b(data->chd_ph, keys_per_bucket);
      55  }
      56  
      57  
      58  void chd_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
      59  {
      60  	chd_config_data_t *data = (chd_config_data_t *) mph->data;
      61  	cmph_config_set_keys_per_bin(data->chd_ph, keys_per_bin);
      62  }
      63  
      64  
      65  cmph_t *chd_new(cmph_config_t *mph, double c)
      66  {
      67  	cmph_t *mphf = NULL;
      68  	chd_data_t *chdf = NULL;
      69  	chd_config_data_t *chd = (chd_config_data_t *)mph->data;
      70  	chd_ph_config_data_t * chd_ph = (chd_ph_config_data_t *)chd->chd_ph->data;
      71  	compressed_rank_t cr;
      72  	
      73  	register cmph_t * chd_phf = NULL;
      74  	register cmph_uint32 packed_chd_phf_size = 0; 
      75  	cmph_uint8 * packed_chd_phf = NULL;
      76  	
      77  	register cmph_uint32 packed_cr_size = 0; 
      78  	cmph_uint8 * packed_cr = NULL;
      79  
      80  	register cmph_uint32 i, idx, nkeys, nvals, nbins;
      81  	cmph_uint32 * vals_table = NULL;
      82  	register cmph_uint32 * occup_table = NULL;
      83  	#ifdef CMPH_TIMING
      84  	double construction_time_begin = 0.0;
      85  	double construction_time = 0.0;
      86  	ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
      87  	#endif
      88  
      89  	cmph_config_set_verbosity(chd->chd_ph, mph->verbosity);	
      90  	cmph_config_set_graphsize(chd->chd_ph, c);
      91  	
      92  	if (mph->verbosity)
      93  	{
      94  		fprintf(stderr, "Generating a CHD_PH perfect hash function with a load factor equal to %.3f\n", c);
      95  	}
      96  	
      97  	chd_phf = cmph_new(chd->chd_ph);
      98  	
      99  	if(chd_phf == NULL) 
     100  	{
     101  		return NULL;
     102  	}
     103  	
     104  	packed_chd_phf_size = cmph_packed_size(chd_phf); 
     105  	DEBUGP("packed_chd_phf_size = %u\n", packed_chd_phf_size);
     106  	
     107  	/* Make sure that we have enough space to pack the mphf. */
     108  	packed_chd_phf = calloc((size_t)packed_chd_phf_size,(size_t)1);
     109  
     110  	/* Pack the mphf. */
     111  	cmph_pack(chd_phf, packed_chd_phf);
     112  
     113  	cmph_destroy(chd_phf);
     114  	
     115  	
     116  	if (mph->verbosity)
     117  	{
     118  		fprintf(stderr, "Compressing the range of the resulting CHD_PH perfect hash function\n");
     119  	}
     120  
     121  	compressed_rank_init(&cr);
     122  	nbins = chd_ph->n;
     123  	nkeys = chd_ph->m;
     124  	nvals =  nbins - nkeys; 
     125  	
     126  	vals_table = (cmph_uint32 *)calloc(nvals, sizeof(cmph_uint32));
     127  	occup_table = (cmph_uint32 *)chd_ph->occup_table;
     128  	
     129  	for(i = 0, idx = 0; i < nbins; i++)
     130  	{
     131  		if(!GETBIT32(occup_table, i))
     132  		{
     133  			vals_table[idx++] = i;
     134  		}
     135  	}
     136  	
     137  	compressed_rank_generate(&cr, vals_table, nvals);
     138  	free(vals_table);
     139  	
     140  	packed_cr_size = compressed_rank_packed_size(&cr);
     141  	packed_cr = (cmph_uint8 *) calloc(packed_cr_size, sizeof(cmph_uint8));
     142  	compressed_rank_pack(&cr, packed_cr);
     143  	compressed_rank_destroy(&cr);
     144  
     145  	mphf = (cmph_t *)malloc(sizeof(cmph_t));
     146  	mphf->algo = mph->algo;
     147  	chdf = (chd_data_t *)malloc(sizeof(chd_data_t));
     148  	
     149  	chdf->packed_cr = packed_cr;
     150  	packed_cr = NULL; //transfer memory ownership
     151  
     152  	chdf->packed_chd_phf = packed_chd_phf;
     153  	packed_chd_phf = NULL; //transfer memory ownership
     154  	
     155  	chdf->packed_chd_phf_size = packed_chd_phf_size;
     156  	chdf->packed_cr_size = packed_cr_size;
     157  	
     158  	mphf->data = chdf;
     159  	mphf->size = nkeys;
     160  
     161  	DEBUGP("Successfully generated minimal perfect hash\n");
     162  	if (mph->verbosity)
     163  	{
     164  		fprintf(stderr, "Successfully generated minimal perfect hash function\n");
     165  	}
     166  	#ifdef CMPH_TIMING	
     167  	ELAPSED_TIME_IN_SECONDS(&construction_time);
     168  	register cmph_uint32 space_usage =  chd_packed_size(mphf)*8;
     169  	construction_time = construction_time - construction_time_begin;
     170  	fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", nkeys, c, chd_ph->keys_per_bucket, construction_time, space_usage/(double)nkeys);
     171  	#endif	
     172  
     173  	return mphf;
     174  }
     175  
     176  void chd_load(FILE *fd, cmph_t *mphf)
     177  {
     178  	register size_t nbytes;
     179  	chd_data_t *chd = (chd_data_t *)malloc(sizeof(chd_data_t));
     180  
     181  	DEBUGP("Loading chd mphf\n");
     182  	mphf->data = chd;
     183  
     184  	nbytes = fread(&chd->packed_chd_phf_size, sizeof(cmph_uint32), (size_t)1, fd);
     185  	DEBUGP("Loading CHD_PH perfect hash function with %u bytes to disk\n", chd->packed_chd_phf_size);
     186  	chd->packed_chd_phf = (cmph_uint8 *) calloc((size_t)chd->packed_chd_phf_size,(size_t)1);
     187  	nbytes = fread(chd->packed_chd_phf, chd->packed_chd_phf_size, (size_t)1, fd);
     188  
     189  	nbytes = fread(&chd->packed_cr_size, sizeof(cmph_uint32), (size_t)1, fd);
     190  	DEBUGP("Loading Compressed rank structure, which has %u bytes\n", chd->packed_cr_size);
     191  	chd->packed_cr = (cmph_uint8 *) calloc((size_t)chd->packed_cr_size, (size_t)1);
     192  	nbytes = fread(chd->packed_cr, chd->packed_cr_size, (size_t)1, fd);
     193  	if (nbytes == 0 && ferror(fd)) {
     194            fprintf(stderr, "ERROR: %s\n", strerror(errno));
     195          }
     196  
     197  }
     198  
     199  int chd_dump(cmph_t *mphf, FILE *fd)
     200  {
     201  	register size_t nbytes;
     202  	chd_data_t *data = (chd_data_t *)mphf->data;
     203  	
     204  	__cmph_dump(mphf, fd);
     205  	// Dumping CHD_PH perfect hash function
     206  
     207  	DEBUGP("Dumping CHD_PH perfect hash function with %u bytes to disk\n", data->packed_chd_phf_size);
     208  	nbytes = fwrite(&data->packed_chd_phf_size, sizeof(cmph_uint32), (size_t)1, fd);
     209  	nbytes = fwrite(data->packed_chd_phf, data->packed_chd_phf_size, (size_t)1, fd);
     210  
     211  	DEBUGP("Dumping compressed rank structure with %u bytes to disk\n", data->packed_cr_size);
     212  	nbytes = fwrite(&data->packed_cr_size, sizeof(cmph_uint32), (size_t)1, fd);
     213  	nbytes = fwrite(data->packed_cr, data->packed_cr_size, (size_t)1, fd);
     214  	if (nbytes == 0 && ferror(fd)) {
     215            fprintf(stderr, "ERROR: %s\n", strerror(errno));
     216            return 0;
     217          }
     218  
     219  	return 1;
     220  }
     221  
     222  void chd_destroy(cmph_t *mphf)
     223  {
     224  	chd_data_t *data = (chd_data_t *)mphf->data;
     225  	free(data->packed_chd_phf);
     226  	free(data->packed_cr);
     227  	free(data);
     228  	free(mphf);
     229  }
     230  
     231  static inline cmph_uint32 _chd_search(void * packed_chd_phf, void * packed_cr, const char *key, cmph_uint32 keylen)
     232  {
     233  	register cmph_uint32 bin_idx = cmph_search_packed(packed_chd_phf, key, keylen);
     234  	register cmph_uint32 rank = compressed_rank_query_packed(packed_cr, bin_idx);
     235  	return bin_idx - rank;
     236  }
     237  
     238  cmph_uint32 chd_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
     239  {
     240  	register chd_data_t * chd = mphf->data;
     241  	return _chd_search(chd->packed_chd_phf, chd->packed_cr, key, keylen);
     242  }
     243  
     244  void chd_pack(cmph_t *mphf, void *packed_mphf)
     245  {
     246  	chd_data_t *data = (chd_data_t *)mphf->data;
     247  	cmph_uint32 * ptr = packed_mphf;
     248  	cmph_uint8 * ptr8;
     249  
     250  	// packing packed_cr_size and packed_cr
     251  	*ptr = data->packed_cr_size;
     252  	ptr8 =  (cmph_uint8 *) (ptr + 1);
     253  	
     254  	memcpy(ptr8, data->packed_cr, data->packed_cr_size);
     255  	ptr8 += data->packed_cr_size;
     256  	
     257  	ptr = (cmph_uint32 *) ptr8;
     258  	*ptr = data->packed_chd_phf_size;
     259  
     260  	ptr8 =  (cmph_uint8 *) (ptr + 1);
     261  	memcpy(ptr8, data->packed_chd_phf, data->packed_chd_phf_size);
     262  }
     263  
     264  cmph_uint32 chd_packed_size(cmph_t *mphf)
     265  {
     266  	register chd_data_t *data = (chd_data_t *)mphf->data;
     267  	return (cmph_uint32)(sizeof(CMPH_ALGO) + 2*sizeof(cmph_uint32) + data->packed_cr_size + data->packed_chd_phf_size);
     268  
     269  }
     270  
     271  cmph_uint32 chd_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
     272  {
     273  
     274  	register cmph_uint32 * ptr = packed_mphf;
     275  	register cmph_uint32 packed_cr_size = *ptr++;
     276  	register cmph_uint8 * packed_chd_phf = ((cmph_uint8 *) ptr) + packed_cr_size + sizeof(cmph_uint32);
     277  	return _chd_search(packed_chd_phf, ptr, key, keylen);
     278  }
     279  
     280