(root)/
glib-2.79.0/
girepository/
cmph/
bdz.c
       1  #include "bdz.h"
       2  #include "cmph_structs.h"
       3  #include "bdz_structs.h"
       4  #include "hash.h"
       5  #include "bitbool.h"
       6  
       7  #include <math.h>
       8  #include <stdlib.h>
       9  #include <stdio.h>
      10  #include <assert.h>
      11  #include <string.h>
      12  #include <errno.h>
      13  //#define DEBUG
      14  #include "debug.h"
      15  #define UNASSIGNED 3U
      16  #define NULL_EDGE 0xffffffff
      17  
      18  //cmph_uint32 ngrafos = 0;
      19  //cmph_uint32 ngrafos_aciclicos = 0;
      20  // table used for looking up the number of assigned vertices  a 8-bit integer
      21  const cmph_uint8 bdz_lookup_table[] =
      22  {
      23  4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
      24  4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
      25  4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
      26  3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
      27  4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
      28  4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
      29  4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
      30  3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
      31  4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
      32  4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
      33  4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
      34  3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
      35  3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
      36  3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
      37  3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
      38  2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0
      39  };	
      40  
      41  typedef struct 
      42  {
      43  	cmph_uint32 vertices[3];
      44  	cmph_uint32 next_edges[3];
      45  }bdz_edge_t;
      46  
      47  typedef cmph_uint32 * bdz_queue_t;
      48  
      49  static void bdz_alloc_queue(bdz_queue_t * queuep, cmph_uint32 nedges)
      50  {
      51  	(*queuep)=malloc(nedges*sizeof(cmph_uint32));
      52  };
      53  static void bdz_free_queue(bdz_queue_t * queue)
      54  {
      55  	free(*queue);
      56  };
      57  
      58  typedef struct 
      59  {
      60  	cmph_uint32 nedges;
      61  	bdz_edge_t * edges;
      62  	cmph_uint32 * first_edge;
      63  	cmph_uint8 * vert_degree;	
      64  }bdz_graph3_t;
      65  
      66  
      67  static void bdz_alloc_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
      68  {
      69  	graph3->edges=malloc(nedges*sizeof(bdz_edge_t));
      70  	graph3->first_edge=malloc(nvertices*sizeof(cmph_uint32));
      71  	graph3->vert_degree=malloc((size_t)nvertices);	
      72  };
      73  static void bdz_init_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
      74  {
      75  	memset(graph3->first_edge,0xff,nvertices*sizeof(cmph_uint32));
      76  	memset(graph3->vert_degree,0,(size_t)nvertices);
      77  	graph3->nedges=0;
      78  };
      79  static void bdz_free_graph3(bdz_graph3_t *graph3)
      80  {
      81  	free(graph3->edges);
      82  	free(graph3->first_edge);
      83  	free(graph3->vert_degree);
      84  };
      85  
      86  static void bdz_partial_free_graph3(bdz_graph3_t *graph3)
      87  {
      88  	free(graph3->first_edge);
      89  	free(graph3->vert_degree);
      90  	graph3->first_edge = NULL;
      91  	graph3->vert_degree = NULL;
      92  };
      93  
      94  static void bdz_add_edge(bdz_graph3_t * graph3, cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2)
      95  {
      96  	graph3->edges[graph3->nedges].vertices[0]=v0;
      97  	graph3->edges[graph3->nedges].vertices[1]=v1;
      98  	graph3->edges[graph3->nedges].vertices[2]=v2;
      99  	graph3->edges[graph3->nedges].next_edges[0]=graph3->first_edge[v0];
     100  	graph3->edges[graph3->nedges].next_edges[1]=graph3->first_edge[v1];
     101  	graph3->edges[graph3->nedges].next_edges[2]=graph3->first_edge[v2];
     102  	graph3->first_edge[v0]=graph3->first_edge[v1]=graph3->first_edge[v2]=graph3->nedges;
     103  	graph3->vert_degree[v0]++;
     104  	graph3->vert_degree[v1]++;
     105  	graph3->vert_degree[v2]++;
     106  	graph3->nedges++;
     107  };
     108  
     109  static void bdz_dump_graph(bdz_graph3_t* graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
     110  {
     111  	cmph_uint32 i;
     112  	for(i=0;i<nedges;i++){
     113  		printf("\nedge %d %d %d %d ",i,graph3->edges[i].vertices[0],
     114  			graph3->edges[i].vertices[1],graph3->edges[i].vertices[2]);
     115  		printf(" nexts %d %d %d",graph3->edges[i].next_edges[0],
     116  				graph3->edges[i].next_edges[1],graph3->edges[i].next_edges[2]);
     117  	};
     118  	
     119  	for(i=0;i<nvertices;i++){
     120  		printf("\nfirst for vertice %d %d ",i,graph3->first_edge[i]);
     121  	
     122  	};
     123  };
     124  
     125  static void bdz_remove_edge(bdz_graph3_t * graph3, cmph_uint32 curr_edge)
     126  {
     127  	cmph_uint32 i,j=0,vert,edge1,edge2;
     128  	for(i=0;i<3;i++){
     129  		vert=graph3->edges[curr_edge].vertices[i];
     130  		edge1=graph3->first_edge[vert];
     131  		edge2=NULL_EDGE;
     132  		while(edge1!=curr_edge&&edge1!=NULL_EDGE){
     133  			edge2=edge1;
     134  			if(graph3->edges[edge1].vertices[0]==vert){
     135  				j=0;
     136  			} else if(graph3->edges[edge1].vertices[1]==vert){
     137  				j=1;
     138  			} else 
     139  				j=2;
     140  			edge1=graph3->edges[edge1].next_edges[j];
     141  		};
     142  		if(edge1==NULL_EDGE){
     143  			printf("\nerror remove edge %d dump graph",curr_edge);
     144  			bdz_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4);
     145  			exit(-1);
     146  		};
     147  		
     148  		if(edge2!=NULL_EDGE){
     149  			graph3->edges[edge2].next_edges[j] = 
     150  				graph3->edges[edge1].next_edges[i];
     151  		} else 
     152  			graph3->first_edge[vert]=
     153  				graph3->edges[edge1].next_edges[i];
     154  		graph3->vert_degree[vert]--;
     155  	};
     156  	
     157  };
     158  
     159  static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_queue_t queue, bdz_graph3_t* graph3)
     160  {
     161  	cmph_uint32 i,v0,v1,v2;
     162  	cmph_uint32 queue_head=0,queue_tail=0;
     163  	cmph_uint32 curr_edge;
     164  	cmph_uint32 tmp_edge;
     165  	cmph_uint8 * marked_edge =malloc((size_t)(nedges >> 3) + 1);
     166  	memset(marked_edge, 0, (size_t)(nedges >> 3) + 1);
     167  
     168  	for(i=0;i<nedges;i++){
     169  		v0=graph3->edges[i].vertices[0];
     170  		v1=graph3->edges[i].vertices[1];
     171  		v2=graph3->edges[i].vertices[2];
     172  		if(graph3->vert_degree[v0]==1 || 
     173  				graph3->vert_degree[v1]==1 ||
     174  				graph3->vert_degree[v2]==1){
     175  			if(!GETBIT(marked_edge,i)) {
     176  				queue[queue_head++]=i;
     177  				SETBIT(marked_edge,i);
     178  			}
     179  		};
     180  	};
     181  	while(queue_tail!=queue_head){
     182  		curr_edge=queue[queue_tail++];
     183  		bdz_remove_edge(graph3,curr_edge);
     184  		v0=graph3->edges[curr_edge].vertices[0];
     185  		v1=graph3->edges[curr_edge].vertices[1];
     186  		v2=graph3->edges[curr_edge].vertices[2];
     187  		if(graph3->vert_degree[v0]==1 ) {
     188  			tmp_edge=graph3->first_edge[v0];
     189  			if(!GETBIT(marked_edge,tmp_edge)) {
     190  				queue[queue_head++]=tmp_edge;
     191  				SETBIT(marked_edge,tmp_edge);
     192  			};
     193  			
     194  		};
     195  		if(graph3->vert_degree[v1]==1) {
     196  			tmp_edge=graph3->first_edge[v1];
     197  			if(!GETBIT(marked_edge,tmp_edge)){
     198  				queue[queue_head++]=tmp_edge;
     199  				SETBIT(marked_edge,tmp_edge);
     200  			};
     201  			
     202  		};
     203  		if(graph3->vert_degree[v2]==1){
     204  			tmp_edge=graph3->first_edge[v2];
     205  			if(!GETBIT(marked_edge,tmp_edge)){
     206  				queue[queue_head++]=tmp_edge;
     207  				SETBIT(marked_edge,tmp_edge);
     208  			};
     209  		};
     210  	};
     211  	free(marked_edge);
     212  	return (int)(queue_head-nedges);/* returns 0 if successful otherwies return negative number*/
     213  };
     214  
     215  static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue);
     216  static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t queue);
     217  static void ranking(bdz_config_data_t *bdz);
     218  static cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint8 * g, cmph_uint32 vertex);
     219  
     220  bdz_config_data_t *bdz_config_new(void)
     221  {
     222  	bdz_config_data_t *bdz;
     223  	bdz = (bdz_config_data_t *)malloc(sizeof(bdz_config_data_t));
     224  	assert(bdz);
     225  	memset(bdz, 0, sizeof(bdz_config_data_t));
     226  	bdz->hashfunc = CMPH_HASH_JENKINS;
     227  	bdz->g = NULL;
     228  	bdz->hl = NULL;
     229  	bdz->k = 0; //kth index in ranktable, $k = log_2(n=3r)/\varepsilon$
     230  	bdz->b = 7; // number of bits of k
     231  	bdz->ranktablesize = 0; //number of entries in ranktable, $n/k +1$
     232  	bdz->ranktable = NULL; // rank table
     233  	return bdz;
     234  }
     235  
     236  void bdz_config_destroy(cmph_config_t *mph)
     237  {
     238  	bdz_config_data_t *data = (bdz_config_data_t *)mph->data;
     239  	DEBUGP("Destroying algorithm dependent data\n");
     240  	free(data);
     241  }
     242  
     243  void bdz_config_set_b(cmph_config_t *mph, cmph_uint32 b)
     244  {
     245  	bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
     246  	if (b <= 2 || b > 10) b = 7; // validating restrictions over parameter b.
     247  	bdz->b = (cmph_uint8)b;
     248  	DEBUGP("b: %u\n", b);
     249  
     250  }
     251  
     252  void bdz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
     253  {
     254  	bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
     255  	CMPH_HASH *hashptr = hashfuncs;
     256  	cmph_uint32 i = 0;
     257  	while(*hashptr != CMPH_HASH_COUNT)
     258  	{
     259  		if (i >= 1) break; //bdz only uses one linear hash function
     260  		bdz->hashfunc = *hashptr;	
     261  		++i, ++hashptr;
     262  	}
     263  }
     264  
     265  cmph_t *bdz_new(cmph_config_t *mph, double c)
     266  {
     267  	cmph_t *mphf = NULL;
     268  	bdz_data_t *bdzf = NULL;
     269  	cmph_uint32 iterations;
     270  	bdz_queue_t edges;
     271  	bdz_graph3_t graph3;
     272  	bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
     273  	#ifdef CMPH_TIMING
     274  	double construction_time_begin = 0.0;
     275  	double construction_time = 0.0;
     276  	ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
     277  	#endif
     278  
     279  
     280  	if (c == 0) c = 1.23; // validating restrictions over parameter c.
     281  	DEBUGP("c: %f\n", c);
     282  	bdz->m = mph->key_source->nkeys;	
     283  	bdz->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
     284  	if ((bdz->r % 2) == 0) bdz->r+=1;
     285  	bdz->n = 3*bdz->r;
     286  
     287  	bdz->k = (1U << bdz->b);
     288  	DEBUGP("b: %u -- k: %u\n", bdz->b, bdz->k);
     289  	
     290  	bdz->ranktablesize = (cmph_uint32)ceil(bdz->n/(double)bdz->k);
     291  	DEBUGP("ranktablesize: %u\n", bdz->ranktablesize);
     292  
     293  	
     294  	bdz_alloc_graph3(&graph3, bdz->m, bdz->n);
     295  	bdz_alloc_queue(&edges,bdz->m);
     296  	DEBUGP("Created hypergraph\n");
     297  	
     298  	DEBUGP("m (edges): %u n (vertices): %u  r: %u c: %f \n", bdz->m, bdz->n, bdz->r, c);
     299  
     300  	// Mapping step
     301  	iterations = 1000;
     302  	if (mph->verbosity)
     303  	{
     304  		fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
     305  	}
     306  	while(1)
     307  	{
     308  		int ok;
     309  		DEBUGP("linear hash function \n");
     310  		bdz->hl = hash_state_new(bdz->hashfunc, 15);
     311  
     312  		ok = bdz_mapping(mph, &graph3, edges);
     313                  //ok = 0;
     314  		if (!ok)
     315  		{
     316  			--iterations;
     317  			hash_state_destroy(bdz->hl);
     318  			bdz->hl = NULL;
     319  			DEBUGP("%u iterations remaining\n", iterations);
     320  			if (mph->verbosity)
     321  			{
     322  				fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations);
     323  			}
     324  			if (iterations == 0) break;
     325  		} 
     326  		else break;
     327  	}
     328  	
     329  	if (iterations == 0)
     330  	{
     331  		bdz_free_queue(&edges);
     332  		bdz_free_graph3(&graph3);
     333  		return NULL;
     334  	}
     335  	bdz_partial_free_graph3(&graph3);
     336  	// Assigning step
     337  	if (mph->verbosity)
     338  	{
     339  		fprintf(stderr, "Entering assigning step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
     340  	}
     341  	assigning(bdz, &graph3, edges);
     342  
     343  	bdz_free_queue(&edges);
     344  	bdz_free_graph3(&graph3);
     345  	if (mph->verbosity)
     346  	{
     347  		fprintf(stderr, "Entering ranking step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
     348  	}
     349  	ranking(bdz);
     350  	#ifdef CMPH_TIMING	
     351  	ELAPSED_TIME_IN_SECONDS(&construction_time);
     352  	#endif
     353  	mphf = (cmph_t *)malloc(sizeof(cmph_t));
     354  	mphf->algo = mph->algo;
     355  	bdzf = (bdz_data_t *)malloc(sizeof(bdz_data_t));
     356  	bdzf->g = bdz->g;
     357  	bdz->g = NULL; //transfer memory ownership
     358  	bdzf->hl = bdz->hl;
     359  	bdz->hl = NULL; //transfer memory ownership
     360  	bdzf->ranktable = bdz->ranktable;
     361  	bdz->ranktable = NULL; //transfer memory ownership
     362  	bdzf->ranktablesize = bdz->ranktablesize;
     363  	bdzf->k = bdz->k;
     364  	bdzf->b = bdz->b;
     365  	bdzf->n = bdz->n;
     366  	bdzf->m = bdz->m;
     367  	bdzf->r = bdz->r;
     368  	mphf->data = bdzf;
     369  	mphf->size = bdz->m;
     370  
     371  	DEBUGP("Successfully generated minimal perfect hash\n");
     372  	if (mph->verbosity)
     373  	{
     374  		fprintf(stderr, "Successfully generated minimal perfect hash function\n");
     375  	}
     376  
     377  
     378  	#ifdef CMPH_TIMING	
     379  	register cmph_uint32 space_usage = bdz_packed_size(mphf)*8;
     380  	register cmph_uint32 keys_per_bucket = 1;
     381  	construction_time = construction_time - construction_time_begin;
     382  	fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz->m, bdz->m/(double)bdz->n, keys_per_bucket, construction_time, space_usage/(double)bdz->m);
     383  	#endif	
     384  
     385  	return mphf;
     386  }
     387  
     388  		
     389  static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue)
     390  {
     391  	cmph_uint32 e;
     392  	int cycles = 0;
     393  	cmph_uint32 hl[3];
     394  	bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
     395  	bdz_init_graph3(graph3, bdz->m, bdz->n);
     396  	mph->key_source->rewind(mph->key_source->data);
     397  	for (e = 0; e < mph->key_source->nkeys; ++e)
     398  	{
     399  		cmph_uint32 h0, h1, h2;
     400  		cmph_uint32 keylen;
     401  		char *key = NULL;
     402  		mph->key_source->read(mph->key_source->data, &key, &keylen);		
     403  		hash_vector(bdz->hl, key, keylen,hl);
     404  		h0 = hl[0] % bdz->r;
     405  		h1 = hl[1] % bdz->r + bdz->r;
     406  		h2 = hl[2] % bdz->r + (bdz->r << 1);
     407  		mph->key_source->dispose(mph->key_source->data, key, keylen);
     408  		bdz_add_edge(graph3,h0,h1,h2);
     409  	}
     410  	cycles = bdz_generate_queue(bdz->m, bdz->n, queue, graph3);	
     411  	return (cycles == 0);
     412  }
     413  
     414  static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t queue)
     415  {
     416  	cmph_uint32 i;
     417  	cmph_uint32 nedges=graph3->nedges;
     418  	cmph_uint32 curr_edge;
     419  	cmph_uint32 v0,v1,v2;
     420  	cmph_uint8 * marked_vertices =malloc((size_t)(bdz->n >> 3) + 1);
     421          cmph_uint32 sizeg = (cmph_uint32)ceil(bdz->n/4.0);
     422  	bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));	
     423  	memset(marked_vertices, 0, (size_t)(bdz->n >> 3) + 1);
     424  	memset(bdz->g, 0xff, (size_t)(sizeg));
     425  
     426  	for(i=nedges-1;i+1>0;i--){
     427  		curr_edge=queue[i];
     428  		v0=graph3->edges[curr_edge].vertices[0];
     429  		v1=graph3->edges[curr_edge].vertices[1];
     430  		v2=graph3->edges[curr_edge].vertices[2];
     431  		DEBUGP("B:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2));
     432  		if(!GETBIT(marked_vertices, v0)){
     433  			if(!GETBIT(marked_vertices,v1))
     434  			{
     435  				SETVALUE1(bdz->g, v1, UNASSIGNED); 
     436  				SETBIT(marked_vertices, v1);
     437  			}
     438  			if(!GETBIT(marked_vertices,v2))
     439  			{
     440  				SETVALUE1(bdz->g, v2, UNASSIGNED);		
     441  				SETBIT(marked_vertices, v2);
     442  			}
     443  			SETVALUE1(bdz->g, v0, (6-(GETVALUE(bdz->g, v1) + GETVALUE(bdz->g,v2)))%3);
     444  			SETBIT(marked_vertices, v0);
     445  		} else if(!GETBIT(marked_vertices, v1)) {
     446  			if(!GETBIT(marked_vertices, v2))
     447  			{
     448  				SETVALUE1(bdz->g, v2, UNASSIGNED);
     449  				SETBIT(marked_vertices, v2);
     450  			}
     451  			SETVALUE1(bdz->g, v1, (7-(GETVALUE(bdz->g, v0)+GETVALUE(bdz->g, v2)))%3);
     452  			SETBIT(marked_vertices, v1);
     453  		}else {
     454  			SETVALUE1(bdz->g, v2, (8-(GETVALUE(bdz->g,v0)+GETVALUE(bdz->g, v1)))%3);
     455  			SETBIT(marked_vertices, v2);
     456  		}		
     457  		DEBUGP("A:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2));
     458  	};
     459  	free(marked_vertices);
     460  }
     461  
     462  
     463  static void ranking(bdz_config_data_t *bdz)
     464  {
     465  	cmph_uint32 i, j, offset = 0U, count = 0U, size = (bdz->k >> 2U), nbytes_total = (cmph_uint32)ceil(bdz->n/4.0), nbytes;
     466  	bdz->ranktable = (cmph_uint32 *)calloc((size_t)bdz->ranktablesize, sizeof(cmph_uint32));
     467  	// ranktable computation
     468  	bdz->ranktable[0] = 0;	
     469  	i = 1;
     470  	while(1)
     471  	{
     472  		if(i == bdz->ranktablesize) break;
     473  		nbytes = size < nbytes_total? size : nbytes_total;
     474  		for(j = 0; j < nbytes; j++)
     475  		{
     476  			count += bdz_lookup_table[*(bdz->g + offset + j)];
     477  		}
     478  		bdz->ranktable[i] = count;
     479  		offset += nbytes;
     480  		nbytes_total -= size;
     481  		i++;
     482  	}
     483  }
     484  
     485  
     486  int bdz_dump(cmph_t *mphf, FILE *fd)
     487  {
     488  	char *buf = NULL;
     489  	cmph_uint32 buflen;
     490  	register size_t nbytes;
     491  	bdz_data_t *data = (bdz_data_t *)mphf->data;
     492  	cmph_uint32 sizeg;
     493  #ifdef DEBUG
     494  	cmph_uint32 i;
     495  #endif
     496  	__cmph_dump(mphf, fd);
     497  
     498  	hash_state_dump(data->hl, &buf, &buflen);
     499  	DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
     500  	nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
     501  	nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
     502  	free(buf);
     503  
     504  	nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
     505  	nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
     506  	nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd);
     507  	
     508  	sizeg = (cmph_uint32)ceil(data->n/4.0);
     509  	nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd);
     510  
     511  	nbytes = fwrite(&(data->k), sizeof(cmph_uint32), (size_t)1, fd);
     512  	nbytes = fwrite(&(data->b), sizeof(cmph_uint8), (size_t)1, fd);
     513  	nbytes = fwrite(&(data->ranktablesize), sizeof(cmph_uint32), (size_t)1, fd);
     514  
     515  	nbytes = fwrite(data->ranktable, sizeof(cmph_uint32)*(data->ranktablesize), (size_t)1, fd);
     516  	if (nbytes == 0 && ferror(fd)) {
     517            fprintf(stderr, "ERROR: %s\n", strerror(errno));
     518            return 0;
     519          }
     520  	#ifdef DEBUG
     521  	fprintf(stderr, "G: ");
     522  	for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", GETVALUE(data->g, i));
     523  	fprintf(stderr, "\n");
     524  	#endif
     525  	return 1;
     526  }
     527  
     528  void bdz_load(FILE *f, cmph_t *mphf)
     529  {
     530  	char *buf = NULL;
     531  	cmph_uint32 buflen, sizeg;
     532  	register size_t nbytes;
     533  	bdz_data_t *bdz = (bdz_data_t *)malloc(sizeof(bdz_data_t));
     534  #ifdef DEBUG
     535  	cmph_uint32  i = 0;
     536  #endif
     537  
     538  	DEBUGP("Loading bdz mphf\n");
     539  	mphf->data = bdz;
     540  
     541  	nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
     542  	DEBUGP("Hash state has %u bytes\n", buflen);
     543  	buf = (char *)malloc((size_t)buflen);
     544  	nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
     545  	bdz->hl = hash_state_load(buf, buflen);
     546  	free(buf);
     547  	
     548  
     549  	DEBUGP("Reading m and n\n");
     550  	nbytes = fread(&(bdz->n), sizeof(cmph_uint32), (size_t)1, f);	
     551  	nbytes = fread(&(bdz->m), sizeof(cmph_uint32), (size_t)1, f);	
     552  	nbytes = fread(&(bdz->r), sizeof(cmph_uint32), (size_t)1, f);	
     553  	sizeg = (cmph_uint32)ceil(bdz->n/4.0);
     554  	bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));
     555  	nbytes = fread(bdz->g, sizeg*sizeof(cmph_uint8), (size_t)1, f);
     556  
     557  	nbytes = fread(&(bdz->k), sizeof(cmph_uint32), (size_t)1, f);
     558  	nbytes = fread(&(bdz->b), sizeof(cmph_uint8), (size_t)1, f);
     559  	nbytes = fread(&(bdz->ranktablesize), sizeof(cmph_uint32), (size_t)1, f);
     560  
     561  	bdz->ranktable = (cmph_uint32 *)calloc((size_t)bdz->ranktablesize, sizeof(cmph_uint32));
     562  	nbytes = fread(bdz->ranktable, sizeof(cmph_uint32)*(bdz->ranktablesize), (size_t)1, f);
     563  	if (nbytes == 0 && ferror(f)) {
     564            fprintf(stderr, "ERROR: %s\n", strerror(errno));
     565            return;
     566          }
     567  
     568  	#ifdef DEBUG
     569  	i = 0;
     570  	fprintf(stderr, "G: ");
     571  	for (i = 0; i < bdz->n; ++i) fprintf(stderr, "%u ", GETVALUE(bdz->g,i));
     572  	fprintf(stderr, "\n");
     573  	#endif
     574  	return;
     575  }
     576  		
     577  
     578  /*
     579  static cmph_uint32 bdz_search_ph(cmph_t *mphf, const char *key, cmph_uint32 keylen)
     580  {
     581  	bdz_data_t *bdz = mphf->data;
     582  	cmph_uint32 hl[3];
     583  	hash_vector(bdz->hl, key, keylen, hl);
     584  	cmph_uint32 vertex;
     585  	hl[0] = hl[0] % bdz->r;
     586  	hl[1] = hl[1] % bdz->r + bdz->r;
     587  	hl[2] = hl[2] % bdz->r + (bdz->r << 1);
     588  	vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3];
     589  	return vertex;
     590  }
     591  */
     592  
     593  static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint8 * g, cmph_uint32 vertex)
     594  {
     595  	register cmph_uint32 index = vertex >> b;
     596  	register cmph_uint32 base_rank = ranktable[index];
     597  	register cmph_uint32 beg_idx_v = index << b;
     598  	register cmph_uint32 beg_idx_b = beg_idx_v >> 2;
     599  	register cmph_uint32 end_idx_b = vertex >> 2;
     600  	while(beg_idx_b < end_idx_b)
     601  	{
     602  		base_rank += bdz_lookup_table[*(g + beg_idx_b++)];
     603  		
     604  	}
     605  	beg_idx_v = beg_idx_b << 2;
     606  	while(beg_idx_v < vertex) 
     607  	{
     608  		if(GETVALUE(g, beg_idx_v) != UNASSIGNED) base_rank++;
     609  		beg_idx_v++;
     610  	}
     611  	
     612  	return base_rank;
     613  }
     614  
     615  cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
     616  {
     617  	register cmph_uint32 vertex;
     618  	register bdz_data_t *bdz = mphf->data;
     619  	cmph_uint32 hl[3];
     620  	hash_vector(bdz->hl, key, keylen, hl);
     621  	hl[0] = hl[0] % bdz->r;
     622  	hl[1] = hl[1] % bdz->r + bdz->r;
     623  	hl[2] = hl[2] % bdz->r + (bdz->r << 1);
     624  	vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3];
     625  	return rank(bdz->b, bdz->ranktable, bdz->g, vertex);
     626  }
     627  
     628  
     629  void bdz_destroy(cmph_t *mphf)
     630  {
     631  	bdz_data_t *data = (bdz_data_t *)mphf->data;
     632  	free(data->g);	
     633  	hash_state_destroy(data->hl);
     634  	free(data->ranktable);
     635  	free(data);
     636  	free(mphf);
     637  }
     638  
     639  /** \fn void bdz_pack(cmph_t *mphf, void *packed_mphf);
     640   *  \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
     641   *  \param mphf pointer to the resulting mphf
     642   *  \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 
     643   */
     644  void bdz_pack(cmph_t *mphf, void *packed_mphf)
     645  {
     646  	bdz_data_t *data = (bdz_data_t *)mphf->data;
     647  	cmph_uint8 * ptr = packed_mphf;
     648  	cmph_uint32 sizeg;
     649  
     650  	// packing hl type
     651  	CMPH_HASH hl_type = hash_get_type(data->hl);
     652  	*((cmph_uint32 *) ptr) = hl_type;
     653  	ptr += sizeof(cmph_uint32);
     654  
     655  	// packing hl
     656  	hash_state_pack(data->hl, ptr);
     657  	ptr += hash_state_packed_size(hl_type);
     658  
     659  	// packing r
     660  	*((cmph_uint32 *) ptr) = data->r;
     661  	ptr += sizeof(data->r);
     662  
     663  	// packing ranktablesize
     664  	*((cmph_uint32 *) ptr) = data->ranktablesize;
     665  	ptr += sizeof(data->ranktablesize);
     666  
     667  	// packing ranktable
     668  	memcpy(ptr, data->ranktable, sizeof(cmph_uint32)*(data->ranktablesize));
     669  	ptr += sizeof(cmph_uint32)*(data->ranktablesize);
     670  
     671  	// packing b
     672  	*ptr++ = data->b;
     673  
     674  	// packing g
     675  	sizeg = (cmph_uint32)ceil(data->n/4.0);
     676  	memcpy(ptr, data->g,  sizeof(cmph_uint8)*sizeg);
     677  }
     678  
     679  /** \fn cmph_uint32 bdz_packed_size(cmph_t *mphf);
     680   *  \brief Return the amount of space needed to pack mphf.
     681   *  \param mphf pointer to a mphf
     682   *  \return the size of the packed function or zero for failures
     683   */ 
     684  cmph_uint32 bdz_packed_size(cmph_t *mphf)
     685  {
     686  	bdz_data_t *data = (bdz_data_t *)mphf->data;
     687  
     688  	CMPH_HASH hl_type = hash_get_type(data->hl); 
     689  
     690  	return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)* (cmph_uint32)(ceil(data->n/4.0)));
     691  }
     692  
     693  /** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
     694   *  \brief Use the packed mphf to do a search. 
     695   *  \param  packed_mphf pointer to the packed mphf
     696   *  \param key key to be hashed
     697   *  \param keylen key legth in bytes
     698   *  \return The mphf value
     699   */
     700  cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
     701  {
     702  	
     703  	register cmph_uint32 vertex;
     704  	register CMPH_HASH hl_type  = *(cmph_uint32 *)packed_mphf;
     705  	register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
     706  
     707  	register cmph_uint32 *ranktable = (cmph_uint32*)(hl_ptr + hash_state_packed_size(hl_type));
     708  	
     709  	register cmph_uint32 r = *ranktable++;
     710  	register cmph_uint32 ranktablesize = *ranktable++;
     711  	register cmph_uint8 * g = (cmph_uint8 *)(ranktable + ranktablesize);
     712  	register cmph_uint8 b = *g++;
     713  
     714  	cmph_uint32 hl[3];
     715  	hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
     716  	hl[0] = hl[0] % r;
     717  	hl[1] = hl[1] % r + r;
     718  	hl[2] = hl[2] % r + (r << 1);
     719  	vertex = hl[(GETVALUE(g, hl[0]) + GETVALUE(g, hl[1]) + GETVALUE(g, hl[2])) % 3];
     720  	return rank(b, ranktable, g, vertex);
     721  }