(root)/
glib-2.79.0/
girepository/
cmph/
jenkins_hash.c
       1  #include "jenkins_hash.h"
       2  #include <stdlib.h>
       3  #ifdef WIN32
       4  #define _USE_MATH_DEFINES //For M_LOG2E
       5  #endif
       6  #include <math.h>
       7  #include <limits.h>
       8  #include <string.h>
       9  
      10  //#define DEBUG
      11  #include "debug.h"
      12  
      13  #define hashsize(n) ((cmph_uint32)1<<(n))
      14  #define hashmask(n) (hashsize(n)-1)
      15  
      16  
      17  
      18  //#define NM2 /* Define this if you do not want power of 2 table sizes*/
      19  
      20  
      21  /*
      22     --------------------------------------------------------------------
      23     mix -- mix 3 32-bit values reversibly.
      24     For every delta with one or two bits set, and the deltas of all three
      25     high bits or all three low bits, whether the original value of a,b,c
      26     is almost all zero or is uniformly distributed,
      27   * If mix() is run forward or backward, at least 32 bits in a,b,c
      28   have at least 1/4 probability of changing.
      29   * If mix() is run forward, every bit of c will change between 1/3 and
      30   2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
      31   mix() was built out of 36 single-cycle latency instructions in a 
      32   structure that could supported 2x parallelism, like so:
      33   a -= b; 
      34   a -= c; x = (c>>13);
      35   b -= c; a ^= x;
      36   b -= a; x = (a<<8);
      37   c -= a; b ^= x;
      38   c -= b; x = (b>>13);
      39   ...
      40   Unfortunately, superscalar Pentiums and Sparcs can't take advantage 
      41   of that parallelism.  They've also turned some of those single-cycle
      42   latency instructions into multi-cycle latency instructions.  Still,
      43   this is the fastest good hash I could find.  There were about 2^^68
      44   to choose from.  I only looked at a billion or so.
      45   --------------------------------------------------------------------
      46   */
      47  #define mix(a,b,c) \
      48  { \
      49  	a -= b; a -= c; a ^= (c>>13); \
      50  	b -= c; b -= a; b ^= (a<<8); \
      51  	c -= a; c -= b; c ^= (b>>13); \
      52  	a -= b; a -= c; a ^= (c>>12);  \
      53  	b -= c; b -= a; b ^= (a<<16); \
      54  	c -= a; c -= b; c ^= (b>>5); \
      55  	a -= b; a -= c; a ^= (c>>3);  \
      56  	b -= c; b -= a; b ^= (a<<10); \
      57  	c -= a; c -= b; c ^= (b>>15); \
      58  }
      59  
      60  /*
      61     --------------------------------------------------------------------
      62     hash() -- hash a variable-length key into a 32-bit value
      63  k       : the key (the unaligned variable-length array of bytes)
      64  len     : the length of the key, counting by bytes
      65  initval : can be any 4-byte value
      66  Returns a 32-bit value.  Every bit of the key affects every bit of
      67  the return value.  Every 1-bit and 2-bit delta achieves avalanche.
      68  About 6*len+35 instructions.
      69  
      70  The best hash table sizes are powers of 2.  There is no need to do
      71  mod a prime (mod is sooo slow!).  If you need less than 32 bits,
      72  use a bitmask.  For example, if you need only 10 bits, do
      73  h = (h & hashmask(10));
      74  In which case, the hash table should have hashsize(10) elements.
      75  
      76  If you are hashing n strings (cmph_uint8 **)k, do it like this:
      77  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
      78  
      79  By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
      80  code any way you wish, private, educational, or commercial.  It's free.
      81  
      82  See http://burtleburtle.net/bob/hash/evahash.html
      83  Use for hash table lookup, or anything where one collision in 2^^32 is
      84  acceptable.  Do NOT use for cryptographic purposes.
      85  --------------------------------------------------------------------
      86   */
      87  jenkins_state_t *jenkins_state_new(cmph_uint32 size) //size of hash table
      88  {
      89  	jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
      90  	DEBUGP("Initializing jenkins hash\n");
      91  	state->seed = ((cmph_uint32)rand() % size);
      92  	return state;
      93  }
      94  void jenkins_state_destroy(jenkins_state_t *state)
      95  {
      96  	free(state);
      97  }
      98  
      99  
     100  static inline void __jenkins_hash_vector(cmph_uint32 seed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
     101  {
     102  	register cmph_uint32 len, length;
     103  
     104  	/* Set up the internal state */
     105  	length = keylen;
     106  	len = length;
     107  	hashes[0] = hashes[1] = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
     108  	hashes[2] = seed;   /* the previous hash value - seed in our case */
     109  
     110  	/*---------------------------------------- handle most of the key */
     111  	while (len >= 12)
     112  	{
     113  		hashes[0] += ((cmph_uint32)k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24));
     114  		hashes[1] += ((cmph_uint32)k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24));
     115  		hashes[2] += ((cmph_uint32)k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24));
     116  		mix(hashes[0],hashes[1],hashes[2]);
     117  		k += 12; len -= 12;
     118  	}
     119  
     120  	/*------------------------------------- handle the last 11 bytes */
     121  	hashes[2]  += length;
     122  	switch(len)              /* all the case statements fall through */
     123  	{
     124  		case 11: 
     125  			hashes[2] +=((cmph_uint32)k[10]<<24);
     126  		case 10: 
     127  			hashes[2] +=((cmph_uint32)k[9]<<16);
     128  		case 9 : 
     129  			hashes[2] +=((cmph_uint32)k[8]<<8);
     130  			/* the first byte of hashes[2] is reserved for the length */
     131  		case 8 : 
     132  			hashes[1] +=((cmph_uint32)k[7]<<24);
     133  		case 7 : 
     134  			hashes[1] +=((cmph_uint32)k[6]<<16);
     135  		case 6 : 
     136  			hashes[1] +=((cmph_uint32)k[5]<<8);
     137  		case 5 :
     138  			hashes[1] +=(cmph_uint8) k[4];
     139  		case 4 : 
     140  			hashes[0] +=((cmph_uint32)k[3]<<24);
     141  		case 3 : 
     142  			hashes[0] +=((cmph_uint32)k[2]<<16);
     143  		case 2 : 
     144  			hashes[0] +=((cmph_uint32)k[1]<<8);
     145  		case 1 : 
     146  			hashes[0] +=(cmph_uint8)k[0];
     147  			/* case 0: nothing left to add */
     148  	}
     149  
     150  	mix(hashes[0],hashes[1],hashes[2]);
     151  }
     152  
     153  cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen)
     154  {
     155  	cmph_uint32 hashes[3];
     156  	__jenkins_hash_vector(state->seed, k, keylen, hashes);
     157  	return hashes[2];
     158  /*	cmph_uint32 a, b, c;
     159  	cmph_uint32 len, length;
     160  
     161  	// Set up the internal state 
     162  	length = keylen;
     163  	len = length;
     164  	a = b = 0x9e3779b9;  // the golden ratio; an arbitrary value 
     165  	c = state->seed;   // the previous hash value - seed in our case 
     166  
     167  	// handle most of the key 
     168  	while (len >= 12)
     169  	{
     170  		a += (k[0] +((cmph_uint32)k[1]<<8) +((cmph_uint32)k[2]<<16) +((cmph_uint32)k[3]<<24));
     171  		b += (k[4] +((cmph_uint32)k[5]<<8) +((cmph_uint32)k[6]<<16) +((cmph_uint32)k[7]<<24));
     172  		c += (k[8] +((cmph_uint32)k[9]<<8) +((cmph_uint32)k[10]<<16)+((cmph_uint32)k[11]<<24));
     173  		mix(a,b,c);
     174  		k += 12; len -= 12;
     175  	}
     176  
     177  	// handle the last 11 bytes
     178  	c  += length;
     179  	switch(len)              /// all the case statements fall through 
     180  	{
     181  		case 11: 
     182  			c +=((cmph_uint32)k[10]<<24);
     183  		case 10: 
     184  			c +=((cmph_uint32)k[9]<<16);
     185  		case 9 : 
     186  			c +=((cmph_uint32)k[8]<<8);
     187  			// the first byte of c is reserved for the length 
     188  		case 8 : 
     189  			b +=((cmph_uint32)k[7]<<24);
     190  		case 7 : 
     191  			b +=((cmph_uint32)k[6]<<16);
     192  		case 6 : 
     193  			b +=((cmph_uint32)k[5]<<8);
     194  		case 5 : 
     195  			b +=k[4];
     196  		case 4 : 
     197  			a +=((cmph_uint32)k[3]<<24);
     198  		case 3 : 
     199  			a +=((cmph_uint32)k[2]<<16);
     200  		case 2 : 
     201  			a +=((cmph_uint32)k[1]<<8);
     202  		case 1 : 
     203  			a +=k[0];
     204  		// case 0: nothing left to add 
     205  	}
     206  
     207  	mix(a,b,c);
     208  
     209  	/// report the result 
     210  
     211  	return c;
     212  	*/
     213  }
     214  
     215  void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
     216  {
     217  	__jenkins_hash_vector(state->seed, k, keylen, hashes);
     218  }
     219  
     220  void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen)
     221  {
     222  	*buflen = sizeof(cmph_uint32);
     223  	*buf = (char *)malloc(sizeof(cmph_uint32));
     224  	if (!*buf) 
     225  	{
     226  		*buflen = UINT_MAX;
     227  		return;
     228  	}
     229  	memcpy(*buf, &(state->seed), sizeof(cmph_uint32));
     230  	DEBUGP("Dumped jenkins state with seed %u\n", state->seed);
     231  	return;
     232  }
     233  
     234  jenkins_state_t *jenkins_state_copy(jenkins_state_t *src_state)
     235  {
     236  	jenkins_state_t *dest_state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
     237  	dest_state->hashfunc = src_state->hashfunc;
     238  	dest_state->seed = src_state->seed;
     239  	return dest_state;
     240  }
     241  
     242  jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen)
     243  {
     244  	jenkins_state_t *state = (jenkins_state_t *)malloc(sizeof(jenkins_state_t));
     245  	state->seed = *(cmph_uint32 *)buf;
     246  	state->hashfunc = CMPH_HASH_JENKINS;
     247  	DEBUGP("Loaded jenkins state with seed %u\n", state->seed);
     248  	return state;
     249  }
     250  
     251  
     252  /** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
     253   *  \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed.
     254   *  \param state points to the jenkins function
     255   *  \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size() 
     256   */
     257  void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed)
     258  {
     259  	if (state && jenkins_packed)
     260  	{
     261  		memcpy(jenkins_packed, &(state->seed), sizeof(cmph_uint32));
     262  	}
     263  }
     264  
     265  /** \fn cmph_uint32 jenkins_state_packed_size(jenkins_state_t *state);
     266   *  \brief Return the amount of space needed to pack a jenkins function.
     267   *  \return the size of the packed function or zero for failures
     268   */ 
     269  cmph_uint32 jenkins_state_packed_size(void)
     270  {
     271  	return sizeof(cmph_uint32);
     272  }
     273  
     274  
     275  /** \fn cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen);
     276   *  \param jenkins_packed is a pointer to a contiguous memory area
     277   *  \param key is a pointer to a key
     278   *  \param keylen is the key length
     279   *  \return an integer that represents a hash value of 32 bits.
     280   */
     281  cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen)
     282  {
     283  	cmph_uint32 hashes[3];
     284  	__jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), k, keylen, hashes);
     285  	return hashes[2];
     286  }
     287  
     288  /** \fn jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
     289   *  \param jenkins_packed is a pointer to a contiguous memory area
     290   *  \param key is a pointer to a key
     291   *  \param keylen is the key length
     292   *  \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
     293   */
     294  void jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes)
     295  {
     296  	__jenkins_hash_vector(*((cmph_uint32 *)jenkins_packed), k, keylen, hashes);
     297  }