(root)/
glib-2.79.0/
girepository/
gthash.c
       1  /* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*-
       2   * GObject introspection: Typelib hashing
       3   *
       4   * Copyright (C) 2010 Red Hat, Inc.
       5   *
       6   * SPDX-License-Identifier: LGPL-2.1-or-later
       7   *
       8   * This library is free software; you can redistribute it and/or
       9   * modify it under the terms of the GNU Lesser General Public
      10   * License as published by the Free Software Foundation; either
      11   * version 2 of the License, or (at your option) any later version.
      12   *
      13   * This library is distributed in the hope that it will be useful,
      14   * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16   * Lesser General Public License for more details.
      17   *
      18   * You should have received a copy of the GNU Lesser General Public
      19   * License along with this library; if not, write to the
      20   * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
      21   * Boston, MA 02111-1307, USA.
      22   */
      23  
      24  #include <glib.h>
      25  #include <glib-object.h>
      26  #include <string.h>
      27  
      28  #include "cmph/cmph.h"
      29  #include "gitypelib-internal.h"
      30  
      31  #define ALIGN_VALUE(this, boundary) \
      32    (( ((unsigned long)(this)) + (((unsigned long)(boundary)) -1)) & (~(((unsigned long)(boundary))-1)))
      33  
      34  /*
      35   * String hashing in the typelib.  We have a set of static (fixed) strings,
      36   * and given one, we need to find its index number.  This problem is perfect
      37   * hashing: http://en.wikipedia.org/wiki/Perfect_hashing
      38   *
      39   * I chose CMPH (http://cmph.sourceforge.net/) as it seemed high
      40   * quality, well documented, and easy to embed.
      41   *
      42   * CMPH provides a number of algorithms; I chose BDZ, because while CHD
      43   * appears to be the "best", the simplicitly of BDZ appealed, and really,
      44   * we're only talking about thousands of strings here, not millions, so
      45   * a few microseconds is no big deal.
      46   *
      47   * In memory, the format is:
      48   * INT32 mph_size
      49   * MPH (mph_size bytes)
      50   * (padding for alignment to uint32 if necessary)
      51   * INDEX (array of guint16)
      52   *
      53   * Because BDZ is not order preserving, we need a lookaside table which
      54   * maps the hash value into the directory index.
      55   */
      56  
      57  struct _GITypelibHashBuilder {
      58    gboolean prepared;
      59    gboolean buildable;
      60    cmph_t *c;
      61    GHashTable *strings;
      62    guint32 dirmap_offset;
      63    guint32 packed_size;
      64  };
      65  
      66  GITypelibHashBuilder *
      67  gi_typelib_hash_builder_new (void)
      68  {
      69    GITypelibHashBuilder *builder = g_slice_new0 (GITypelibHashBuilder);
      70    builder->c = NULL;
      71    builder->strings = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
      72    return builder;
      73  }
      74  
      75  void
      76  gi_typelib_hash_builder_add_string (GITypelibHashBuilder *builder,
      77                                      const char           *str,
      78                                      guint16               value)
      79  {
      80    g_return_if_fail (builder->c == NULL);
      81    g_hash_table_insert (builder->strings, g_strdup (str), GUINT_TO_POINTER ((guint) value));
      82  }
      83  
      84  gboolean
      85  gi_typelib_hash_builder_prepare (GITypelibHashBuilder *builder)
      86  {
      87    char **strs;
      88    GHashTableIter hashiter;
      89    gpointer key, value;
      90    cmph_io_adapter_t *io;
      91    cmph_config_t *config;
      92    guint32 num_elts;
      93    guint32 offset;
      94    guint i;
      95  
      96    if (builder->prepared)
      97      return builder->buildable;
      98    g_assert (builder->c == NULL);
      99  
     100    num_elts = g_hash_table_size (builder->strings);
     101    g_assert (num_elts <= 65536);
     102  
     103    strs = (char**) g_new (char *, num_elts + 1);
     104  
     105    i = 0;
     106    g_hash_table_iter_init (&hashiter, builder->strings);
     107    while (g_hash_table_iter_next (&hashiter, &key, &value))
     108      {
     109        const char *str = key;
     110  
     111        strs[i++] = g_strdup (str);
     112      }
     113    strs[i++] = NULL;
     114  
     115    io = cmph_io_vector_adapter (strs, num_elts);
     116    config = cmph_config_new (io);
     117    cmph_config_set_algo (config, CMPH_BDZ);
     118  
     119    builder->c = cmph_new (config);
     120    builder->prepared = TRUE;
     121    if (!builder->c)
     122      {
     123        builder->buildable = FALSE;
     124        goto out;
     125      }
     126    builder->buildable = TRUE;
     127    g_assert (cmph_size (builder->c) == num_elts);
     128  
     129    /* Pack a size counter at front */
     130    offset = sizeof(guint32) + cmph_packed_size (builder->c);
     131    builder->dirmap_offset = ALIGN_VALUE (offset, 4);
     132    builder->packed_size = builder->dirmap_offset + (num_elts * sizeof(guint16));
     133   out:
     134    cmph_config_destroy (config);
     135    cmph_io_vector_adapter_destroy (io);
     136    return builder->buildable;
     137  }
     138  
     139  guint32
     140  gi_typelib_hash_builder_get_buffer_size (GITypelibHashBuilder *builder)
     141  {
     142    g_return_val_if_fail (builder != NULL, 0);
     143    g_return_val_if_fail (builder->prepared, 0);
     144    g_return_val_if_fail (builder->buildable, 0 );
     145  
     146    return builder->packed_size;
     147  }
     148  
     149  void
     150  gi_typelib_hash_builder_pack (GITypelibHashBuilder *builder, guint8* mem, guint32 len)
     151  {
     152    guint16 *table;
     153    GHashTableIter hashiter;
     154    gpointer key, value;
     155  #ifndef G_DISABLE_ASSERT
     156    guint32 num_elts;
     157  #endif
     158    guint8 *packed_mem;
     159  
     160    g_return_if_fail (builder != NULL);
     161    g_return_if_fail (builder->prepared);
     162    g_return_if_fail (builder->buildable);
     163  
     164    g_assert (len >= builder->packed_size);
     165    g_assert ((((size_t)mem) & 0x3) == 0);
     166  
     167    memset (mem, 0, len);
     168  
     169    *((guint32*) mem) = builder->dirmap_offset;
     170    packed_mem = (guint8*)(mem + sizeof(guint32));
     171    cmph_pack (builder->c, packed_mem);
     172  
     173    table = (guint16*) (mem + builder->dirmap_offset);
     174  
     175  #ifndef G_DISABLE_ASSERT
     176    num_elts = g_hash_table_size (builder->strings);
     177  #endif
     178    g_hash_table_iter_init (&hashiter, builder->strings);
     179    while (g_hash_table_iter_next (&hashiter, &key, &value))
     180      {
     181        const char *str = key;
     182        guint16 strval = (guint16)GPOINTER_TO_UINT(value);
     183        guint32 hashv;
     184  
     185        hashv = cmph_search_packed (packed_mem, str, strlen (str));
     186        g_assert (hashv < num_elts);
     187        table[hashv] = strval;
     188      }
     189  }
     190  
     191  void
     192  gi_typelib_hash_builder_destroy (GITypelibHashBuilder *builder)
     193  {
     194    if (builder->c)
     195      {
     196        cmph_destroy (builder->c);
     197        builder->c = NULL;
     198      }
     199    g_hash_table_destroy (builder->strings);
     200    g_slice_free (GITypelibHashBuilder, builder);
     201  }
     202  
     203  guint16
     204  gi_typelib_hash_search (guint8* memory, const char *str, guint n_entries)
     205  {
     206    guint32 *mph;
     207    guint16 *table;
     208    guint32 dirmap_offset;
     209    guint32 offset;
     210  
     211    g_assert ((((size_t)memory) & 0x3) == 0);
     212    mph = ((guint32*)memory)+1;
     213  
     214    offset = cmph_search_packed (mph, str, strlen (str));
     215  
     216    /* Make sure that offset always lies in the entries array.  cmph
     217       cometimes generates offset larger than number of entries (for
     218       'str' argument which is not in the hashed list). In this case,
     219       fake the correct result and depend on caller's final check that
     220       the entry is really the one that the caller wanted. */
     221    if (offset >= n_entries)
     222      offset = 0;
     223  
     224    dirmap_offset = *((guint32*)memory);
     225    table = (guint16*) (memory + dirmap_offset);
     226  
     227    return table[offset];
     228  }
     229