(root)/
harfbuzz-8.3.0/
test/
api/
test-buffer.c
       1  /*
       2   * Copyright © 2011  Google, Inc.
       3   *
       4   *  This is part of HarfBuzz, a text shaping library.
       5   *
       6   * Permission is hereby granted, without written agreement and without
       7   * license or royalty fees, to use, copy, modify, and distribute this
       8   * software and its documentation for any purpose, provided that the
       9   * above copyright notice and the following two paragraphs appear in
      10   * all copies of this software.
      11   *
      12   * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
      13   * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
      14   * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
      15   * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
      16   * DAMAGE.
      17   *
      18   * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
      19   * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
      20   * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
      21   * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
      22   * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
      23   *
      24   * Google Author(s): Behdad Esfahbod
      25   */
      26  
      27  #include "hb-test.h"
      28  
      29  /* Unit tests for hb-buffer.h */
      30  
      31  
      32  static const char utf8[10] = "ab\360\240\200\200defg";
      33  static const uint16_t utf16[8] = {'a', 'b', 0xD840, 0xDC00, 'd', 'e', 'f', 'g'};
      34  static const uint32_t utf32[7] = {'a', 'b', 0x20000, 'd', 'e', 'f', 'g'};
      35  
      36  
      37  typedef enum {
      38    BUFFER_EMPTY,
      39    BUFFER_ONE_BY_ONE,
      40    BUFFER_UTF32,
      41    BUFFER_UTF16,
      42    BUFFER_UTF8,
      43    BUFFER_NUM_TYPES,
      44  } buffer_type_t;
      45  
      46  static const char *buffer_names[] = {
      47    "empty",
      48    "one-by-one",
      49    "utf32",
      50    "utf16",
      51    "utf8"
      52  };
      53  
      54  typedef struct
      55  {
      56    hb_buffer_t *buffer;
      57  } fixture_t;
      58  
      59  static void
      60  fixture_init (fixture_t *fixture, gconstpointer user_data)
      61  {
      62    hb_buffer_t *b;
      63    unsigned int i;
      64  
      65    b = fixture->buffer = hb_buffer_create ();
      66  
      67    switch (GPOINTER_TO_INT (user_data))
      68    {
      69      case BUFFER_EMPTY:
      70        break;
      71  
      72      case BUFFER_ONE_BY_ONE:
      73        for (i = 1; i < G_N_ELEMENTS (utf32) - 1; i++)
      74        hb_buffer_add (b, utf32[i], i);
      75        break;
      76  
      77      case BUFFER_UTF32:
      78        hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
      79        break;
      80  
      81      case BUFFER_UTF16:
      82        hb_buffer_add_utf16 (b, utf16, G_N_ELEMENTS (utf16), 1, G_N_ELEMENTS (utf16) - 2);
      83        break;
      84  
      85      case BUFFER_UTF8:
      86        hb_buffer_add_utf8  (b, utf8,  G_N_ELEMENTS (utf8),  1, G_N_ELEMENTS (utf8)  - 2);
      87        break;
      88  
      89      default:
      90        g_assert_not_reached ();
      91    }
      92  }
      93  
      94  static void
      95  fixture_finish (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
      96  {
      97    hb_buffer_destroy (fixture->buffer);
      98  }
      99  
     100  
     101  static void
     102  test_buffer_properties (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
     103  {
     104    hb_buffer_t *b = fixture->buffer;
     105    hb_unicode_funcs_t *ufuncs;
     106  
     107    /* test default properties */
     108  
     109    g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
     110    g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
     111    g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
     112    g_assert (hb_buffer_get_language (b) == NULL);
     113  
     114  
     115    /* test property changes are retained */
     116    ufuncs = hb_unicode_funcs_create (NULL);
     117    hb_buffer_set_unicode_funcs (b, ufuncs);
     118    hb_unicode_funcs_destroy (ufuncs);
     119    g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
     120  
     121    hb_buffer_set_direction (b, HB_DIRECTION_RTL);
     122    g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
     123  
     124    hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
     125    g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
     126  
     127    hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
     128    g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
     129  
     130    hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
     131    g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
     132  
     133    hb_buffer_set_replacement_codepoint (b, (unsigned int) -1);
     134    g_assert (hb_buffer_get_replacement_codepoint (b) == (unsigned int) -1);
     135  
     136  
     137    /* test clear_contents clears all these properties: */
     138  
     139    hb_buffer_clear_contents (b);
     140  
     141    g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
     142    g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
     143    g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
     144    g_assert (hb_buffer_get_language (b) == NULL);
     145  
     146    /* but not these: */
     147  
     148    g_assert (hb_buffer_get_flags (b) != HB_BUFFER_FLAGS_DEFAULT);
     149    g_assert (hb_buffer_get_replacement_codepoint (b) != HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
     150  
     151  
     152    /* test reset clears all properties */
     153  
     154    hb_buffer_set_direction (b, HB_DIRECTION_RTL);
     155    g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
     156  
     157    hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
     158    g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
     159  
     160    hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
     161    g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
     162  
     163    hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
     164    g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
     165  
     166    hb_buffer_set_replacement_codepoint (b, (unsigned int) -1);
     167    g_assert (hb_buffer_get_replacement_codepoint (b) == (unsigned int) -1);
     168  
     169    hb_buffer_reset (b);
     170  
     171    g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
     172    g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
     173    g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
     174    g_assert (hb_buffer_get_language (b) == NULL);
     175    g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
     176    g_assert (hb_buffer_get_replacement_codepoint (b) == HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
     177  }
     178  
     179  static void
     180  test_buffer_contents (fixture_t *fixture, gconstpointer user_data)
     181  {
     182    hb_buffer_t *b = fixture->buffer;
     183    unsigned int i, len, len2;
     184    buffer_type_t buffer_type = GPOINTER_TO_INT (user_data);
     185    hb_glyph_info_t *glyphs;
     186  
     187    if (buffer_type == BUFFER_EMPTY) {
     188      g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
     189      return;
     190    }
     191  
     192    len = hb_buffer_get_length (b);
     193    hb_buffer_get_glyph_infos (b, NULL); /* test NULL */
     194    glyphs = hb_buffer_get_glyph_infos (b, &len2);
     195    g_assert_cmpint (len, ==, len2);
     196    g_assert_cmpint (len, ==, 5);
     197  
     198    for (i = 0; i < len; i++) {
     199      g_assert_cmphex (glyphs[i].mask,      ==, 0);
     200      g_assert_cmphex (glyphs[i].var1.u32,  ==, 0);
     201      g_assert_cmphex (glyphs[i].var2.u32,  ==, 0);
     202    }
     203  
     204    for (i = 0; i < len; i++) {
     205      unsigned int cluster;
     206      cluster = 1+i;
     207      if (i >= 2) {
     208        if (buffer_type == BUFFER_UTF16)
     209  	cluster++;
     210        else if (buffer_type == BUFFER_UTF8)
     211  	cluster += 3;
     212      }
     213      g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
     214      g_assert_cmphex (glyphs[i].cluster,   ==, cluster);
     215    }
     216  
     217    /* reverse, test, and reverse back */
     218  
     219    hb_buffer_reverse (b);
     220    for (i = 0; i < len; i++)
     221      g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
     222  
     223    hb_buffer_reverse (b);
     224    for (i = 0; i < len; i++)
     225      g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
     226  
     227    /* reverse_clusters works same as reverse for now since each codepoint is
     228     * in its own cluster */
     229  
     230    hb_buffer_reverse_clusters (b);
     231    for (i = 0; i < len; i++)
     232      g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
     233  
     234    hb_buffer_reverse_clusters (b);
     235    for (i = 0; i < len; i++)
     236      g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
     237  
     238    /* now form a cluster and test again */
     239    glyphs[2].cluster = glyphs[1].cluster;
     240  
     241    /* reverse, test, and reverse back */
     242  
     243    hb_buffer_reverse (b);
     244    for (i = 0; i < len; i++)
     245      g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
     246  
     247    hb_buffer_reverse (b);
     248    for (i = 0; i < len; i++)
     249      g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
     250  
     251    /* reverse_clusters twice still should return the original string,
     252     * but when applied once, the 1-2 cluster should be retained. */
     253  
     254    hb_buffer_reverse_clusters (b);
     255    for (i = 0; i < len; i++) {
     256      unsigned int j = len-1-i;
     257      if (j == 1)
     258        j = 2;
     259      else if (j == 2)
     260        j = 1;
     261      g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+j]);
     262    }
     263  
     264    hb_buffer_reverse_clusters (b);
     265    for (i = 0; i < len; i++)
     266      g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
     267  
     268  
     269    /* test setting length */
     270  
     271    /* enlarge */
     272    g_assert (hb_buffer_set_length (b, 10));
     273    glyphs = hb_buffer_get_glyph_infos (b, NULL);
     274    g_assert_cmpint (hb_buffer_get_length (b), ==, 10);
     275    for (i = 0; i < 5; i++)
     276      g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
     277    for (i = 5; i < 10; i++)
     278      g_assert_cmphex (glyphs[i].codepoint, ==, 0);
     279    /* shrink */
     280    g_assert (hb_buffer_set_length (b, 3));
     281    glyphs = hb_buffer_get_glyph_infos (b, NULL);
     282    g_assert_cmpint (hb_buffer_get_length (b), ==, 3);
     283    for (i = 0; i < 3; i++)
     284      g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
     285  
     286  
     287    g_assert (hb_buffer_allocation_successful (b));
     288  
     289  
     290    /* test reset clears content */
     291  
     292    hb_buffer_reset (b);
     293    g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
     294  }
     295  
     296  static void
     297  test_buffer_positions (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
     298  {
     299    hb_buffer_t *b = fixture->buffer;
     300    unsigned int i, len, len2;
     301    hb_glyph_position_t *positions;
     302  
     303    /* Without shaping, positions should all be zero */
     304    len = hb_buffer_get_length (b);
     305    hb_buffer_get_glyph_positions (b, NULL); /* test NULL */
     306    positions = hb_buffer_get_glyph_positions (b, &len2);
     307    g_assert_cmpint (len, ==, len2);
     308    for (i = 0; i < len; i++) {
     309      g_assert_cmpint (0, ==, positions[i].x_advance);
     310      g_assert_cmpint (0, ==, positions[i].y_advance);
     311      g_assert_cmpint (0, ==, positions[i].x_offset);
     312      g_assert_cmpint (0, ==, positions[i].y_offset);
     313      g_assert_cmpint (0, ==, positions[i].var.i32);
     314    }
     315  
     316    /* test reset clears content */
     317    hb_buffer_reset (b);
     318    g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
     319  }
     320  
     321  static void
     322  test_buffer_allocation (fixture_t *fixture, gconstpointer user_data HB_UNUSED)
     323  {
     324    hb_buffer_t *b = fixture->buffer;
     325  
     326    g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
     327  
     328    g_assert (hb_buffer_pre_allocate (b, 100));
     329    g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
     330    g_assert (hb_buffer_allocation_successful (b));
     331  
     332    /* lets try a huge allocation, make sure it fails */
     333    g_assert (!hb_buffer_pre_allocate (b, (unsigned int) -1));
     334    g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
     335    g_assert (!hb_buffer_allocation_successful (b));
     336  
     337    /* small one again */
     338    g_assert (hb_buffer_pre_allocate (b, 50));
     339    g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
     340    g_assert (!hb_buffer_allocation_successful (b));
     341  
     342    hb_buffer_reset (b);
     343    g_assert (hb_buffer_allocation_successful (b));
     344  
     345    /* all allocation and size  */
     346    g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 + 1));
     347    g_assert (!hb_buffer_allocation_successful (b));
     348  
     349    hb_buffer_reset (b);
     350    g_assert (hb_buffer_allocation_successful (b));
     351  
     352    /* technically, this one can actually pass on 64bit machines, but
     353     * I'm doubtful that any malloc allows 4GB allocations at a time.
     354     * But let's only enable it on a 32-bit machine. */
     355    if (sizeof (long) == 4) {
     356      g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 - 1));
     357      g_assert (!hb_buffer_allocation_successful (b));
     358    }
     359  
     360    hb_buffer_reset (b);
     361    g_assert (hb_buffer_allocation_successful (b));
     362  }
     363  
     364  
     365  typedef struct {
     366    const char utf8[8];
     367    const uint32_t codepoints[8];
     368  } utf8_conversion_test_t;
     369  
     370  /* note: we skip the first and last byte when adding to buffer */
     371  static const utf8_conversion_test_t utf8_conversion_tests[] = {
     372    {"a\303\207", {(hb_codepoint_t) -1}},
     373    {"a\303\207b", {0xC7}},
     374    {"ab\303cd", {'b', (hb_codepoint_t) -1, 'c'}},
     375    {"ab\303\302\301cd", {'b', (hb_codepoint_t) -1, (hb_codepoint_t) -1, (hb_codepoint_t) -1, 'c'}}
     376  };
     377  
     378  static void
     379  test_buffer_utf8_conversion (void)
     380  {
     381    hb_buffer_t *b;
     382    unsigned int chars, i, j, len;
     383  
     384    b = hb_buffer_create ();
     385    hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
     386  
     387    for (i = 0; i < G_N_ELEMENTS (utf8_conversion_tests); i++)
     388    {
     389      unsigned int bytes;
     390      hb_glyph_info_t *glyphs;
     391      const utf8_conversion_test_t *test = &utf8_conversion_tests[i];
     392      char *escaped;
     393  
     394      escaped = g_strescape (test->utf8, NULL);
     395      g_test_message ("UTF-8 test #%d: %s", i, escaped);
     396      g_free (escaped);
     397  
     398      bytes = strlen (test->utf8);
     399      for (chars = 0; test->codepoints[chars]; chars++)
     400        ;
     401  
     402      hb_buffer_clear_contents (b);
     403      hb_buffer_add_utf8 (b, test->utf8, bytes,  1, bytes - 2);
     404  
     405      glyphs = hb_buffer_get_glyph_infos (b, &len);
     406      g_assert_cmpint (len, ==, chars);
     407      for (j = 0; j < chars; j++)
     408        g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
     409    }
     410  
     411    hb_buffer_destroy (b);
     412  }
     413  
     414  
     415  
     416  /* Following test table is adapted from glib/glib/tests/utf8-validate.c
     417   * with relicensing permission from Matthias Clasen. */
     418  
     419  typedef struct {
     420    const char *utf8;
     421    int max_len;
     422    unsigned int offset;
     423    gboolean valid;
     424  } utf8_validity_test_t;
     425  
     426  static const utf8_validity_test_t utf8_validity_tests[] = {
     427    /* some tests to check max_len handling */
     428    /* length 1 */
     429    { "abcde", -1, 5, TRUE },
     430    { "abcde", 3, 3, TRUE },
     431    { "abcde", 5, 5, TRUE },
     432    /* length 2 */
     433    { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE },
     434    { "\xc2\xa9\xc2\xa9\xc2\xa9",  1, 0, FALSE },
     435    { "\xc2\xa9\xc2\xa9\xc2\xa9",  2, 2, TRUE },
     436    { "\xc2\xa9\xc2\xa9\xc2\xa9",  3, 2, FALSE },
     437    { "\xc2\xa9\xc2\xa9\xc2\xa9",  4, 4, TRUE },
     438    { "\xc2\xa9\xc2\xa9\xc2\xa9",  5, 4, FALSE },
     439    { "\xc2\xa9\xc2\xa9\xc2\xa9",  6, 6, TRUE },
     440    /* length 3 */
     441    { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE },
     442    { "\xe2\x89\xa0\xe2\x89\xa0",  1, 0, FALSE },
     443    { "\xe2\x89\xa0\xe2\x89\xa0",  2, 0, FALSE },
     444    { "\xe2\x89\xa0\xe2\x89\xa0",  3, 3, TRUE },
     445    { "\xe2\x89\xa0\xe2\x89\xa0",  4, 3, FALSE },
     446    { "\xe2\x89\xa0\xe2\x89\xa0",  5, 3, FALSE },
     447    { "\xe2\x89\xa0\xe2\x89\xa0",  6, 6, TRUE },
     448  
     449    /* examples from https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
     450    /* greek 'kosme' */
     451    { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE },
     452    /* first sequence of each length */
     453    { "\x00", -1, 0, TRUE },
     454    { "\xc2\x80", -1, 2, TRUE },
     455    { "\xe0\xa0\x80", -1, 3, TRUE },
     456    { "\xf0\x90\x80\x80", -1, 4, TRUE },
     457    { "\xf8\x88\x80\x80\x80", -1, 0, FALSE },
     458    { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE },
     459    /* last sequence of each length */
     460    { "\x7f", -1, 1, TRUE },
     461    { "\xdf\xbf", -1, 2, TRUE },
     462    { "\xef\xbf\xbf", -1, 0, TRUE },
     463    { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
     464    { "\xf4\x90\xbf\xbf", -1, 0, FALSE },
     465    { "\xf7\xbf\xbf\xbf", -1, 0, FALSE },
     466    { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE },
     467    { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE },
     468    /* other boundary conditions */
     469    { "\xed\x9f\xbf", -1, 3, TRUE },
     470    { "\xed\xa0\x80", -1, 0, FALSE },
     471    { "\xed\xbf\xbf", -1, 0, FALSE },
     472    { "\xee\x80\x80", -1, 3, TRUE },
     473    { "\xef\xbf\xbd", -1, 3, TRUE },
     474    { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
     475    /* malformed sequences */
     476    /* continuation bytes */
     477    { "\x80", -1, 0, FALSE },
     478    { "\xbf", -1, 0, FALSE },
     479    { "\x80\xbf", -1, 0, FALSE },
     480    { "\x80\xbf\x80", -1, 0, FALSE },
     481    { "\x80\xbf\x80\xbf", -1, 0, FALSE },
     482    { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
     483    { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE },
     484    { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
     485  
     486    /* all possible continuation byte */
     487    { "\x80", -1, 0, FALSE },
     488    { "\x81", -1, 0, FALSE },
     489    { "\x82", -1, 0, FALSE },
     490    { "\x83", -1, 0, FALSE },
     491    { "\x84", -1, 0, FALSE },
     492    { "\x85", -1, 0, FALSE },
     493    { "\x86", -1, 0, FALSE },
     494    { "\x87", -1, 0, FALSE },
     495    { "\x88", -1, 0, FALSE },
     496    { "\x89", -1, 0, FALSE },
     497    { "\x8a", -1, 0, FALSE },
     498    { "\x8b", -1, 0, FALSE },
     499    { "\x8c", -1, 0, FALSE },
     500    { "\x8d", -1, 0, FALSE },
     501    { "\x8e", -1, 0, FALSE },
     502    { "\x8f", -1, 0, FALSE },
     503    { "\x90", -1, 0, FALSE },
     504    { "\x91", -1, 0, FALSE },
     505    { "\x92", -1, 0, FALSE },
     506    { "\x93", -1, 0, FALSE },
     507    { "\x94", -1, 0, FALSE },
     508    { "\x95", -1, 0, FALSE },
     509    { "\x96", -1, 0, FALSE },
     510    { "\x97", -1, 0, FALSE },
     511    { "\x98", -1, 0, FALSE },
     512    { "\x99", -1, 0, FALSE },
     513    { "\x9a", -1, 0, FALSE },
     514    { "\x9b", -1, 0, FALSE },
     515    { "\x9c", -1, 0, FALSE },
     516    { "\x9d", -1, 0, FALSE },
     517    { "\x9e", -1, 0, FALSE },
     518    { "\x9f", -1, 0, FALSE },
     519    { "\xa0", -1, 0, FALSE },
     520    { "\xa1", -1, 0, FALSE },
     521    { "\xa2", -1, 0, FALSE },
     522    { "\xa3", -1, 0, FALSE },
     523    { "\xa4", -1, 0, FALSE },
     524    { "\xa5", -1, 0, FALSE },
     525    { "\xa6", -1, 0, FALSE },
     526    { "\xa7", -1, 0, FALSE },
     527    { "\xa8", -1, 0, FALSE },
     528    { "\xa9", -1, 0, FALSE },
     529    { "\xaa", -1, 0, FALSE },
     530    { "\xab", -1, 0, FALSE },
     531    { "\xac", -1, 0, FALSE },
     532    { "\xad", -1, 0, FALSE },
     533    { "\xae", -1, 0, FALSE },
     534    { "\xaf", -1, 0, FALSE },
     535    { "\xb0", -1, 0, FALSE },
     536    { "\xb1", -1, 0, FALSE },
     537    { "\xb2", -1, 0, FALSE },
     538    { "\xb3", -1, 0, FALSE },
     539    { "\xb4", -1, 0, FALSE },
     540    { "\xb5", -1, 0, FALSE },
     541    { "\xb6", -1, 0, FALSE },
     542    { "\xb7", -1, 0, FALSE },
     543    { "\xb8", -1, 0, FALSE },
     544    { "\xb9", -1, 0, FALSE },
     545    { "\xba", -1, 0, FALSE },
     546    { "\xbb", -1, 0, FALSE },
     547    { "\xbc", -1, 0, FALSE },
     548    { "\xbd", -1, 0, FALSE },
     549    { "\xbe", -1, 0, FALSE },
     550    { "\xbf", -1, 0, FALSE },
     551    /* lone start characters */
     552    { "\xc0\x20", -1, 0, FALSE },
     553    { "\xc1\x20", -1, 0, FALSE },
     554    { "\xc2\x20", -1, 0, FALSE },
     555    { "\xc3\x20", -1, 0, FALSE },
     556    { "\xc4\x20", -1, 0, FALSE },
     557    { "\xc5\x20", -1, 0, FALSE },
     558    { "\xc6\x20", -1, 0, FALSE },
     559    { "\xc7\x20", -1, 0, FALSE },
     560    { "\xc8\x20", -1, 0, FALSE },
     561    { "\xc9\x20", -1, 0, FALSE },
     562    { "\xca\x20", -1, 0, FALSE },
     563    { "\xcb\x20", -1, 0, FALSE },
     564    { "\xcc\x20", -1, 0, FALSE },
     565    { "\xcd\x20", -1, 0, FALSE },
     566    { "\xce\x20", -1, 0, FALSE },
     567    { "\xcf\x20", -1, 0, FALSE },
     568    { "\xd0\x20", -1, 0, FALSE },
     569    { "\xd1\x20", -1, 0, FALSE },
     570    { "\xd2\x20", -1, 0, FALSE },
     571    { "\xd3\x20", -1, 0, FALSE },
     572    { "\xd4\x20", -1, 0, FALSE },
     573    { "\xd5\x20", -1, 0, FALSE },
     574    { "\xd6\x20", -1, 0, FALSE },
     575    { "\xd7\x20", -1, 0, FALSE },
     576    { "\xd8\x20", -1, 0, FALSE },
     577    { "\xd9\x20", -1, 0, FALSE },
     578    { "\xda\x20", -1, 0, FALSE },
     579    { "\xdb\x20", -1, 0, FALSE },
     580    { "\xdc\x20", -1, 0, FALSE },
     581    { "\xdd\x20", -1, 0, FALSE },
     582    { "\xde\x20", -1, 0, FALSE },
     583    { "\xdf\x20", -1, 0, FALSE },
     584    { "\xe0\x20", -1, 0, FALSE },
     585    { "\xe1\x20", -1, 0, FALSE },
     586    { "\xe2\x20", -1, 0, FALSE },
     587    { "\xe3\x20", -1, 0, FALSE },
     588    { "\xe4\x20", -1, 0, FALSE },
     589    { "\xe5\x20", -1, 0, FALSE },
     590    { "\xe6\x20", -1, 0, FALSE },
     591    { "\xe7\x20", -1, 0, FALSE },
     592    { "\xe8\x20", -1, 0, FALSE },
     593    { "\xe9\x20", -1, 0, FALSE },
     594    { "\xea\x20", -1, 0, FALSE },
     595    { "\xeb\x20", -1, 0, FALSE },
     596    { "\xec\x20", -1, 0, FALSE },
     597    { "\xed\x20", -1, 0, FALSE },
     598    { "\xee\x20", -1, 0, FALSE },
     599    { "\xef\x20", -1, 0, FALSE },
     600    { "\xf0\x20", -1, 0, FALSE },
     601    { "\xf1\x20", -1, 0, FALSE },
     602    { "\xf2\x20", -1, 0, FALSE },
     603    { "\xf3\x20", -1, 0, FALSE },
     604    { "\xf4\x20", -1, 0, FALSE },
     605    { "\xf5\x20", -1, 0, FALSE },
     606    { "\xf6\x20", -1, 0, FALSE },
     607    { "\xf7\x20", -1, 0, FALSE },
     608    { "\xf8\x20", -1, 0, FALSE },
     609    { "\xf9\x20", -1, 0, FALSE },
     610    { "\xfa\x20", -1, 0, FALSE },
     611    { "\xfb\x20", -1, 0, FALSE },
     612    { "\xfc\x20", -1, 0, FALSE },
     613    { "\xfd\x20", -1, 0, FALSE },
     614    /* missing continuation bytes */
     615    { "\x20\xc0", -1, 1, FALSE },
     616    { "\x20\xe0\x80", -1, 1, FALSE },
     617    { "\x20\xf0\x80\x80", -1, 1, FALSE },
     618    { "\x20\xf8\x80\x80\x80", -1, 1, FALSE },
     619    { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE },
     620    { "\x20\xdf", -1, 1, FALSE },
     621    { "\x20\xef\xbf", -1, 1, FALSE },
     622    { "\x20\xf7\xbf\xbf", -1, 1, FALSE },
     623    { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE },
     624    { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE },
     625    /* impossible bytes */
     626    { "\x20\xfe\x20", -1, 1, FALSE },
     627    { "\x20\xff\x20", -1, 1, FALSE },
     628    /* overlong sequences */
     629    { "\x20\xc0\xaf\x20", -1, 1, FALSE },
     630    { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE },
     631    { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE },
     632    { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE },
     633    { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE },
     634    { "\x20\xc1\xbf\x20", -1, 1, FALSE },
     635    { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE },
     636    { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE },
     637    { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE },
     638    { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE },
     639    { "\x20\xc0\x80\x20", -1, 1, FALSE },
     640    { "\x20\xe0\x80\x80\x20", -1, 1, FALSE },
     641    { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE },
     642    { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE },
     643    { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE },
     644    /* illegal code positions */
     645    { "\x20\xed\xa0\x80\x20", -1, 1, FALSE },
     646    { "\x20\xed\xad\xbf\x20", -1, 1, FALSE },
     647    { "\x20\xed\xae\x80\x20", -1, 1, FALSE },
     648    { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE },
     649    { "\x20\xed\xb0\x80\x20", -1, 1, FALSE },
     650    { "\x20\xed\xbe\x80\x20", -1, 1, FALSE },
     651    { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE },
     652    { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE },
     653    { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
     654    { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
     655    { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
     656    { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE },
     657    { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
     658    { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
     659    { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
     660  #if 0 /* We don't consider U+FFFE / U+FFFF and similar invalid. */
     661    { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE },
     662    { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE },
     663  #endif
     664    { "", -1, 0, TRUE }
     665  };
     666  
     667  static void
     668  test_buffer_utf8_validity (void)
     669  {
     670    hb_buffer_t *b;
     671    unsigned int i;
     672  
     673    b = hb_buffer_create ();
     674    hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
     675  
     676    for (i = 0; i < G_N_ELEMENTS (utf8_validity_tests); i++)
     677    {
     678      const utf8_validity_test_t *test = &utf8_validity_tests[i];
     679      unsigned int text_bytes, segment_bytes, j, len;
     680      hb_glyph_info_t *glyphs;
     681      char *escaped;
     682  
     683      escaped = g_strescape (test->utf8, NULL);
     684      g_test_message ("UTF-8 test #%d: %s", i, escaped);
     685      g_free (escaped);
     686  
     687      text_bytes = strlen (test->utf8);
     688      if (test->max_len == -1)
     689        segment_bytes = text_bytes;
     690      else
     691        segment_bytes = test->max_len;
     692  
     693      hb_buffer_clear_contents (b);
     694      hb_buffer_add_utf8 (b, test->utf8, text_bytes,  0, segment_bytes);
     695  
     696      glyphs = hb_buffer_get_glyph_infos (b, &len);
     697      for (j = 0; j < len; j++)
     698        if (glyphs[j].codepoint == (hb_codepoint_t) -1)
     699  	break;
     700  
     701      g_assert (test->valid ? j == len : j < len);
     702      if (!test->valid)
     703        g_assert (glyphs[j].cluster == test->offset);
     704    }
     705  
     706    hb_buffer_destroy (b);
     707  }
     708  
     709  
     710  typedef struct {
     711    const uint16_t utf16[8];
     712    const uint32_t codepoints[8];
     713  } utf16_conversion_test_t;
     714  
     715  /* note: we skip the first and last item from utf16 when adding to buffer */
     716  static const utf16_conversion_test_t utf16_conversion_tests[] = {
     717    {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
     718    {{0x41, 0xD800, 0xDF02, 0x61}, {0x10302}},
     719    {{0x41, 0xD800, 0xDF02}, {(hb_codepoint_t) -1}},
     720    {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, (hb_codepoint_t) -1}},
     721    {{0x41, 0xD800, 0x61, 0xDF02}, {(hb_codepoint_t) -1, 0x61}},
     722    {{0x41, 0xDF00, 0x61}, {(hb_codepoint_t) -1}},
     723    {{0x41, 0x61}, {0}}
     724  };
     725  
     726  static void
     727  test_buffer_utf16_conversion (void)
     728  {
     729    hb_buffer_t *b;
     730    unsigned int i;
     731  
     732    b = hb_buffer_create ();
     733    hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
     734  
     735    for (i = 0; i < G_N_ELEMENTS (utf16_conversion_tests); i++)
     736    {
     737      const utf16_conversion_test_t *test = &utf16_conversion_tests[i];
     738      unsigned int u_len, chars, j, len;
     739      hb_glyph_info_t *glyphs;
     740  
     741      g_test_message ("UTF-16 test #%d", i);
     742  
     743      for (u_len = 0; test->utf16[u_len]; u_len++)
     744        ;
     745      for (chars = 0; test->codepoints[chars]; chars++)
     746        ;
     747  
     748      hb_buffer_clear_contents (b);
     749      hb_buffer_add_utf16 (b, test->utf16, u_len,  1, u_len - 2);
     750  
     751      glyphs = hb_buffer_get_glyph_infos (b, &len);
     752      g_assert_cmpint (len, ==, chars);
     753      for (j = 0; j < chars; j++)
     754        g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
     755    }
     756  
     757    hb_buffer_destroy (b);
     758  }
     759  
     760  
     761  typedef struct {
     762    const uint32_t utf32[8];
     763    const uint32_t codepoints[8];
     764  } utf32_conversion_test_t;
     765  
     766  /* note: we skip the first and last item from utf32 when adding to buffer */
     767  static const utf32_conversion_test_t utf32_conversion_tests[] = {
     768    {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, (hb_codepoint_t) -3, (hb_codepoint_t) -3}},
     769    {{0x41, 0x004D, 0x0430, 0x4E8C, 0x10302, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
     770    {{0x41, 0xD800, 0xDF02, 0x61}, {(hb_codepoint_t) -3, (hb_codepoint_t) -3}},
     771    {{0x41, 0xD800, 0xDF02}, {(hb_codepoint_t) -3}},
     772    {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, (hb_codepoint_t) -3}},
     773    {{0x41, 0xD800, 0x61, 0xDF02}, {(hb_codepoint_t) -3, 0x61}},
     774    {{0x41, 0xDF00, 0x61}, {(hb_codepoint_t) -3}},
     775    {{0x41, 0x10FFFF, 0x61}, {0x10FFFF}},
     776    {{0x41, 0x110000, 0x61}, {(hb_codepoint_t) -3}},
     777    {{0x41, 0x61}, {0}}
     778  };
     779  
     780  static void
     781  test_buffer_utf32_conversion (void)
     782  {
     783    hb_buffer_t *b;
     784    unsigned int i;
     785  
     786    b = hb_buffer_create ();
     787    hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -3);
     788  
     789    for (i = 0; i < G_N_ELEMENTS (utf32_conversion_tests); i++)
     790    {
     791      const utf32_conversion_test_t *test = &utf32_conversion_tests[i];
     792      unsigned int u_len, chars, j, len;
     793      hb_glyph_info_t *glyphs;
     794  
     795      g_test_message ("UTF-32 test #%d", i);
     796  
     797      for (u_len = 0; test->utf32[u_len]; u_len++)
     798        ;
     799      for (chars = 0; test->codepoints[chars]; chars++)
     800        ;
     801  
     802      hb_buffer_clear_contents (b);
     803      hb_buffer_add_utf32 (b, test->utf32, u_len,  1, u_len - 2);
     804  
     805      glyphs = hb_buffer_get_glyph_infos (b, &len);
     806      g_assert_cmpint (len, ==, chars);
     807      for (j = 0; j < chars; j++)
     808        g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
     809    }
     810  
     811    hb_buffer_destroy (b);
     812  }
     813  
     814  
     815  static void
     816  test_empty (hb_buffer_t *b)
     817  {
     818    g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
     819    g_assert (!hb_buffer_get_glyph_infos (b, NULL));
     820    g_assert (!hb_buffer_get_glyph_positions (b, NULL));
     821  }
     822  
     823  static void
     824  test_buffer_empty (void)
     825  {
     826    hb_buffer_t *b = hb_buffer_get_empty ();
     827  
     828    g_assert (hb_buffer_get_empty ());
     829    g_assert (hb_buffer_get_empty () == b);
     830  
     831    g_assert (!hb_buffer_allocation_successful (b));
     832  
     833    test_empty (b);
     834  
     835    hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
     836  
     837    test_empty (b);
     838  
     839    hb_buffer_reverse (b);
     840    hb_buffer_reverse_clusters (b);
     841  
     842    g_assert (!hb_buffer_set_length (b, 10));
     843  
     844    test_empty (b);
     845  
     846    g_assert (hb_buffer_set_length (b, 0));
     847  
     848    test_empty (b);
     849  
     850    g_assert (!hb_buffer_allocation_successful (b));
     851  
     852    hb_buffer_reset (b);
     853  
     854    test_empty (b);
     855  
     856    g_assert (!hb_buffer_allocation_successful (b));
     857  }
     858  
     859  typedef struct {
     860    const char *contents;
     861    hb_buffer_serialize_format_t format;
     862    unsigned int num_items;
     863    hb_bool_t success;
     864  } serialization_test_t;
     865  
     866  static const serialization_test_t serialization_tests[] = {
     867    { "<U+0640=0|U+0635=1>", HB_BUFFER_SERIALIZE_FORMAT_TEXT, 2, 1 },
     868    { "[{\"u\":1600,\"cl\":0},{\"u\":1589,\"cl\":1}]", HB_BUFFER_SERIALIZE_FORMAT_JSON, 2, 1 },
     869  
     870    /* Mixed glyphs/Unicodes -> parse fail */
     871    { "[{\"u\":1600,\"cl\":0},{\"g\":1589,\"cl\":1}]", HB_BUFFER_SERIALIZE_FORMAT_JSON, 0, 0 },
     872    { "<U+0640=0|uni0635=1>", HB_BUFFER_SERIALIZE_FORMAT_TEXT, 0, 0 },
     873  };
     874  
     875  static void
     876  test_buffer_serialize_deserialize (void)
     877  {
     878    hb_buffer_t *b;
     879    unsigned int i;
     880  
     881    for (i = 0; i < G_N_ELEMENTS (serialization_tests); i++)
     882    {
     883      unsigned int consumed;
     884      char round_trip[1024];
     885  
     886      b = hb_buffer_create ();
     887      hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
     888  
     889      const serialization_test_t *test = &serialization_tests[i];
     890      g_test_message ("serialize test #%d", i);
     891  
     892      (void) hb_buffer_deserialize_unicode (b, test->contents, -1, NULL, test->format);
     893  
     894      // Expected parse failure, got one, don't round-trip
     895      if (test->success != 0)
     896      {
     897        unsigned int num_glyphs = hb_buffer_get_length (b);
     898        g_assert_cmpint (num_glyphs, ==, test->num_items);
     899  
     900        hb_buffer_serialize_unicode (b, 0, num_glyphs, round_trip,
     901  				   sizeof(round_trip), &consumed, test->format,
     902  				   HB_BUFFER_SERIALIZE_FLAG_DEFAULT);
     903        g_assert_cmpstr (round_trip, ==, test->contents);
     904      }
     905  
     906      hb_buffer_destroy (b);
     907  
     908    }
     909  
     910    char test[1024];
     911    unsigned int consumed;
     912    hb_buffer_t *indeterminate = hb_buffer_get_empty ();
     913    hb_buffer_serialize (indeterminate, 0, (unsigned) -1,
     914  		       test, sizeof(test), &consumed, NULL,
     915  		       HB_BUFFER_SERIALIZE_FORMAT_JSON,
     916  		       HB_BUFFER_SERIALIZE_FLAG_DEFAULT);
     917    g_assert_cmpstr ( test, ==, "[]");
     918  
     919    hb_buffer_serialize (indeterminate, 0, (unsigned) - 1,
     920  		       test, sizeof(test), &consumed, NULL,
     921  		       HB_BUFFER_SERIALIZE_FORMAT_TEXT,
     922  		       HB_BUFFER_SERIALIZE_FLAG_DEFAULT);
     923    g_assert_cmpstr ( test, ==, "!!");
     924  
     925  }
     926  
     927  int
     928  main (int argc, char **argv)
     929  {
     930    unsigned int i;
     931  
     932    hb_test_init (&argc, &argv);
     933  
     934    for (i = 0; i < BUFFER_NUM_TYPES; i++)
     935    {
     936      const void *buffer_type = GINT_TO_POINTER (i);
     937      const char *buffer_name = buffer_names[i];
     938  
     939      hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_properties);
     940      hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_contents);
     941      hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_positions);
     942    }
     943  
     944    hb_test_add_fixture (fixture, GINT_TO_POINTER (BUFFER_EMPTY), test_buffer_allocation);
     945  
     946    hb_test_add (test_buffer_utf8_conversion);
     947    hb_test_add (test_buffer_utf8_validity);
     948    hb_test_add (test_buffer_utf16_conversion);
     949    hb_test_add (test_buffer_utf32_conversion);
     950    hb_test_add (test_buffer_empty);
     951    hb_test_add (test_buffer_serialize_deserialize);
     952  
     953    return hb_test_run();
     954  }