(root)/
glib-2.79.0/
glib/
tests/
convert.c
       1  /* GLIB - Library of useful routines for C programming
       2   * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
       3   *
       4   * SPDX-License-Identifier: LGPL-2.1-or-later
       5   *
       6   * This library is free software; you can redistribute it and/or
       7   * modify it under the terms of the GNU Lesser General Public
       8   * License as published by the Free Software Foundation; either
       9   * version 2.1 of the License, or (at your option) any later version.
      10   *
      11   * This library is distributed in the hope that it will be useful,
      12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14   * Lesser General Public License for more details.
      15   *
      16   * You should have received a copy of the GNU Lesser General Public
      17   * License along with this library; if not, see <http://www.gnu.org/licenses/>.
      18   */
      19  
      20  /*
      21   * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
      22   * file for a list of people on the GLib Team.  See the ChangeLog
      23   * files for a list of changes.  These files are distributed with
      24   * GLib at ftp://ftp.gtk.org/pub/gtk/. 
      25   */
      26  
      27  #undef G_DISABLE_ASSERT
      28  #undef G_LOG_DOMAIN
      29  
      30  #include <locale.h>
      31  #include <string.h>
      32  
      33  #include <glib.h>
      34  
      35  /* Bug 311337 */
      36  static void
      37  test_iconv_state (void)
      38  {
      39    const gchar *in = "\xf4\xe5\xf8\xe5\xed";
      40    const gchar *expected = "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d";
      41    gchar *out;
      42    gsize bytes_read = 0;
      43    gsize bytes_written = 0;
      44    GError *error = NULL;
      45  
      46    out = g_convert (in, -1, "UTF-8", "CP1255", 
      47  		   &bytes_read, &bytes_written, &error);
      48  
      49    if (error && error->code == G_CONVERT_ERROR_NO_CONVERSION)
      50      return; /* silently skip if CP1255 is not supported, see bug 467707 */ 
      51  
      52    g_assert_no_error (error);
      53    g_assert_cmpint (bytes_read, ==, 5);
      54    g_assert_cmpint (bytes_written, ==, 10);
      55    g_assert_cmpstr (out, ==, expected);
      56    g_free (out);
      57  }
      58  
      59  /* Some tests involving "vulgar fraction one half" (U+00BD). This is
      60   * represented in UTF-8 as \xC2\xBD, in ISO-8859-1 as \xBD, and is not
      61   * represented in ISO-8859-15. */
      62  static void 
      63  test_one_half (void)
      64  {
      65    const gchar *in_utf8 = "\xc2\xbd";
      66    gchar *out;
      67    gsize bytes_read = 0;
      68    gsize bytes_written = 0;
      69    GError *error = NULL;  
      70  
      71    out = g_convert (in_utf8, -1,
      72  		   "ISO-8859-1", "UTF-8",
      73  		   &bytes_read, &bytes_written,
      74  		   &error);
      75  
      76    g_assert_no_error (error);
      77    g_assert_cmpint (bytes_read, ==, 2);
      78    g_assert_cmpint (bytes_written, ==, 1);
      79    g_assert_cmpstr (out, ==, "\xbd");
      80    g_free (out);
      81  
      82    out = g_convert (in_utf8, -1,
      83  		   "ISO-8859-15", "UTF-8",
      84  		   &bytes_read, &bytes_written,
      85  		   &error);
      86  
      87    g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
      88    g_assert_cmpint (bytes_read, ==, 0);
      89    g_assert_cmpint (bytes_written, ==, 0);
      90    g_assert_cmpstr (out, ==, NULL);
      91    g_clear_error (&error);
      92    g_free (out);
      93  
      94    out = g_convert_with_fallback (in_utf8, -1,
      95  				 "ISO8859-15", "UTF-8",
      96  				 "a",
      97  				 &bytes_read, &bytes_written,
      98  				 &error);
      99  
     100    g_assert_no_error (error);
     101    g_assert_cmpint (bytes_read, ==, 2);
     102    g_assert_cmpint (bytes_written, ==, 1);
     103    g_assert_cmpstr (out, ==, "a");
     104    g_free (out);
     105  }
     106  
     107  static void
     108  test_byte_order (void)
     109  {
     110    gchar in_be[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */
     111    gchar in_le[4] = { 0xff, 0xfe, 0x93, 0x03};
     112    const gchar *expected = "\xce\x93";
     113    gchar *out;
     114    gsize bytes_read = 0;
     115    gsize bytes_written = 0;
     116    GError *error = NULL;  
     117  
     118    out = g_convert (in_be, sizeof (in_be), 
     119  		   "UTF-8", "UTF-16",
     120  		   &bytes_read, &bytes_written,
     121  		   &error);
     122  
     123    g_assert_no_error (error);
     124    g_assert_cmpint (bytes_read, ==, 4);
     125    g_assert_cmpint (bytes_written, ==, 2);
     126    g_assert_cmpstr (out, ==, expected);
     127    g_free (out);
     128  
     129    out = g_convert (in_le, sizeof (in_le), 
     130  		   "UTF-8", "UTF-16",
     131  		   &bytes_read, &bytes_written,
     132  		   &error);
     133  
     134    g_assert_no_error (error);
     135    g_assert_cmpint (bytes_read, ==, 4);
     136    g_assert_cmpint (bytes_written, ==, 2);
     137    g_assert_cmpstr (out, ==, expected);
     138    g_free (out);
     139  }
     140  
     141  static void
     142  check_utf8_to_ucs4 (const char     *utf8,
     143  		    gsize           utf8_len,
     144  		    const gunichar *ucs4,
     145  		    glong           ucs4_len,
     146  		    glong           error_pos)
     147  {
     148    gunichar *result, *result2, *result3;
     149    glong items_read, items_read2;
     150    glong items_written, items_written2;
     151    GError *error, *error2, *error3;
     152    gint i;
     153  
     154    if (!error_pos)
     155      {
     156        /* check the fast conversion */
     157        result = g_utf8_to_ucs4_fast (utf8, utf8_len, &items_written);
     158  
     159        g_assert_cmpint (items_written, ==, ucs4_len);
     160        g_assert (result);
     161        for (i = 0; i <= items_written; i++)
     162  	g_assert (result[i] == ucs4[i]);      
     163  
     164        g_free (result);
     165      }
     166  
     167    error = NULL;
     168    result = g_utf8_to_ucs4 (utf8, utf8_len, &items_read, &items_written, &error);
     169    
     170    if (utf8_len == strlen (utf8))
     171      {
     172        /* check that len == -1 yields identical results */
     173        error2 = NULL;
     174        result2 = g_utf8_to_ucs4 (utf8, -1, &items_read2, &items_written2, &error2);
     175        g_assert (error || items_read2 == items_read);
     176        g_assert (error || items_written2 == items_written);
     177        g_assert_cmpint (!!result, ==, !!result2);
     178        g_assert_cmpint (!!error, ==, !!error2);
     179        if (result)
     180  	for (i = 0; i <= items_written; i++)
     181  	  g_assert (result[i] == result2[i]);
     182  
     183        g_free (result2);
     184        if (error2)
     185  	g_error_free (error2);
     186      }
     187  
     188    error3 = NULL;
     189    result3 = g_utf8_to_ucs4 (utf8, utf8_len, NULL, NULL, &error3);
     190        
     191    if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
     192      {
     193        g_assert_no_error (error);
     194        g_assert_cmpint (items_read, ==, error_pos);
     195        g_assert_cmpint (items_written, ==, ucs4_len);
     196        g_assert (result);
     197        for (i = 0; i <= items_written; i++)
     198  	g_assert (result[i] == ucs4[i]);
     199        g_error_free (error3);
     200      }
     201    else if (error_pos)
     202      {
     203        g_assert (error != NULL);
     204        g_assert (result == NULL);
     205        g_assert_cmpint (items_read, ==, error_pos);
     206        g_error_free (error);
     207  
     208        g_assert (error3 != NULL);
     209        g_assert (result3 == NULL);
     210        g_error_free (error3);
     211      }
     212    else
     213      {
     214        g_assert_no_error (error);
     215        g_assert_cmpint (items_read, ==, utf8_len);
     216        g_assert_cmpint (items_written, ==, ucs4_len);
     217        g_assert (result);
     218        for (i = 0; i <= items_written; i++)
     219  	g_assert (result[i] == ucs4[i]);
     220  
     221        g_assert_no_error (error3);
     222        g_assert (result3);
     223        for (i = 0; i <= ucs4_len; i++)
     224  	g_assert (result3[i] == ucs4[i]);
     225      }
     226  
     227    g_free (result);
     228    g_free (result3);
     229  }
     230  
     231  static void
     232  check_ucs4_to_utf8 (const gunichar *ucs4,
     233  		    glong           ucs4_len,
     234  		    const char     *utf8,
     235  		    glong           utf8_len,
     236  		    glong           error_pos)
     237  {
     238    gchar *result, *result2, *result3;
     239    glong items_read, items_read2;
     240    glong items_written, items_written2;
     241    GError *error, *error2, *error3;
     242  
     243    error = NULL;
     244    result = g_ucs4_to_utf8 (ucs4, ucs4_len, &items_read, &items_written, &error);
     245  
     246    if (ucs4[ucs4_len] == 0)
     247      {
     248        /* check that len == -1 yields identical results */
     249        error2 = NULL;
     250        result2 = g_ucs4_to_utf8 (ucs4, -1, &items_read2, &items_written2, &error2);
     251        
     252        g_assert (error || items_read2 == items_read);
     253        g_assert (error || items_written2 == items_written);
     254        g_assert_cmpint (!!result, ==, !!result2);
     255        g_assert_cmpint (!!error, ==, !!error2);
     256        if (result)
     257  	g_assert_cmpstr (result, ==, result2);
     258  
     259        g_free (result2);
     260        if (error2)
     261  	g_error_free (error2);
     262      }
     263  
     264    error3 = NULL;
     265    result3 = g_ucs4_to_utf8 (ucs4, ucs4_len, NULL, NULL, &error3);
     266        
     267    if (error_pos)
     268      {
     269        g_assert (error != NULL);
     270        g_assert (result == NULL);
     271        g_assert_cmpint (items_read, ==, error_pos);
     272        g_error_free (error);
     273  
     274        g_assert (error3 != NULL);
     275        g_assert (result3 == NULL);
     276        g_error_free (error3);
     277      }
     278    else
     279      {
     280        g_assert_no_error (error);
     281        g_assert_cmpint (items_read, ==, ucs4_len);
     282        g_assert_cmpint (items_written, ==, utf8_len);
     283        g_assert (result);
     284        g_assert_cmpstr (result, ==, utf8);
     285  
     286        g_assert_no_error (error3);
     287        g_assert (result3);
     288        g_assert_cmpstr (result3, ==, utf8);
     289      }
     290  
     291    g_free (result);
     292    g_free (result3);
     293  }
     294  
     295  static void
     296  check_utf8_to_utf16 (const char      *utf8,
     297  		     gsize            utf8_len,
     298  		     const gunichar2 *utf16,
     299  		     glong            utf16_len,
     300  		     glong            error_pos)
     301  {
     302    gunichar2 *result, *result2, *result3;
     303    glong items_read, items_read2;
     304    glong items_written, items_written2;
     305    GError *error, *error2, *error3;
     306    gint i;
     307  
     308    error = NULL;
     309    result = g_utf8_to_utf16 (utf8, utf8_len, &items_read, &items_written, &error);
     310  
     311    if (utf8_len == strlen (utf8))
     312      {
     313        /* check that len == -1 yields identical results */
     314        error2 = NULL;
     315        result2 = g_utf8_to_utf16 (utf8, -1, &items_read2, &items_written2, &error2);
     316        g_assert (error || items_read2 == items_read);
     317        g_assert (error || items_written2 == items_written);
     318        g_assert_cmpint (!!result, ==, !!result2);
     319        g_assert_cmpint (!!error, ==, !!error2);
     320        if (result)
     321  	for (i = 0; i <= items_written; i++)
     322  	  g_assert (result[i] == result2[i]);
     323        
     324        g_free (result2);
     325        if (error2)
     326  	g_error_free (error2);
     327      }
     328  
     329    error3 = NULL;
     330    result3 = g_utf8_to_utf16 (utf8, utf8_len, NULL, NULL, &error3);
     331        
     332    if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
     333      {
     334        g_assert_no_error (error);
     335        g_assert_cmpint (items_read, ==, error_pos);
     336        g_assert_cmpint (items_written, ==, utf16_len);
     337        g_assert (result);
     338        for (i = 0; i <= items_written; i++)
     339  	g_assert (result[i] == utf16[i]);
     340        g_error_free (error3);
     341      }
     342    else if (error_pos)
     343      {
     344        g_assert (error != NULL);
     345        g_assert (result == NULL);
     346        g_assert_cmpint (items_read, ==, error_pos);
     347        g_error_free (error);
     348  
     349        g_assert (error3 != NULL);
     350        g_assert (result3 == NULL);
     351        g_error_free (error3);
     352      }
     353    else
     354      {
     355        g_assert_no_error (error);
     356        g_assert_cmpint (items_read, ==, utf8_len);
     357        g_assert_cmpint (items_written, ==, utf16_len);
     358        g_assert (result);
     359        for (i = 0; i <= items_written; i++)
     360  	g_assert (result[i] == utf16[i]);
     361  
     362        g_assert_no_error (error3);
     363        g_assert (result3);
     364        for (i = 0; i <= utf16_len; i++)
     365  	g_assert (result3[i] == utf16[i]);
     366      }
     367  
     368    g_free (result);
     369    g_free (result3);
     370  }
     371  
     372  static void
     373  check_utf16_to_utf8 (const gunichar2 *utf16,
     374  		     glong            utf16_len,
     375  		     const char      *utf8,
     376  		     glong            utf8_len,
     377  		     glong            error_pos)
     378  {
     379    gchar *result, *result2, *result3;
     380    glong items_read, items_read2;
     381    glong items_written, items_written2;
     382    GError *error, *error2, *error3;
     383  
     384    error = NULL;
     385    result = g_utf16_to_utf8 (utf16, utf16_len, &items_read, &items_written, &error);
     386    if (utf16[utf16_len] == 0)
     387      {
     388        /* check that len == -1 yields identical results */
     389        error2 = NULL;
     390        result2 = g_utf16_to_utf8 (utf16, -1, &items_read2, &items_written2, &error2);
     391        
     392        g_assert (error || items_read2 == items_read);
     393        g_assert (error || items_written2 == items_written);
     394        g_assert_cmpint (!!result, ==, !!result2);
     395        g_assert_cmpint (!!error, ==, !!error2);
     396        if (result)
     397  	g_assert_cmpstr (result, ==, result2);
     398  
     399        g_free (result2);
     400        if (error2)
     401  	g_error_free (error2);
     402      }
     403  
     404    error3 = NULL;
     405    result3 = g_utf16_to_utf8 (utf16, utf16_len, NULL, NULL, &error3);
     406    
     407    if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
     408      {
     409        g_assert_no_error (error);
     410        g_assert_cmpint (items_read, ==, error_pos);
     411        g_assert_cmpint (items_read + 1, ==, utf16_len);
     412        g_assert_cmpint (items_written, ==, utf8_len);
     413        g_assert (result);
     414        g_assert_cmpstr (result, ==, utf8);
     415        g_error_free (error3);
     416      }
     417    else if (error_pos)
     418      {
     419        g_assert (error != NULL);
     420        g_assert (result == NULL);
     421        g_assert_cmpint (items_read, ==, error_pos);
     422        g_error_free (error);
     423  
     424        g_assert (error3 != NULL);
     425        g_assert (result3 == NULL);
     426        g_error_free (error3);
     427      }
     428    else
     429      {
     430        g_assert_no_error (error);
     431        g_assert_cmpint (items_read, ==, utf16_len);
     432        g_assert_cmpint (items_written, ==, utf8_len);
     433        g_assert (result);
     434        g_assert_cmpstr (result, ==, utf8);
     435  
     436        g_assert_no_error (error3);
     437        g_assert (result3);
     438        g_assert_cmpstr (result3, ==, utf8);
     439      }
     440  
     441    g_free (result);
     442    g_free (result3);
     443  }
     444  
     445  static void
     446  check_ucs4_to_utf16 (const gunichar  *ucs4,
     447  		     glong            ucs4_len,
     448  		     const gunichar2 *utf16,
     449  		     glong            utf16_len,
     450  		     glong            error_pos)
     451  {
     452    gunichar2 *result, *result2, *result3;
     453    glong items_read, items_read2;
     454    glong items_written, items_written2;
     455    GError *error, *error2, *error3;
     456    gint i;
     457  
     458    error = NULL;
     459    result = g_ucs4_to_utf16 (ucs4, ucs4_len, &items_read, &items_written, &error);
     460  
     461    if (ucs4[ucs4_len] == 0)
     462      {
     463        /* check that len == -1 yields identical results */
     464        error2 = NULL;
     465        result2 = g_ucs4_to_utf16 (ucs4, -1, &items_read2, &items_written2, &error2);
     466        
     467        g_assert (error || items_read2 == items_read);
     468        g_assert (error || items_written2 == items_written);
     469        g_assert_cmpint (!!result, ==, !!result2);
     470        g_assert_cmpint (!!error, ==, !!error2);
     471        if (result)
     472        for (i = 0; i <= utf16_len; i++)
     473  	g_assert (result[i] == result2[i]);
     474  
     475        g_free (result2);
     476        if (error2)
     477  	g_error_free (error2);
     478      }
     479  
     480    error3 = NULL;
     481    result3 = g_ucs4_to_utf16 (ucs4, -1, NULL, NULL, &error3);
     482        
     483    if (error_pos)
     484      {
     485        g_assert (error != NULL);
     486        g_assert (result == NULL);
     487        g_assert_cmpint (items_read, ==, error_pos);
     488        g_error_free (error);
     489  
     490        g_assert (error3 != NULL);
     491        g_assert (result3 == NULL);
     492        g_error_free (error3);
     493      }
     494    else
     495      {
     496        g_assert_no_error (error);
     497        g_assert_cmpint (items_read, ==, ucs4_len);
     498        g_assert_cmpint (items_written, ==, utf16_len);
     499        g_assert (result);
     500        for (i = 0; i <= utf16_len; i++)
     501  	g_assert (result[i] == utf16[i]);
     502  
     503        g_assert_no_error (error3);
     504        g_assert (result3);
     505        for (i = 0; i <= utf16_len; i++)
     506  	g_assert (result3[i] == utf16[i]);
     507      }
     508  
     509    g_free (result);
     510    g_free (result3);
     511  }
     512  
     513  static void
     514  check_utf16_to_ucs4 (const gunichar2 *utf16,
     515  		     glong            utf16_len,
     516  		     const gunichar  *ucs4,
     517  		     glong            ucs4_len,
     518  		     glong            error_pos)
     519  {
     520    gunichar *result, *result2, *result3;
     521    glong items_read, items_read2;
     522    glong items_written, items_written2;
     523    GError *error, *error2, *error3;
     524    gint i;
     525  
     526    error = NULL;
     527    result = g_utf16_to_ucs4 (utf16, utf16_len, &items_read, &items_written, &error);
     528    if (utf16[utf16_len] == 0)
     529      {
     530        /* check that len == -1 yields identical results */
     531        error2 = NULL;
     532        result2 = g_utf16_to_ucs4 (utf16, -1, &items_read2, &items_written2, &error2);
     533        g_assert (error || items_read2 == items_read);
     534        g_assert (error || items_written2 == items_written);
     535        g_assert_cmpint (!!result, ==, !!result2);
     536        g_assert_cmpint (!!error, ==, !!error2);
     537        if (result)
     538  	for (i = 0; i <= items_written; i++)
     539  	  g_assert (result[i] == result2[i]);
     540  
     541        g_free (result2);
     542        if (error2)
     543  	g_error_free (error2);
     544      }
     545  
     546    error3 = NULL;
     547    result3 = g_utf16_to_ucs4 (utf16, utf16_len, NULL, NULL, &error3);
     548        
     549    if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
     550      {
     551        g_assert_no_error (error);
     552        g_assert_cmpint (items_read, ==, error_pos);
     553        g_assert_cmpint (items_read + 1, ==, utf16_len);
     554        g_assert_cmpint (items_written, ==, ucs4_len);
     555        g_assert (result);
     556        for (i = 0; i <= items_written; i++)
     557  	g_assert (result[i] == ucs4[i]);
     558        g_error_free (error3);
     559      }
     560    else if (error_pos)
     561      {
     562        g_assert (error != NULL);
     563        g_assert (result == NULL);
     564        g_assert_cmpint (items_read, ==, error_pos);
     565        g_error_free (error);
     566  
     567        g_assert (error3 != NULL);
     568        g_assert (result3 == NULL);
     569        g_error_free (error3);
     570      }
     571    else
     572      {
     573        g_assert_no_error (error);
     574        g_assert_cmpint (items_read, ==, utf16_len);
     575        g_assert_cmpint (items_written, ==, ucs4_len);
     576        g_assert (result);
     577        for (i = 0; i <= ucs4_len; i++)
     578  	g_assert (result[i] == ucs4[i]);
     579  
     580        g_assert_no_error (error3);
     581        g_assert (result3);
     582        for (i = 0; i <= ucs4_len; i++)
     583  	g_assert (result3[i] == ucs4[i]);
     584      }
     585  
     586    g_free (result);
     587    g_free (result3);
     588  }
     589  
     590  static void
     591  test_unicode_conversions (void)
     592  {
     593    const char *utf8;
     594    gunichar ucs4[100];
     595    gunichar2 utf16[100];
     596  
     597    utf8 = "abc";
     598    ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
     599    utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
     600  
     601    check_utf8_to_ucs4 (utf8, 3, ucs4, 3, 0);
     602    check_ucs4_to_utf8 (ucs4, 3, utf8, 3, 0);
     603    check_utf8_to_utf16 (utf8, 3, utf16, 3, 0);
     604    check_utf16_to_utf8 (utf16, 3, utf8, 3, 0);
     605    check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
     606    check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
     607  
     608    utf8 = "\316\261\316\262\316\263";
     609    ucs4[0] = 0x03b1; ucs4[1] = 0x03b2; ucs4[2] = 0x03b3; ucs4[3] = 0;
     610    utf16[0] = 0x03b1; utf16[1] = 0x03b2; utf16[2] = 0x03b3; utf16[3] = 0;
     611  
     612    check_utf8_to_ucs4 (utf8, 6, ucs4, 3, 0);
     613    check_ucs4_to_utf8 (ucs4, 3, utf8, 6, 0);
     614    check_utf8_to_utf16 (utf8, 6, utf16, 3, 0);
     615    check_utf16_to_utf8 (utf16, 3, utf8, 6, 0);
     616    check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
     617    check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
     618  
     619    /* partial utf8 character */
     620    utf8 = "abc\316";
     621    ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
     622    utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
     623  
     624    check_utf8_to_ucs4 (utf8, 4, ucs4, 3, 3);
     625    check_utf8_to_utf16 (utf8, 4, utf16, 3, 3);
     626  
     627    /* invalid utf8 */
     628    utf8 = "abc\316\316";
     629    ucs4[0] = 0; 
     630    utf16[0] = 0; 
     631  
     632    check_utf8_to_ucs4 (utf8, 5, ucs4, 0, 3);
     633    check_utf8_to_utf16 (utf8, 5, utf16, 0, 3);
     634  
     635    /* partial utf16 character */
     636    utf8 = "ab";
     637    ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0;
     638    utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xd801; utf16[3] = 0;
     639    
     640    check_utf16_to_utf8 (utf16, 3, utf8, 2, 2);
     641    check_utf16_to_ucs4 (utf16, 3, ucs4, 2, 2);
     642  
     643    /* invalid utf16 */
     644    utf8 = NULL;
     645    ucs4[0] = 0;
     646    utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xdc01; utf16[3] = 0;
     647  
     648    check_utf16_to_utf8 (utf16, 3, utf8, 0, 2);
     649    check_utf16_to_ucs4 (utf16, 3, ucs4, 0, 2);
     650  
     651    /* invalid ucs4 */
     652    utf8 = NULL;
     653    ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x80000000; ucs4[3] = 0;
     654    utf16[0] = 0;
     655  
     656    check_ucs4_to_utf8 (ucs4, 3, utf8, 0, 2);
     657    check_ucs4_to_utf16 (ucs4, 3, utf16, 0, 2);
     658  }
     659  
     660  static void
     661  test_filename_utf8 (void)
     662  {
     663    const gchar *filename = "/my/path/to/foo";
     664    gchar *utf8;
     665    gchar *back;
     666    GError *error;
     667  
     668    error = NULL;
     669    utf8 = g_filename_to_utf8 (filename, -1, NULL, NULL, &error);
     670    g_assert_no_error (error);
     671    back = g_filename_from_utf8 (utf8, -1, NULL, NULL, &error);
     672    g_assert_no_error (error);
     673    g_assert_cmpstr (back, ==, filename);
     674  
     675    g_free (utf8);
     676    g_free (back);
     677  }
     678  
     679  static void
     680  test_filename_display (void)
     681  {
     682    const gchar *filename = "/my/path/to/foo";
     683    char *display;
     684  
     685    display = g_filename_display_basename (filename);
     686    g_assert_cmpstr (display, ==, "foo");
     687  
     688    g_free (display);
     689  }
     690  
     691  /* g_convert() should accept and produce text buffers with embedded
     692   * nul bytes/characters.
     693   */
     694  static void
     695  test_convert_embedded_nul (void)
     696  {
     697    gchar *res;
     698    gsize bytes_read, bytes_written;
     699    GError *error = NULL;
     700  
     701    res = g_convert ("ab\0\xf6", 4, "UTF-8", "ISO-8859-1",
     702                     &bytes_read, &bytes_written, &error);
     703    g_assert_no_error (error);
     704    g_assert_cmpuint (bytes_read, ==, 4);
     705    g_assert_cmpmem (res, bytes_written, "ab\0\xc3\xb6", 5);
     706    g_free (res);
     707  }
     708  
     709  static void
     710  test_locale_to_utf8_embedded_nul (void)
     711  {
     712    g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8",
     713                            0, G_TEST_SUBPROCESS_DEFAULT);
     714    g_test_trap_assert_passed ();
     715    g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv",
     716                            0, G_TEST_SUBPROCESS_DEFAULT);
     717    g_test_trap_assert_passed ();
     718  }
     719  
     720  /* Test that embedded nul characters in UTF-8 input to g_locale_to_utf8()
     721   * result in an error.
     722   */
     723  static void
     724  test_locale_to_utf8_embedded_nul_utf8 (void)
     725  {
     726    gchar *res;
     727    gsize bytes_read;
     728    GError *error = NULL;
     729  
     730    setlocale (LC_ALL, "");
     731    g_setenv ("CHARSET", "UTF-8", TRUE);
     732    g_assert_true (g_get_charset (NULL));
     733  
     734    res = g_locale_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
     735  
     736    g_assert_null (res);
     737    g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
     738    g_assert_cmpuint (bytes_read, ==, 2);
     739    g_error_free (error);
     740  }
     741  
     742  /* Test that embedded nul characters in output of g_locale_to_utf8(),
     743   * when converted from non-UTF8 input, result in an error.
     744   */
     745  static void
     746  test_locale_to_utf8_embedded_nul_iconv (void)
     747  {
     748    gchar *res;
     749    GError *error = NULL;
     750  
     751    setlocale (LC_ALL, "C");
     752    g_setenv ("CHARSET", "US-ASCII", TRUE);
     753    g_assert_false (g_get_charset (NULL));
     754  
     755    res = g_locale_to_utf8 ("ab\0c", 4, NULL, NULL, &error);
     756  
     757    g_assert_null (res);
     758    g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL);
     759    g_error_free (error);
     760  }
     761  
     762  static void
     763  test_locale_from_utf8_embedded_nul (void)
     764  {
     765    g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8",
     766                            0, G_TEST_SUBPROCESS_DEFAULT);
     767    g_test_trap_assert_passed ();
     768    g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv",
     769                            0, G_TEST_SUBPROCESS_DEFAULT);
     770    g_test_trap_assert_passed ();
     771  }
     772  
     773  /* Test that embedded nul characters in input to g_locale_from_utf8(),
     774   * when converting (copying) to UTF-8 output, result in an error.
     775   */
     776  static void
     777  test_locale_from_utf8_embedded_nul_utf8 (void)
     778  {
     779    gchar *res;
     780    gsize bytes_read;
     781    GError *error = NULL;
     782  
     783    setlocale (LC_ALL, "");
     784    g_setenv ("CHARSET", "UTF-8", TRUE);
     785    g_assert_true (g_get_charset (NULL));
     786  
     787    res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
     788  
     789    g_assert_null (res);
     790    g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
     791    g_assert_cmpuint (bytes_read, ==, 2);
     792    g_error_free (error);
     793  }
     794  
     795  /* Test that embedded nul characters in input to g_locale_from_utf8(),
     796   * when converting to non-UTF-8 output, result in an error.
     797   */
     798  static void
     799  test_locale_from_utf8_embedded_nul_iconv (void)
     800  {
     801    gchar *res;
     802    gsize bytes_read;
     803    GError *error = NULL;
     804  
     805    setlocale (LC_ALL, "C");
     806    g_setenv ("CHARSET", "US-ASCII", TRUE);
     807    g_assert_false (g_get_charset (NULL));
     808  
     809    res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
     810  
     811    g_assert_null (res);
     812    g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
     813    g_assert_cmpuint (bytes_read, ==, 2);
     814    g_error_free (error);
     815  }
     816  
     817  static void
     818  test_filename_to_utf8_embedded_nul (void)
     819  {
     820    g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8",
     821                            0, G_TEST_SUBPROCESS_DEFAULT);
     822    g_test_trap_assert_passed ();
     823    g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv",
     824                            0, G_TEST_SUBPROCESS_DEFAULT);
     825    g_test_trap_assert_passed ();
     826  }
     827  
     828  /* Test that embedded nul characters in UTF-8 input to g_filename_to_utf8()
     829   * result in an error.
     830   */
     831  static void
     832  test_filename_to_utf8_embedded_nul_utf8 (void)
     833  {
     834    gchar *res;
     835    gsize bytes_read;
     836    GError *error = NULL;
     837  
     838  #ifndef G_OS_WIN32
     839    /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
     840    g_setenv ("G_FILENAME_ENCODING", "UTF-8", TRUE);
     841    g_assert_true (g_get_filename_charsets (NULL));
     842  #endif
     843  
     844    res = g_filename_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
     845  
     846    g_assert_null (res);
     847    g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
     848    g_assert_cmpuint (bytes_read, ==, 2);
     849    g_error_free (error);
     850  }
     851  
     852  /* Test that embedded nul characters in non-UTF-8 input of g_filename_to_utf8()
     853   * result in an error.
     854   */
     855  static void
     856  test_filename_to_utf8_embedded_nul_iconv (void)
     857  {
     858    gchar *res;
     859    gsize bytes_read;
     860    GError *error = NULL;
     861  
     862  #ifndef G_OS_WIN32
     863    /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
     864    g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
     865    g_assert_false (g_get_filename_charsets (NULL));
     866  #endif
     867  
     868    res = g_filename_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
     869  
     870    g_assert_null (res);
     871    g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
     872    g_assert_cmpuint (bytes_read, ==, 2);
     873    g_error_free (error);
     874  }
     875  
     876  static void
     877  test_filename_from_utf8_embedded_nul (void)
     878  {
     879    g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8",
     880                            0, G_TEST_SUBPROCESS_DEFAULT);
     881    g_test_trap_assert_passed ();
     882    g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv",
     883                            0, G_TEST_SUBPROCESS_DEFAULT);
     884    g_test_trap_assert_passed ();
     885  }
     886  
     887  /* Test that embedded nul characters in input to g_filename_from_utf8(),
     888   * when converting (copying) to UTF-8 output, result in an error.
     889   */
     890  static void
     891  test_filename_from_utf8_embedded_nul_utf8 (void)
     892  {
     893    gchar *res;
     894    gsize bytes_read;
     895    GError *error = NULL;
     896  
     897  #ifndef G_OS_WIN32
     898    /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
     899    g_setenv ("G_FILENAME_ENCODING", "UTF-8", TRUE);
     900    g_assert_true (g_get_filename_charsets (NULL));
     901  #endif
     902  
     903    res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
     904  
     905    g_assert_null (res);
     906    g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
     907    g_assert_cmpuint (bytes_read, ==, 2);
     908    g_error_free (error);
     909  }
     910  
     911  /* Test that embedded nul characters in input to g_filename_from_utf8(),
     912   * when converting to non-UTF-8 output, result in an error.
     913   */
     914  static void
     915  test_filename_from_utf8_embedded_nul_iconv (void)
     916  {
     917    gchar *res;
     918    gsize bytes_read;
     919    GError *error = NULL;
     920  
     921  #ifndef G_OS_WIN32
     922    /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
     923    g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
     924    g_assert_false (g_get_filename_charsets (NULL));
     925  #endif
     926  
     927    res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
     928  
     929    g_assert_null (res);
     930    g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
     931    g_assert_cmpuint (bytes_read, ==, 2);
     932    g_error_free (error);
     933  }
     934  
     935  static void
     936  test_no_conv (void)
     937  {
     938    const gchar *in = "";
     939    gchar *out G_GNUC_UNUSED;
     940    gsize bytes_read = 0;
     941    gsize bytes_written = 0;
     942    GError *error = NULL;
     943  
     944    out = g_convert (in, -1, "XXX", "UVZ",
     945                     &bytes_read, &bytes_written, &error);
     946  
     947    /* error code is unreliable, since we mishandle errno there */
     948    g_assert (error && error->domain == G_CONVERT_ERROR);
     949    g_error_free (error);
     950  }
     951  
     952  static void
     953  test_filename_from_uri_helper (const gchar *uri,
     954  			       const gchar *expected_filename)
     955  {
     956    gchar *filename;
     957    gchar *expected_platform_filename;
     958    GError *error = NULL;
     959  
     960    expected_platform_filename = g_strdup (expected_filename);
     961  #ifdef G_OS_WIN32
     962    for (gchar *p = expected_platform_filename; *p; p++)
     963      {
     964        if (*p == '/')
     965  	*p = '\\';
     966      }
     967  #endif
     968  
     969    filename = g_filename_from_uri (uri, NULL, &error);
     970    g_assert_no_error (error);
     971    g_assert_cmpstr (filename, ==, expected_platform_filename);
     972    g_free (filename);
     973    g_free (expected_platform_filename);
     974  }
     975  
     976  static void
     977  test_filename_from_uri_query_is_ignored (void)
     978  {
     979    test_filename_from_uri_helper ("file:///tmp/foo?bar", "/tmp/foo");
     980    test_filename_from_uri_helper ("file:///tmp/foo?bar#baz", "/tmp/foo");
     981  }
     982  
     983  static void
     984  test_filename_from_uri_fragment_is_ignored (void)
     985  {
     986    test_filename_from_uri_helper ("file:///tmp/foo#bar", "/tmp/foo");
     987    /* this doesn't have a query, only a bizarre anchor */
     988    test_filename_from_uri_helper ("file:///tmp/foo#bar?baz", "/tmp/foo");
     989  }
     990  
     991  int
     992  main (int argc, char *argv[])
     993  {
     994    g_test_init (&argc, &argv, NULL);
     995  
     996    g_test_add_func ("/conversion/no-conv", test_no_conv);
     997    g_test_add_func ("/conversion/iconv-state", test_iconv_state);
     998    g_test_add_func ("/conversion/illegal-sequence", test_one_half);
     999    g_test_add_func ("/conversion/byte-order", test_byte_order);
    1000    g_test_add_func ("/conversion/unicode", test_unicode_conversions);
    1001    g_test_add_func ("/conversion/filename-utf8", test_filename_utf8);
    1002    g_test_add_func ("/conversion/filename-display", test_filename_display);
    1003    g_test_add_func ("/conversion/convert-embedded-nul", test_convert_embedded_nul);
    1004    g_test_add_func ("/conversion/locale-to-utf8/embedded-nul", test_locale_to_utf8_embedded_nul);
    1005    g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", test_locale_to_utf8_embedded_nul_utf8);
    1006    g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", test_locale_to_utf8_embedded_nul_iconv);
    1007    g_test_add_func ("/conversion/locale-from-utf8/embedded-nul", test_locale_from_utf8_embedded_nul);
    1008    g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", test_locale_from_utf8_embedded_nul_utf8);
    1009    g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", test_locale_from_utf8_embedded_nul_iconv);
    1010    g_test_add_func ("/conversion/filename-to-utf8/embedded-nul", test_filename_to_utf8_embedded_nul);
    1011    g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", test_filename_to_utf8_embedded_nul_utf8);
    1012    g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", test_filename_to_utf8_embedded_nul_iconv);
    1013    g_test_add_func ("/conversion/filename-from-utf8/embedded-nul", test_filename_from_utf8_embedded_nul);
    1014    g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", test_filename_from_utf8_embedded_nul_utf8);
    1015    g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", test_filename_from_utf8_embedded_nul_iconv);
    1016    g_test_add_func ("/conversion/filename-from-uri/query-is-ignored", test_filename_from_uri_query_is_ignored);
    1017    g_test_add_func ("/conversion/filename-from-uri/fragment-is-ignored", test_filename_from_uri_fragment_is_ignored);
    1018  
    1019    return g_test_run ();
    1020  }