(root)/
glibc-2.38/
wcsmbs/
test-c8rtomb.c
       1  /* Test c8rtomb.
       2     Copyright (C) 2022-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <errno.h>
      20  #include <limits.h>
      21  #include <locale.h>
      22  #include <stdio.h>
      23  #include <stdlib.h>
      24  #include <string.h>
      25  #include <uchar.h>
      26  #include <wchar.h>
      27  #include <support/check.h>
      28  #include <support/support.h>
      29  
      30  static int
      31  test_truncated_code_unit_sequence (void)
      32  {
      33    /* Missing trailing code unit for a two code byte unit sequence.  */
      34    {
      35      const char8_t *u8s = (const char8_t*) u8"\xC2";
      36      char buf[MB_LEN_MAX] = { 0 };
      37      mbstate_t s = { 0 };
      38  
      39      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
      40      errno = 0;
      41      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1);
      42      TEST_COMPARE (errno, EILSEQ);
      43    }
      44  
      45    /* Missing first trailing code unit for a three byte code unit sequence.  */
      46    {
      47      const char8_t *u8s = (const char8_t*) u8"\xE0";
      48      char buf[MB_LEN_MAX] = { 0 };
      49      mbstate_t s = { 0 };
      50  
      51      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
      52      errno = 0;
      53      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1);
      54      TEST_COMPARE (errno, EILSEQ);
      55    }
      56  
      57    /* Missing second trailing code unit for a three byte code unit sequence.  */
      58    {
      59      const char8_t *u8s = (const char8_t*) u8"\xE0\xA0";
      60      char buf[MB_LEN_MAX] = { 0 };
      61      mbstate_t s = { 0 };
      62  
      63      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
      64      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t)  0);
      65      errno = 0;
      66      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) -1);
      67      TEST_COMPARE (errno, EILSEQ);
      68    }
      69  
      70    /* Missing first trailing code unit for a four byte code unit sequence.  */
      71    {
      72      const char8_t *u8s = (const char8_t*) u8"\xF0";
      73      char buf[MB_LEN_MAX] = { 0 };
      74      mbstate_t s = { 0 };
      75  
      76      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
      77      errno = 0;
      78      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1);
      79      TEST_COMPARE (errno, EILSEQ);
      80    }
      81  
      82    /* Missing second trailing code unit for a four byte code unit sequence.  */
      83    {
      84      const char8_t *u8s = (const char8_t*) u8"\xF0\x90";
      85      char buf[MB_LEN_MAX] = { 0 };
      86      mbstate_t s = { 0 };
      87  
      88      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
      89      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t)  0);
      90      errno = 0;
      91      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) -1);
      92      TEST_COMPARE (errno, EILSEQ);
      93    }
      94  
      95    /* Missing third trailing code unit for a four byte code unit sequence.  */
      96    {
      97      const char8_t *u8s = (const char8_t*) u8"\xF0\x90\x80";
      98      char buf[MB_LEN_MAX] = { 0 };
      99      mbstate_t s = { 0 };
     100  
     101      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
     102      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t)  0);
     103      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t)  0);
     104      errno = 0;
     105      TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) -1);
     106      TEST_COMPARE (errno, EILSEQ);
     107    }
     108  
     109    return 0;
     110  }
     111  
     112  static int
     113  test_invalid_trailing_code_unit_sequence (void)
     114  {
     115    /* Invalid trailing code unit for a two code byte unit sequence.  */
     116    {
     117      const char8_t *u8s = (const char8_t*) u8"\xC2\xC0";
     118      char buf[MB_LEN_MAX] = { 0 };
     119      mbstate_t s = { 0 };
     120  
     121      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
     122      errno = 0;
     123      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1);
     124      TEST_COMPARE (errno, EILSEQ);
     125    }
     126  
     127    /* Invalid first trailing code unit for a three byte code unit sequence.  */
     128    {
     129      const char8_t *u8s = (const char8_t*) u8"\xE0\xC0";
     130      char buf[MB_LEN_MAX] = { 0 };
     131      mbstate_t s = { 0 };
     132  
     133      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
     134      errno = 0;
     135      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1);
     136      TEST_COMPARE (errno, EILSEQ);
     137    }
     138  
     139    /* Invalid second trailing code unit for a three byte code unit sequence.  */
     140    {
     141      const char8_t *u8s = (const char8_t*) u8"\xE0\xA0\xC0";
     142      char buf[MB_LEN_MAX] = { 0 };
     143      mbstate_t s = { 0 };
     144  
     145      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
     146      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t)  0);
     147      errno = 0;
     148      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) -1);
     149      TEST_COMPARE (errno, EILSEQ);
     150    }
     151  
     152    /* Invalid first trailing code unit for a four byte code unit sequence.  */
     153    {
     154      const char8_t *u8s = (const char8_t*) u8"\xF0\xC0";
     155      char buf[MB_LEN_MAX] = { 0 };
     156      mbstate_t s = { 0 };
     157  
     158      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
     159      errno = 0;
     160      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1);
     161      TEST_COMPARE (errno, EILSEQ);
     162    }
     163  
     164    /* Invalid second trailing code unit for a four byte code unit sequence.  */
     165    {
     166      const char8_t *u8s = (const char8_t*) u8"\xF0\x90\xC0";
     167      char buf[MB_LEN_MAX] = { 0 };
     168      mbstate_t s = { 0 };
     169  
     170      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
     171      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t)  0);
     172      errno = 0;
     173      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) -1);
     174      TEST_COMPARE (errno, EILSEQ);
     175    }
     176  
     177    /* Invalid third trailing code unit for a four byte code unit sequence.  */
     178    {
     179      const char8_t *u8s = (const char8_t*) u8"\xF0\x90\x80\xC0";
     180      char buf[MB_LEN_MAX] = { 0 };
     181      mbstate_t s = { 0 };
     182  
     183      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
     184      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t)  0);
     185      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t)  0);
     186      errno = 0;
     187      TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) -1);
     188      TEST_COMPARE (errno, EILSEQ);
     189    }
     190  
     191    return 0;
     192  }
     193  
     194  static int
     195  test_lone_trailing_code_units (void)
     196  {
     197    /* Lone trailing code unit.  */
     198    const char8_t *u8s = (const char8_t*) u8"\x80";
     199    char buf[MB_LEN_MAX] = { 0 };
     200    mbstate_t s = { 0 };
     201  
     202    errno = 0;
     203    TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) -1);
     204    TEST_COMPARE (errno, EILSEQ);
     205  
     206    return 0;
     207  }
     208  
     209  static int
     210  test_overlong_encoding (void)
     211  {
     212    /* Two byte overlong encoding.  */
     213    {
     214      const char8_t *u8s = (const char8_t*) u8"\xC0\x80";
     215      char buf[MB_LEN_MAX] = { 0 };
     216      mbstate_t s = { 0 };
     217  
     218      errno = 0;
     219      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) -1);
     220      TEST_COMPARE (errno, EILSEQ);
     221    }
     222  
     223    /* Two byte overlong encoding.  */
     224    {
     225      const char8_t *u8s = (const char8_t*) u8"\xC1\x80";
     226      char buf[MB_LEN_MAX] = { 0 };
     227      mbstate_t s = { 0 };
     228  
     229      errno = 0;
     230      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) -1);
     231      TEST_COMPARE (errno, EILSEQ);
     232    }
     233  
     234    /* Three byte overlong encoding.  */
     235    {
     236      const char8_t *u8s = (const char8_t*) u8"\xE0\x9F\xBF";
     237      char buf[MB_LEN_MAX] = { 0 };
     238      mbstate_t s = { 0 };
     239  
     240      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
     241      errno = 0;
     242      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1);
     243      TEST_COMPARE (errno, EILSEQ);
     244    }
     245  
     246    /* Four byte overlong encoding.  */
     247    {
     248      const char8_t *u8s = (const char8_t*) u8"\xF0\x8F\xBF\xBF";
     249      char buf[MB_LEN_MAX] = { 0 };
     250      mbstate_t s = { 0 };
     251  
     252      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
     253      errno = 0;
     254      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1);
     255      TEST_COMPARE (errno, EILSEQ);
     256    }
     257  
     258    return 0;
     259  }
     260  
     261  static int
     262  test_surrogate_range (void)
     263  {
     264    /* Would encode U+D800.  */
     265    {
     266      const char8_t *u8s = (const char8_t*) u8"\xED\xA0\x80";
     267      char buf[MB_LEN_MAX] = { 0 };
     268      mbstate_t s = { 0 };
     269  
     270      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
     271      errno = 0;
     272      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1);
     273      TEST_COMPARE (errno, EILSEQ);
     274    }
     275  
     276    /* Would encode U+DFFF.  */
     277    {
     278      const char8_t *u8s = (const char8_t*) u8"\xED\xBF\xBF";
     279      char buf[MB_LEN_MAX] = { 0 };
     280      mbstate_t s = { 0 };
     281  
     282      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
     283      errno = 0;
     284      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1);
     285      TEST_COMPARE (errno, EILSEQ);
     286    }
     287  
     288    return 0;
     289  }
     290  
     291  static int
     292  test_out_of_range_encoding (void)
     293  {
     294    /* Would encode U+00110000.  */
     295    {
     296      const char8_t *u8s = (const char8_t*) u8"\xF4\x90\x80\x80";
     297      char buf[MB_LEN_MAX] = { 0 };
     298      mbstate_t s = { 0 };
     299  
     300      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t)  0);
     301      errno = 0;
     302      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1);
     303      TEST_COMPARE (errno, EILSEQ);
     304    }
     305  
     306    /* Would encode U+00140000.  */
     307    {
     308      const char8_t *u8s = (const char8_t*) u8"\xF5\x90\x80\x80";
     309      char buf[MB_LEN_MAX] = { 0 };
     310      mbstate_t s = { 0 };
     311  
     312      errno = 0;
     313      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) -1);
     314      TEST_COMPARE (errno, EILSEQ);
     315    }
     316  
     317    return 0;
     318  }
     319  
     320  static int
     321  test_null_output_buffer (void)
     322  {
     323    /* Null character with an initial state.  */
     324    {
     325      mbstate_t s = { 0 };
     326  
     327      TEST_COMPARE (c8rtomb (NULL, u8"X"[0], &s), (size_t) 1);
     328      /* Assert the state is now an initial state.  */
     329      TEST_VERIFY (mbsinit (&s));
     330    }
     331  
     332    /* Null buffer with a state corresponding to an incompletely read code
     333       unit sequence.  In this case, an error occurs since insufficient
     334       information is available to complete the already started code unit
     335       sequence and return to the initial state.  */
     336    {
     337      char buf[MB_LEN_MAX] = { 0 };
     338      mbstate_t s = { 0 };
     339  
     340      TEST_COMPARE (c8rtomb (buf, u8"\xC2"[0], &s), (size_t)  0);
     341      errno = 0;
     342      TEST_COMPARE (c8rtomb (NULL, u8"\x80"[0], &s), (size_t) -1);
     343      TEST_COMPARE (errno, EILSEQ);
     344    }
     345  
     346    return 0;
     347  }
     348  
     349  static int
     350  test_utf8 (void)
     351  {
     352    xsetlocale (LC_ALL, "de_DE.UTF-8");
     353  
     354    /* Null character.  */
     355    {
     356      /* U+0000 => 0x00 */
     357      const char8_t *u8s = (const char8_t*) u8"\x00";
     358      char buf[MB_LEN_MAX] = { 0 };
     359      mbstate_t s = { 0 };
     360  
     361      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 1);
     362      TEST_COMPARE (buf[0], (char) 0x00);
     363      TEST_VERIFY (mbsinit (&s));
     364    }
     365  
     366    /* First non-null character in the code point range that maps to a single
     367       code unit.  */
     368    {
     369      /* U+0001 => 0x01 */
     370      const char8_t *u8s = (const char8_t*) u8"\x01";
     371      char buf[MB_LEN_MAX] = { 0 };
     372      mbstate_t s = { 0 };
     373  
     374      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 1);
     375      TEST_COMPARE (buf[0], (char) 0x01);
     376      TEST_VERIFY (mbsinit (&s));
     377    }
     378  
     379    /* Last character in the code point range that maps to a single code unit.  */
     380    {
     381      /* U+007F => 0x7F */
     382      const char8_t *u8s = (const char8_t*) u8"\x7F";
     383      char buf[MB_LEN_MAX] = { 0 };
     384      mbstate_t s = { 0 };
     385  
     386      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 1);
     387      TEST_COMPARE (buf[0], (char) 0x7F);
     388      TEST_VERIFY (mbsinit (&s));
     389    }
     390  
     391    /* First character in the code point range that maps to two code units.  */
     392    {
     393      /* U+0080 => 0xC2 0x80 */
     394      const char8_t *u8s = (const char8_t*) u8"\xC2\x80";
     395      char buf[MB_LEN_MAX] = { 0 };
     396      mbstate_t s = { 0 };
     397  
     398      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0);
     399      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 2);
     400      TEST_COMPARE (buf[0], (char) 0xC2);
     401      TEST_COMPARE (buf[1], (char) 0x80);
     402      TEST_VERIFY (mbsinit (&s));
     403    }
     404  
     405    /* Last character in the code point range that maps to two code units.  */
     406    {
     407      /* U+07FF => 0xDF 0xBF */
     408      const char8_t *u8s = (const char8_t*) u8"\u07FF";
     409      char buf[MB_LEN_MAX] = { 0 };
     410      mbstate_t s = { 0 };
     411  
     412      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0);
     413      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 2);
     414      TEST_COMPARE (buf[0], (char) 0xDF);
     415      TEST_COMPARE (buf[1], (char) 0xBF);
     416      TEST_VERIFY (mbsinit (&s));
     417    }
     418  
     419    /* First character in the code point range that maps to three code units.  */
     420    {
     421      /* U+0800 => 0xE0 0xA0 0x80 */
     422      const char8_t *u8s = (const char8_t*) u8"\u0800";
     423      char buf[MB_LEN_MAX] = { 0 };
     424      mbstate_t s = { 0 };
     425  
     426      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0);
     427      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0);
     428      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3);
     429      TEST_COMPARE (buf[0], (char) 0xE0);
     430      TEST_COMPARE (buf[1], (char) 0xA0);
     431      TEST_COMPARE (buf[2], (char) 0x80);
     432      TEST_VERIFY (mbsinit (&s));
     433    }
     434  
     435    /* Last character in the code point range that maps to three code units
     436       before the surrogate code point range.  */
     437    {
     438      /* U+D7FF => 0xED 0x9F 0xBF */
     439      const char8_t *u8s = (const char8_t*) u8"\uD7FF";
     440      char buf[MB_LEN_MAX] = { 0 };
     441      mbstate_t s = { 0 };
     442  
     443      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0);
     444      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0);
     445      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3);
     446      TEST_COMPARE (buf[0], (char) 0xED);
     447      TEST_COMPARE (buf[1], (char) 0x9F);
     448      TEST_COMPARE (buf[2], (char) 0xBF);
     449      TEST_VERIFY (mbsinit (&s));
     450    }
     451  
     452    /* First character in the code point range that maps to three code units
     453       after the surrogate code point range.  */
     454    {
     455      /* U+E000 => 0xEE 0x80 0x80 */
     456      const char8_t *u8s = (const char8_t*) u8"\uE000";
     457      char buf[MB_LEN_MAX] = { 0 };
     458      mbstate_t s = { 0 };
     459  
     460      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0);
     461      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0);
     462      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3);
     463      TEST_COMPARE (buf[0], (char) 0xEE);
     464      TEST_COMPARE (buf[1], (char) 0x80);
     465      TEST_COMPARE (buf[2], (char) 0x80);
     466      TEST_VERIFY (mbsinit (&s));
     467    }
     468  
     469    /* Not a BOM.  */
     470    {
     471      /* U+FEFF => 0xEF 0xBB 0xBF */
     472      const char8_t *u8s = (const char8_t*) u8"\uFEFF";
     473      char buf[MB_LEN_MAX] = { 0 };
     474      mbstate_t s = { 0 };
     475  
     476      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0);
     477      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0);
     478      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3);
     479      TEST_COMPARE (buf[0], (char) 0xEF);
     480      TEST_COMPARE (buf[1], (char) 0xBB);
     481      TEST_COMPARE (buf[2], (char) 0xBF);
     482      TEST_VERIFY (mbsinit (&s));
     483    }
     484  
     485    /* Replacement character.  */
     486    {
     487      /* U+FFFD => 0xEF 0xBF 0xBD */
     488      const char8_t *u8s = (const char8_t*) u8"\uFFFD";
     489      char buf[MB_LEN_MAX] = { 0 };
     490      mbstate_t s = { 0 };
     491  
     492      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0);
     493      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0);
     494      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3);
     495      TEST_COMPARE (buf[0], (char) 0xEF);
     496      TEST_COMPARE (buf[1], (char) 0xBF);
     497      TEST_COMPARE (buf[2], (char) 0xBD);
     498      TEST_VERIFY (mbsinit (&s));
     499    }
     500  
     501    /* Last character in the code point range that maps to three code units.  */
     502    {
     503      /* U+FFFF => 0xEF 0xBF 0xBF */
     504      const char8_t *u8s = (const char8_t*) u8"\uFFFF";
     505      char buf[MB_LEN_MAX] = { 0 };
     506      mbstate_t s = { 0 };
     507  
     508      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0);
     509      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0);
     510      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3);
     511      TEST_COMPARE (buf[0], (char) 0xEF);
     512      TEST_COMPARE (buf[1], (char) 0xBF);
     513      TEST_COMPARE (buf[2], (char) 0xBF);
     514      TEST_VERIFY (mbsinit (&s));
     515    }
     516  
     517    /* First character in the code point range that maps to four code units.  */
     518    {
     519      /* U+10000 => 0xF0 0x90 0x80 0x80 */
     520      const char8_t *u8s = (const char8_t*) u8"\U00010000";
     521      char buf[MB_LEN_MAX] = { 0 };
     522      mbstate_t s = { 0 };
     523  
     524      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0);
     525      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0);
     526      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0);
     527      TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) 4);
     528      TEST_COMPARE (buf[0], (char) 0xF0);
     529      TEST_COMPARE (buf[1], (char) 0x90);
     530      TEST_COMPARE (buf[2], (char) 0x80);
     531      TEST_COMPARE (buf[3], (char) 0x80);
     532      TEST_VERIFY (mbsinit (&s));
     533    }
     534  
     535    /* Last character in the code point range that maps to four code units.  */
     536    {
     537      /* U+10FFFF => 0xF4 0x8F 0xBF 0xBF */
     538      const char8_t *u8s = (const char8_t*) u8"\U0010FFFF";
     539      char buf[MB_LEN_MAX] = { 0 };
     540      mbstate_t s = { 0 };
     541  
     542      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0);
     543      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0);
     544      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0);
     545      TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) 4);
     546      TEST_COMPARE (buf[0], (char) 0xF4);
     547      TEST_COMPARE (buf[1], (char) 0x8F);
     548      TEST_COMPARE (buf[2], (char) 0xBF);
     549      TEST_COMPARE (buf[3], (char) 0xBF);
     550      TEST_VERIFY (mbsinit (&s));
     551    }
     552  
     553    return 0;
     554  }
     555  
     556  static int
     557  test_big5_hkscs (void)
     558  {
     559    xsetlocale (LC_ALL, "zh_HK.BIG5-HKSCS");
     560  
     561    /* A pair of two byte UTF-8 code unit sequences that map a Unicode code
     562       point and combining character to a single double byte character.  */
     563    {
     564      /* U+00CA U+0304 => 0x88 0x62 */
     565      const char8_t *u8s = (const char8_t*) u8"\u00CA\u0304";
     566      char buf[MB_LEN_MAX] = { 0 };
     567      mbstate_t s = { 0 };
     568  
     569      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0);
     570      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0);
     571      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0);
     572      TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) 2);
     573      TEST_COMPARE (buf[0], (char) 0x88);
     574      TEST_COMPARE (buf[1], (char) 0x62);
     575      TEST_VERIFY (mbsinit (&s));
     576    }
     577  
     578    /* Another pair of two byte UTF-8 code unit sequences that map a Unicode code
     579       point and combining character to a single double byte character.  */
     580    {
     581      /* U+00EA U+030C => 0x88 0xA5 */
     582      const char8_t *u8s = (const char8_t*) u8"\u00EA\u030C";
     583      char buf[MB_LEN_MAX] = { 0 };
     584      mbstate_t s = { 0 };
     585  
     586      TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0);
     587      TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0);
     588      TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0);
     589      TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) 2);
     590      TEST_COMPARE (buf[0], (char) 0x88);
     591      TEST_COMPARE (buf[1], (char) 0xA5);
     592      TEST_VERIFY (mbsinit (&s));
     593    }
     594  
     595    return 0;
     596  }
     597  
     598  static int
     599  do_test (void)
     600  {
     601    test_truncated_code_unit_sequence ();
     602    test_invalid_trailing_code_unit_sequence ();
     603    test_lone_trailing_code_units ();
     604    test_overlong_encoding ();
     605    test_surrogate_range ();
     606    test_out_of_range_encoding ();
     607    test_null_output_buffer ();
     608    test_utf8 ();
     609    test_big5_hkscs ();
     610    return 0;
     611  }
     612  
     613  #include <support/test-driver.c>