(root)/
coreutils-9.4/
gnulib-tests/
test-mbrtoc32.c
       1  /* Test of conversion of multibyte character to 32-bit wide character.
       2     Copyright (C) 2008-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  /* Written by Bruno Haible <bruno@clisp.org>, 2008.  */
      18  
      19  #include <config.h>
      20  
      21  #include <uchar.h>
      22  
      23  #include "signature.h"
      24  SIGNATURE_CHECK (mbrtoc32, size_t,
      25                   (char32_t *, const char *, size_t, mbstate_t *));
      26  
      27  #include <locale.h>
      28  #include <stdio.h>
      29  #include <stdlib.h>
      30  #include <string.h>
      31  #include <wchar.h>
      32  
      33  #include "macros.h"
      34  
      35  int
      36  main (int argc, char *argv[])
      37  {
      38    mbstate_t state;
      39    char32_t wc;
      40    size_t ret;
      41  
      42    /* configure should already have checked that the locale is supported.  */
      43    if (setlocale (LC_ALL, "") == NULL)
      44      return 1;
      45  
      46    /* Test zero-length input.  */
      47    {
      48      memset (&state, '\0', sizeof (mbstate_t));
      49      wc = (char32_t) 0xBADFACE;
      50      ret = mbrtoc32 (&wc, "x", 0, &state);
      51      ASSERT (ret == (size_t)(-2));
      52      ASSERT (mbsinit (&state));
      53    }
      54  
      55    /* Test NUL byte input.  */
      56    {
      57      memset (&state, '\0', sizeof (mbstate_t));
      58      wc = (char32_t) 0xBADFACE;
      59      ret = mbrtoc32 (&wc, "", 1, &state);
      60      ASSERT (ret == 0);
      61      ASSERT (wc == 0);
      62      ASSERT (mbsinit (&state));
      63      ret = mbrtoc32 (NULL, "", 1, &state);
      64      ASSERT (ret == 0);
      65      ASSERT (mbsinit (&state));
      66    }
      67  
      68    /* Test single-byte input.  */
      69    {
      70      int c;
      71      char buf[1];
      72  
      73      memset (&state, '\0', sizeof (mbstate_t));
      74      for (c = 0; c < 0x100; c++)
      75        switch (c)
      76          {
      77          case '\t': case '\v': case '\f':
      78          case ' ': case '!': case '"': case '#': case '%':
      79          case '&': case '\'': case '(': case ')': case '*':
      80          case '+': case ',': case '-': case '.': case '/':
      81          case '0': case '1': case '2': case '3': case '4':
      82          case '5': case '6': case '7': case '8': case '9':
      83          case ':': case ';': case '<': case '=': case '>':
      84          case '?':
      85          case 'A': case 'B': case 'C': case 'D': case 'E':
      86          case 'F': case 'G': case 'H': case 'I': case 'J':
      87          case 'K': case 'L': case 'M': case 'N': case 'O':
      88          case 'P': case 'Q': case 'R': case 'S': case 'T':
      89          case 'U': case 'V': case 'W': case 'X': case 'Y':
      90          case 'Z':
      91          case '[': case '\\': case ']': case '^': case '_':
      92          case 'a': case 'b': case 'c': case 'd': case 'e':
      93          case 'f': case 'g': case 'h': case 'i': case 'j':
      94          case 'k': case 'l': case 'm': case 'n': case 'o':
      95          case 'p': case 'q': case 'r': case 's': case 't':
      96          case 'u': case 'v': case 'w': case 'x': case 'y':
      97          case 'z': case '{': case '|': case '}': case '~':
      98            /* c is in the ISO C "basic character set".  */
      99            ASSERT (c < 0x80);
     100            /* c is an ASCII character.  */
     101            buf[0] = c;
     102  
     103            wc = (char32_t) 0xBADFACE;
     104            ret = mbrtoc32 (&wc, buf, 1, &state);
     105            ASSERT (ret == 1);
     106            ASSERT (wc == c);
     107            ASSERT (mbsinit (&state));
     108  
     109            ret = mbrtoc32 (NULL, buf, 1, &state);
     110            ASSERT (ret == 1);
     111            ASSERT (mbsinit (&state));
     112  
     113            break;
     114          default:
     115            break;
     116          }
     117    }
     118  
     119    /* Test special calling convention, passing a NULL pointer.  */
     120    {
     121      memset (&state, '\0', sizeof (mbstate_t));
     122      wc = (char32_t) 0xBADFACE;
     123      ret = mbrtoc32 (&wc, NULL, 5, &state);
     124      ASSERT (ret == 0);
     125      ASSERT (wc == (char32_t) 0xBADFACE);
     126      ASSERT (mbsinit (&state));
     127    }
     128  
     129  #ifdef __ANDROID__
     130    /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the
     131       "C" locale.  Furthermore, when you attempt to set the "C" or "POSIX"
     132       locale via setlocale(), what you get is a "C" locale with UTF-8 encoding,
     133       that is, effectively the "C.UTF-8" locale.  */
     134    if (argc > 1 && strcmp (argv[1], "1") == 0 && MB_CUR_MAX > 1)
     135      argv[1] = "3";
     136  #endif
     137  
     138    if (argc > 1)
     139      switch (argv[1][0])
     140        {
     141        case '1':
     142          /* C or POSIX locale.  */
     143          {
     144            int c;
     145            char buf[1];
     146  
     147            memset (&state, '\0', sizeof (mbstate_t));
     148            for (c = 0; c < 0x100; c++)
     149              if (c != 0)
     150                {
     151                  /* We are testing all nonnull bytes.  */
     152                  buf[0] = c;
     153  
     154                  wc = (char32_t) 0xBADFACE;
     155                  ret = mbrtoc32 (&wc, buf, 1, &state);
     156                  /* POSIX:2018 says regarding mbrtowc: "In the POSIX locale an
     157                     [EILSEQ] error cannot occur since all byte values are valid
     158                     characters."  It is reasonable to expect mbrtoc32 to behave
     159                     in the same way.  */
     160                  ASSERT (ret == 1);
     161                  if (c < 0x80)
     162                    /* c is an ASCII character.  */
     163                    ASSERT (wc == c);
     164                  else
     165                    /* On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF.
     166                       But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF.  */
     167                    ASSERT (wc == (btoc32 (c) == 0xDF00 + c ? btoc32 (c) : c));
     168                  ASSERT (mbsinit (&state));
     169  
     170                  ret = mbrtoc32 (NULL, buf, 1, &state);
     171                  ASSERT (ret == 1);
     172                  ASSERT (mbsinit (&state));
     173                }
     174          }
     175          return 0;
     176  
     177        case '2':
     178          /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
     179          {
     180            char input[] = "B\374\337er"; /* "Büßer" */
     181            memset (&state, '\0', sizeof (mbstate_t));
     182  
     183            wc = (char32_t) 0xBADFACE;
     184            ret = mbrtoc32 (&wc, input, 1, &state);
     185            ASSERT (ret == 1);
     186            ASSERT (wc == 'B');
     187            ASSERT (mbsinit (&state));
     188            input[0] = '\0';
     189  
     190            wc = (char32_t) 0xBADFACE;
     191            ret = mbrtoc32 (&wc, input + 1, 1, &state);
     192            ASSERT (ret == 1);
     193            ASSERT (c32tob (wc) == (unsigned char) '\374');
     194            #if GL_CHAR32_T_IS_UNICODE
     195            ASSERT (wc == 0x00FC); /* expect Unicode encoding */
     196            #endif
     197            ASSERT (mbsinit (&state));
     198            input[1] = '\0';
     199  
     200            /* Test support of NULL first argument.  */
     201            ret = mbrtoc32 (NULL, input + 2, 3, &state);
     202            ASSERT (ret == 1);
     203            ASSERT (mbsinit (&state));
     204  
     205            wc = (char32_t) 0xBADFACE;
     206            ret = mbrtoc32 (&wc, input + 2, 3, &state);
     207            ASSERT (ret == 1);
     208            ASSERT (c32tob (wc) == (unsigned char) '\337');
     209            #if GL_CHAR32_T_IS_UNICODE
     210            ASSERT (wc == 0x00DF); /* expect Unicode encoding */
     211            #endif
     212            ASSERT (mbsinit (&state));
     213            input[2] = '\0';
     214  
     215            wc = (char32_t) 0xBADFACE;
     216            ret = mbrtoc32 (&wc, input + 3, 2, &state);
     217            ASSERT (ret == 1);
     218            ASSERT (wc == 'e');
     219            ASSERT (mbsinit (&state));
     220            input[3] = '\0';
     221  
     222            wc = (char32_t) 0xBADFACE;
     223            ret = mbrtoc32 (&wc, input + 4, 1, &state);
     224            ASSERT (ret == 1);
     225            ASSERT (wc == 'r');
     226            ASSERT (mbsinit (&state));
     227          }
     228          return 0;
     229  
     230        case '3':
     231          /* Locale encoding is UTF-8.  */
     232          {
     233            char input[] = "s\303\274\303\237\360\237\230\213!"; /* "süß😋!" */
     234            memset (&state, '\0', sizeof (mbstate_t));
     235  
     236            wc = (char32_t) 0xBADFACE;
     237            ret = mbrtoc32 (&wc, input, 1, &state);
     238            ASSERT (ret == 1);
     239            ASSERT (wc == 's');
     240            ASSERT (mbsinit (&state));
     241            input[0] = '\0';
     242  
     243            wc = (char32_t) 0xBADFACE;
     244            ret = mbrtoc32 (&wc, input + 1, 1, &state);
     245            ASSERT (ret == (size_t)(-2));
     246            ASSERT (wc == (char32_t) 0xBADFACE);
     247            ASSERT (!mbsinit (&state));
     248            input[1] = '\0';
     249  
     250            wc = (char32_t) 0xBADFACE;
     251            ret = mbrtoc32 (&wc, input + 2, 7, &state);
     252            ASSERT (ret == 1);
     253            ASSERT (c32tob (wc) == EOF);
     254            ASSERT (wc == 0x00FC); /* expect Unicode encoding */
     255            ASSERT (mbsinit (&state));
     256            input[2] = '\0';
     257  
     258            /* Test support of NULL first argument.  */
     259            ret = mbrtoc32 (NULL, input + 3, 6, &state);
     260            ASSERT (ret == 2);
     261            ASSERT (mbsinit (&state));
     262  
     263            wc = (char32_t) 0xBADFACE;
     264            ret = mbrtoc32 (&wc, input + 3, 6, &state);
     265            ASSERT (ret == 2);
     266            ASSERT (c32tob (wc) == EOF);
     267            ASSERT (wc == 0x00DF); /* expect Unicode encoding */
     268            ASSERT (mbsinit (&state));
     269            input[3] = '\0';
     270            input[4] = '\0';
     271  
     272            /* Test support of NULL first argument.  */
     273            ret = mbrtoc32 (NULL, input + 5, 4, &state);
     274            ASSERT (ret == 4);
     275            ASSERT (mbsinit (&state));
     276  
     277            wc = (char32_t) 0xBADFACE;
     278            ret = mbrtoc32 (&wc, input + 5, 4, &state);
     279            ASSERT (ret == 4);
     280            ASSERT (c32tob (wc) == EOF);
     281            ASSERT (wc == 0x1F60B); /* expect Unicode encoding */
     282            ASSERT (mbsinit (&state));
     283            input[5] = '\0';
     284            input[6] = '\0';
     285            input[7] = '\0';
     286            input[8] = '\0';
     287  
     288            wc = (char32_t) 0xBADFACE;
     289            ret = mbrtoc32 (&wc, input + 9, 1, &state);
     290            ASSERT (ret == 1);
     291            ASSERT (wc == '!');
     292            ASSERT (mbsinit (&state));
     293          }
     294          return 0;
     295  
     296        case '4':
     297          /* Locale encoding is EUC-JP.  */
     298          {
     299            char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
     300            memset (&state, '\0', sizeof (mbstate_t));
     301  
     302            wc = (char32_t) 0xBADFACE;
     303            ret = mbrtoc32 (&wc, input, 1, &state);
     304            ASSERT (ret == 1);
     305            ASSERT (wc == '<');
     306            ASSERT (mbsinit (&state));
     307            input[0] = '\0';
     308  
     309            wc = (char32_t) 0xBADFACE;
     310            ret = mbrtoc32 (&wc, input + 1, 2, &state);
     311            ASSERT (ret == 2);
     312            ASSERT (c32tob (wc) == EOF);
     313            #if GL_CHAR32_T_IS_UNICODE
     314            ASSERT (wc == 0x65E5); /* expect Unicode encoding */
     315            #endif
     316            ASSERT (mbsinit (&state));
     317            input[1] = '\0';
     318            input[2] = '\0';
     319  
     320            wc = (char32_t) 0xBADFACE;
     321            ret = mbrtoc32 (&wc, input + 3, 1, &state);
     322            ASSERT (ret == (size_t)(-2));
     323            ASSERT (wc == (char32_t) 0xBADFACE);
     324            ASSERT (!mbsinit (&state));
     325            input[3] = '\0';
     326  
     327            wc = (char32_t) 0xBADFACE;
     328            ret = mbrtoc32 (&wc, input + 4, 4, &state);
     329            ASSERT (ret == 1);
     330            ASSERT (c32tob (wc) == EOF);
     331            #if GL_CHAR32_T_IS_UNICODE
     332            ASSERT (wc == 0x672C); /* expect Unicode encoding */
     333            #endif
     334            ASSERT (mbsinit (&state));
     335            input[4] = '\0';
     336  
     337            /* Test support of NULL first argument.  */
     338            ret = mbrtoc32 (NULL, input + 5, 3, &state);
     339            ASSERT (ret == 2);
     340            ASSERT (mbsinit (&state));
     341  
     342            wc = (char32_t) 0xBADFACE;
     343            ret = mbrtoc32 (&wc, input + 5, 3, &state);
     344            ASSERT (ret == 2);
     345            ASSERT (c32tob (wc) == EOF);
     346            #if GL_CHAR32_T_IS_UNICODE
     347            ASSERT (wc == 0x8A9E); /* expect Unicode encoding */
     348            #endif
     349            ASSERT (mbsinit (&state));
     350            input[5] = '\0';
     351            input[6] = '\0';
     352  
     353            wc = (char32_t) 0xBADFACE;
     354            ret = mbrtoc32 (&wc, input + 7, 1, &state);
     355            ASSERT (ret == 1);
     356            ASSERT (wc == '>');
     357            ASSERT (mbsinit (&state));
     358          }
     359          return 0;
     360  
     361        case '5':
     362          /* Locale encoding is GB18030.  */
     363          #if (defined __GLIBC__ && __GLIBC__ == 2 && __GLIBC_MINOR__ >= 13 && __GLIBC_MINOR__ <= 15) || (GL_CHAR32_T_IS_UNICODE && (defined __NetBSD__ || defined __sun))
     364          fputs ("Skipping test: The GB18030 converter in this system's iconv is broken.\n", stderr);
     365          return 77;
     366          #endif
     367          {
     368            char input[] = "s\250\271\201\060\211\070\224\071\375\067!"; /* "süß😋!" */
     369            memset (&state, '\0', sizeof (mbstate_t));
     370  
     371            wc = (char32_t) 0xBADFACE;
     372            ret = mbrtoc32 (&wc, input, 1, &state);
     373            ASSERT (ret == 1);
     374            ASSERT (wc == 's');
     375            ASSERT (mbsinit (&state));
     376            input[0] = '\0';
     377  
     378            wc = (char32_t) 0xBADFACE;
     379            ret = mbrtoc32 (&wc, input + 1, 1, &state);
     380            ASSERT (ret == (size_t)(-2));
     381            ASSERT (wc == (char32_t) 0xBADFACE);
     382            ASSERT (!mbsinit (&state));
     383            input[1] = '\0';
     384  
     385            wc = (char32_t) 0xBADFACE;
     386            ret = mbrtoc32 (&wc, input + 2, 9, &state);
     387            ASSERT (ret == 1);
     388            ASSERT (c32tob (wc) == EOF);
     389            #if GL_CHAR32_T_IS_UNICODE
     390            ASSERT (wc == 0x00FC); /* expect Unicode encoding */
     391            #endif
     392            ASSERT (mbsinit (&state));
     393            input[2] = '\0';
     394  
     395            /* Test support of NULL first argument.  */
     396            ret = mbrtoc32 (NULL, input + 3, 8, &state);
     397            ASSERT (ret == 4);
     398            ASSERT (mbsinit (&state));
     399  
     400            wc = (char32_t) 0xBADFACE;
     401            ret = mbrtoc32 (&wc, input + 3, 8, &state);
     402            ASSERT (ret == 4);
     403            ASSERT (c32tob (wc) == EOF);
     404            #if GL_CHAR32_T_IS_UNICODE
     405            ASSERT (wc == 0x00DF); /* expect Unicode encoding */
     406            #endif
     407            ASSERT (mbsinit (&state));
     408            input[3] = '\0';
     409            input[4] = '\0';
     410            input[5] = '\0';
     411            input[6] = '\0';
     412  
     413            /* Test support of NULL first argument.  */
     414            ret = mbrtoc32 (NULL, input + 7, 4, &state);
     415            ASSERT (ret == 4);
     416            ASSERT (mbsinit (&state));
     417  
     418            wc = (char32_t) 0xBADFACE;
     419            ret = mbrtoc32 (&wc, input + 7, 4, &state);
     420            ASSERT (ret == 4);
     421            ASSERT (c32tob (wc) == EOF);
     422            #if GL_CHAR32_T_IS_UNICODE
     423            ASSERT (wc == 0x1F60B); /* expect Unicode encoding */
     424            #endif
     425            ASSERT (mbsinit (&state));
     426            input[7] = '\0';
     427            input[8] = '\0';
     428            input[9] = '\0';
     429            input[10] = '\0';
     430  
     431            wc = (char32_t) 0xBADFACE;
     432            ret = mbrtoc32 (&wc, input + 11, 1, &state);
     433            ASSERT (ret == 1);
     434            ASSERT (wc == '!');
     435            ASSERT (mbsinit (&state));
     436          }
     437          return 0;
     438        }
     439  
     440    return 1;
     441  }