(root)/
diffutils-3.10/
gnulib-tests/
test-mbsrtowcs.c
       1  /* Test of conversion of string to wide string.
       2     Copyright (C) 2008-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  /* Written by Bruno Haible <bruno@clisp.org>, 2008.  */
      18  
      19  #include <config.h>
      20  
      21  #include <wchar.h>
      22  
      23  #include "signature.h"
      24  SIGNATURE_CHECK (mbsrtowcs, size_t, (wchar_t *, char const **, size_t,
      25                                       mbstate_t *));
      26  
      27  #include <locale.h>
      28  #include <stdio.h>
      29  #include <string.h>
      30  
      31  #include "macros.h"
      32  
      33  int
      34  main (int argc, char *argv[])
      35  {
      36    mbstate_t state;
      37    wchar_t wc;
      38    size_t ret;
      39  
      40    /* configure should already have checked that the locale is supported.  */
      41    if (setlocale (LC_ALL, "") == NULL)
      42      return 1;
      43  
      44    /* Test NUL byte input.  */
      45    {
      46      const char *src;
      47  
      48      memset (&state, '\0', sizeof (mbstate_t));
      49  
      50      src = "";
      51      ret = mbsrtowcs (NULL, &src, 0, &state);
      52      ASSERT (ret == 0);
      53      ASSERT (mbsinit (&state));
      54  
      55      src = "";
      56      ret = mbsrtowcs (NULL, &src, 1, &state);
      57      ASSERT (ret == 0);
      58      ASSERT (mbsinit (&state));
      59  
      60      wc = (wchar_t) 0xBADFACE;
      61      src = "";
      62      ret = mbsrtowcs (&wc, &src, 0, &state);
      63      ASSERT (ret == 0);
      64      ASSERT (wc == (wchar_t) 0xBADFACE);
      65      ASSERT (mbsinit (&state));
      66  
      67      wc = (wchar_t) 0xBADFACE;
      68      src = "";
      69      ret = mbsrtowcs (&wc, &src, 1, &state);
      70      ASSERT (ret == 0);
      71      ASSERT (wc == 0);
      72      ASSERT (mbsinit (&state));
      73    }
      74  
      75  #ifdef __ANDROID__
      76    /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the
      77       "C" locale.  Furthermore, when you attempt to set the "C" or "POSIX"
      78       locale via setlocale(), what you get is a "C" locale with UTF-8 encoding,
      79       that is, effectively the "C.UTF-8" locale.  */
      80    if (argc > 1 && strcmp (argv[1], "5") == 0 && MB_CUR_MAX > 1)
      81      argv[1] = "2";
      82  #endif
      83  
      84    if (argc > 1)
      85      {
      86        int unlimited;
      87  
      88        for (unlimited = 0; unlimited < 2; unlimited++)
      89          {
      90            #define BUFSIZE 10
      91            wchar_t buf[BUFSIZE];
      92            const char *src;
      93            mbstate_t temp_state;
      94  
      95            {
      96              size_t i;
      97              for (i = 0; i < BUFSIZE; i++)
      98                buf[i] = (wchar_t) 0xBADFACE;
      99            }
     100  
     101            switch (argv[1][0])
     102              {
     103              case '1':
     104                /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
     105                {
     106                  char input[] = "B\374\337er"; /* "Büßer" */
     107                  memset (&state, '\0', sizeof (mbstate_t));
     108  
     109                  wc = (wchar_t) 0xBADFACE;
     110                  ret = mbrtowc (&wc, input, 1, &state);
     111                  ASSERT (ret == 1);
     112                  ASSERT (wc == 'B');
     113                  ASSERT (mbsinit (&state));
     114                  input[0] = '\0';
     115  
     116                  wc = (wchar_t) 0xBADFACE;
     117                  ret = mbrtowc (&wc, input + 1, 1, &state);
     118                  ASSERT (ret == 1);
     119                  ASSERT (wctob (wc) == (unsigned char) '\374');
     120                  ASSERT (mbsinit (&state));
     121                  input[1] = '\0';
     122  
     123                  src = input + 2;
     124                  temp_state = state;
     125                  ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 1, &temp_state);
     126                  ASSERT (ret == 3);
     127                  ASSERT (src == input + 2);
     128                  ASSERT (mbsinit (&state));
     129  
     130                  src = input + 2;
     131                  ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 1, &state);
     132                  ASSERT (ret == (unlimited ? 3 : 1));
     133                  ASSERT (src == (unlimited ? NULL : input + 3));
     134                  ASSERT (wctob (buf[0]) == (unsigned char) '\337');
     135                  if (unlimited)
     136                    {
     137                      ASSERT (buf[1] == 'e');
     138                      ASSERT (buf[2] == 'r');
     139                      ASSERT (buf[3] == 0);
     140                      ASSERT (buf[4] == (wchar_t) 0xBADFACE);
     141                    }
     142                  else
     143                    ASSERT (buf[1] == (wchar_t) 0xBADFACE);
     144                  ASSERT (mbsinit (&state));
     145                }
     146                break;
     147  
     148              case '2':
     149                /* Locale encoding is UTF-8.  */
     150                {
     151                  char input[] = "B\303\274\303\237er"; /* "Büßer" */
     152                  memset (&state, '\0', sizeof (mbstate_t));
     153  
     154                  wc = (wchar_t) 0xBADFACE;
     155                  ret = mbrtowc (&wc, input, 1, &state);
     156                  ASSERT (ret == 1);
     157                  ASSERT (wc == 'B');
     158                  ASSERT (mbsinit (&state));
     159                  input[0] = '\0';
     160  
     161                  wc = (wchar_t) 0xBADFACE;
     162                  ret = mbrtowc (&wc, input + 1, 1, &state);
     163                  ASSERT (ret == (size_t)(-2));
     164                  ASSERT (wc == (wchar_t) 0xBADFACE);
     165                  ASSERT (!mbsinit (&state));
     166                  input[1] = '\0';
     167  
     168                  src = input + 2;
     169                  temp_state = state;
     170                  ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state);
     171                  ASSERT (ret == 4);
     172                  ASSERT (src == input + 2);
     173                  ASSERT (!mbsinit (&state));
     174  
     175                  src = input + 2;
     176                  ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 2, &state);
     177                  ASSERT (ret == (unlimited ? 4 : 2));
     178                  ASSERT (src == (unlimited ? NULL : input + 5));
     179                  ASSERT (wctob (buf[0]) == EOF);
     180                  ASSERT (wctob (buf[1]) == EOF);
     181                  if (unlimited)
     182                    {
     183                      ASSERT (buf[2] == 'e');
     184                      ASSERT (buf[3] == 'r');
     185                      ASSERT (buf[4] == 0);
     186                      ASSERT (buf[5] == (wchar_t) 0xBADFACE);
     187                    }
     188                  else
     189                    ASSERT (buf[2] == (wchar_t) 0xBADFACE);
     190                  ASSERT (mbsinit (&state));
     191                }
     192                break;
     193  
     194              case '3':
     195                /* Locale encoding is EUC-JP.  */
     196                {
     197                  char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
     198                  memset (&state, '\0', sizeof (mbstate_t));
     199  
     200                  wc = (wchar_t) 0xBADFACE;
     201                  ret = mbrtowc (&wc, input, 1, &state);
     202                  ASSERT (ret == 1);
     203                  ASSERT (wc == '<');
     204                  ASSERT (mbsinit (&state));
     205                  input[0] = '\0';
     206  
     207                  wc = (wchar_t) 0xBADFACE;
     208                  ret = mbrtowc (&wc, input + 1, 2, &state);
     209                  ASSERT (ret == 2);
     210                  ASSERT (wctob (wc) == EOF);
     211                  ASSERT (mbsinit (&state));
     212                  input[1] = '\0';
     213                  input[2] = '\0';
     214  
     215                  wc = (wchar_t) 0xBADFACE;
     216                  ret = mbrtowc (&wc, input + 3, 1, &state);
     217                  ASSERT (ret == (size_t)(-2));
     218                  ASSERT (wc == (wchar_t) 0xBADFACE);
     219                  ASSERT (!mbsinit (&state));
     220                  input[3] = '\0';
     221  
     222                  src = input + 4;
     223                  temp_state = state;
     224                  ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state);
     225                  ASSERT (ret == 3);
     226                  ASSERT (src == input + 4);
     227                  ASSERT (!mbsinit (&state));
     228  
     229                  src = input + 4;
     230                  ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 2, &state);
     231                  ASSERT (ret == (unlimited ? 3 : 2));
     232                  ASSERT (src == (unlimited ? NULL : input + 7));
     233                  ASSERT (wctob (buf[0]) == EOF);
     234                  ASSERT (wctob (buf[1]) == EOF);
     235                  if (unlimited)
     236                    {
     237                      ASSERT (buf[2] == '>');
     238                      ASSERT (buf[3] == 0);
     239                      ASSERT (buf[4] == (wchar_t) 0xBADFACE);
     240                    }
     241                  else
     242                    ASSERT (buf[2] == (wchar_t) 0xBADFACE);
     243                  ASSERT (mbsinit (&state));
     244                }
     245                break;
     246  
     247              case '4':
     248                /* Locale encoding is GB18030.  */
     249                {
     250                  char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
     251                  memset (&state, '\0', sizeof (mbstate_t));
     252  
     253                  wc = (wchar_t) 0xBADFACE;
     254                  ret = mbrtowc (&wc, input, 1, &state);
     255                  ASSERT (ret == 1);
     256                  ASSERT (wc == 'B');
     257                  ASSERT (mbsinit (&state));
     258                  input[0] = '\0';
     259  
     260                  wc = (wchar_t) 0xBADFACE;
     261                  ret = mbrtowc (&wc, input + 1, 1, &state);
     262                  ASSERT (ret == (size_t)(-2));
     263                  ASSERT (wc == (wchar_t) 0xBADFACE);
     264                  ASSERT (!mbsinit (&state));
     265                  input[1] = '\0';
     266  
     267                  src = input + 2;
     268                  temp_state = state;
     269                  ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state);
     270                  ASSERT (ret == 4);
     271                  ASSERT (src == input + 2);
     272                  ASSERT (!mbsinit (&state));
     273  
     274                  src = input + 2;
     275                  ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 2, &state);
     276                  ASSERT (ret == (unlimited ? 4 : 2));
     277                  ASSERT (src == (unlimited ? NULL : input + 7));
     278                  ASSERT (wctob (buf[0]) == EOF);
     279                  ASSERT (wctob (buf[1]) == EOF);
     280                  if (unlimited)
     281                    {
     282                      ASSERT (buf[2] == 'e');
     283                      ASSERT (buf[3] == 'r');
     284                      ASSERT (buf[4] == 0);
     285                      ASSERT (buf[5] == (wchar_t) 0xBADFACE);
     286                    }
     287                  else
     288                    ASSERT (buf[2] == (wchar_t) 0xBADFACE);
     289                  ASSERT (mbsinit (&state));
     290                }
     291                break;
     292  
     293              case '5':
     294                /* C or POSIX locale.  */
     295                {
     296                  char input[] = "n/a";
     297                  memset (&state, '\0', sizeof (mbstate_t));
     298  
     299                  src = input;
     300                  temp_state = state;
     301                  ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 1, &temp_state);
     302                  ASSERT (ret == 3);
     303                  ASSERT (src == input);
     304                  ASSERT (mbsinit (&state));
     305  
     306                  src = input;
     307                  ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 1, &state);
     308                  ASSERT (ret == (unlimited ? 3 : 1));
     309                  ASSERT (src == (unlimited ? NULL : input + 1));
     310                  ASSERT (buf[0] == 'n');
     311                  if (unlimited)
     312                    {
     313                      ASSERT (buf[1] == '/');
     314                      ASSERT (buf[2] == 'a');
     315                      ASSERT (buf[3] == 0);
     316                      ASSERT (buf[4] == (wchar_t) 0xBADFACE);
     317                    }
     318                  else
     319                    ASSERT (buf[1] == (wchar_t) 0xBADFACE);
     320                  ASSERT (mbsinit (&state));
     321                }
     322                {
     323                  int c;
     324                  char input[2];
     325  
     326                  memset (&state, '\0', sizeof (mbstate_t));
     327                  for (c = 0; c < 0x100; c++)
     328                    if (c != 0)
     329                      {
     330                        /* We are testing all nonnull bytes.  */
     331                        input[0] = c;
     332                        input[1] = '\0';
     333  
     334                        src = input;
     335                        ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 1, &state);
     336                        ASSERT (ret == 1);
     337                        ASSERT (src == input);
     338                        ASSERT (mbsinit (&state));
     339  
     340                        buf[0] = buf[1] = (wchar_t) 0xBADFACE;
     341                        src = input;
     342                        ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 1, &state);
     343                        /* POSIX:2018 says: "In the POSIX locale an [EILSEQ] error
     344                           cannot occur since all byte values are valid characters."  */
     345                        ASSERT (ret == 1);
     346                        ASSERT (src == (unlimited ? NULL : input + 1));
     347                        if (c < 0x80)
     348                          /* c is an ASCII character.  */
     349                          ASSERT (buf[0] == c);
     350                        else
     351                          /* On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF.
     352                             But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF.  */
     353                          ASSERT (buf[0] == (btowc (c) == 0xDF00 + c ? btowc (c) : c));
     354                        ASSERT (mbsinit (&state));
     355                      }
     356                }
     357                break;
     358  
     359              default:
     360                return 1;
     361              }
     362          }
     363  
     364        return 0;
     365      }
     366  
     367    return 1;
     368  }