(root)/
gettext-0.22.4/
gettext-tools/
gnulib-tests/
test-mbsrtowcs.c
       1  /* Test of conversion of string to wide string.
       2     Copyright (C) 2008-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  /* Written by Bruno Haible <bruno@clisp.org>, 2008.  */
      18  
      19  #include <config.h>
      20  
      21  #include <wchar.h>
      22  
      23  #include "signature.h"
      24  SIGNATURE_CHECK (mbsrtowcs, size_t, (wchar_t *, char const **, size_t,
      25                                       mbstate_t *));
      26  
      27  #include <locale.h>
      28  #include <stdio.h>
      29  #include <string.h>
      30  
      31  #include "macros.h"
      32  
      33  int
      34  main (int argc, char *argv[])
      35  {
      36    mbstate_t state;
      37    wchar_t wc;
      38    size_t ret;
      39  
      40    /* configure should already have checked that the locale is supported.  */
      41    if (setlocale (LC_ALL, "") == NULL)
      42      return 1;
      43  
      44    /* Test NUL byte input.  */
      45    {
      46      const char *src;
      47  
      48      memset (&state, '\0', sizeof (mbstate_t));
      49  
      50      src = "";
      51      ret = mbsrtowcs (NULL, &src, 0, &state);
      52      ASSERT (ret == 0);
      53      ASSERT (mbsinit (&state));
      54  
      55      src = "";
      56      ret = mbsrtowcs (NULL, &src, 1, &state);
      57      ASSERT (ret == 0);
      58      ASSERT (mbsinit (&state));
      59  
      60      wc = (wchar_t) 0xBADFACE;
      61      src = "";
      62      ret = mbsrtowcs (&wc, &src, 0, &state);
      63      ASSERT (ret == 0);
      64      ASSERT (wc == (wchar_t) 0xBADFACE);
      65      ASSERT (mbsinit (&state));
      66  
      67      wc = (wchar_t) 0xBADFACE;
      68      src = "";
      69      ret = mbsrtowcs (&wc, &src, 1, &state);
      70      ASSERT (ret == 0);
      71      ASSERT (wc == 0);
      72      ASSERT (mbsinit (&state));
      73    }
      74  
      75  #ifdef __ANDROID__
      76    /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the
      77       "C" locale.  Furthermore, when you attempt to set the "C" or "POSIX"
      78       locale via setlocale(), what you get is a "C" locale with UTF-8 encoding,
      79       that is, effectively the "C.UTF-8" locale.  */
      80    if (argc > 1 && strcmp (argv[1], "1") == 0 && MB_CUR_MAX > 1)
      81      argv[1] = "3";
      82  #endif
      83  
      84    if (argc > 1)
      85      {
      86        int unlimited;
      87  
      88        for (unlimited = 0; unlimited < 2; unlimited++)
      89          {
      90            #define BUFSIZE 10
      91            wchar_t buf[BUFSIZE];
      92            const char *src;
      93            mbstate_t temp_state;
      94  
      95            {
      96              size_t i;
      97              for (i = 0; i < BUFSIZE; i++)
      98                buf[i] = (wchar_t) 0xBADFACE;
      99            }
     100  
     101            switch (argv[1][0])
     102              {
     103              case '1':
     104                /* C or POSIX locale.  */
     105                {
     106                  char input[] = "n/a";
     107                  memset (&state, '\0', sizeof (mbstate_t));
     108  
     109                  src = input;
     110                  temp_state = state;
     111                  ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 1, &temp_state);
     112                  ASSERT (ret == 3);
     113                  ASSERT (src == input);
     114                  ASSERT (mbsinit (&state));
     115  
     116                  src = input;
     117                  ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 1, &state);
     118                  ASSERT (ret == (unlimited ? 3 : 1));
     119                  ASSERT (src == (unlimited ? NULL : input + 1));
     120                  ASSERT (buf[0] == 'n');
     121                  if (unlimited)
     122                    {
     123                      ASSERT (buf[1] == '/');
     124                      ASSERT (buf[2] == 'a');
     125                      ASSERT (buf[3] == 0);
     126                      ASSERT (buf[4] == (wchar_t) 0xBADFACE);
     127                    }
     128                  else
     129                    ASSERT (buf[1] == (wchar_t) 0xBADFACE);
     130                  ASSERT (mbsinit (&state));
     131                }
     132                {
     133                  int c;
     134                  char input[2];
     135  
     136                  memset (&state, '\0', sizeof (mbstate_t));
     137                  for (c = 0; c < 0x100; c++)
     138                    if (c != 0)
     139                      {
     140                        /* We are testing all nonnull bytes.  */
     141                        input[0] = c;
     142                        input[1] = '\0';
     143  
     144                        src = input;
     145                        ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 1, &state);
     146                        ASSERT (ret == 1);
     147                        ASSERT (src == input);
     148                        ASSERT (mbsinit (&state));
     149  
     150                        buf[0] = buf[1] = (wchar_t) 0xBADFACE;
     151                        src = input;
     152                        ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 1, &state);
     153                        /* POSIX:2018 says: "In the POSIX locale an [EILSEQ] error
     154                           cannot occur since all byte values are valid characters."  */
     155                        ASSERT (ret == 1);
     156                        ASSERT (src == (unlimited ? NULL : input + 1));
     157                        if (c < 0x80)
     158                          /* c is an ASCII character.  */
     159                          ASSERT (buf[0] == c);
     160                        else
     161                          /* On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF.
     162                             But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF.  */
     163                          ASSERT (buf[0] == (btowc (c) == 0xDF00 + c ? btowc (c) : c));
     164                        ASSERT (mbsinit (&state));
     165                      }
     166                }
     167                break;
     168  
     169              case '2':
     170                /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
     171                {
     172                  char input[] = "B\374\337er"; /* "Büßer" */
     173                  memset (&state, '\0', sizeof (mbstate_t));
     174  
     175                  wc = (wchar_t) 0xBADFACE;
     176                  ret = mbrtowc (&wc, input, 1, &state);
     177                  ASSERT (ret == 1);
     178                  ASSERT (wc == 'B');
     179                  ASSERT (mbsinit (&state));
     180                  input[0] = '\0';
     181  
     182                  wc = (wchar_t) 0xBADFACE;
     183                  ret = mbrtowc (&wc, input + 1, 1, &state);
     184                  ASSERT (ret == 1);
     185                  ASSERT (wctob (wc) == (unsigned char) '\374');
     186                  ASSERT (mbsinit (&state));
     187                  input[1] = '\0';
     188  
     189                  src = input + 2;
     190                  temp_state = state;
     191                  ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 1, &temp_state);
     192                  ASSERT (ret == 3);
     193                  ASSERT (src == input + 2);
     194                  ASSERT (mbsinit (&state));
     195  
     196                  src = input + 2;
     197                  ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 1, &state);
     198                  ASSERT (ret == (unlimited ? 3 : 1));
     199                  ASSERT (src == (unlimited ? NULL : input + 3));
     200                  ASSERT (wctob (buf[0]) == (unsigned char) '\337');
     201                  if (unlimited)
     202                    {
     203                      ASSERT (buf[1] == 'e');
     204                      ASSERT (buf[2] == 'r');
     205                      ASSERT (buf[3] == 0);
     206                      ASSERT (buf[4] == (wchar_t) 0xBADFACE);
     207                    }
     208                  else
     209                    ASSERT (buf[1] == (wchar_t) 0xBADFACE);
     210                  ASSERT (mbsinit (&state));
     211                }
     212                break;
     213  
     214              case '3':
     215                /* Locale encoding is UTF-8.  */
     216                {
     217                  char input[] = "B\303\274\303\237er"; /* "Büßer" */
     218                  memset (&state, '\0', sizeof (mbstate_t));
     219  
     220                  wc = (wchar_t) 0xBADFACE;
     221                  ret = mbrtowc (&wc, input, 1, &state);
     222                  ASSERT (ret == 1);
     223                  ASSERT (wc == 'B');
     224                  ASSERT (mbsinit (&state));
     225                  input[0] = '\0';
     226  
     227                  wc = (wchar_t) 0xBADFACE;
     228                  ret = mbrtowc (&wc, input + 1, 1, &state);
     229                  ASSERT (ret == (size_t)(-2));
     230                  ASSERT (wc == (wchar_t) 0xBADFACE);
     231                  ASSERT (!mbsinit (&state));
     232                  input[1] = '\0';
     233  
     234                  src = input + 2;
     235                  temp_state = state;
     236                  ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state);
     237                  ASSERT (ret == 4);
     238                  ASSERT (src == input + 2);
     239                  ASSERT (!mbsinit (&state));
     240  
     241                  src = input + 2;
     242                  ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 2, &state);
     243                  ASSERT (ret == (unlimited ? 4 : 2));
     244                  ASSERT (src == (unlimited ? NULL : input + 5));
     245                  ASSERT (wctob (buf[0]) == EOF);
     246                  ASSERT (wctob (buf[1]) == EOF);
     247                  if (unlimited)
     248                    {
     249                      ASSERT (buf[2] == 'e');
     250                      ASSERT (buf[3] == 'r');
     251                      ASSERT (buf[4] == 0);
     252                      ASSERT (buf[5] == (wchar_t) 0xBADFACE);
     253                    }
     254                  else
     255                    ASSERT (buf[2] == (wchar_t) 0xBADFACE);
     256                  ASSERT (mbsinit (&state));
     257                }
     258                break;
     259  
     260              case '4':
     261                /* Locale encoding is EUC-JP.  */
     262                {
     263                  char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
     264                  memset (&state, '\0', sizeof (mbstate_t));
     265  
     266                  wc = (wchar_t) 0xBADFACE;
     267                  ret = mbrtowc (&wc, input, 1, &state);
     268                  ASSERT (ret == 1);
     269                  ASSERT (wc == '<');
     270                  ASSERT (mbsinit (&state));
     271                  input[0] = '\0';
     272  
     273                  wc = (wchar_t) 0xBADFACE;
     274                  ret = mbrtowc (&wc, input + 1, 2, &state);
     275                  ASSERT (ret == 2);
     276                  ASSERT (wctob (wc) == EOF);
     277                  ASSERT (mbsinit (&state));
     278                  input[1] = '\0';
     279                  input[2] = '\0';
     280  
     281                  wc = (wchar_t) 0xBADFACE;
     282                  ret = mbrtowc (&wc, input + 3, 1, &state);
     283                  ASSERT (ret == (size_t)(-2));
     284                  ASSERT (wc == (wchar_t) 0xBADFACE);
     285                  ASSERT (!mbsinit (&state));
     286                  input[3] = '\0';
     287  
     288                  src = input + 4;
     289                  temp_state = state;
     290                  ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state);
     291                  ASSERT (ret == 3);
     292                  ASSERT (src == input + 4);
     293                  ASSERT (!mbsinit (&state));
     294  
     295                  src = input + 4;
     296                  ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 2, &state);
     297                  ASSERT (ret == (unlimited ? 3 : 2));
     298                  ASSERT (src == (unlimited ? NULL : input + 7));
     299                  ASSERT (wctob (buf[0]) == EOF);
     300                  ASSERT (wctob (buf[1]) == EOF);
     301                  if (unlimited)
     302                    {
     303                      ASSERT (buf[2] == '>');
     304                      ASSERT (buf[3] == 0);
     305                      ASSERT (buf[4] == (wchar_t) 0xBADFACE);
     306                    }
     307                  else
     308                    ASSERT (buf[2] == (wchar_t) 0xBADFACE);
     309                  ASSERT (mbsinit (&state));
     310                }
     311                break;
     312  
     313              case '5':
     314                /* Locale encoding is GB18030.  */
     315                {
     316                  char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
     317                  memset (&state, '\0', sizeof (mbstate_t));
     318  
     319                  wc = (wchar_t) 0xBADFACE;
     320                  ret = mbrtowc (&wc, input, 1, &state);
     321                  ASSERT (ret == 1);
     322                  ASSERT (wc == 'B');
     323                  ASSERT (mbsinit (&state));
     324                  input[0] = '\0';
     325  
     326                  wc = (wchar_t) 0xBADFACE;
     327                  ret = mbrtowc (&wc, input + 1, 1, &state);
     328                  ASSERT (ret == (size_t)(-2));
     329                  ASSERT (wc == (wchar_t) 0xBADFACE);
     330                  ASSERT (!mbsinit (&state));
     331                  input[1] = '\0';
     332  
     333                  src = input + 2;
     334                  temp_state = state;
     335                  ret = mbsrtowcs (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state);
     336                  ASSERT (ret == 4);
     337                  ASSERT (src == input + 2);
     338                  ASSERT (!mbsinit (&state));
     339  
     340                  src = input + 2;
     341                  ret = mbsrtowcs (buf, &src, unlimited ? BUFSIZE : 2, &state);
     342                  ASSERT (ret == (unlimited ? 4 : 2));
     343                  ASSERT (src == (unlimited ? NULL : input + 7));
     344                  ASSERT (wctob (buf[0]) == EOF);
     345                  ASSERT (wctob (buf[1]) == EOF);
     346                  if (unlimited)
     347                    {
     348                      ASSERT (buf[2] == 'e');
     349                      ASSERT (buf[3] == 'r');
     350                      ASSERT (buf[4] == 0);
     351                      ASSERT (buf[5] == (wchar_t) 0xBADFACE);
     352                    }
     353                  else
     354                    ASSERT (buf[2] == (wchar_t) 0xBADFACE);
     355                  ASSERT (mbsinit (&state));
     356                }
     357                break;
     358  
     359              default:
     360                return 1;
     361              }
     362          }
     363  
     364        return 0;
     365      }
     366  
     367    return 1;
     368  }