(root)/
coreutils-9.4/
gnulib-tests/
test-mbsrtoc32s.c
       1  /* Test of conversion of string to 32-bit wide string.
       2     Copyright (C) 2008-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  /* Written by Bruno Haible <bruno@clisp.org>, 2008.  */
      18  
      19  #include <config.h>
      20  
      21  #include <uchar.h>
      22  
      23  #include "signature.h"
      24  SIGNATURE_CHECK (mbsrtoc32s, size_t,
      25                   (char32_t *, const char **, size_t, mbstate_t *));
      26  
      27  #include <locale.h>
      28  #include <stdio.h>
      29  #include <string.h>
      30  #include <wchar.h>
      31  
      32  #include "macros.h"
      33  
      34  int
      35  main (int argc, char *argv[])
      36  {
      37    mbstate_t state;
      38    char32_t wc;
      39    size_t ret;
      40  
      41    /* configure should already have checked that the locale is supported.  */
      42    if (setlocale (LC_ALL, "") == NULL)
      43      return 1;
      44  
      45    /* Test NUL byte input.  */
      46    {
      47      const char *src;
      48  
      49      memset (&state, '\0', sizeof (mbstate_t));
      50  
      51      src = "";
      52      ret = mbsrtoc32s (NULL, &src, 0, &state);
      53      ASSERT (ret == 0);
      54      ASSERT (mbsinit (&state));
      55  
      56      src = "";
      57      ret = mbsrtoc32s (NULL, &src, 1, &state);
      58      ASSERT (ret == 0);
      59      ASSERT (mbsinit (&state));
      60  
      61      wc = (char32_t) 0xBADFACE;
      62      src = "";
      63      ret = mbsrtoc32s (&wc, &src, 0, &state);
      64      ASSERT (ret == 0);
      65      ASSERT (wc == (char32_t) 0xBADFACE);
      66      ASSERT (mbsinit (&state));
      67  
      68      wc = (char32_t) 0xBADFACE;
      69      src = "";
      70      ret = mbsrtoc32s (&wc, &src, 1, &state);
      71      ASSERT (ret == 0);
      72      ASSERT (wc == 0);
      73      ASSERT (mbsinit (&state));
      74    }
      75  
      76  #ifdef __ANDROID__
      77    /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the
      78       "C" locale.  Furthermore, when you attempt to set the "C" or "POSIX"
      79       locale via setlocale(), what you get is a "C" locale with UTF-8 encoding,
      80       that is, effectively the "C.UTF-8" locale.  */
      81    if (argc > 1 && strcmp (argv[1], "1") == 0 && MB_CUR_MAX > 1)
      82      argv[1] = "3";
      83  #endif
      84  
      85    if (argc > 1)
      86      {
      87        int unlimited;
      88  
      89        for (unlimited = 0; unlimited < 2; unlimited++)
      90          {
      91            #define BUFSIZE 10
      92            char32_t buf[BUFSIZE];
      93            const char *src;
      94            mbstate_t temp_state;
      95  
      96            {
      97              size_t i;
      98              for (i = 0; i < BUFSIZE; i++)
      99                buf[i] = (char32_t) 0xBADFACE;
     100            }
     101  
     102            switch (argv[1][0])
     103              {
     104              case '1':
     105                /* C or POSIX locale.  */
     106                {
     107                  char input[] = "n/a";
     108                  memset (&state, '\0', sizeof (mbstate_t));
     109  
     110                  src = input;
     111                  temp_state = state;
     112                  ret = mbsrtoc32s (NULL, &src, unlimited ? BUFSIZE : 1, &temp_state);
     113                  ASSERT (ret == 3);
     114                  ASSERT (src == input);
     115                  ASSERT (mbsinit (&state));
     116  
     117                  src = input;
     118                  ret = mbsrtoc32s (buf, &src, unlimited ? BUFSIZE : 1, &state);
     119                  ASSERT (ret == (unlimited ? 3 : 1));
     120                  ASSERT (src == (unlimited ? NULL : input + 1));
     121                  ASSERT (buf[0] == 'n');
     122                  if (unlimited)
     123                    {
     124                      ASSERT (buf[1] == '/');
     125                      ASSERT (buf[2] == 'a');
     126                      ASSERT (buf[3] == 0);
     127                      ASSERT (buf[4] == (char32_t) 0xBADFACE);
     128                    }
     129                  else
     130                    ASSERT (buf[1] == (char32_t) 0xBADFACE);
     131                  ASSERT (mbsinit (&state));
     132                }
     133                {
     134                  int c;
     135                  char input[2];
     136  
     137                  memset (&state, '\0', sizeof (mbstate_t));
     138                  for (c = 0; c < 0x100; c++)
     139                    if (c != 0)
     140                      {
     141                        /* We are testing all nonnull bytes.  */
     142                        input[0] = c;
     143                        input[1] = '\0';
     144  
     145                        src = input;
     146                        ret = mbsrtoc32s (NULL, &src, unlimited ? BUFSIZE : 1, &state);
     147                        ASSERT (ret == 1);
     148                        ASSERT (src == input);
     149                        ASSERT (mbsinit (&state));
     150  
     151                        buf[0] = buf[1] = (char32_t) 0xBADFACE;
     152                        src = input;
     153                        ret = mbsrtoc32s (buf, &src, unlimited ? BUFSIZE : 1, &state);
     154                        /* POSIX:2018 says regarding mbsrtowcs: "In the POSIX locale an
     155                           [EILSEQ] error cannot occur since all byte values are valid
     156                           characters."  It is reasonable to expect mbsrtoc32s to behave
     157                           in the same way.  */
     158                        ASSERT (ret == 1);
     159                        ASSERT (src == (unlimited ? NULL : input + 1));
     160                        if (c < 0x80)
     161                          /* c is an ASCII character.  */
     162                          ASSERT (buf[0] == c);
     163                        else
     164                          /* On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF.
     165                             But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF.  */
     166                          ASSERT (buf[0] == (btoc32 (c) == 0xDF00 + c ? btoc32 (c) : c));
     167                        ASSERT (mbsinit (&state));
     168                      }
     169                }
     170                break;
     171  
     172              case '2':
     173                /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
     174                {
     175                  char input[] = "B\374\337er"; /* "Büßer" */
     176                  memset (&state, '\0', sizeof (mbstate_t));
     177  
     178                  wc = (char32_t) 0xBADFACE;
     179                  ret = mbrtoc32 (&wc, input, 1, &state);
     180                  ASSERT (ret == 1);
     181                  ASSERT (wc == 'B');
     182                  ASSERT (mbsinit (&state));
     183                  input[0] = '\0';
     184  
     185                  wc = (char32_t) 0xBADFACE;
     186                  ret = mbrtoc32 (&wc, input + 1, 1, &state);
     187                  ASSERT (ret == 1);
     188                  ASSERT (c32tob (wc) == (unsigned char) '\374');
     189                  ASSERT (mbsinit (&state));
     190                  input[1] = '\0';
     191  
     192                  src = input + 2;
     193                  temp_state = state;
     194                  ret = mbsrtoc32s (NULL, &src, unlimited ? BUFSIZE : 1, &temp_state);
     195                  ASSERT (ret == 3);
     196                  ASSERT (src == input + 2);
     197                  ASSERT (mbsinit (&state));
     198  
     199                  src = input + 2;
     200                  ret = mbsrtoc32s (buf, &src, unlimited ? BUFSIZE : 1, &state);
     201                  ASSERT (ret == (unlimited ? 3 : 1));
     202                  ASSERT (src == (unlimited ? NULL : input + 3));
     203                  ASSERT (c32tob (buf[0]) == (unsigned char) '\337');
     204                  if (unlimited)
     205                    {
     206                      ASSERT (buf[1] == 'e');
     207                      ASSERT (buf[2] == 'r');
     208                      ASSERT (buf[3] == 0);
     209                      ASSERT (buf[4] == (char32_t) 0xBADFACE);
     210                    }
     211                  else
     212                    ASSERT (buf[1] == (char32_t) 0xBADFACE);
     213                  ASSERT (mbsinit (&state));
     214                }
     215                break;
     216  
     217              case '3':
     218                /* Locale encoding is UTF-8.  */
     219                {
     220                  char input[] = "s\303\274\303\237\360\237\230\213!"; /* "süß😋!" */
     221                  memset (&state, '\0', sizeof (mbstate_t));
     222  
     223                  wc = (char32_t) 0xBADFACE;
     224                  ret = mbrtoc32 (&wc, input, 1, &state);
     225                  ASSERT (ret == 1);
     226                  ASSERT (wc == 's');
     227                  ASSERT (mbsinit (&state));
     228                  input[0] = '\0';
     229  
     230                  wc = (char32_t) 0xBADFACE;
     231                  ret = mbrtoc32 (&wc, input + 1, 1, &state);
     232                  ASSERT (ret == (size_t)(-2));
     233                  ASSERT (wc == (char32_t) 0xBADFACE);
     234                  ASSERT (!mbsinit (&state));
     235                  input[1] = '\0';
     236  
     237                  src = input + 2;
     238                  temp_state = state;
     239                  ret = mbsrtoc32s (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state);
     240                  ASSERT (ret == 4);
     241                  ASSERT (src == input + 2);
     242                  ASSERT (!mbsinit (&state));
     243  
     244                  src = input + 2;
     245                  ret = mbsrtoc32s (buf, &src, unlimited ? BUFSIZE : 2, &state);
     246                  ASSERT (ret == (unlimited ? 4 : 2));
     247                  ASSERT (src == (unlimited ? NULL : input + 5));
     248                  ASSERT (c32tob (buf[0]) == EOF);
     249                  ASSERT (c32tob (buf[1]) == EOF);
     250                  if (unlimited)
     251                    {
     252                      ASSERT (buf[2] == 0x1F60B); /* expect Unicode encoding */
     253                      ASSERT (buf[3] == '!');
     254                      ASSERT (buf[4] == 0);
     255                      ASSERT (buf[5] == (char32_t) 0xBADFACE);
     256                    }
     257                  else
     258                    ASSERT (buf[2] == (char32_t) 0xBADFACE);
     259                  ASSERT (mbsinit (&state));
     260                }
     261                break;
     262  
     263              case '4':
     264                /* Locale encoding is EUC-JP.  */
     265                {
     266                  char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
     267                  memset (&state, '\0', sizeof (mbstate_t));
     268  
     269                  wc = (char32_t) 0xBADFACE;
     270                  ret = mbrtoc32 (&wc, input, 1, &state);
     271                  ASSERT (ret == 1);
     272                  ASSERT (wc == '<');
     273                  ASSERT (mbsinit (&state));
     274                  input[0] = '\0';
     275  
     276                  wc = (char32_t) 0xBADFACE;
     277                  ret = mbrtoc32 (&wc, input + 1, 2, &state);
     278                  ASSERT (ret == 2);
     279                  ASSERT (c32tob (wc) == EOF);
     280                  ASSERT (mbsinit (&state));
     281                  input[1] = '\0';
     282                  input[2] = '\0';
     283  
     284                  wc = (char32_t) 0xBADFACE;
     285                  ret = mbrtoc32 (&wc, input + 3, 1, &state);
     286                  ASSERT (ret == (size_t)(-2));
     287                  ASSERT (wc == (char32_t) 0xBADFACE);
     288                  ASSERT (!mbsinit (&state));
     289                  input[3] = '\0';
     290  
     291                  src = input + 4;
     292                  temp_state = state;
     293                  ret = mbsrtoc32s (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state);
     294                  ASSERT (ret == 3);
     295                  ASSERT (src == input + 4);
     296                  ASSERT (!mbsinit (&state));
     297  
     298                  src = input + 4;
     299                  ret = mbsrtoc32s (buf, &src, unlimited ? BUFSIZE : 2, &state);
     300                  ASSERT (ret == (unlimited ? 3 : 2));
     301                  ASSERT (src == (unlimited ? NULL : input + 7));
     302                  ASSERT (c32tob (buf[0]) == EOF);
     303                  ASSERT (c32tob (buf[1]) == EOF);
     304                  if (unlimited)
     305                    {
     306                      ASSERT (buf[2] == '>');
     307                      ASSERT (buf[3] == 0);
     308                      ASSERT (buf[4] == (char32_t) 0xBADFACE);
     309                    }
     310                  else
     311                    ASSERT (buf[2] == (char32_t) 0xBADFACE);
     312                  ASSERT (mbsinit (&state));
     313                }
     314                break;
     315  
     316              case '5':
     317                /* Locale encoding is GB18030.  */
     318                #if (defined __GLIBC__ && __GLIBC__ == 2 && __GLIBC_MINOR__ >= 13 && __GLIBC_MINOR__ <= 15) || (GL_CHAR32_T_IS_UNICODE && (defined __NetBSD__ || defined __sun))
     319                fputs ("Skipping test: The GB18030 converter in this system's iconv is broken.\n", stderr);
     320                return 77;
     321                #endif
     322                {
     323                  char input[] = "s\250\271\201\060\211\070\224\071\375\067!"; /* "süß😋!" */
     324                  memset (&state, '\0', sizeof (mbstate_t));
     325  
     326                  wc = (char32_t) 0xBADFACE;
     327                  ret = mbrtoc32 (&wc, input, 1, &state);
     328                  ASSERT (ret == 1);
     329                  ASSERT (wc == 's');
     330                  ASSERT (mbsinit (&state));
     331                  input[0] = '\0';
     332  
     333                  wc = (char32_t) 0xBADFACE;
     334                  ret = mbrtoc32 (&wc, input + 1, 1, &state);
     335                  ASSERT (ret == (size_t)(-2));
     336                  ASSERT (wc == (char32_t) 0xBADFACE);
     337                  ASSERT (!mbsinit (&state));
     338                  input[1] = '\0';
     339  
     340                  src = input + 2;
     341                  temp_state = state;
     342                  ret = mbsrtoc32s (NULL, &src, unlimited ? BUFSIZE : 2, &temp_state);
     343                  ASSERT (ret == 4);
     344                  ASSERT (src == input + 2);
     345                  ASSERT (!mbsinit (&state));
     346  
     347                  src = input + 2;
     348                  ret = mbsrtoc32s (buf, &src, unlimited ? BUFSIZE : 2, &state);
     349                  ASSERT (ret == (unlimited ? 4 : 2));
     350                  ASSERT (src == (unlimited ? NULL : input + 7));
     351                  ASSERT (c32tob (buf[0]) == EOF);
     352                  ASSERT (c32tob (buf[1]) == EOF);
     353                  if (unlimited)
     354                    {
     355                      ASSERT (c32tob (buf[2]) == EOF);
     356                      ASSERT (buf[3] == '!');
     357                      ASSERT (buf[4] == 0);
     358                      ASSERT (buf[5] == (char32_t) 0xBADFACE);
     359                    }
     360                  else
     361                    ASSERT (buf[2] == (char32_t) 0xBADFACE);
     362                  ASSERT (mbsinit (&state));
     363                }
     364                break;
     365  
     366              default:
     367                return 1;
     368              }
     369          }
     370  
     371        return 0;
     372      }
     373  
     374    return 1;
     375  }