(root)/
coreutils-9.4/
gnulib-tests/
test-mbrlen-w32.c
       1  /* Test of conversion of multibyte character to wide character.
       2     Copyright (C) 2008-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  #include <config.h>
      18  
      19  #include <wchar.h>
      20  
      21  #include <errno.h>
      22  #include <locale.h>
      23  #include <stdio.h>
      24  #include <stdlib.h>
      25  #include <string.h>
      26  
      27  #include "localcharset.h"
      28  #include "macros.h"
      29  
      30  #if defined _WIN32 && !defined __CYGWIN__
      31  
      32  static int
      33  test_one_locale (const char *name, int codepage)
      34  {
      35    mbstate_t state;
      36    size_t ret;
      37  
      38  # if 1
      39    /* Portable code to set the locale.  */
      40    {
      41      char name_with_codepage[1024];
      42  
      43      sprintf (name_with_codepage, "%s.%d", name, codepage);
      44  
      45      /* Set the locale.  */
      46      if (setlocale (LC_ALL, name_with_codepage) == NULL)
      47        return 77;
      48    }
      49  # else
      50    /* Hacky way to set a locale.codepage combination that setlocale() refuses
      51       to set.  */
      52    {
      53      /* Codepage of the current locale, set with setlocale().
      54         Not necessarily the same as GetACP().  */
      55      extern __declspec(dllimport) unsigned int __lc_codepage;
      56  
      57      /* Set the locale.  */
      58      if (setlocale (LC_ALL, name) == NULL)
      59        return 77;
      60  
      61      /* Clobber the codepage and MB_CUR_MAX, both set by setlocale().  */
      62      __lc_codepage = codepage;
      63      switch (codepage)
      64        {
      65        case 1252:
      66        case 1256:
      67          MB_CUR_MAX = 1;
      68          break;
      69        case 932:
      70        case 950:
      71        case 936:
      72          MB_CUR_MAX = 2;
      73          break;
      74        case 54936:
      75        case 65001:
      76          MB_CUR_MAX = 4;
      77          break;
      78        }
      79  
      80      /* Test whether the codepage is really available.  */
      81      memset (&state, '\0', sizeof (mbstate_t));
      82      if (mbrlen (" ", 1, &state) == (size_t)(-1))
      83        return 77;
      84    }
      85  # endif
      86  
      87    /* Test zero-length input.  */
      88    {
      89      memset (&state, '\0', sizeof (mbstate_t));
      90      ret = mbrlen ("x", 0, &state);
      91      /* gnulib's implementation returns (size_t)(-2).
      92         The AIX 5.1 implementation returns (size_t)(-1).
      93         glibc's implementation returns 0.  */
      94      ASSERT (ret == (size_t)(-2) || ret == (size_t)(-1) || ret == 0);
      95      ASSERT (mbsinit (&state));
      96    }
      97  
      98    /* Test NUL byte input.  */
      99    {
     100      memset (&state, '\0', sizeof (mbstate_t));
     101      ret = mbrlen ("", 1, &state);
     102      ASSERT (ret == 0);
     103      ASSERT (mbsinit (&state));
     104    }
     105  
     106    /* Test single-byte input.  */
     107    {
     108      int c;
     109      char buf[1];
     110  
     111      memset (&state, '\0', sizeof (mbstate_t));
     112      for (c = 0; c < 0x100; c++)
     113        switch (c)
     114          {
     115          case '\t': case '\v': case '\f':
     116          case ' ': case '!': case '"': case '#': case '%':
     117          case '&': case '\'': case '(': case ')': case '*':
     118          case '+': case ',': case '-': case '.': case '/':
     119          case '0': case '1': case '2': case '3': case '4':
     120          case '5': case '6': case '7': case '8': case '9':
     121          case ':': case ';': case '<': case '=': case '>':
     122          case '?':
     123          case 'A': case 'B': case 'C': case 'D': case 'E':
     124          case 'F': case 'G': case 'H': case 'I': case 'J':
     125          case 'K': case 'L': case 'M': case 'N': case 'O':
     126          case 'P': case 'Q': case 'R': case 'S': case 'T':
     127          case 'U': case 'V': case 'W': case 'X': case 'Y':
     128          case 'Z':
     129          case '[': case '\\': case ']': case '^': case '_':
     130          case 'a': case 'b': case 'c': case 'd': case 'e':
     131          case 'f': case 'g': case 'h': case 'i': case 'j':
     132          case 'k': case 'l': case 'm': case 'n': case 'o':
     133          case 'p': case 'q': case 'r': case 's': case 't':
     134          case 'u': case 'v': case 'w': case 'x': case 'y':
     135          case 'z': case '{': case '|': case '}': case '~':
     136            /* c is in the ISO C "basic character set".  */
     137            buf[0] = c;
     138            ret = mbrlen (buf, 1, &state);
     139            ASSERT (ret == 1);
     140            ASSERT (mbsinit (&state));
     141            break;
     142          }
     143    }
     144  
     145    /* Test special calling convention, passing a NULL pointer.  */
     146    {
     147      memset (&state, '\0', sizeof (mbstate_t));
     148      ret = mbrlen (NULL, 5, &state);
     149      ASSERT (ret == 0);
     150      ASSERT (mbsinit (&state));
     151    }
     152  
     153    switch (codepage)
     154      {
     155      case 1252:
     156        /* Locale encoding is CP1252, an extension of ISO-8859-1.  */
     157        {
     158          char input[] = "B\374\337er"; /* "Büßer" */
     159          memset (&state, '\0', sizeof (mbstate_t));
     160  
     161          ret = mbrlen (input, 1, &state);
     162          ASSERT (ret == 1);
     163          ASSERT (mbsinit (&state));
     164          input[0] = '\0';
     165  
     166          ret = mbrlen (input + 1, 1, &state);
     167          ASSERT (ret == 1);
     168          ASSERT (mbsinit (&state));
     169          input[1] = '\0';
     170  
     171          ret = mbrlen (input + 2, 3, &state);
     172          ASSERT (ret == 1);
     173          ASSERT (mbsinit (&state));
     174          input[2] = '\0';
     175  
     176          ret = mbrlen (input + 3, 2, &state);
     177          ASSERT (ret == 1);
     178          ASSERT (mbsinit (&state));
     179          input[3] = '\0';
     180  
     181          ret = mbrlen (input + 4, 1, &state);
     182          ASSERT (ret == 1);
     183          ASSERT (mbsinit (&state));
     184        }
     185        return 0;
     186  
     187      case 1256:
     188        /* Locale encoding is CP1256, not the same as ISO-8859-6.  */
     189        {
     190          char input[] = "x\302\341\346y"; /* "xآلوy" */
     191          memset (&state, '\0', sizeof (mbstate_t));
     192  
     193          ret = mbrlen (input, 1, &state);
     194          ASSERT (ret == 1);
     195          ASSERT (mbsinit (&state));
     196          input[0] = '\0';
     197  
     198          ret = mbrlen (input + 1, 1, &state);
     199          ASSERT (ret == 1);
     200          ASSERT (mbsinit (&state));
     201          input[1] = '\0';
     202  
     203          ret = mbrlen (input + 2, 3, &state);
     204          ASSERT (ret == 1);
     205          ASSERT (mbsinit (&state));
     206          input[2] = '\0';
     207  
     208          ret = mbrlen (input + 3, 2, &state);
     209          ASSERT (ret == 1);
     210          ASSERT (mbsinit (&state));
     211          input[3] = '\0';
     212  
     213          ret = mbrlen (input + 4, 1, &state);
     214          ASSERT (ret == 1);
     215          ASSERT (mbsinit (&state));
     216        }
     217        return 0;
     218  
     219      case 65001:
     220        /* Locale encoding is CP65001 = UTF-8.  */
     221        if (strcmp (locale_charset (), "UTF-8") != 0)
     222          return 77;
     223        {
     224          char input[] = "B\303\274\303\237er"; /* "Büßer" */
     225          memset (&state, '\0', sizeof (mbstate_t));
     226  
     227          ret = mbrlen (input, 1, &state);
     228          ASSERT (ret == 1);
     229          ASSERT (mbsinit (&state));
     230          input[0] = '\0';
     231  
     232          ret = mbrlen (input + 1, 1, &state);
     233          ASSERT (ret == (size_t)(-2));
     234          ASSERT (!mbsinit (&state));
     235          input[1] = '\0';
     236  
     237          ret = mbrlen (input + 2, 5, &state);
     238          ASSERT (ret == 1);
     239          ASSERT (mbsinit (&state));
     240          input[2] = '\0';
     241  
     242          ret = mbrlen (input + 3, 4, &state);
     243          ASSERT (ret == 2);
     244          ASSERT (mbsinit (&state));
     245          input[3] = '\0';
     246          input[4] = '\0';
     247  
     248          ret = mbrlen (input + 5, 2, &state);
     249          ASSERT (ret == 1);
     250          ASSERT (mbsinit (&state));
     251          input[5] = '\0';
     252  
     253          ret = mbrlen (input + 6, 1, &state);
     254          ASSERT (ret == 1);
     255          ASSERT (mbsinit (&state));
     256  
     257          /* Test some invalid input.  */
     258          memset (&state, '\0', sizeof (mbstate_t));
     259          ret = mbrlen ("\377", 1, &state); /* 0xFF */
     260          ASSERT (ret == (size_t)-1);
     261          ASSERT (errno == EILSEQ);
     262  
     263          memset (&state, '\0', sizeof (mbstate_t));
     264          ret = mbrlen ("\303\300", 2, &state); /* 0xC3 0xC0 */
     265          ASSERT (ret == (size_t)-1);
     266          ASSERT (errno == EILSEQ);
     267  
     268          memset (&state, '\0', sizeof (mbstate_t));
     269          ret = mbrlen ("\343\300", 2, &state); /* 0xE3 0xC0 */
     270          ASSERT (ret == (size_t)-1);
     271          ASSERT (errno == EILSEQ);
     272  
     273          memset (&state, '\0', sizeof (mbstate_t));
     274          ret = mbrlen ("\343\300\200", 3, &state); /* 0xE3 0xC0 0x80 */
     275          ASSERT (ret == (size_t)-1);
     276          ASSERT (errno == EILSEQ);
     277  
     278          memset (&state, '\0', sizeof (mbstate_t));
     279          ret = mbrlen ("\343\200\300", 3, &state); /* 0xE3 0x80 0xC0 */
     280          ASSERT (ret == (size_t)-1);
     281          ASSERT (errno == EILSEQ);
     282  
     283          memset (&state, '\0', sizeof (mbstate_t));
     284          ret = mbrlen ("\363\300", 2, &state); /* 0xF3 0xC0 */
     285          ASSERT (ret == (size_t)-1);
     286          ASSERT (errno == EILSEQ);
     287  
     288          memset (&state, '\0', sizeof (mbstate_t));
     289          ret = mbrlen ("\363\300\200\200", 4, &state); /* 0xF3 0xC0 0x80 0x80 */
     290          ASSERT (ret == (size_t)-1);
     291          ASSERT (errno == EILSEQ);
     292  
     293          memset (&state, '\0', sizeof (mbstate_t));
     294          ret = mbrlen ("\363\200\300", 3, &state); /* 0xF3 0x80 0xC0 */
     295          ASSERT (ret == (size_t)-1);
     296          ASSERT (errno == EILSEQ);
     297  
     298          memset (&state, '\0', sizeof (mbstate_t));
     299          ret = mbrlen ("\363\200\300\200", 4, &state); /* 0xF3 0x80 0xC0 0x80 */
     300          ASSERT (ret == (size_t)-1);
     301          ASSERT (errno == EILSEQ);
     302  
     303          memset (&state, '\0', sizeof (mbstate_t));
     304          ret = mbrlen ("\363\200\200\300", 4, &state); /* 0xF3 0x80 0x80 0xC0 */
     305          ASSERT (ret == (size_t)-1);
     306          ASSERT (errno == EILSEQ);
     307        }
     308        return 0;
     309  
     310      case 932:
     311        /* Locale encoding is CP932, similar to Shift_JIS.  */
     312        {
     313          char input[] = "<\223\372\226\173\214\352>"; /* "<日本語>" */
     314          memset (&state, '\0', sizeof (mbstate_t));
     315  
     316          ret = mbrlen (input, 1, &state);
     317          ASSERT (ret == 1);
     318          ASSERT (mbsinit (&state));
     319          input[0] = '\0';
     320  
     321          ret = mbrlen (input + 1, 2, &state);
     322          ASSERT (ret == 2);
     323          ASSERT (mbsinit (&state));
     324          input[1] = '\0';
     325          input[2] = '\0';
     326  
     327          ret = mbrlen (input + 3, 1, &state);
     328          ASSERT (ret == (size_t)(-2));
     329          ASSERT (!mbsinit (&state));
     330          input[3] = '\0';
     331  
     332          ret = mbrlen (input + 4, 4, &state);
     333          ASSERT (ret == 1);
     334          ASSERT (mbsinit (&state));
     335          input[4] = '\0';
     336  
     337          ret = mbrlen (input + 5, 3, &state);
     338          ASSERT (ret == 2);
     339          ASSERT (mbsinit (&state));
     340          input[5] = '\0';
     341          input[6] = '\0';
     342  
     343          ret = mbrlen (input + 7, 1, &state);
     344          ASSERT (ret == 1);
     345          ASSERT (mbsinit (&state));
     346  
     347          /* Test some invalid input.  */
     348          memset (&state, '\0', sizeof (mbstate_t));
     349          ret = mbrlen ("\377", 1, &state); /* 0xFF */
     350          ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == (size_t)-2);
     351  
     352          memset (&state, '\0', sizeof (mbstate_t));
     353          ret = mbrlen ("\225\377", 2, &state); /* 0x95 0xFF */
     354          ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == 2);
     355        }
     356        return 0;
     357  
     358      case 950:
     359        /* Locale encoding is CP950, similar to Big5.  */
     360        {
     361          char input[] = "<\244\351\245\273\273\171>"; /* "<日本語>" */
     362          memset (&state, '\0', sizeof (mbstate_t));
     363  
     364          ret = mbrlen (input, 1, &state);
     365          ASSERT (ret == 1);
     366          ASSERT (mbsinit (&state));
     367          input[0] = '\0';
     368  
     369          ret = mbrlen (input + 1, 2, &state);
     370          ASSERT (ret == 2);
     371          ASSERT (mbsinit (&state));
     372          input[1] = '\0';
     373          input[2] = '\0';
     374  
     375          ret = mbrlen (input + 3, 1, &state);
     376          ASSERT (ret == (size_t)(-2));
     377          ASSERT (!mbsinit (&state));
     378          input[3] = '\0';
     379  
     380          ret = mbrlen (input + 4, 4, &state);
     381          ASSERT (ret == 1);
     382          ASSERT (mbsinit (&state));
     383          input[4] = '\0';
     384  
     385          ret = mbrlen (input + 5, 3, &state);
     386          ASSERT (ret == 2);
     387          ASSERT (mbsinit (&state));
     388          input[5] = '\0';
     389          input[6] = '\0';
     390  
     391          ret = mbrlen (input + 7, 1, &state);
     392          ASSERT (ret == 1);
     393          ASSERT (mbsinit (&state));
     394  
     395          /* Test some invalid input.  */
     396          memset (&state, '\0', sizeof (mbstate_t));
     397          ret = mbrlen ("\377", 1, &state); /* 0xFF */
     398          ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == (size_t)-2);
     399  
     400          memset (&state, '\0', sizeof (mbstate_t));
     401          ret = mbrlen ("\225\377", 2, &state); /* 0x95 0xFF */
     402          ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == 2);
     403        }
     404        return 0;
     405  
     406      case 936:
     407        /* Locale encoding is CP936 = GBK, an extension of GB2312.  */
     408        {
     409          char input[] = "<\310\325\261\276\325\132>"; /* "<日本語>" */
     410          memset (&state, '\0', sizeof (mbstate_t));
     411  
     412          ret = mbrlen (input, 1, &state);
     413          ASSERT (ret == 1);
     414          ASSERT (mbsinit (&state));
     415          input[0] = '\0';
     416  
     417          ret = mbrlen (input + 1, 2, &state);
     418          ASSERT (ret == 2);
     419          ASSERT (mbsinit (&state));
     420          input[1] = '\0';
     421          input[2] = '\0';
     422  
     423          ret = mbrlen (input + 3, 1, &state);
     424          ASSERT (ret == (size_t)(-2));
     425          ASSERT (!mbsinit (&state));
     426          input[3] = '\0';
     427  
     428          ret = mbrlen (input + 4, 4, &state);
     429          ASSERT (ret == 1);
     430          ASSERT (mbsinit (&state));
     431          input[4] = '\0';
     432  
     433          ret = mbrlen (input + 5, 3, &state);
     434          ASSERT (ret == 2);
     435          ASSERT (mbsinit (&state));
     436          input[5] = '\0';
     437          input[6] = '\0';
     438  
     439          ret = mbrlen (input + 7, 1, &state);
     440          ASSERT (ret == 1);
     441          ASSERT (mbsinit (&state));
     442  
     443          /* Test some invalid input.  */
     444          memset (&state, '\0', sizeof (mbstate_t));
     445          ret = mbrlen ("\377", 1, &state); /* 0xFF */
     446          ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == (size_t)-2);
     447  
     448          memset (&state, '\0', sizeof (mbstate_t));
     449          ret = mbrlen ("\225\377", 2, &state); /* 0x95 0xFF */
     450          ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == 2);
     451        }
     452        return 0;
     453  
     454      case 54936:
     455        /* Locale encoding is CP54936 = GB18030.  */
     456        if (strcmp (locale_charset (), "GB18030") != 0)
     457          return 77;
     458        {
     459          char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
     460          memset (&state, '\0', sizeof (mbstate_t));
     461  
     462          ret = mbrlen (input, 1, &state);
     463          ASSERT (ret == 1);
     464          ASSERT (mbsinit (&state));
     465          input[0] = '\0';
     466  
     467          ret = mbrlen (input + 1, 1, &state);
     468          ASSERT (ret == (size_t)(-2));
     469          ASSERT (!mbsinit (&state));
     470          input[1] = '\0';
     471  
     472          ret = mbrlen (input + 2, 7, &state);
     473          ASSERT (ret == 1);
     474          ASSERT (mbsinit (&state));
     475          input[2] = '\0';
     476  
     477          ret = mbrlen (input + 3, 6, &state);
     478          ASSERT (ret == 4);
     479          ASSERT (mbsinit (&state));
     480          input[3] = '\0';
     481          input[4] = '\0';
     482          input[5] = '\0';
     483          input[6] = '\0';
     484  
     485          ret = mbrlen (input + 7, 2, &state);
     486          ASSERT (ret == 1);
     487          ASSERT (mbsinit (&state));
     488          input[7] = '\0';
     489  
     490          ret = mbrlen (input + 8, 1, &state);
     491          ASSERT (ret == 1);
     492          ASSERT (mbsinit (&state));
     493  
     494          /* Test some invalid input.  */
     495          memset (&state, '\0', sizeof (mbstate_t));
     496          ret = mbrlen ("\377", 1, &state); /* 0xFF */
     497          ASSERT (ret == (size_t)-1);
     498          ASSERT (errno == EILSEQ);
     499  
     500          memset (&state, '\0', sizeof (mbstate_t));
     501          ret = mbrlen ("\225\377", 2, &state); /* 0x95 0xFF */
     502          ASSERT (ret == (size_t)-1);
     503          ASSERT (errno == EILSEQ);
     504  
     505          memset (&state, '\0', sizeof (mbstate_t));
     506          ret = mbrlen ("\201\045", 2, &state); /* 0x81 0x25 */
     507          ASSERT (ret == (size_t)-1);
     508          ASSERT (errno == EILSEQ);
     509  
     510          memset (&state, '\0', sizeof (mbstate_t));
     511          ret = mbrlen ("\201\060\377", 3, &state); /* 0x81 0x30 0xFF */
     512          ASSERT (ret == (size_t)-1);
     513          ASSERT (errno == EILSEQ);
     514  
     515          memset (&state, '\0', sizeof (mbstate_t));
     516          ret = mbrlen ("\201\060\377\064", 4, &state); /* 0x81 0x30 0xFF 0x34 */
     517          ASSERT (ret == (size_t)-1);
     518          ASSERT (errno == EILSEQ);
     519  
     520          memset (&state, '\0', sizeof (mbstate_t));
     521          ret = mbrlen ("\201\060\211\072", 4, &state); /* 0x81 0x30 0x89 0x3A */
     522          ASSERT (ret == (size_t)-1);
     523          ASSERT (errno == EILSEQ);
     524        }
     525        return 0;
     526  
     527      default:
     528        return 1;
     529      }
     530  }
     531  
     532  int
     533  main (int argc, char *argv[])
     534  {
     535    int codepage = atoi (argv[argc - 1]);
     536    int result;
     537    int i;
     538  
     539    result = 77;
     540    for (i = 1; i < argc - 1; i++)
     541      {
     542        int ret = test_one_locale (argv[i], codepage);
     543  
     544        if (ret != 77)
     545          result = ret;
     546      }
     547  
     548    if (result == 77)
     549      {
     550        fprintf (stderr, "Skipping test: found no locale with codepage %d\n",
     551                 codepage);
     552      }
     553    return result;
     554  }
     555  
     556  #else
     557  
     558  int
     559  main (int argc, char *argv[])
     560  {
     561    fputs ("Skipping test: not a native Windows system\n", stderr);
     562    return 77;
     563  }
     564  
     565  #endif