(root)/
grep-3.11/
gnulib-tests/
test-mbrlen-w32.c
       1  /* Test of conversion of multibyte character to wide character.
       2     Copyright (C) 2008-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  #include <config.h>
      18  
      19  #include <wchar.h>
      20  
      21  #include <errno.h>
      22  #include <locale.h>
      23  #include <stdio.h>
      24  #include <stdlib.h>
      25  #include <string.h>
      26  
      27  #include "localcharset.h"
      28  #include "macros.h"
      29  
      30  #if defined _WIN32 && !defined __CYGWIN__
      31  
      32  static int
      33  test_one_locale (const char *name, int codepage)
      34  {
      35    mbstate_t state;
      36    size_t ret;
      37  
      38  # if 1
      39    /* Portable code to set the locale.  */
      40    {
      41      char name_with_codepage[1024];
      42  
      43      sprintf (name_with_codepage, "%s.%d", name, codepage);
      44  
      45      /* Set the locale.  */
      46      if (setlocale (LC_ALL, name_with_codepage) == NULL)
      47        return 77;
      48    }
      49  # else
      50    /* Hacky way to set a locale.codepage combination that setlocale() refuses
      51       to set.  */
      52    {
      53      /* Codepage of the current locale, set with setlocale().
      54         Not necessarily the same as GetACP().  */
      55      extern __declspec(dllimport) unsigned int __lc_codepage;
      56  
      57      /* Set the locale.  */
      58      if (setlocale (LC_ALL, name) == NULL)
      59        return 77;
      60  
      61      /* Clobber the codepage and MB_CUR_MAX, both set by setlocale().  */
      62      __lc_codepage = codepage;
      63      switch (codepage)
      64        {
      65        case 1252:
      66        case 1256:
      67          MB_CUR_MAX = 1;
      68          break;
      69        case 932:
      70        case 950:
      71        case 936:
      72          MB_CUR_MAX = 2;
      73          break;
      74        case 54936:
      75        case 65001:
      76          MB_CUR_MAX = 4;
      77          break;
      78        }
      79  
      80      /* Test whether the codepage is really available.  */
      81      memset (&state, '\0', sizeof (mbstate_t));
      82      if (mbrlen (" ", 1, &state) == (size_t)(-1))
      83        return 77;
      84    }
      85  # endif
      86  
      87    /* Test zero-length input.  */
      88    {
      89      memset (&state, '\0', sizeof (mbstate_t));
      90      ret = mbrlen ("x", 0, &state);
      91      /* gnulib's implementation returns (size_t)(-2).
      92         The AIX 5.1 implementation returns (size_t)(-1).
      93         glibc's implementation returns 0.  */
      94      ASSERT (ret == (size_t)(-2) || ret == (size_t)(-1) || ret == 0);
      95      ASSERT (mbsinit (&state));
      96    }
      97  
      98    /* Test NUL byte input.  */
      99    {
     100      memset (&state, '\0', sizeof (mbstate_t));
     101      ret = mbrlen ("", 1, &state);
     102      ASSERT (ret == 0);
     103      ASSERT (mbsinit (&state));
     104    }
     105  
     106    /* Test single-byte input.  */
     107    {
     108      int c;
     109      char buf[1];
     110  
     111      memset (&state, '\0', sizeof (mbstate_t));
     112      for (c = 0; c < 0x100; c++)
     113        switch (c)
     114          {
     115          case '\t': case '\v': case '\f':
     116          case ' ': case '!': case '"': case '#': case '%':
     117          case '&': case '\'': case '(': case ')': case '*':
     118          case '+': case ',': case '-': case '.': case '/':
     119          case '0': case '1': case '2': case '3': case '4':
     120          case '5': case '6': case '7': case '8': case '9':
     121          case ':': case ';': case '<': case '=': case '>':
     122          case '?':
     123          case 'A': case 'B': case 'C': case 'D': case 'E':
     124          case 'F': case 'G': case 'H': case 'I': case 'J':
     125          case 'K': case 'L': case 'M': case 'N': case 'O':
     126          case 'P': case 'Q': case 'R': case 'S': case 'T':
     127          case 'U': case 'V': case 'W': case 'X': case 'Y':
     128          case 'Z':
     129          case '[': case '\\': case ']': case '^': case '_':
     130          case 'a': case 'b': case 'c': case 'd': case 'e':
     131          case 'f': case 'g': case 'h': case 'i': case 'j':
     132          case 'k': case 'l': case 'm': case 'n': case 'o':
     133          case 'p': case 'q': case 'r': case 's': case 't':
     134          case 'u': case 'v': case 'w': case 'x': case 'y':
     135          case 'z': case '{': case '|': case '}': case '~':
     136            /* c is in the ISO C "basic character set".  */
     137            buf[0] = c;
     138            ret = mbrlen (buf, 1, &state);
     139            ASSERT (ret == 1);
     140            ASSERT (mbsinit (&state));
     141            break;
     142          }
     143    }
     144  
     145    /* Test special calling convention, passing a NULL pointer.  */
     146    {
     147      memset (&state, '\0', sizeof (mbstate_t));
     148      ret = mbrlen (NULL, 5, &state);
     149      ASSERT (ret == 0);
     150      ASSERT (mbsinit (&state));
     151    }
     152  
     153    switch (codepage)
     154      {
     155      case 1252:
     156        /* Locale encoding is CP1252, an extension of ISO-8859-1.  */
     157        {
     158          char input[] = "B\374\337er"; /* "Büßer" */
     159          memset (&state, '\0', sizeof (mbstate_t));
     160  
     161          ret = mbrlen (input, 1, &state);
     162          ASSERT (ret == 1);
     163          ASSERT (mbsinit (&state));
     164          input[0] = '\0';
     165  
     166          ret = mbrlen (input + 1, 1, &state);
     167          ASSERT (ret == 1);
     168          ASSERT (mbsinit (&state));
     169          input[1] = '\0';
     170  
     171          ret = mbrlen (input + 2, 3, &state);
     172          ASSERT (ret == 1);
     173          ASSERT (mbsinit (&state));
     174          input[2] = '\0';
     175  
     176          ret = mbrlen (input + 3, 2, &state);
     177          ASSERT (ret == 1);
     178          ASSERT (mbsinit (&state));
     179          input[3] = '\0';
     180  
     181          ret = mbrlen (input + 4, 1, &state);
     182          ASSERT (ret == 1);
     183          ASSERT (mbsinit (&state));
     184        }
     185        return 0;
     186  
     187      case 1256:
     188        /* Locale encoding is CP1256, not the same as ISO-8859-6.  */
     189        {
     190          char input[] = "x\302\341\346y"; /* "xآلوy" */
     191          memset (&state, '\0', sizeof (mbstate_t));
     192  
     193          ret = mbrlen (input, 1, &state);
     194          ASSERT (ret == 1);
     195          ASSERT (mbsinit (&state));
     196          input[0] = '\0';
     197  
     198          ret = mbrlen (input + 1, 1, &state);
     199          ASSERT (ret == 1);
     200          ASSERT (mbsinit (&state));
     201          input[1] = '\0';
     202  
     203          ret = mbrlen (input + 2, 3, &state);
     204          ASSERT (ret == 1);
     205          ASSERT (mbsinit (&state));
     206          input[2] = '\0';
     207  
     208          ret = mbrlen (input + 3, 2, &state);
     209          ASSERT (ret == 1);
     210          ASSERT (mbsinit (&state));
     211          input[3] = '\0';
     212  
     213          ret = mbrlen (input + 4, 1, &state);
     214          ASSERT (ret == 1);
     215          ASSERT (mbsinit (&state));
     216        }
     217        return 0;
     218  
     219      case 932:
     220        /* Locale encoding is CP932, similar to Shift_JIS.  */
     221        {
     222          char input[] = "<\223\372\226\173\214\352>"; /* "<日本語>" */
     223          memset (&state, '\0', sizeof (mbstate_t));
     224  
     225          ret = mbrlen (input, 1, &state);
     226          ASSERT (ret == 1);
     227          ASSERT (mbsinit (&state));
     228          input[0] = '\0';
     229  
     230          ret = mbrlen (input + 1, 2, &state);
     231          ASSERT (ret == 2);
     232          ASSERT (mbsinit (&state));
     233          input[1] = '\0';
     234          input[2] = '\0';
     235  
     236          ret = mbrlen (input + 3, 1, &state);
     237          ASSERT (ret == (size_t)(-2));
     238          ASSERT (!mbsinit (&state));
     239          input[3] = '\0';
     240  
     241          ret = mbrlen (input + 4, 4, &state);
     242          ASSERT (ret == 1);
     243          ASSERT (mbsinit (&state));
     244          input[4] = '\0';
     245  
     246          ret = mbrlen (input + 5, 3, &state);
     247          ASSERT (ret == 2);
     248          ASSERT (mbsinit (&state));
     249          input[5] = '\0';
     250          input[6] = '\0';
     251  
     252          ret = mbrlen (input + 7, 1, &state);
     253          ASSERT (ret == 1);
     254          ASSERT (mbsinit (&state));
     255  
     256          /* Test some invalid input.  */
     257          memset (&state, '\0', sizeof (mbstate_t));
     258          ret = mbrlen ("\377", 1, &state); /* 0xFF */
     259          ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == (size_t)-2);
     260  
     261          memset (&state, '\0', sizeof (mbstate_t));
     262          ret = mbrlen ("\225\377", 2, &state); /* 0x95 0xFF */
     263          ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == 2);
     264        }
     265        return 0;
     266  
     267      case 950:
     268        /* Locale encoding is CP950, similar to Big5.  */
     269        {
     270          char input[] = "<\244\351\245\273\273\171>"; /* "<日本語>" */
     271          memset (&state, '\0', sizeof (mbstate_t));
     272  
     273          ret = mbrlen (input, 1, &state);
     274          ASSERT (ret == 1);
     275          ASSERT (mbsinit (&state));
     276          input[0] = '\0';
     277  
     278          ret = mbrlen (input + 1, 2, &state);
     279          ASSERT (ret == 2);
     280          ASSERT (mbsinit (&state));
     281          input[1] = '\0';
     282          input[2] = '\0';
     283  
     284          ret = mbrlen (input + 3, 1, &state);
     285          ASSERT (ret == (size_t)(-2));
     286          ASSERT (!mbsinit (&state));
     287          input[3] = '\0';
     288  
     289          ret = mbrlen (input + 4, 4, &state);
     290          ASSERT (ret == 1);
     291          ASSERT (mbsinit (&state));
     292          input[4] = '\0';
     293  
     294          ret = mbrlen (input + 5, 3, &state);
     295          ASSERT (ret == 2);
     296          ASSERT (mbsinit (&state));
     297          input[5] = '\0';
     298          input[6] = '\0';
     299  
     300          ret = mbrlen (input + 7, 1, &state);
     301          ASSERT (ret == 1);
     302          ASSERT (mbsinit (&state));
     303  
     304          /* Test some invalid input.  */
     305          memset (&state, '\0', sizeof (mbstate_t));
     306          ret = mbrlen ("\377", 1, &state); /* 0xFF */
     307          ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == (size_t)-2);
     308  
     309          memset (&state, '\0', sizeof (mbstate_t));
     310          ret = mbrlen ("\225\377", 2, &state); /* 0x95 0xFF */
     311          ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == 2);
     312        }
     313        return 0;
     314  
     315      case 936:
     316        /* Locale encoding is CP936 = GBK, an extension of GB2312.  */
     317        {
     318          char input[] = "<\310\325\261\276\325\132>"; /* "<日本語>" */
     319          memset (&state, '\0', sizeof (mbstate_t));
     320  
     321          ret = mbrlen (input, 1, &state);
     322          ASSERT (ret == 1);
     323          ASSERT (mbsinit (&state));
     324          input[0] = '\0';
     325  
     326          ret = mbrlen (input + 1, 2, &state);
     327          ASSERT (ret == 2);
     328          ASSERT (mbsinit (&state));
     329          input[1] = '\0';
     330          input[2] = '\0';
     331  
     332          ret = mbrlen (input + 3, 1, &state);
     333          ASSERT (ret == (size_t)(-2));
     334          ASSERT (!mbsinit (&state));
     335          input[3] = '\0';
     336  
     337          ret = mbrlen (input + 4, 4, &state);
     338          ASSERT (ret == 1);
     339          ASSERT (mbsinit (&state));
     340          input[4] = '\0';
     341  
     342          ret = mbrlen (input + 5, 3, &state);
     343          ASSERT (ret == 2);
     344          ASSERT (mbsinit (&state));
     345          input[5] = '\0';
     346          input[6] = '\0';
     347  
     348          ret = mbrlen (input + 7, 1, &state);
     349          ASSERT (ret == 1);
     350          ASSERT (mbsinit (&state));
     351  
     352          /* Test some invalid input.  */
     353          memset (&state, '\0', sizeof (mbstate_t));
     354          ret = mbrlen ("\377", 1, &state); /* 0xFF */
     355          ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == (size_t)-2);
     356  
     357          memset (&state, '\0', sizeof (mbstate_t));
     358          ret = mbrlen ("\225\377", 2, &state); /* 0x95 0xFF */
     359          ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == 2);
     360        }
     361        return 0;
     362  
     363      case 54936:
     364        /* Locale encoding is CP54936 = GB18030.  */
     365        if (strcmp (locale_charset (), "GB18030") != 0)
     366          return 77;
     367        {
     368          char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
     369          memset (&state, '\0', sizeof (mbstate_t));
     370  
     371          ret = mbrlen (input, 1, &state);
     372          ASSERT (ret == 1);
     373          ASSERT (mbsinit (&state));
     374          input[0] = '\0';
     375  
     376          ret = mbrlen (input + 1, 1, &state);
     377          ASSERT (ret == (size_t)(-2));
     378          ASSERT (!mbsinit (&state));
     379          input[1] = '\0';
     380  
     381          ret = mbrlen (input + 2, 7, &state);
     382          ASSERT (ret == 1);
     383          ASSERT (mbsinit (&state));
     384          input[2] = '\0';
     385  
     386          ret = mbrlen (input + 3, 6, &state);
     387          ASSERT (ret == 4);
     388          ASSERT (mbsinit (&state));
     389          input[3] = '\0';
     390          input[4] = '\0';
     391          input[5] = '\0';
     392          input[6] = '\0';
     393  
     394          ret = mbrlen (input + 7, 2, &state);
     395          ASSERT (ret == 1);
     396          ASSERT (mbsinit (&state));
     397          input[7] = '\0';
     398  
     399          ret = mbrlen (input + 8, 1, &state);
     400          ASSERT (ret == 1);
     401          ASSERT (mbsinit (&state));
     402  
     403          /* Test some invalid input.  */
     404          memset (&state, '\0', sizeof (mbstate_t));
     405          ret = mbrlen ("\377", 1, &state); /* 0xFF */
     406          ASSERT (ret == (size_t)-1);
     407          ASSERT (errno == EILSEQ);
     408  
     409          memset (&state, '\0', sizeof (mbstate_t));
     410          ret = mbrlen ("\225\377", 2, &state); /* 0x95 0xFF */
     411          ASSERT (ret == (size_t)-1);
     412          ASSERT (errno == EILSEQ);
     413  
     414          memset (&state, '\0', sizeof (mbstate_t));
     415          ret = mbrlen ("\201\045", 2, &state); /* 0x81 0x25 */
     416          ASSERT (ret == (size_t)-1);
     417          ASSERT (errno == EILSEQ);
     418  
     419          memset (&state, '\0', sizeof (mbstate_t));
     420          ret = mbrlen ("\201\060\377", 3, &state); /* 0x81 0x30 0xFF */
     421          ASSERT (ret == (size_t)-1);
     422          ASSERT (errno == EILSEQ);
     423  
     424          memset (&state, '\0', sizeof (mbstate_t));
     425          ret = mbrlen ("\201\060\377\064", 4, &state); /* 0x81 0x30 0xFF 0x34 */
     426          ASSERT (ret == (size_t)-1);
     427          ASSERT (errno == EILSEQ);
     428  
     429          memset (&state, '\0', sizeof (mbstate_t));
     430          ret = mbrlen ("\201\060\211\072", 4, &state); /* 0x81 0x30 0x89 0x3A */
     431          ASSERT (ret == (size_t)-1);
     432          ASSERT (errno == EILSEQ);
     433        }
     434        return 0;
     435  
     436      case 65001:
     437        /* Locale encoding is CP65001 = UTF-8.  */
     438        if (strcmp (locale_charset (), "UTF-8") != 0)
     439          return 77;
     440        {
     441          char input[] = "B\303\274\303\237er"; /* "Büßer" */
     442          memset (&state, '\0', sizeof (mbstate_t));
     443  
     444          ret = mbrlen (input, 1, &state);
     445          ASSERT (ret == 1);
     446          ASSERT (mbsinit (&state));
     447          input[0] = '\0';
     448  
     449          ret = mbrlen (input + 1, 1, &state);
     450          ASSERT (ret == (size_t)(-2));
     451          ASSERT (!mbsinit (&state));
     452          input[1] = '\0';
     453  
     454          ret = mbrlen (input + 2, 5, &state);
     455          ASSERT (ret == 1);
     456          ASSERT (mbsinit (&state));
     457          input[2] = '\0';
     458  
     459          ret = mbrlen (input + 3, 4, &state);
     460          ASSERT (ret == 2);
     461          ASSERT (mbsinit (&state));
     462          input[3] = '\0';
     463          input[4] = '\0';
     464  
     465          ret = mbrlen (input + 5, 2, &state);
     466          ASSERT (ret == 1);
     467          ASSERT (mbsinit (&state));
     468          input[5] = '\0';
     469  
     470          ret = mbrlen (input + 6, 1, &state);
     471          ASSERT (ret == 1);
     472          ASSERT (mbsinit (&state));
     473  
     474          /* Test some invalid input.  */
     475          memset (&state, '\0', sizeof (mbstate_t));
     476          ret = mbrlen ("\377", 1, &state); /* 0xFF */
     477          ASSERT (ret == (size_t)-1);
     478          ASSERT (errno == EILSEQ);
     479  
     480          memset (&state, '\0', sizeof (mbstate_t));
     481          ret = mbrlen ("\303\300", 2, &state); /* 0xC3 0xC0 */
     482          ASSERT (ret == (size_t)-1);
     483          ASSERT (errno == EILSEQ);
     484  
     485          memset (&state, '\0', sizeof (mbstate_t));
     486          ret = mbrlen ("\343\300", 2, &state); /* 0xE3 0xC0 */
     487          ASSERT (ret == (size_t)-1);
     488          ASSERT (errno == EILSEQ);
     489  
     490          memset (&state, '\0', sizeof (mbstate_t));
     491          ret = mbrlen ("\343\300\200", 3, &state); /* 0xE3 0xC0 0x80 */
     492          ASSERT (ret == (size_t)-1);
     493          ASSERT (errno == EILSEQ);
     494  
     495          memset (&state, '\0', sizeof (mbstate_t));
     496          ret = mbrlen ("\343\200\300", 3, &state); /* 0xE3 0x80 0xC0 */
     497          ASSERT (ret == (size_t)-1);
     498          ASSERT (errno == EILSEQ);
     499  
     500          memset (&state, '\0', sizeof (mbstate_t));
     501          ret = mbrlen ("\363\300", 2, &state); /* 0xF3 0xC0 */
     502          ASSERT (ret == (size_t)-1);
     503          ASSERT (errno == EILSEQ);
     504  
     505          memset (&state, '\0', sizeof (mbstate_t));
     506          ret = mbrlen ("\363\300\200\200", 4, &state); /* 0xF3 0xC0 0x80 0x80 */
     507          ASSERT (ret == (size_t)-1);
     508          ASSERT (errno == EILSEQ);
     509  
     510          memset (&state, '\0', sizeof (mbstate_t));
     511          ret = mbrlen ("\363\200\300", 3, &state); /* 0xF3 0x80 0xC0 */
     512          ASSERT (ret == (size_t)-1);
     513          ASSERT (errno == EILSEQ);
     514  
     515          memset (&state, '\0', sizeof (mbstate_t));
     516          ret = mbrlen ("\363\200\300\200", 4, &state); /* 0xF3 0x80 0xC0 0x80 */
     517          ASSERT (ret == (size_t)-1);
     518          ASSERT (errno == EILSEQ);
     519  
     520          memset (&state, '\0', sizeof (mbstate_t));
     521          ret = mbrlen ("\363\200\200\300", 4, &state); /* 0xF3 0x80 0x80 0xC0 */
     522          ASSERT (ret == (size_t)-1);
     523          ASSERT (errno == EILSEQ);
     524        }
     525        return 0;
     526  
     527      default:
     528        return 1;
     529      }
     530  }
     531  
     532  int
     533  main (int argc, char *argv[])
     534  {
     535    int codepage = atoi (argv[argc - 1]);
     536    int result;
     537    int i;
     538  
     539    result = 77;
     540    for (i = 1; i < argc - 1; i++)
     541      {
     542        int ret = test_one_locale (argv[i], codepage);
     543  
     544        if (ret != 77)
     545          result = ret;
     546      }
     547  
     548    if (result == 77)
     549      {
     550        fprintf (stderr, "Skipping test: found no locale with codepage %d\n",
     551                 codepage);
     552      }
     553    return result;
     554  }
     555  
     556  #else
     557  
     558  int
     559  main (int argc, char *argv[])
     560  {
     561    fputs ("Skipping test: not a native Windows system\n", stderr);
     562    return 77;
     563  }
     564  
     565  #endif