(root)/
sed-4.9/
gnulib-tests/
test-wcrtomb-w32.c
       1  /* Test of conversion of wide character to multibyte character.
       2     Copyright (C) 2008-2022 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  #include <config.h>
      18  
      19  #include <wchar.h>
      20  
      21  #include <locale.h>
      22  #include <stdio.h>
      23  #include <stdlib.h>
      24  #include <string.h>
      25  
      26  #include "localcharset.h"
      27  #include "macros.h"
      28  
      29  #if defined _WIN32 && !defined __CYGWIN__
      30  
      31  static int
      32  test_one_locale (const char *name, int codepage)
      33  {
      34    char buf[64];
      35    size_t ret;
      36  
      37  # if 1
      38    /* Portable code to set the locale.  */
      39    {
      40      char name_with_codepage[1024];
      41  
      42      sprintf (name_with_codepage, "%s.%d", name, codepage);
      43  
      44      /* Set the locale.  */
      45      if (setlocale (LC_ALL, name_with_codepage) == NULL)
      46        return 77;
      47    }
      48  # else
      49    /* Hacky way to set a locale.codepage combination that setlocale() refuses
      50       to set.  */
      51    {
      52      /* Codepage of the current locale, set with setlocale().
      53         Not necessarily the same as GetACP().  */
      54      extern __declspec(dllimport) unsigned int __lc_codepage;
      55  
      56      /* Set the locale.  */
      57      if (setlocale (LC_ALL, name) == NULL)
      58        return 77;
      59  
      60      /* Clobber the codepage and MB_CUR_MAX, both set by setlocale().  */
      61      __lc_codepage = codepage;
      62      switch (codepage)
      63        {
      64        case 1252:
      65        case 1256:
      66          MB_CUR_MAX = 1;
      67          break;
      68        case 932:
      69        case 950:
      70        case 936:
      71          MB_CUR_MAX = 2;
      72          break;
      73        case 54936:
      74        case 65001:
      75          MB_CUR_MAX = 4;
      76          break;
      77        }
      78  
      79      /* Test whether the codepage is really available.  */
      80      {
      81        mbstate_t state;
      82        wchar_t wc;
      83  
      84        memset (&state, '\0', sizeof (mbstate_t));
      85        if (mbrtowc (&wc, " ", 1, &state) == (size_t)(-1))
      86          return 77;
      87      }
      88    }
      89  # endif
      90  
      91    /* Test NUL character.  */
      92    {
      93      buf[0] = 'x';
      94      ret = wcrtomb (buf, 0, NULL);
      95      ASSERT (ret == 1);
      96      ASSERT (buf[0] == '\0');
      97    }
      98  
      99    /* Test single bytes.  */
     100    {
     101      int c;
     102  
     103      for (c = 0; c < 0x100; c++)
     104        switch (c)
     105          {
     106          case '\t': case '\v': case '\f':
     107          case ' ': case '!': case '"': case '#': case '%':
     108          case '&': case '\'': case '(': case ')': case '*':
     109          case '+': case ',': case '-': case '.': case '/':
     110          case '0': case '1': case '2': case '3': case '4':
     111          case '5': case '6': case '7': case '8': case '9':
     112          case ':': case ';': case '<': case '=': case '>':
     113          case '?':
     114          case 'A': case 'B': case 'C': case 'D': case 'E':
     115          case 'F': case 'G': case 'H': case 'I': case 'J':
     116          case 'K': case 'L': case 'M': case 'N': case 'O':
     117          case 'P': case 'Q': case 'R': case 'S': case 'T':
     118          case 'U': case 'V': case 'W': case 'X': case 'Y':
     119          case 'Z':
     120          case '[': case '\\': case ']': case '^': case '_':
     121          case 'a': case 'b': case 'c': case 'd': case 'e':
     122          case 'f': case 'g': case 'h': case 'i': case 'j':
     123          case 'k': case 'l': case 'm': case 'n': case 'o':
     124          case 'p': case 'q': case 'r': case 's': case 't':
     125          case 'u': case 'v': case 'w': case 'x': case 'y':
     126          case 'z': case '{': case '|': case '}': case '~':
     127            /* c is in the ISO C "basic character set".  */
     128            ret = wcrtomb (buf, btowc (c), NULL);
     129            ASSERT (ret == 1);
     130            ASSERT (buf[0] == (char) c);
     131            break;
     132          }
     133    }
     134  
     135    /* Test special calling convention, passing a NULL pointer.  */
     136    {
     137      ret = wcrtomb (NULL, '\0', NULL);
     138      ASSERT (ret == 1);
     139      ret = wcrtomb (NULL, btowc ('x'), NULL);
     140      ASSERT (ret == 1);
     141    }
     142  
     143    switch (codepage)
     144      {
     145      case 1252:
     146        /* Locale encoding is CP1252, an extension of ISO-8859-1.  */
     147        {
     148          /* Convert "B\374\337er": "Büßer" */
     149          memset (buf, 'x', 8);
     150          ret = wcrtomb (buf, 0x00FC, NULL);
     151          ASSERT (ret == 1);
     152          ASSERT (memcmp (buf, "\374", 1) == 0);
     153          ASSERT (buf[1] == 'x');
     154  
     155          memset (buf, 'x', 8);
     156          ret = wcrtomb (buf, 0x00DF, NULL);
     157          ASSERT (ret == 1);
     158          ASSERT (memcmp (buf, "\337", 1) == 0);
     159          ASSERT (buf[1] == 'x');
     160        }
     161        return 0;
     162  
     163      case 1256:
     164        /* Locale encoding is CP1256, not the same as ISO-8859-6.  */
     165        {
     166          /* Convert "x\302\341\346y": "xآلوy" */
     167          memset (buf, 'x', 8);
     168          ret = wcrtomb (buf, 0x0622, NULL);
     169          ASSERT (ret == 1);
     170          ASSERT (memcmp (buf, "\302", 1) == 0);
     171          ASSERT (buf[1] == 'x');
     172  
     173          memset (buf, 'x', 8);
     174          ret = wcrtomb (buf, 0x0644, NULL);
     175          ASSERT (ret == 1);
     176          ASSERT (memcmp (buf, "\341", 1) == 0);
     177          ASSERT (buf[1] == 'x');
     178  
     179          memset (buf, 'x', 8);
     180          ret = wcrtomb (buf, 0x0648, NULL);
     181          ASSERT (ret == 1);
     182          ASSERT (memcmp (buf, "\346", 1) == 0);
     183          ASSERT (buf[1] == 'x');
     184        }
     185        return 0;
     186  
     187      case 932:
     188        /* Locale encoding is CP932, similar to Shift_JIS.  */
     189        {
     190          /* Convert "<\223\372\226\173\214\352>": "<日本語>" */
     191          memset (buf, 'x', 8);
     192          ret = wcrtomb (buf, 0x65E5, NULL);
     193          ASSERT (ret == 2);
     194          ASSERT (memcmp (buf, "\223\372", 2) == 0);
     195          ASSERT (buf[2] == 'x');
     196  
     197          memset (buf, 'x', 8);
     198          ret = wcrtomb (buf, 0x672C, NULL);
     199          ASSERT (ret == 2);
     200          ASSERT (memcmp (buf, "\226\173", 2) == 0);
     201          ASSERT (buf[2] == 'x');
     202  
     203          memset (buf, 'x', 8);
     204          ret = wcrtomb (buf, 0x8A9E, NULL);
     205          ASSERT (ret == 2);
     206          ASSERT (memcmp (buf, "\214\352", 2) == 0);
     207          ASSERT (buf[2] == 'x');
     208        }
     209        return 0;
     210  
     211      case 950:
     212        /* Locale encoding is CP950, similar to Big5.  */
     213        {
     214          /* Convert "<\244\351\245\273\273\171>": "<日本語>" */
     215          memset (buf, 'x', 8);
     216          ret = wcrtomb (buf, 0x65E5, NULL);
     217          ASSERT (ret == 2);
     218          ASSERT (memcmp (buf, "\244\351", 2) == 0);
     219          ASSERT (buf[2] == 'x');
     220  
     221          memset (buf, 'x', 8);
     222          ret = wcrtomb (buf, 0x672C, NULL);
     223          ASSERT (ret == 2);
     224          ASSERT (memcmp (buf, "\245\273", 2) == 0);
     225          ASSERT (buf[2] == 'x');
     226  
     227          memset (buf, 'x', 8);
     228          ret = wcrtomb (buf, 0x8A9E, NULL);
     229          ASSERT (ret == 2);
     230          ASSERT (memcmp (buf, "\273\171", 2) == 0);
     231          ASSERT (buf[2] == 'x');
     232        }
     233        return 0;
     234  
     235      case 936:
     236        /* Locale encoding is CP936 = GBK, an extension of GB2312.  */
     237        {
     238          /* Convert "<\310\325\261\276\325\132>": "<日本語>" */
     239          memset (buf, 'x', 8);
     240          ret = wcrtomb (buf, 0x65E5, NULL);
     241          ASSERT (ret == 2);
     242          ASSERT (memcmp (buf, "\310\325", 2) == 0);
     243          ASSERT (buf[2] == 'x');
     244  
     245          memset (buf, 'x', 8);
     246          ret = wcrtomb (buf, 0x672C, NULL);
     247          ASSERT (ret == 2);
     248          ASSERT (memcmp (buf, "\261\276", 2) == 0);
     249          ASSERT (buf[2] == 'x');
     250  
     251          memset (buf, 'x', 8);
     252          ret = wcrtomb (buf, 0x8A9E, NULL);
     253          ASSERT (ret == 2);
     254          ASSERT (memcmp (buf, "\325\132", 2) == 0);
     255          ASSERT (buf[2] == 'x');
     256        }
     257        return 0;
     258  
     259      case 54936:
     260        /* Locale encoding is CP54936 = GB18030.  */
     261        if (strcmp (locale_charset (), "GB18030") != 0)
     262          return 77;
     263        {
     264          /* Convert "B\250\271\201\060\211\070er": "Büßer" */
     265          memset (buf, 'x', 8);
     266          ret = wcrtomb (buf, 0x00FC, NULL);
     267          ASSERT (ret == 2);
     268          ASSERT (memcmp (buf, "\250\271", 2) == 0);
     269          ASSERT (buf[2] == 'x');
     270  
     271          memset (buf, 'x', 8);
     272          ret = wcrtomb (buf, 0x00DF, NULL);
     273          ASSERT (ret == 4);
     274          ASSERT (memcmp (buf, "\201\060\211\070", 4) == 0);
     275          ASSERT (buf[4] == 'x');
     276        }
     277        return 0;
     278  
     279      case 65001:
     280        /* Locale encoding is CP65001 = UTF-8.  */
     281        if (strcmp (locale_charset (), "UTF-8") != 0)
     282          return 77;
     283        {
     284          /* Convert "B\303\274\303\237er": "Büßer" */
     285          memset (buf, 'x', 8);
     286          ret = wcrtomb (buf, 0x00FC, NULL);
     287          ASSERT (ret == 2);
     288          ASSERT (memcmp (buf, "\303\274", 2) == 0);
     289          ASSERT (buf[2] == 'x');
     290  
     291          memset (buf, 'x', 8);
     292          ret = wcrtomb (buf, 0x00DF, NULL);
     293          ASSERT (ret == 2);
     294          ASSERT (memcmp (buf, "\303\237", 2) == 0);
     295          ASSERT (buf[2] == 'x');
     296        }
     297        return 0;
     298  
     299      default:
     300        return 1;
     301      }
     302  }
     303  
     304  int
     305  main (int argc, char *argv[])
     306  {
     307    int codepage = atoi (argv[argc - 1]);
     308    int result;
     309    int i;
     310  
     311    result = 77;
     312    for (i = 1; i < argc - 1; i++)
     313      {
     314        int ret = test_one_locale (argv[i], codepage);
     315  
     316        if (ret != 77)
     317          result = ret;
     318      }
     319  
     320    if (result == 77)
     321      {
     322        fprintf (stderr, "Skipping test: found no locale with codepage %d\n",
     323                 codepage);
     324      }
     325    return result;
     326  }
     327  
     328  #else
     329  
     330  int
     331  main (int argc, char *argv[])
     332  {
     333    fputs ("Skipping test: not a native Windows system\n", stderr);
     334    return 77;
     335  }
     336  
     337  #endif