(root)/
coreutils-9.4/
gnulib-tests/
test-c32tolower.c
       1  /* Test of c32tolower() function.
       2     Copyright (C) 2020-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  #include <config.h>
      18  
      19  #include <uchar.h>
      20  
      21  #include "signature.h"
      22  SIGNATURE_CHECK (c32tolower, wint_t, (wint_t));
      23  
      24  #include <locale.h>
      25  #include <stdlib.h>
      26  #include <string.h>
      27  #include <wchar.h>
      28  
      29  #include "macros.h"
      30  
      31  /* Representation of a multibyte character.  */
      32  #define MBCHAR_BUF_SIZE 6
      33  struct multibyte
      34  {
      35    size_t nbytes;             /* number of bytes of current character, > 0 */
      36    char buf[MBCHAR_BUF_SIZE]; /* room for the bytes */
      37  };
      38  
      39  /* Returns the value of c32tolower for the multibyte character s[0..n-1],
      40     as a multibyte character.  */
      41  static struct multibyte
      42  for_character (const char *s, size_t n)
      43  {
      44    mbstate_t state;
      45    char32_t wc;
      46    size_t ret;
      47    struct multibyte result;
      48  
      49    memset (&state, '\0', sizeof (mbstate_t));
      50    wc = (char32_t) 0xBADFACE;
      51    ret = mbrtoc32 (&wc, s, n, &state);
      52    ASSERT (ret == n);
      53  
      54    wc = c32tolower (wc);
      55    ASSERT (wc != WEOF);
      56  
      57    memset (&state, '\0', sizeof (mbstate_t));
      58    ret = c32rtomb (result.buf, wc, &state);
      59    ASSERT (ret != 0);
      60    if (ret == (size_t)(-1))
      61      /* wc cannot be converted back to multibyte.  */
      62      result.nbytes = 0;
      63    else
      64      {
      65        ASSERT (ret <= MBCHAR_BUF_SIZE);
      66        result.nbytes = ret;
      67      }
      68    return result;
      69  }
      70  
      71  int
      72  main (int argc, char *argv[])
      73  {
      74    wint_t wc;
      75    struct multibyte mb;
      76    char buf[4];
      77  
      78    /* configure should already have checked that the locale is supported.  */
      79    if (setlocale (LC_ALL, "") == NULL)
      80      return 1;
      81  
      82    /* Test WEOF.  */
      83    wc = c32tolower (WEOF);
      84    ASSERT (wc == WEOF);
      85  
      86    /* Test single-byte characters.
      87       POSIX specifies in
      88         <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html>
      89       that
      90         - in all locales, the uppercase characters include the A ... Z
      91           characters, and the corresponding characters a ... z (if not in a
      92           Turkish locale) are lowercase,
      93         - in the "POSIX" locale (which is usually the same as the "C" locale),
      94           the uppercase characters include only the ASCII A ... Z characters,
      95           and the corresponding characters a ... z are lowercase.
      96     */
      97  #if defined __NetBSD__
      98    /* towlower is broken in the zh_CN.GB18030 locale on NetBSD 9.0.
      99       See <https://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=57339>.  */
     100    if (!(argc > 1 && argv[1][0] == '4'))
     101  #endif
     102    {
     103      int c;
     104  
     105      for (c = 0; c < 0x100; c++)
     106        switch (c)
     107          {
     108          case '\t': case '\v': case '\f':
     109          case ' ': case '!': case '"': case '#': case '%':
     110          case '&': case '\'': case '(': case ')': case '*':
     111          case '+': case ',': case '-': case '.': case '/':
     112          case '0': case '1': case '2': case '3': case '4':
     113          case '5': case '6': case '7': case '8': case '9':
     114          case ':': case ';': case '<': case '=': case '>':
     115          case '?':
     116          case 'A': case 'B': case 'C': case 'D': case 'E':
     117          case 'F': case 'G': case 'H': case 'I': case 'J':
     118          case 'K': case 'L': case 'M': case 'N': case 'O':
     119          case 'P': case 'Q': case 'R': case 'S': case 'T':
     120          case 'U': case 'V': case 'W': case 'X': case 'Y':
     121          case 'Z':
     122          case '[': case '\\': case ']': case '^': case '_':
     123          case 'a': case 'b': case 'c': case 'd': case 'e':
     124          case 'f': case 'g': case 'h': case 'i': case 'j':
     125          case 'k': case 'l': case 'm': case 'n': case 'o':
     126          case 'p': case 'q': case 'r': case 's': case 't':
     127          case 'u': case 'v': case 'w': case 'x': case 'y':
     128          case 'z': case '{': case '|': case '}': case '~':
     129            /* c is in the ISO C "basic character set".  */
     130            buf[0] = (unsigned char) c;
     131            mb = for_character (buf, 1);
     132            switch (c)
     133              {
     134              case 'A': case 'B': case 'C': case 'D': case 'E':
     135              case 'F': case 'G': case 'H': case 'I': case 'J':
     136              case 'K': case 'L': case 'M': case 'N': case 'O':
     137              case 'P': case 'Q': case 'R': case 'S': case 'T':
     138              case 'U': case 'V': case 'W': case 'X': case 'Y':
     139              case 'Z':
     140                ASSERT (mb.nbytes == 1);
     141                ASSERT ((unsigned char) mb.buf[0] == (unsigned char) c - 'A' + 'a');
     142                break;
     143              default:
     144                ASSERT (mb.nbytes == 1);
     145                ASSERT ((unsigned char) mb.buf[0] == c);
     146                break;
     147              }
     148            break;
     149          }
     150    }
     151  
     152    if (argc > 1)
     153      switch (argv[1][0])
     154        {
     155        case '0':
     156          /* C locale; tested above.  */
     157          return 0;
     158  
     159        case '1':
     160          /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
     161          {
     162            /* U+00B2 SUPERSCRIPT TWO */
     163            mb = for_character ("\262", 1);
     164            ASSERT (mb.nbytes == 1);
     165            ASSERT (memcmp (mb.buf, "\262", 1) == 0);
     166            /* U+00B5 MICRO SIGN */
     167            mb = for_character ("\265", 1);
     168            ASSERT (mb.nbytes == 1);
     169            ASSERT (memcmp (mb.buf, "\265", 1) == 0);
     170            /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
     171            mb = for_character ("\311", 1);
     172            ASSERT (mb.nbytes == 1);
     173            ASSERT (memcmp (mb.buf, "\351", 1) == 0);
     174            /* U+00DF LATIN SMALL LETTER SHARP S */
     175            mb = for_character ("\337", 1);
     176            ASSERT (mb.nbytes == 1);
     177            ASSERT (memcmp (mb.buf, "\337", 1) == 0);
     178            /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
     179            mb = for_character ("\351", 1);
     180            ASSERT (mb.nbytes == 1);
     181            ASSERT (memcmp (mb.buf, "\351", 1) == 0);
     182            /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
     183            mb = for_character ("\377", 1);
     184            ASSERT (mb.nbytes == 1);
     185            ASSERT (memcmp (mb.buf, "\377", 1) == 0);
     186          }
     187          return 0;
     188  
     189        case '2':
     190          /* Locale encoding is EUC-JP.  */
     191          {
     192          #if !((defined __APPLE__ && defined __MACH__) || defined __DragonFly__)
     193            /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
     194            mb = for_character ("\217\252\261", 3);
     195            ASSERT (mb.nbytes == 3);
     196            ASSERT (memcmp (mb.buf, "\217\253\261", 3) == 0);
     197          #endif
     198            /* U+00DF LATIN SMALL LETTER SHARP S */
     199            mb = for_character ("\217\251\316", 3);
     200            ASSERT (mb.nbytes == 3);
     201            ASSERT (memcmp (mb.buf, "\217\251\316", 3) == 0);
     202            /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
     203            mb = for_character ("\217\253\261", 3);
     204            ASSERT (mb.nbytes == 3);
     205            ASSERT (memcmp (mb.buf, "\217\253\261", 3) == 0);
     206            /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
     207            mb = for_character ("\217\253\363", 3);
     208            ASSERT (mb.nbytes == 3);
     209            ASSERT (memcmp (mb.buf, "\217\253\363", 3) == 0);
     210          #if !((defined __APPLE__ && defined __MACH__) || defined __DragonFly__)
     211            /* U+0141 LATIN CAPITAL LETTER L WITH STROKE */
     212            mb = for_character ("\217\251\250", 3);
     213            ASSERT (mb.nbytes == 3);
     214            ASSERT (memcmp (mb.buf, "\217\251\310", 3) == 0);
     215          #endif
     216            /* U+0142 LATIN SMALL LETTER L WITH STROKE */
     217            mb = for_character ("\217\251\310", 3);
     218            ASSERT (mb.nbytes == 3);
     219            ASSERT (memcmp (mb.buf, "\217\251\310", 3) == 0);
     220          #if !defined __DragonFly__
     221            /* U+0429 CYRILLIC CAPITAL LETTER SHCHA */
     222            mb = for_character ("\247\273", 2);
     223            ASSERT (mb.nbytes == 2);
     224            ASSERT (memcmp (mb.buf, "\247\353", 2) == 0);
     225          #endif
     226            /* U+0449 CYRILLIC SMALL LETTER SHCHA */
     227            mb = for_character ("\247\353", 2);
     228            ASSERT (mb.nbytes == 2);
     229            ASSERT (memcmp (mb.buf, "\247\353", 2) == 0);
     230            /* U+3073 HIRAGANA LETTER BI */
     231            mb = for_character ("\244\323", 2);
     232            ASSERT (mb.nbytes == 2);
     233            ASSERT (memcmp (mb.buf, "\244\323", 2) == 0);
     234          #if !defined __DragonFly__
     235            /* U+FF27 FULLWIDTH LATIN CAPITAL LETTER G */
     236            mb = for_character ("\243\307", 2);
     237            ASSERT (mb.nbytes == 2);
     238            ASSERT (memcmp (mb.buf, "\243\347", 2) == 0);
     239          #endif
     240            /* U+FF47 FULLWIDTH LATIN SMALL LETTER G */
     241            mb = for_character ("\243\347", 2);
     242            ASSERT (mb.nbytes == 2);
     243            ASSERT (memcmp (mb.buf, "\243\347", 2) == 0);
     244          }
     245          return 0;
     246  
     247        case '3':
     248          /* Locale encoding is UTF-8.  */
     249          {
     250            /* U+00B2 SUPERSCRIPT TWO */
     251            mb = for_character ("\302\262", 2);
     252            ASSERT (mb.nbytes == 2);
     253            ASSERT (memcmp (mb.buf, "\302\262", 2) == 0);
     254            /* U+00B5 MICRO SIGN */
     255            mb = for_character ("\302\265", 2);
     256            ASSERT (mb.nbytes == 2);
     257            ASSERT (memcmp (mb.buf, "\302\265", 2) == 0);
     258          #if !(defined _WIN32 && !defined __CYGWIN__)
     259            /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
     260            mb = for_character ("\303\211", 2);
     261            ASSERT (mb.nbytes == 2);
     262            ASSERT (memcmp (mb.buf, "\303\251", 2) == 0);
     263          #endif
     264            /* U+00DF LATIN SMALL LETTER SHARP S */
     265            mb = for_character ("\303\237", 2);
     266            ASSERT (mb.nbytes == 2);
     267            ASSERT (memcmp (mb.buf, "\303\237", 2) == 0);
     268            /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
     269            mb = for_character ("\303\251", 2);
     270            ASSERT (mb.nbytes == 2);
     271            ASSERT (memcmp (mb.buf, "\303\251", 2) == 0);
     272            /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
     273            mb = for_character ("\303\277", 2);
     274            ASSERT (mb.nbytes == 2);
     275            ASSERT (memcmp (mb.buf, "\303\277", 2) == 0);
     276            /* U+0141 LATIN CAPITAL LETTER L WITH STROKE */
     277            mb = for_character ("\305\201", 2);
     278            ASSERT (mb.nbytes == 2);
     279            ASSERT (memcmp (mb.buf, "\305\202", 2) == 0);
     280            /* U+0142 LATIN SMALL LETTER L WITH STROKE */
     281            mb = for_character ("\305\202", 2);
     282            ASSERT (mb.nbytes == 2);
     283            ASSERT (memcmp (mb.buf, "\305\202", 2) == 0);
     284            /* U+0429 CYRILLIC CAPITAL LETTER SHCHA */
     285            mb = for_character ("\320\251", 2);
     286            ASSERT (mb.nbytes == 2);
     287            ASSERT (memcmp (mb.buf, "\321\211", 2) == 0);
     288            /* U+0449 CYRILLIC SMALL LETTER SHCHA */
     289            mb = for_character ("\321\211", 2);
     290            ASSERT (mb.nbytes == 2);
     291            ASSERT (memcmp (mb.buf, "\321\211", 2) == 0);
     292            /* U+05D5 HEBREW LETTER VAV */
     293            mb = for_character ("\327\225", 2);
     294            ASSERT (mb.nbytes == 2);
     295            ASSERT (memcmp (mb.buf, "\327\225", 2) == 0);
     296            /* U+3073 HIRAGANA LETTER BI */
     297            mb = for_character ("\343\201\263", 3);
     298            ASSERT (mb.nbytes == 3);
     299            ASSERT (memcmp (mb.buf, "\343\201\263", 3) == 0);
     300            /* U+3162 HANGUL LETTER YI */
     301            mb = for_character ("\343\205\242", 3);
     302            ASSERT (mb.nbytes == 3);
     303            ASSERT (memcmp (mb.buf, "\343\205\242", 3) == 0);
     304            /* U+FF27 FULLWIDTH LATIN CAPITAL LETTER G */
     305            mb = for_character ("\357\274\247", 3);
     306            ASSERT (mb.nbytes == 3);
     307            ASSERT (memcmp (mb.buf, "\357\275\207", 3) == 0);
     308            /* U+FF47 FULLWIDTH LATIN SMALL LETTER G */
     309            mb = for_character ("\357\275\207", 3);
     310            ASSERT (mb.nbytes == 3);
     311            ASSERT (memcmp (mb.buf, "\357\275\207", 3) == 0);
     312            /* U+FFDB HALFWIDTH HANGUL LETTER YI */
     313            mb = for_character ("\357\277\233", 3);
     314            ASSERT (mb.nbytes == 3);
     315            ASSERT (memcmp (mb.buf, "\357\277\233", 3) == 0);
     316          #if !(defined __DragonFly__ || defined __sun)
     317            /* U+10419 DESERET CAPITAL LETTER EF */
     318            mb = for_character ("\360\220\220\231", 4);
     319            ASSERT (mb.nbytes == 4);
     320            ASSERT (memcmp (mb.buf, "\360\220\221\201", 4) == 0);
     321          #endif
     322            /* U+10441 DESERET SMALL LETTER EF */
     323            mb = for_character ("\360\220\221\201", 4);
     324            ASSERT (mb.nbytes == 4);
     325            ASSERT (memcmp (mb.buf, "\360\220\221\201", 4) == 0);
     326            /* U+E0041 TAG LATIN CAPITAL LETTER A */
     327            mb = for_character ("\363\240\201\201", 4);
     328            ASSERT (mb.nbytes == 4);
     329            ASSERT (memcmp (mb.buf, "\363\240\201\201", 4) == 0);
     330            /* U+E0061 TAG LATIN SMALL LETTER A */
     331            mb = for_character ("\363\240\201\241", 4);
     332            ASSERT (mb.nbytes == 4);
     333            ASSERT (memcmp (mb.buf, "\363\240\201\241", 4) == 0);
     334          }
     335          return 0;
     336  
     337        case '4':
     338          /* Locale encoding is GB18030.  */
     339          #if (defined __GLIBC__ && __GLIBC__ == 2 && __GLIBC_MINOR__ >= 13 && __GLIBC_MINOR__ <= 15) || (GL_CHAR32_T_IS_UNICODE && (defined __NetBSD__ || defined __sun))
     340          fputs ("Skipping test: The GB18030 converter in this system's iconv is broken.\n", stderr);
     341          return 77;
     342          #endif
     343          {
     344            /* U+00B2 SUPERSCRIPT TWO */
     345            mb = for_character ("\201\060\205\065", 4);
     346            ASSERT (mb.nbytes == 4);
     347            ASSERT (memcmp (mb.buf, "\201\060\205\065", 4) == 0);
     348            /* U+00B5 MICRO SIGN */
     349            mb = for_character ("\201\060\205\070", 4);
     350            ASSERT (mb.nbytes == 4);
     351            ASSERT (memcmp (mb.buf, "\201\060\205\070", 4) == 0);
     352          #if !(defined __FreeBSD__ || defined __DragonFly__ || defined __sun)
     353            /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
     354            mb = for_character ("\201\060\207\067", 4);
     355            ASSERT (mb.nbytes == 2);
     356            ASSERT (memcmp (mb.buf, "\250\246", 2) == 0);
     357          #endif
     358            /* U+00DF LATIN SMALL LETTER SHARP S */
     359            mb = for_character ("\201\060\211\070", 4);
     360            ASSERT (mb.nbytes == 4);
     361            ASSERT (memcmp (mb.buf, "\201\060\211\070", 4) == 0);
     362            /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
     363            mb = for_character ("\250\246", 2);
     364            ASSERT (mb.nbytes == 2);
     365            ASSERT (memcmp (mb.buf, "\250\246", 2) == 0);
     366            /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
     367            mb = for_character ("\201\060\213\067", 4);
     368            ASSERT (mb.nbytes == 4);
     369            ASSERT (memcmp (mb.buf, "\201\060\213\067", 4) == 0);
     370          #if !(defined __FreeBSD__ || defined __DragonFly__ || defined __sun)
     371            /* U+0141 LATIN CAPITAL LETTER L WITH STROKE */
     372            mb = for_character ("\201\060\221\071", 4);
     373            ASSERT (mb.nbytes == 4);
     374            ASSERT (memcmp (mb.buf, "\201\060\222\060", 4) == 0);
     375          #endif
     376            /* U+0142 LATIN SMALL LETTER L WITH STROKE */
     377            mb = for_character ("\201\060\222\060", 4);
     378            ASSERT (mb.nbytes == 4);
     379            ASSERT (memcmp (mb.buf, "\201\060\222\060", 4) == 0);
     380          #if !(defined __FreeBSD__ || defined __DragonFly__)
     381            /* U+0429 CYRILLIC CAPITAL LETTER SHCHA */
     382            mb = for_character ("\247\273", 2);
     383            ASSERT (mb.nbytes == 2);
     384            ASSERT (memcmp (mb.buf, "\247\353", 2) == 0);
     385          #endif
     386            /* U+0449 CYRILLIC SMALL LETTER SHCHA */
     387            mb = for_character ("\247\353", 2);
     388            ASSERT (mb.nbytes == 2);
     389            ASSERT (memcmp (mb.buf, "\247\353", 2) == 0);
     390            /* U+05D5 HEBREW LETTER VAV */
     391            mb = for_character ("\201\060\371\067", 4);
     392            ASSERT (mb.nbytes == 4);
     393            ASSERT (memcmp (mb.buf, "\201\060\371\067", 4) == 0);
     394            /* U+3073 HIRAGANA LETTER BI */
     395            mb = for_character ("\244\323", 2);
     396            ASSERT (mb.nbytes == 2);
     397            ASSERT (memcmp (mb.buf, "\244\323", 2) == 0);
     398            /* U+3162 HANGUL LETTER YI */
     399            mb = for_character ("\201\071\256\062", 4);
     400            ASSERT (mb.nbytes == 4);
     401            ASSERT (memcmp (mb.buf, "\201\071\256\062", 4) == 0);
     402          #if !defined __DragonFly__
     403            /* U+FF27 FULLWIDTH LATIN CAPITAL LETTER G */
     404            mb = for_character ("\243\307", 2);
     405            ASSERT (mb.nbytes == 2);
     406            ASSERT (memcmp (mb.buf, "\243\347", 2) == 0);
     407          #endif
     408            /* U+FF47 FULLWIDTH LATIN SMALL LETTER G */
     409            mb = for_character ("\243\347", 2);
     410            ASSERT (mb.nbytes == 2);
     411            ASSERT (memcmp (mb.buf, "\243\347", 2) == 0);
     412            /* U+FFDB HALFWIDTH HANGUL LETTER YI */
     413            mb = for_character ("\204\061\241\071", 4);
     414            ASSERT (mb.nbytes == 4);
     415            ASSERT (memcmp (mb.buf, "\204\061\241\071", 4) == 0);
     416          #if !((defined __APPLE__ && defined __MACH__) || defined __FreeBSD__ || defined __DragonFly__ || defined __NetBSD__ || defined __sun)
     417            /* U+10419 DESERET CAPITAL LETTER EF */
     418            mb = for_character ("\220\060\351\071", 4);
     419            ASSERT (mb.nbytes == 4);
     420            ASSERT (memcmp (mb.buf, "\220\060\355\071", 4) == 0);
     421          #endif
     422            /* U+10441 DESERET SMALL LETTER EF */
     423            mb = for_character ("\220\060\355\071", 4);
     424            ASSERT (mb.nbytes == 4);
     425            ASSERT (memcmp (mb.buf, "\220\060\355\071", 4) == 0);
     426            /* U+E0041 TAG LATIN CAPITAL LETTER A */
     427            mb = for_character ("\323\066\234\063", 4);
     428            ASSERT (mb.nbytes == 4);
     429            ASSERT (memcmp (mb.buf, "\323\066\234\063", 4) == 0);
     430            /* U+E0061 TAG LATIN SMALL LETTER A */
     431            mb = for_character ("\323\066\237\065", 4);
     432            ASSERT (mb.nbytes == 4);
     433            ASSERT (memcmp (mb.buf, "\323\066\237\065", 4) == 0);
     434          }
     435          return 0;
     436  
     437        }
     438  
     439    return 1;
     440  }